1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 /* To avoid EBCDIC trouble when parsing on zOS */ 34 #if defined(__MVS__) 35 #pragma convert("ISO8859-1") 36 #endif 37 38 #define IN_LIBXML 39 #include "libxml.h" 40 41 #if defined(_WIN32) 42 #define XML_DIR_SEP '\\' 43 #else 44 #define XML_DIR_SEP '/' 45 #endif 46 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <string.h> 50 #include <stdarg.h> 51 #include <stddef.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <libxml/xmlmemory.h> 55 #include <libxml/threads.h> 56 #include <libxml/globals.h> 57 #include <libxml/tree.h> 58 #include <libxml/parser.h> 59 #include <libxml/parserInternals.h> 60 #include <libxml/valid.h> 61 #include <libxml/entities.h> 62 #include <libxml/xmlerror.h> 63 #include <libxml/encoding.h> 64 #include <libxml/xmlIO.h> 65 #include <libxml/uri.h> 66 #ifdef LIBXML_CATALOG_ENABLED 67 #include <libxml/catalog.h> 68 #endif 69 #ifdef LIBXML_SCHEMAS_ENABLED 70 #include <libxml/xmlschemastypes.h> 71 #include <libxml/relaxng.h> 72 #endif 73 74 #include "buf.h" 75 #include "enc.h" 76 77 struct _xmlStartTag { 78 const xmlChar *prefix; 79 const xmlChar *URI; 80 int line; 81 int nsNr; 82 }; 83 84 static void 85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 86 87 static xmlParserCtxtPtr 88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 89 const xmlChar *base, xmlParserCtxtPtr pctx); 90 91 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 92 93 static int 94 xmlParseElementStart(xmlParserCtxtPtr ctxt); 95 96 static void 97 xmlParseElementEnd(xmlParserCtxtPtr ctxt); 98 99 /************************************************************************ 100 * * 101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 102 * * 103 ************************************************************************/ 104 105 #define XML_MAX_HUGE_LENGTH 1000000000 106 107 #define XML_PARSER_BIG_ENTITY 1000 108 #define XML_PARSER_LOT_ENTITY 5000 109 110 /* 111 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 112 * replacement over the size in byte of the input indicates that you have 113 * and exponential behaviour. A value of 10 correspond to at least 3 entity 114 * replacement per byte of input. 115 */ 116 #define XML_PARSER_NON_LINEAR 10 117 118 /* 119 * xmlParserEntityCheck 120 * 121 * Function to check non-linear entity expansion behaviour 122 * This is here to detect and stop exponential linear entity expansion 123 * This is not a limitation of the parser but a safety 124 * boundary feature. It can be disabled with the XML_PARSE_HUGE 125 * parser option. 126 */ 127 static int 128 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 129 xmlEntityPtr ent, size_t replacement) 130 { 131 size_t consumed = 0; 132 int i; 133 134 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 135 return (0); 136 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 137 return (1); 138 139 /* 140 * This may look absurd but is needed to detect 141 * entities problems 142 */ 143 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 144 (ent->content != NULL) && (ent->checked == 0) && 145 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 146 unsigned long oldnbent = ctxt->nbentities, diff; 147 xmlChar *rep; 148 149 ent->checked = 1; 150 151 ++ctxt->depth; 152 rep = xmlStringDecodeEntities(ctxt, ent->content, 153 XML_SUBSTITUTE_REF, 0, 0, 0); 154 --ctxt->depth; 155 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { 156 ent->content[0] = 0; 157 } 158 159 diff = ctxt->nbentities - oldnbent + 1; 160 if (diff > INT_MAX / 2) 161 diff = INT_MAX / 2; 162 ent->checked = diff * 2; 163 if (rep != NULL) { 164 if (xmlStrchr(rep, '<')) 165 ent->checked |= 1; 166 xmlFree(rep); 167 rep = NULL; 168 } 169 } 170 171 /* 172 * Prevent entity exponential check, not just replacement while 173 * parsing the DTD 174 * The check is potentially costly so do that only once in a thousand 175 */ 176 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && 177 (ctxt->nbentities % 1024 == 0)) { 178 for (i = 0;i < ctxt->inputNr;i++) { 179 consumed += ctxt->inputTab[i]->consumed + 180 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); 181 } 182 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { 183 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 184 ctxt->instate = XML_PARSER_EOF; 185 return (1); 186 } 187 consumed = 0; 188 } 189 190 191 192 if (replacement != 0) { 193 if (replacement < XML_MAX_TEXT_LENGTH) 194 return(0); 195 196 /* 197 * If the volume of entity copy reaches 10 times the 198 * amount of parsed data and over the large text threshold 199 * then that's very likely to be an abuse. 200 */ 201 if (ctxt->input != NULL) { 202 consumed = ctxt->input->consumed + 203 (ctxt->input->cur - ctxt->input->base); 204 } 205 consumed += ctxt->sizeentities; 206 207 if (replacement < XML_PARSER_NON_LINEAR * consumed) 208 return(0); 209 } else if (size != 0) { 210 /* 211 * Do the check based on the replacement size of the entity 212 */ 213 if (size < XML_PARSER_BIG_ENTITY) 214 return(0); 215 216 /* 217 * A limit on the amount of text data reasonably used 218 */ 219 if (ctxt->input != NULL) { 220 consumed = ctxt->input->consumed + 221 (ctxt->input->cur - ctxt->input->base); 222 } 223 consumed += ctxt->sizeentities; 224 225 if ((size < XML_PARSER_NON_LINEAR * consumed) && 226 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 227 return (0); 228 } else if (ent != NULL) { 229 /* 230 * use the number of parsed entities in the replacement 231 */ 232 size = ent->checked / 2; 233 234 /* 235 * The amount of data parsed counting entities size only once 236 */ 237 if (ctxt->input != NULL) { 238 consumed = ctxt->input->consumed + 239 (ctxt->input->cur - ctxt->input->base); 240 } 241 consumed += ctxt->sizeentities; 242 243 /* 244 * Check the density of entities for the amount of data 245 * knowing an entity reference will take at least 3 bytes 246 */ 247 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 248 return (0); 249 } else { 250 /* 251 * strange we got no data for checking 252 */ 253 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 254 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 255 (ctxt->nbentities <= 10000)) 256 return (0); 257 } 258 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 259 return (1); 260 } 261 262 /** 263 * xmlParserMaxDepth: 264 * 265 * arbitrary depth limit for the XML documents that we allow to 266 * process. This is not a limitation of the parser but a safety 267 * boundary feature. It can be disabled with the XML_PARSE_HUGE 268 * parser option. 269 */ 270 unsigned int xmlParserMaxDepth = 256; 271 272 273 274 #define SAX2 1 275 #define XML_PARSER_BIG_BUFFER_SIZE 300 276 #define XML_PARSER_BUFFER_SIZE 100 277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 278 279 /** 280 * XML_PARSER_CHUNK_SIZE 281 * 282 * When calling GROW that's the minimal amount of data 283 * the parser expected to have received. It is not a hard 284 * limit but an optimization when reading strings like Names 285 * It is not strictly needed as long as inputs available characters 286 * are followed by 0, which should be provided by the I/O level 287 */ 288 #define XML_PARSER_CHUNK_SIZE 100 289 290 /* 291 * List of XML prefixed PI allowed by W3C specs 292 */ 293 294 static const char* const xmlW3CPIs[] = { 295 "xml-stylesheet", 296 "xml-model", 297 NULL 298 }; 299 300 301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 302 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 303 const xmlChar **str); 304 305 static xmlParserErrors 306 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 307 xmlSAXHandlerPtr sax, 308 void *user_data, int depth, const xmlChar *URL, 309 const xmlChar *ID, xmlNodePtr *list); 310 311 static int 312 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 313 const char *encoding); 314 #ifdef LIBXML_LEGACY_ENABLED 315 static void 316 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 317 xmlNodePtr lastNode); 318 #endif /* LIBXML_LEGACY_ENABLED */ 319 320 static xmlParserErrors 321 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 322 const xmlChar *string, void *user_data, xmlNodePtr *lst); 323 324 static int 325 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 326 327 /************************************************************************ 328 * * 329 * Some factorized error routines * 330 * * 331 ************************************************************************/ 332 333 /** 334 * xmlErrAttributeDup: 335 * @ctxt: an XML parser context 336 * @prefix: the attribute prefix 337 * @localname: the attribute localname 338 * 339 * Handle a redefinition of attribute error 340 */ 341 static void 342 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 343 const xmlChar * localname) 344 { 345 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 346 (ctxt->instate == XML_PARSER_EOF)) 347 return; 348 if (ctxt != NULL) 349 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 350 351 if (prefix == NULL) 352 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 353 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 354 (const char *) localname, NULL, NULL, 0, 0, 355 "Attribute %s redefined\n", localname); 356 else 357 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 358 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 359 (const char *) prefix, (const char *) localname, 360 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 361 localname); 362 if (ctxt != NULL) { 363 ctxt->wellFormed = 0; 364 if (ctxt->recovery == 0) 365 ctxt->disableSAX = 1; 366 } 367 } 368 369 /** 370 * xmlFatalErr: 371 * @ctxt: an XML parser context 372 * @error: the error number 373 * @extra: extra information string 374 * 375 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 376 */ 377 static void 378 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 379 { 380 const char *errmsg; 381 382 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 383 (ctxt->instate == XML_PARSER_EOF)) 384 return; 385 switch (error) { 386 case XML_ERR_INVALID_HEX_CHARREF: 387 errmsg = "CharRef: invalid hexadecimal value"; 388 break; 389 case XML_ERR_INVALID_DEC_CHARREF: 390 errmsg = "CharRef: invalid decimal value"; 391 break; 392 case XML_ERR_INVALID_CHARREF: 393 errmsg = "CharRef: invalid value"; 394 break; 395 case XML_ERR_INTERNAL_ERROR: 396 errmsg = "internal error"; 397 break; 398 case XML_ERR_PEREF_AT_EOF: 399 errmsg = "PEReference at end of document"; 400 break; 401 case XML_ERR_PEREF_IN_PROLOG: 402 errmsg = "PEReference in prolog"; 403 break; 404 case XML_ERR_PEREF_IN_EPILOG: 405 errmsg = "PEReference in epilog"; 406 break; 407 case XML_ERR_PEREF_NO_NAME: 408 errmsg = "PEReference: no name"; 409 break; 410 case XML_ERR_PEREF_SEMICOL_MISSING: 411 errmsg = "PEReference: expecting ';'"; 412 break; 413 case XML_ERR_ENTITY_LOOP: 414 errmsg = "Detected an entity reference loop"; 415 break; 416 case XML_ERR_ENTITY_NOT_STARTED: 417 errmsg = "EntityValue: \" or ' expected"; 418 break; 419 case XML_ERR_ENTITY_PE_INTERNAL: 420 errmsg = "PEReferences forbidden in internal subset"; 421 break; 422 case XML_ERR_ENTITY_NOT_FINISHED: 423 errmsg = "EntityValue: \" or ' expected"; 424 break; 425 case XML_ERR_ATTRIBUTE_NOT_STARTED: 426 errmsg = "AttValue: \" or ' expected"; 427 break; 428 case XML_ERR_LT_IN_ATTRIBUTE: 429 errmsg = "Unescaped '<' not allowed in attributes values"; 430 break; 431 case XML_ERR_LITERAL_NOT_STARTED: 432 errmsg = "SystemLiteral \" or ' expected"; 433 break; 434 case XML_ERR_LITERAL_NOT_FINISHED: 435 errmsg = "Unfinished System or Public ID \" or ' expected"; 436 break; 437 case XML_ERR_MISPLACED_CDATA_END: 438 errmsg = "Sequence ']]>' not allowed in content"; 439 break; 440 case XML_ERR_URI_REQUIRED: 441 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 442 break; 443 case XML_ERR_PUBID_REQUIRED: 444 errmsg = "PUBLIC, the Public Identifier is missing"; 445 break; 446 case XML_ERR_HYPHEN_IN_COMMENT: 447 errmsg = "Comment must not contain '--' (double-hyphen)"; 448 break; 449 case XML_ERR_PI_NOT_STARTED: 450 errmsg = "xmlParsePI : no target name"; 451 break; 452 case XML_ERR_RESERVED_XML_NAME: 453 errmsg = "Invalid PI name"; 454 break; 455 case XML_ERR_NOTATION_NOT_STARTED: 456 errmsg = "NOTATION: Name expected here"; 457 break; 458 case XML_ERR_NOTATION_NOT_FINISHED: 459 errmsg = "'>' required to close NOTATION declaration"; 460 break; 461 case XML_ERR_VALUE_REQUIRED: 462 errmsg = "Entity value required"; 463 break; 464 case XML_ERR_URI_FRAGMENT: 465 errmsg = "Fragment not allowed"; 466 break; 467 case XML_ERR_ATTLIST_NOT_STARTED: 468 errmsg = "'(' required to start ATTLIST enumeration"; 469 break; 470 case XML_ERR_NMTOKEN_REQUIRED: 471 errmsg = "NmToken expected in ATTLIST enumeration"; 472 break; 473 case XML_ERR_ATTLIST_NOT_FINISHED: 474 errmsg = "')' required to finish ATTLIST enumeration"; 475 break; 476 case XML_ERR_MIXED_NOT_STARTED: 477 errmsg = "MixedContentDecl : '|' or ')*' expected"; 478 break; 479 case XML_ERR_PCDATA_REQUIRED: 480 errmsg = "MixedContentDecl : '#PCDATA' expected"; 481 break; 482 case XML_ERR_ELEMCONTENT_NOT_STARTED: 483 errmsg = "ContentDecl : Name or '(' expected"; 484 break; 485 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 486 errmsg = "ContentDecl : ',' '|' or ')' expected"; 487 break; 488 case XML_ERR_PEREF_IN_INT_SUBSET: 489 errmsg = 490 "PEReference: forbidden within markup decl in internal subset"; 491 break; 492 case XML_ERR_GT_REQUIRED: 493 errmsg = "expected '>'"; 494 break; 495 case XML_ERR_CONDSEC_INVALID: 496 errmsg = "XML conditional section '[' expected"; 497 break; 498 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 499 errmsg = "Content error in the external subset"; 500 break; 501 case XML_ERR_CONDSEC_INVALID_KEYWORD: 502 errmsg = 503 "conditional section INCLUDE or IGNORE keyword expected"; 504 break; 505 case XML_ERR_CONDSEC_NOT_FINISHED: 506 errmsg = "XML conditional section not closed"; 507 break; 508 case XML_ERR_XMLDECL_NOT_STARTED: 509 errmsg = "Text declaration '<?xml' required"; 510 break; 511 case XML_ERR_XMLDECL_NOT_FINISHED: 512 errmsg = "parsing XML declaration: '?>' expected"; 513 break; 514 case XML_ERR_EXT_ENTITY_STANDALONE: 515 errmsg = "external parsed entities cannot be standalone"; 516 break; 517 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 518 errmsg = "EntityRef: expecting ';'"; 519 break; 520 case XML_ERR_DOCTYPE_NOT_FINISHED: 521 errmsg = "DOCTYPE improperly terminated"; 522 break; 523 case XML_ERR_LTSLASH_REQUIRED: 524 errmsg = "EndTag: '</' not found"; 525 break; 526 case XML_ERR_EQUAL_REQUIRED: 527 errmsg = "expected '='"; 528 break; 529 case XML_ERR_STRING_NOT_CLOSED: 530 errmsg = "String not closed expecting \" or '"; 531 break; 532 case XML_ERR_STRING_NOT_STARTED: 533 errmsg = "String not started expecting ' or \""; 534 break; 535 case XML_ERR_ENCODING_NAME: 536 errmsg = "Invalid XML encoding name"; 537 break; 538 case XML_ERR_STANDALONE_VALUE: 539 errmsg = "standalone accepts only 'yes' or 'no'"; 540 break; 541 case XML_ERR_DOCUMENT_EMPTY: 542 errmsg = "Document is empty"; 543 break; 544 case XML_ERR_DOCUMENT_END: 545 errmsg = "Extra content at the end of the document"; 546 break; 547 case XML_ERR_NOT_WELL_BALANCED: 548 errmsg = "chunk is not well balanced"; 549 break; 550 case XML_ERR_EXTRA_CONTENT: 551 errmsg = "extra content at the end of well balanced chunk"; 552 break; 553 case XML_ERR_VERSION_MISSING: 554 errmsg = "Malformed declaration expecting version"; 555 break; 556 case XML_ERR_NAME_TOO_LONG: 557 errmsg = "Name too long"; 558 break; 559 #if 0 560 case: 561 errmsg = ""; 562 break; 563 #endif 564 default: 565 errmsg = "Unregistered error message"; 566 } 567 if (ctxt != NULL) 568 ctxt->errNo = error; 569 if (info == NULL) { 570 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 571 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 572 errmsg); 573 } else { 574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 575 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 576 errmsg, info); 577 } 578 if (ctxt != NULL) { 579 ctxt->wellFormed = 0; 580 if (ctxt->recovery == 0) 581 ctxt->disableSAX = 1; 582 } 583 } 584 585 /** 586 * xmlFatalErrMsg: 587 * @ctxt: an XML parser context 588 * @error: the error number 589 * @msg: the error message 590 * 591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 592 */ 593 static void LIBXML_ATTR_FORMAT(3,0) 594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg) 596 { 597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 598 (ctxt->instate == XML_PARSER_EOF)) 599 return; 600 if (ctxt != NULL) 601 ctxt->errNo = error; 602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 603 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 604 if (ctxt != NULL) { 605 ctxt->wellFormed = 0; 606 if (ctxt->recovery == 0) 607 ctxt->disableSAX = 1; 608 } 609 } 610 611 /** 612 * xmlWarningMsg: 613 * @ctxt: an XML parser context 614 * @error: the error number 615 * @msg: the error message 616 * @str1: extra data 617 * @str2: extra data 618 * 619 * Handle a warning. 620 */ 621 static void LIBXML_ATTR_FORMAT(3,0) 622 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 623 const char *msg, const xmlChar *str1, const xmlChar *str2) 624 { 625 xmlStructuredErrorFunc schannel = NULL; 626 627 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 628 (ctxt->instate == XML_PARSER_EOF)) 629 return; 630 if ((ctxt != NULL) && (ctxt->sax != NULL) && 631 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 632 schannel = ctxt->sax->serror; 633 if (ctxt != NULL) { 634 __xmlRaiseError(schannel, 635 (ctxt->sax) ? ctxt->sax->warning : NULL, 636 ctxt->userData, 637 ctxt, NULL, XML_FROM_PARSER, error, 638 XML_ERR_WARNING, NULL, 0, 639 (const char *) str1, (const char *) str2, NULL, 0, 0, 640 msg, (const char *) str1, (const char *) str2); 641 } else { 642 __xmlRaiseError(schannel, NULL, NULL, 643 ctxt, NULL, XML_FROM_PARSER, error, 644 XML_ERR_WARNING, NULL, 0, 645 (const char *) str1, (const char *) str2, NULL, 0, 0, 646 msg, (const char *) str1, (const char *) str2); 647 } 648 } 649 650 /** 651 * xmlValidityError: 652 * @ctxt: an XML parser context 653 * @error: the error number 654 * @msg: the error message 655 * @str1: extra data 656 * 657 * Handle a validity error. 658 */ 659 static void LIBXML_ATTR_FORMAT(3,0) 660 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 661 const char *msg, const xmlChar *str1, const xmlChar *str2) 662 { 663 xmlStructuredErrorFunc schannel = NULL; 664 665 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 666 (ctxt->instate == XML_PARSER_EOF)) 667 return; 668 if (ctxt != NULL) { 669 ctxt->errNo = error; 670 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 671 schannel = ctxt->sax->serror; 672 } 673 if (ctxt != NULL) { 674 __xmlRaiseError(schannel, 675 ctxt->vctxt.error, ctxt->vctxt.userData, 676 ctxt, NULL, XML_FROM_DTD, error, 677 XML_ERR_ERROR, NULL, 0, (const char *) str1, 678 (const char *) str2, NULL, 0, 0, 679 msg, (const char *) str1, (const char *) str2); 680 ctxt->valid = 0; 681 } else { 682 __xmlRaiseError(schannel, NULL, NULL, 683 ctxt, NULL, XML_FROM_DTD, error, 684 XML_ERR_ERROR, NULL, 0, (const char *) str1, 685 (const char *) str2, NULL, 0, 0, 686 msg, (const char *) str1, (const char *) str2); 687 } 688 } 689 690 /** 691 * xmlFatalErrMsgInt: 692 * @ctxt: an XML parser context 693 * @error: the error number 694 * @msg: the error message 695 * @val: an integer value 696 * 697 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 698 */ 699 static void LIBXML_ATTR_FORMAT(3,0) 700 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 701 const char *msg, int val) 702 { 703 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 704 (ctxt->instate == XML_PARSER_EOF)) 705 return; 706 if (ctxt != NULL) 707 ctxt->errNo = error; 708 __xmlRaiseError(NULL, NULL, NULL, 709 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 710 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 711 if (ctxt != NULL) { 712 ctxt->wellFormed = 0; 713 if (ctxt->recovery == 0) 714 ctxt->disableSAX = 1; 715 } 716 } 717 718 /** 719 * xmlFatalErrMsgStrIntStr: 720 * @ctxt: an XML parser context 721 * @error: the error number 722 * @msg: the error message 723 * @str1: an string info 724 * @val: an integer value 725 * @str2: an string info 726 * 727 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 728 */ 729 static void LIBXML_ATTR_FORMAT(3,0) 730 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 731 const char *msg, const xmlChar *str1, int val, 732 const xmlChar *str2) 733 { 734 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 735 (ctxt->instate == XML_PARSER_EOF)) 736 return; 737 if (ctxt != NULL) 738 ctxt->errNo = error; 739 __xmlRaiseError(NULL, NULL, NULL, 740 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 741 NULL, 0, (const char *) str1, (const char *) str2, 742 NULL, val, 0, msg, str1, val, str2); 743 if (ctxt != NULL) { 744 ctxt->wellFormed = 0; 745 if (ctxt->recovery == 0) 746 ctxt->disableSAX = 1; 747 } 748 } 749 750 /** 751 * xmlFatalErrMsgStr: 752 * @ctxt: an XML parser context 753 * @error: the error number 754 * @msg: the error message 755 * @val: a string value 756 * 757 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 758 */ 759 static void LIBXML_ATTR_FORMAT(3,0) 760 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 761 const char *msg, const xmlChar * val) 762 { 763 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 764 (ctxt->instate == XML_PARSER_EOF)) 765 return; 766 if (ctxt != NULL) 767 ctxt->errNo = error; 768 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 769 XML_FROM_PARSER, error, XML_ERR_FATAL, 770 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 771 val); 772 if (ctxt != NULL) { 773 ctxt->wellFormed = 0; 774 if (ctxt->recovery == 0) 775 ctxt->disableSAX = 1; 776 } 777 } 778 779 /** 780 * xmlErrMsgStr: 781 * @ctxt: an XML parser context 782 * @error: the error number 783 * @msg: the error message 784 * @val: a string value 785 * 786 * Handle a non fatal parser error 787 */ 788 static void LIBXML_ATTR_FORMAT(3,0) 789 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 790 const char *msg, const xmlChar * val) 791 { 792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 793 (ctxt->instate == XML_PARSER_EOF)) 794 return; 795 if (ctxt != NULL) 796 ctxt->errNo = error; 797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 798 XML_FROM_PARSER, error, XML_ERR_ERROR, 799 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 800 val); 801 } 802 803 /** 804 * xmlNsErr: 805 * @ctxt: an XML parser context 806 * @error: the error number 807 * @msg: the message 808 * @info1: extra information string 809 * @info2: extra information string 810 * 811 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 812 */ 813 static void LIBXML_ATTR_FORMAT(3,0) 814 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 815 const char *msg, 816 const xmlChar * info1, const xmlChar * info2, 817 const xmlChar * info3) 818 { 819 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 820 (ctxt->instate == XML_PARSER_EOF)) 821 return; 822 if (ctxt != NULL) 823 ctxt->errNo = error; 824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 825 XML_ERR_ERROR, NULL, 0, (const char *) info1, 826 (const char *) info2, (const char *) info3, 0, 0, msg, 827 info1, info2, info3); 828 if (ctxt != NULL) 829 ctxt->nsWellFormed = 0; 830 } 831 832 /** 833 * xmlNsWarn 834 * @ctxt: an XML parser context 835 * @error: the error number 836 * @msg: the message 837 * @info1: extra information string 838 * @info2: extra information string 839 * 840 * Handle a namespace warning error 841 */ 842 static void LIBXML_ATTR_FORMAT(3,0) 843 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 844 const char *msg, 845 const xmlChar * info1, const xmlChar * info2, 846 const xmlChar * info3) 847 { 848 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 849 (ctxt->instate == XML_PARSER_EOF)) 850 return; 851 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 852 XML_ERR_WARNING, NULL, 0, (const char *) info1, 853 (const char *) info2, (const char *) info3, 0, 0, msg, 854 info1, info2, info3); 855 } 856 857 /************************************************************************ 858 * * 859 * Library wide options * 860 * * 861 ************************************************************************/ 862 863 /** 864 * xmlHasFeature: 865 * @feature: the feature to be examined 866 * 867 * Examines if the library has been compiled with a given feature. 868 * 869 * Returns a non-zero value if the feature exist, otherwise zero. 870 * Returns zero (0) if the feature does not exist or an unknown 871 * unknown feature is requested, non-zero otherwise. 872 */ 873 int 874 xmlHasFeature(xmlFeature feature) 875 { 876 switch (feature) { 877 case XML_WITH_THREAD: 878 #ifdef LIBXML_THREAD_ENABLED 879 return(1); 880 #else 881 return(0); 882 #endif 883 case XML_WITH_TREE: 884 #ifdef LIBXML_TREE_ENABLED 885 return(1); 886 #else 887 return(0); 888 #endif 889 case XML_WITH_OUTPUT: 890 #ifdef LIBXML_OUTPUT_ENABLED 891 return(1); 892 #else 893 return(0); 894 #endif 895 case XML_WITH_PUSH: 896 #ifdef LIBXML_PUSH_ENABLED 897 return(1); 898 #else 899 return(0); 900 #endif 901 case XML_WITH_READER: 902 #ifdef LIBXML_READER_ENABLED 903 return(1); 904 #else 905 return(0); 906 #endif 907 case XML_WITH_PATTERN: 908 #ifdef LIBXML_PATTERN_ENABLED 909 return(1); 910 #else 911 return(0); 912 #endif 913 case XML_WITH_WRITER: 914 #ifdef LIBXML_WRITER_ENABLED 915 return(1); 916 #else 917 return(0); 918 #endif 919 case XML_WITH_SAX1: 920 #ifdef LIBXML_SAX1_ENABLED 921 return(1); 922 #else 923 return(0); 924 #endif 925 case XML_WITH_FTP: 926 #ifdef LIBXML_FTP_ENABLED 927 return(1); 928 #else 929 return(0); 930 #endif 931 case XML_WITH_HTTP: 932 #ifdef LIBXML_HTTP_ENABLED 933 return(1); 934 #else 935 return(0); 936 #endif 937 case XML_WITH_VALID: 938 #ifdef LIBXML_VALID_ENABLED 939 return(1); 940 #else 941 return(0); 942 #endif 943 case XML_WITH_HTML: 944 #ifdef LIBXML_HTML_ENABLED 945 return(1); 946 #else 947 return(0); 948 #endif 949 case XML_WITH_LEGACY: 950 #ifdef LIBXML_LEGACY_ENABLED 951 return(1); 952 #else 953 return(0); 954 #endif 955 case XML_WITH_C14N: 956 #ifdef LIBXML_C14N_ENABLED 957 return(1); 958 #else 959 return(0); 960 #endif 961 case XML_WITH_CATALOG: 962 #ifdef LIBXML_CATALOG_ENABLED 963 return(1); 964 #else 965 return(0); 966 #endif 967 case XML_WITH_XPATH: 968 #ifdef LIBXML_XPATH_ENABLED 969 return(1); 970 #else 971 return(0); 972 #endif 973 case XML_WITH_XPTR: 974 #ifdef LIBXML_XPTR_ENABLED 975 return(1); 976 #else 977 return(0); 978 #endif 979 case XML_WITH_XINCLUDE: 980 #ifdef LIBXML_XINCLUDE_ENABLED 981 return(1); 982 #else 983 return(0); 984 #endif 985 case XML_WITH_ICONV: 986 #ifdef LIBXML_ICONV_ENABLED 987 return(1); 988 #else 989 return(0); 990 #endif 991 case XML_WITH_ISO8859X: 992 #ifdef LIBXML_ISO8859X_ENABLED 993 return(1); 994 #else 995 return(0); 996 #endif 997 case XML_WITH_UNICODE: 998 #ifdef LIBXML_UNICODE_ENABLED 999 return(1); 1000 #else 1001 return(0); 1002 #endif 1003 case XML_WITH_REGEXP: 1004 #ifdef LIBXML_REGEXP_ENABLED 1005 return(1); 1006 #else 1007 return(0); 1008 #endif 1009 case XML_WITH_AUTOMATA: 1010 #ifdef LIBXML_AUTOMATA_ENABLED 1011 return(1); 1012 #else 1013 return(0); 1014 #endif 1015 case XML_WITH_EXPR: 1016 #ifdef LIBXML_EXPR_ENABLED 1017 return(1); 1018 #else 1019 return(0); 1020 #endif 1021 case XML_WITH_SCHEMAS: 1022 #ifdef LIBXML_SCHEMAS_ENABLED 1023 return(1); 1024 #else 1025 return(0); 1026 #endif 1027 case XML_WITH_SCHEMATRON: 1028 #ifdef LIBXML_SCHEMATRON_ENABLED 1029 return(1); 1030 #else 1031 return(0); 1032 #endif 1033 case XML_WITH_MODULES: 1034 #ifdef LIBXML_MODULES_ENABLED 1035 return(1); 1036 #else 1037 return(0); 1038 #endif 1039 case XML_WITH_DEBUG: 1040 #ifdef LIBXML_DEBUG_ENABLED 1041 return(1); 1042 #else 1043 return(0); 1044 #endif 1045 case XML_WITH_DEBUG_MEM: 1046 #ifdef DEBUG_MEMORY_LOCATION 1047 return(1); 1048 #else 1049 return(0); 1050 #endif 1051 case XML_WITH_DEBUG_RUN: 1052 #ifdef LIBXML_DEBUG_RUNTIME 1053 return(1); 1054 #else 1055 return(0); 1056 #endif 1057 case XML_WITH_ZLIB: 1058 #ifdef LIBXML_ZLIB_ENABLED 1059 return(1); 1060 #else 1061 return(0); 1062 #endif 1063 case XML_WITH_LZMA: 1064 #ifdef LIBXML_LZMA_ENABLED 1065 return(1); 1066 #else 1067 return(0); 1068 #endif 1069 case XML_WITH_ICU: 1070 #ifdef LIBXML_ICU_ENABLED 1071 return(1); 1072 #else 1073 return(0); 1074 #endif 1075 default: 1076 break; 1077 } 1078 return(0); 1079 } 1080 1081 /************************************************************************ 1082 * * 1083 * SAX2 defaulted attributes handling * 1084 * * 1085 ************************************************************************/ 1086 1087 /** 1088 * xmlDetectSAX2: 1089 * @ctxt: an XML parser context 1090 * 1091 * Do the SAX2 detection and specific initialization 1092 */ 1093 static void 1094 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1095 xmlSAXHandlerPtr sax; 1096 1097 /* Avoid unused variable warning if features are disabled. */ 1098 (void) sax; 1099 1100 if (ctxt == NULL) return; 1101 sax = ctxt->sax; 1102 #ifdef LIBXML_SAX1_ENABLED 1103 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) && 1104 ((sax->startElementNs != NULL) || 1105 (sax->endElementNs != NULL) || 1106 ((sax->startElement == NULL) && (sax->endElement == NULL)))) 1107 ctxt->sax2 = 1; 1108 #else 1109 ctxt->sax2 = 1; 1110 #endif /* LIBXML_SAX1_ENABLED */ 1111 1112 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1113 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1114 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1115 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1116 (ctxt->str_xml_ns == NULL)) { 1117 xmlErrMemory(ctxt, NULL); 1118 } 1119 } 1120 1121 typedef struct _xmlDefAttrs xmlDefAttrs; 1122 typedef xmlDefAttrs *xmlDefAttrsPtr; 1123 struct _xmlDefAttrs { 1124 int nbAttrs; /* number of defaulted attributes on that element */ 1125 int maxAttrs; /* the size of the array */ 1126 #if __STDC_VERSION__ >= 199901L 1127 /* Using a C99 flexible array member avoids UBSan errors. */ 1128 const xmlChar *values[]; /* array of localname/prefix/values/external */ 1129 #else 1130 const xmlChar *values[5]; 1131 #endif 1132 }; 1133 1134 /** 1135 * xmlAttrNormalizeSpace: 1136 * @src: the source string 1137 * @dst: the target string 1138 * 1139 * Normalize the space in non CDATA attribute values: 1140 * If the attribute type is not CDATA, then the XML processor MUST further 1141 * process the normalized attribute value by discarding any leading and 1142 * trailing space (#x20) characters, and by replacing sequences of space 1143 * (#x20) characters by a single space (#x20) character. 1144 * Note that the size of dst need to be at least src, and if one doesn't need 1145 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1146 * passing src as dst is just fine. 1147 * 1148 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1149 * is needed. 1150 */ 1151 static xmlChar * 1152 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1153 { 1154 if ((src == NULL) || (dst == NULL)) 1155 return(NULL); 1156 1157 while (*src == 0x20) src++; 1158 while (*src != 0) { 1159 if (*src == 0x20) { 1160 while (*src == 0x20) src++; 1161 if (*src != 0) 1162 *dst++ = 0x20; 1163 } else { 1164 *dst++ = *src++; 1165 } 1166 } 1167 *dst = 0; 1168 if (dst == src) 1169 return(NULL); 1170 return(dst); 1171 } 1172 1173 /** 1174 * xmlAttrNormalizeSpace2: 1175 * @src: the source string 1176 * 1177 * Normalize the space in non CDATA attribute values, a slightly more complex 1178 * front end to avoid allocation problems when running on attribute values 1179 * coming from the input. 1180 * 1181 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1182 * is needed. 1183 */ 1184 static const xmlChar * 1185 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1186 { 1187 int i; 1188 int remove_head = 0; 1189 int need_realloc = 0; 1190 const xmlChar *cur; 1191 1192 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1193 return(NULL); 1194 i = *len; 1195 if (i <= 0) 1196 return(NULL); 1197 1198 cur = src; 1199 while (*cur == 0x20) { 1200 cur++; 1201 remove_head++; 1202 } 1203 while (*cur != 0) { 1204 if (*cur == 0x20) { 1205 cur++; 1206 if ((*cur == 0x20) || (*cur == 0)) { 1207 need_realloc = 1; 1208 break; 1209 } 1210 } else 1211 cur++; 1212 } 1213 if (need_realloc) { 1214 xmlChar *ret; 1215 1216 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1217 if (ret == NULL) { 1218 xmlErrMemory(ctxt, NULL); 1219 return(NULL); 1220 } 1221 xmlAttrNormalizeSpace(ret, ret); 1222 *len = (int) strlen((const char *)ret); 1223 return(ret); 1224 } else if (remove_head) { 1225 *len -= remove_head; 1226 memmove(src, src + remove_head, 1 + *len); 1227 return(src); 1228 } 1229 return(NULL); 1230 } 1231 1232 /** 1233 * xmlAddDefAttrs: 1234 * @ctxt: an XML parser context 1235 * @fullname: the element fullname 1236 * @fullattr: the attribute fullname 1237 * @value: the attribute value 1238 * 1239 * Add a defaulted attribute for an element 1240 */ 1241 static void 1242 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1243 const xmlChar *fullname, 1244 const xmlChar *fullattr, 1245 const xmlChar *value) { 1246 xmlDefAttrsPtr defaults; 1247 int len; 1248 const xmlChar *name; 1249 const xmlChar *prefix; 1250 1251 /* 1252 * Allows to detect attribute redefinitions 1253 */ 1254 if (ctxt->attsSpecial != NULL) { 1255 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1256 return; 1257 } 1258 1259 if (ctxt->attsDefault == NULL) { 1260 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1261 if (ctxt->attsDefault == NULL) 1262 goto mem_error; 1263 } 1264 1265 /* 1266 * split the element name into prefix:localname , the string found 1267 * are within the DTD and then not associated to namespace names. 1268 */ 1269 name = xmlSplitQName3(fullname, &len); 1270 if (name == NULL) { 1271 name = xmlDictLookup(ctxt->dict, fullname, -1); 1272 prefix = NULL; 1273 } else { 1274 name = xmlDictLookup(ctxt->dict, name, -1); 1275 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1276 } 1277 1278 /* 1279 * make sure there is some storage 1280 */ 1281 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1282 if (defaults == NULL) { 1283 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1284 (4 * 5) * sizeof(const xmlChar *)); 1285 if (defaults == NULL) 1286 goto mem_error; 1287 defaults->nbAttrs = 0; 1288 defaults->maxAttrs = 4; 1289 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1290 defaults, NULL) < 0) { 1291 xmlFree(defaults); 1292 goto mem_error; 1293 } 1294 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1295 xmlDefAttrsPtr temp; 1296 1297 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1298 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1299 if (temp == NULL) 1300 goto mem_error; 1301 defaults = temp; 1302 defaults->maxAttrs *= 2; 1303 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1304 defaults, NULL) < 0) { 1305 xmlFree(defaults); 1306 goto mem_error; 1307 } 1308 } 1309 1310 /* 1311 * Split the element name into prefix:localname , the string found 1312 * are within the DTD and hen not associated to namespace names. 1313 */ 1314 name = xmlSplitQName3(fullattr, &len); 1315 if (name == NULL) { 1316 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1317 prefix = NULL; 1318 } else { 1319 name = xmlDictLookup(ctxt->dict, name, -1); 1320 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1321 } 1322 1323 defaults->values[5 * defaults->nbAttrs] = name; 1324 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1325 /* intern the string and precompute the end */ 1326 len = xmlStrlen(value); 1327 value = xmlDictLookup(ctxt->dict, value, len); 1328 defaults->values[5 * defaults->nbAttrs + 2] = value; 1329 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1330 if (ctxt->external) 1331 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1332 else 1333 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1334 defaults->nbAttrs++; 1335 1336 return; 1337 1338 mem_error: 1339 xmlErrMemory(ctxt, NULL); 1340 return; 1341 } 1342 1343 /** 1344 * xmlAddSpecialAttr: 1345 * @ctxt: an XML parser context 1346 * @fullname: the element fullname 1347 * @fullattr: the attribute fullname 1348 * @type: the attribute type 1349 * 1350 * Register this attribute type 1351 */ 1352 static void 1353 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1354 const xmlChar *fullname, 1355 const xmlChar *fullattr, 1356 int type) 1357 { 1358 if (ctxt->attsSpecial == NULL) { 1359 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1360 if (ctxt->attsSpecial == NULL) 1361 goto mem_error; 1362 } 1363 1364 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1365 return; 1366 1367 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1368 (void *) (ptrdiff_t) type); 1369 return; 1370 1371 mem_error: 1372 xmlErrMemory(ctxt, NULL); 1373 return; 1374 } 1375 1376 /** 1377 * xmlCleanSpecialAttrCallback: 1378 * 1379 * Removes CDATA attributes from the special attribute table 1380 */ 1381 static void 1382 xmlCleanSpecialAttrCallback(void *payload, void *data, 1383 const xmlChar *fullname, const xmlChar *fullattr, 1384 const xmlChar *unused ATTRIBUTE_UNUSED) { 1385 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1386 1387 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1388 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1389 } 1390 } 1391 1392 /** 1393 * xmlCleanSpecialAttr: 1394 * @ctxt: an XML parser context 1395 * 1396 * Trim the list of attributes defined to remove all those of type 1397 * CDATA as they are not special. This call should be done when finishing 1398 * to parse the DTD and before starting to parse the document root. 1399 */ 1400 static void 1401 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1402 { 1403 if (ctxt->attsSpecial == NULL) 1404 return; 1405 1406 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1407 1408 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1409 xmlHashFree(ctxt->attsSpecial, NULL); 1410 ctxt->attsSpecial = NULL; 1411 } 1412 return; 1413 } 1414 1415 /** 1416 * xmlCheckLanguageID: 1417 * @lang: pointer to the string value 1418 * 1419 * Checks that the value conforms to the LanguageID production: 1420 * 1421 * NOTE: this is somewhat deprecated, those productions were removed from 1422 * the XML Second edition. 1423 * 1424 * [33] LanguageID ::= Langcode ('-' Subcode)* 1425 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1426 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1427 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1428 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1429 * [38] Subcode ::= ([a-z] | [A-Z])+ 1430 * 1431 * The current REC reference the successors of RFC 1766, currently 5646 1432 * 1433 * http://www.rfc-editor.org/rfc/rfc5646.txt 1434 * langtag = language 1435 * ["-" script] 1436 * ["-" region] 1437 * *("-" variant) 1438 * *("-" extension) 1439 * ["-" privateuse] 1440 * language = 2*3ALPHA ; shortest ISO 639 code 1441 * ["-" extlang] ; sometimes followed by 1442 * ; extended language subtags 1443 * / 4ALPHA ; or reserved for future use 1444 * / 5*8ALPHA ; or registered language subtag 1445 * 1446 * extlang = 3ALPHA ; selected ISO 639 codes 1447 * *2("-" 3ALPHA) ; permanently reserved 1448 * 1449 * script = 4ALPHA ; ISO 15924 code 1450 * 1451 * region = 2ALPHA ; ISO 3166-1 code 1452 * / 3DIGIT ; UN M.49 code 1453 * 1454 * variant = 5*8alphanum ; registered variants 1455 * / (DIGIT 3alphanum) 1456 * 1457 * extension = singleton 1*("-" (2*8alphanum)) 1458 * 1459 * ; Single alphanumerics 1460 * ; "x" reserved for private use 1461 * singleton = DIGIT ; 0 - 9 1462 * / %x41-57 ; A - W 1463 * / %x59-5A ; Y - Z 1464 * / %x61-77 ; a - w 1465 * / %x79-7A ; y - z 1466 * 1467 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1468 * The parser below doesn't try to cope with extension or privateuse 1469 * that could be added but that's not interoperable anyway 1470 * 1471 * Returns 1 if correct 0 otherwise 1472 **/ 1473 int 1474 xmlCheckLanguageID(const xmlChar * lang) 1475 { 1476 const xmlChar *cur = lang, *nxt; 1477 1478 if (cur == NULL) 1479 return (0); 1480 if (((cur[0] == 'i') && (cur[1] == '-')) || 1481 ((cur[0] == 'I') && (cur[1] == '-')) || 1482 ((cur[0] == 'x') && (cur[1] == '-')) || 1483 ((cur[0] == 'X') && (cur[1] == '-'))) { 1484 /* 1485 * Still allow IANA code and user code which were coming 1486 * from the previous version of the XML-1.0 specification 1487 * it's deprecated but we should not fail 1488 */ 1489 cur += 2; 1490 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1491 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1492 cur++; 1493 return(cur[0] == 0); 1494 } 1495 nxt = cur; 1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1498 nxt++; 1499 if (nxt - cur >= 4) { 1500 /* 1501 * Reserved 1502 */ 1503 if ((nxt - cur > 8) || (nxt[0] != 0)) 1504 return(0); 1505 return(1); 1506 } 1507 if (nxt - cur < 2) 1508 return(0); 1509 /* we got an ISO 639 code */ 1510 if (nxt[0] == 0) 1511 return(1); 1512 if (nxt[0] != '-') 1513 return(0); 1514 1515 nxt++; 1516 cur = nxt; 1517 /* now we can have extlang or script or region or variant */ 1518 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1519 goto region_m49; 1520 1521 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1522 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1523 nxt++; 1524 if (nxt - cur == 4) 1525 goto script; 1526 if (nxt - cur == 2) 1527 goto region; 1528 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1529 goto variant; 1530 if (nxt - cur != 3) 1531 return(0); 1532 /* we parsed an extlang */ 1533 if (nxt[0] == 0) 1534 return(1); 1535 if (nxt[0] != '-') 1536 return(0); 1537 1538 nxt++; 1539 cur = nxt; 1540 /* now we can have script or region or variant */ 1541 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1542 goto region_m49; 1543 1544 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1545 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1546 nxt++; 1547 if (nxt - cur == 2) 1548 goto region; 1549 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1550 goto variant; 1551 if (nxt - cur != 4) 1552 return(0); 1553 /* we parsed a script */ 1554 script: 1555 if (nxt[0] == 0) 1556 return(1); 1557 if (nxt[0] != '-') 1558 return(0); 1559 1560 nxt++; 1561 cur = nxt; 1562 /* now we can have region or variant */ 1563 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1564 goto region_m49; 1565 1566 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1567 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1568 nxt++; 1569 1570 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1571 goto variant; 1572 if (nxt - cur != 2) 1573 return(0); 1574 /* we parsed a region */ 1575 region: 1576 if (nxt[0] == 0) 1577 return(1); 1578 if (nxt[0] != '-') 1579 return(0); 1580 1581 nxt++; 1582 cur = nxt; 1583 /* now we can just have a variant */ 1584 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1585 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1586 nxt++; 1587 1588 if ((nxt - cur < 5) || (nxt - cur > 8)) 1589 return(0); 1590 1591 /* we parsed a variant */ 1592 variant: 1593 if (nxt[0] == 0) 1594 return(1); 1595 if (nxt[0] != '-') 1596 return(0); 1597 /* extensions and private use subtags not checked */ 1598 return (1); 1599 1600 region_m49: 1601 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1602 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1603 nxt += 3; 1604 goto region; 1605 } 1606 return(0); 1607 } 1608 1609 /************************************************************************ 1610 * * 1611 * Parser stacks related functions and macros * 1612 * * 1613 ************************************************************************/ 1614 1615 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1616 const xmlChar ** str); 1617 1618 #ifdef SAX2 1619 /** 1620 * nsPush: 1621 * @ctxt: an XML parser context 1622 * @prefix: the namespace prefix or NULL 1623 * @URL: the namespace name 1624 * 1625 * Pushes a new parser namespace on top of the ns stack 1626 * 1627 * Returns -1 in case of error, -2 if the namespace should be discarded 1628 * and the index in the stack otherwise. 1629 */ 1630 static int 1631 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1632 { 1633 if (ctxt->options & XML_PARSE_NSCLEAN) { 1634 int i; 1635 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1636 if (ctxt->nsTab[i] == prefix) { 1637 /* in scope */ 1638 if (ctxt->nsTab[i + 1] == URL) 1639 return(-2); 1640 /* out of scope keep it */ 1641 break; 1642 } 1643 } 1644 } 1645 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1646 ctxt->nsMax = 10; 1647 ctxt->nsNr = 0; 1648 ctxt->nsTab = (const xmlChar **) 1649 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1650 if (ctxt->nsTab == NULL) { 1651 xmlErrMemory(ctxt, NULL); 1652 ctxt->nsMax = 0; 1653 return (-1); 1654 } 1655 } else if (ctxt->nsNr >= ctxt->nsMax) { 1656 const xmlChar ** tmp; 1657 ctxt->nsMax *= 2; 1658 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1659 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1660 if (tmp == NULL) { 1661 xmlErrMemory(ctxt, NULL); 1662 ctxt->nsMax /= 2; 1663 return (-1); 1664 } 1665 ctxt->nsTab = tmp; 1666 } 1667 ctxt->nsTab[ctxt->nsNr++] = prefix; 1668 ctxt->nsTab[ctxt->nsNr++] = URL; 1669 return (ctxt->nsNr); 1670 } 1671 /** 1672 * nsPop: 1673 * @ctxt: an XML parser context 1674 * @nr: the number to pop 1675 * 1676 * Pops the top @nr parser prefix/namespace from the ns stack 1677 * 1678 * Returns the number of namespaces removed 1679 */ 1680 static int 1681 nsPop(xmlParserCtxtPtr ctxt, int nr) 1682 { 1683 int i; 1684 1685 if (ctxt->nsTab == NULL) return(0); 1686 if (ctxt->nsNr < nr) { 1687 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1688 nr = ctxt->nsNr; 1689 } 1690 if (ctxt->nsNr <= 0) 1691 return (0); 1692 1693 for (i = 0;i < nr;i++) { 1694 ctxt->nsNr--; 1695 ctxt->nsTab[ctxt->nsNr] = NULL; 1696 } 1697 return(nr); 1698 } 1699 #endif 1700 1701 static int 1702 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1703 const xmlChar **atts; 1704 int *attallocs; 1705 int maxatts; 1706 1707 if (ctxt->atts == NULL) { 1708 maxatts = 55; /* allow for 10 attrs by default */ 1709 atts = (const xmlChar **) 1710 xmlMalloc(maxatts * sizeof(xmlChar *)); 1711 if (atts == NULL) goto mem_error; 1712 ctxt->atts = atts; 1713 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1714 if (attallocs == NULL) goto mem_error; 1715 ctxt->attallocs = attallocs; 1716 ctxt->maxatts = maxatts; 1717 } else if (nr + 5 > ctxt->maxatts) { 1718 maxatts = (nr + 5) * 2; 1719 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1720 maxatts * sizeof(const xmlChar *)); 1721 if (atts == NULL) goto mem_error; 1722 ctxt->atts = atts; 1723 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1724 (maxatts / 5) * sizeof(int)); 1725 if (attallocs == NULL) goto mem_error; 1726 ctxt->attallocs = attallocs; 1727 ctxt->maxatts = maxatts; 1728 } 1729 return(ctxt->maxatts); 1730 mem_error: 1731 xmlErrMemory(ctxt, NULL); 1732 return(-1); 1733 } 1734 1735 /** 1736 * inputPush: 1737 * @ctxt: an XML parser context 1738 * @value: the parser input 1739 * 1740 * Pushes a new parser input on top of the input stack 1741 * 1742 * Returns -1 in case of error, the index in the stack otherwise 1743 */ 1744 int 1745 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1746 { 1747 if ((ctxt == NULL) || (value == NULL)) 1748 return(-1); 1749 if (ctxt->inputNr >= ctxt->inputMax) { 1750 ctxt->inputMax *= 2; 1751 ctxt->inputTab = 1752 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1753 ctxt->inputMax * 1754 sizeof(ctxt->inputTab[0])); 1755 if (ctxt->inputTab == NULL) { 1756 xmlErrMemory(ctxt, NULL); 1757 ctxt->inputMax /= 2; 1758 return (-1); 1759 } 1760 } 1761 ctxt->inputTab[ctxt->inputNr] = value; 1762 ctxt->input = value; 1763 return (ctxt->inputNr++); 1764 } 1765 /** 1766 * inputPop: 1767 * @ctxt: an XML parser context 1768 * 1769 * Pops the top parser input from the input stack 1770 * 1771 * Returns the input just removed 1772 */ 1773 xmlParserInputPtr 1774 inputPop(xmlParserCtxtPtr ctxt) 1775 { 1776 xmlParserInputPtr ret; 1777 1778 if (ctxt == NULL) 1779 return(NULL); 1780 if (ctxt->inputNr <= 0) 1781 return (NULL); 1782 ctxt->inputNr--; 1783 if (ctxt->inputNr > 0) 1784 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1785 else 1786 ctxt->input = NULL; 1787 ret = ctxt->inputTab[ctxt->inputNr]; 1788 ctxt->inputTab[ctxt->inputNr] = NULL; 1789 return (ret); 1790 } 1791 /** 1792 * nodePush: 1793 * @ctxt: an XML parser context 1794 * @value: the element node 1795 * 1796 * Pushes a new element node on top of the node stack 1797 * 1798 * Returns -1 in case of error, the index in the stack otherwise 1799 */ 1800 int 1801 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1802 { 1803 if (ctxt == NULL) return(0); 1804 if (ctxt->nodeNr >= ctxt->nodeMax) { 1805 xmlNodePtr *tmp; 1806 1807 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1808 ctxt->nodeMax * 2 * 1809 sizeof(ctxt->nodeTab[0])); 1810 if (tmp == NULL) { 1811 xmlErrMemory(ctxt, NULL); 1812 return (-1); 1813 } 1814 ctxt->nodeTab = tmp; 1815 ctxt->nodeMax *= 2; 1816 } 1817 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1818 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1819 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1820 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1821 xmlParserMaxDepth); 1822 xmlHaltParser(ctxt); 1823 return(-1); 1824 } 1825 ctxt->nodeTab[ctxt->nodeNr] = value; 1826 ctxt->node = value; 1827 return (ctxt->nodeNr++); 1828 } 1829 1830 /** 1831 * nodePop: 1832 * @ctxt: an XML parser context 1833 * 1834 * Pops the top element node from the node stack 1835 * 1836 * Returns the node just removed 1837 */ 1838 xmlNodePtr 1839 nodePop(xmlParserCtxtPtr ctxt) 1840 { 1841 xmlNodePtr ret; 1842 1843 if (ctxt == NULL) return(NULL); 1844 if (ctxt->nodeNr <= 0) 1845 return (NULL); 1846 ctxt->nodeNr--; 1847 if (ctxt->nodeNr > 0) 1848 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1849 else 1850 ctxt->node = NULL; 1851 ret = ctxt->nodeTab[ctxt->nodeNr]; 1852 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1853 return (ret); 1854 } 1855 1856 /** 1857 * nameNsPush: 1858 * @ctxt: an XML parser context 1859 * @value: the element name 1860 * @prefix: the element prefix 1861 * @URI: the element namespace name 1862 * @line: the current line number for error messages 1863 * @nsNr: the number of namespaces pushed on the namespace table 1864 * 1865 * Pushes a new element name/prefix/URL on top of the name stack 1866 * 1867 * Returns -1 in case of error, the index in the stack otherwise 1868 */ 1869 static int 1870 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1871 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr) 1872 { 1873 xmlStartTag *tag; 1874 1875 if (ctxt->nameNr >= ctxt->nameMax) { 1876 const xmlChar * *tmp; 1877 xmlStartTag *tmp2; 1878 ctxt->nameMax *= 2; 1879 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1880 ctxt->nameMax * 1881 sizeof(ctxt->nameTab[0])); 1882 if (tmp == NULL) { 1883 ctxt->nameMax /= 2; 1884 goto mem_error; 1885 } 1886 ctxt->nameTab = tmp; 1887 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab, 1888 ctxt->nameMax * 1889 sizeof(ctxt->pushTab[0])); 1890 if (tmp2 == NULL) { 1891 ctxt->nameMax /= 2; 1892 goto mem_error; 1893 } 1894 ctxt->pushTab = tmp2; 1895 } else if (ctxt->pushTab == NULL) { 1896 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax * 1897 sizeof(ctxt->pushTab[0])); 1898 if (ctxt->pushTab == NULL) 1899 goto mem_error; 1900 } 1901 ctxt->nameTab[ctxt->nameNr] = value; 1902 ctxt->name = value; 1903 tag = &ctxt->pushTab[ctxt->nameNr]; 1904 tag->prefix = prefix; 1905 tag->URI = URI; 1906 tag->line = line; 1907 tag->nsNr = nsNr; 1908 return (ctxt->nameNr++); 1909 mem_error: 1910 xmlErrMemory(ctxt, NULL); 1911 return (-1); 1912 } 1913 #ifdef LIBXML_PUSH_ENABLED 1914 /** 1915 * nameNsPop: 1916 * @ctxt: an XML parser context 1917 * 1918 * Pops the top element/prefix/URI name from the name stack 1919 * 1920 * Returns the name just removed 1921 */ 1922 static const xmlChar * 1923 nameNsPop(xmlParserCtxtPtr ctxt) 1924 { 1925 const xmlChar *ret; 1926 1927 if (ctxt->nameNr <= 0) 1928 return (NULL); 1929 ctxt->nameNr--; 1930 if (ctxt->nameNr > 0) 1931 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1932 else 1933 ctxt->name = NULL; 1934 ret = ctxt->nameTab[ctxt->nameNr]; 1935 ctxt->nameTab[ctxt->nameNr] = NULL; 1936 return (ret); 1937 } 1938 #endif /* LIBXML_PUSH_ENABLED */ 1939 1940 /** 1941 * namePush: 1942 * @ctxt: an XML parser context 1943 * @value: the element name 1944 * 1945 * Pushes a new element name on top of the name stack 1946 * 1947 * Returns -1 in case of error, the index in the stack otherwise 1948 */ 1949 int 1950 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1951 { 1952 if (ctxt == NULL) return (-1); 1953 1954 if (ctxt->nameNr >= ctxt->nameMax) { 1955 const xmlChar * *tmp; 1956 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1957 ctxt->nameMax * 2 * 1958 sizeof(ctxt->nameTab[0])); 1959 if (tmp == NULL) { 1960 goto mem_error; 1961 } 1962 ctxt->nameTab = tmp; 1963 ctxt->nameMax *= 2; 1964 } 1965 ctxt->nameTab[ctxt->nameNr] = value; 1966 ctxt->name = value; 1967 return (ctxt->nameNr++); 1968 mem_error: 1969 xmlErrMemory(ctxt, NULL); 1970 return (-1); 1971 } 1972 /** 1973 * namePop: 1974 * @ctxt: an XML parser context 1975 * 1976 * Pops the top element name from the name stack 1977 * 1978 * Returns the name just removed 1979 */ 1980 const xmlChar * 1981 namePop(xmlParserCtxtPtr ctxt) 1982 { 1983 const xmlChar *ret; 1984 1985 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1986 return (NULL); 1987 ctxt->nameNr--; 1988 if (ctxt->nameNr > 0) 1989 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1990 else 1991 ctxt->name = NULL; 1992 ret = ctxt->nameTab[ctxt->nameNr]; 1993 ctxt->nameTab[ctxt->nameNr] = NULL; 1994 return (ret); 1995 } 1996 1997 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1998 if (ctxt->spaceNr >= ctxt->spaceMax) { 1999 int *tmp; 2000 2001 ctxt->spaceMax *= 2; 2002 tmp = (int *) xmlRealloc(ctxt->spaceTab, 2003 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 2004 if (tmp == NULL) { 2005 xmlErrMemory(ctxt, NULL); 2006 ctxt->spaceMax /=2; 2007 return(-1); 2008 } 2009 ctxt->spaceTab = tmp; 2010 } 2011 ctxt->spaceTab[ctxt->spaceNr] = val; 2012 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 2013 return(ctxt->spaceNr++); 2014 } 2015 2016 static int spacePop(xmlParserCtxtPtr ctxt) { 2017 int ret; 2018 if (ctxt->spaceNr <= 0) return(0); 2019 ctxt->spaceNr--; 2020 if (ctxt->spaceNr > 0) 2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 2022 else 2023 ctxt->space = &ctxt->spaceTab[0]; 2024 ret = ctxt->spaceTab[ctxt->spaceNr]; 2025 ctxt->spaceTab[ctxt->spaceNr] = -1; 2026 return(ret); 2027 } 2028 2029 /* 2030 * Macros for accessing the content. Those should be used only by the parser, 2031 * and not exported. 2032 * 2033 * Dirty macros, i.e. one often need to make assumption on the context to 2034 * use them 2035 * 2036 * CUR_PTR return the current pointer to the xmlChar to be parsed. 2037 * To be used with extreme caution since operations consuming 2038 * characters may move the input buffer to a different location ! 2039 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2040 * This should be used internally by the parser 2041 * only to compare to ASCII values otherwise it would break when 2042 * running with UTF-8 encoding. 2043 * RAW same as CUR but in the input buffer, bypass any token 2044 * extraction that may have been done 2045 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2046 * to compare on ASCII based substring. 2047 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2048 * strings without newlines within the parser. 2049 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2050 * defined char within the parser. 2051 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2052 * 2053 * NEXT Skip to the next character, this does the proper decoding 2054 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2055 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2056 * CUR_CHAR(l) returns the current unicode character (int), set l 2057 * to the number of xmlChars used for the encoding [0-5]. 2058 * CUR_SCHAR same but operate on a string instead of the context 2059 * COPY_BUF copy the current unicode char to the target buffer, increment 2060 * the index 2061 * GROW, SHRINK handling of input buffers 2062 */ 2063 2064 #define RAW (*ctxt->input->cur) 2065 #define CUR (*ctxt->input->cur) 2066 #define NXT(val) ctxt->input->cur[(val)] 2067 #define CUR_PTR ctxt->input->cur 2068 #define BASE_PTR ctxt->input->base 2069 2070 #define CMP4( s, c1, c2, c3, c4 ) \ 2071 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2072 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2073 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2074 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2075 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2076 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2077 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2078 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2079 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2080 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2081 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2082 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2083 ((unsigned char *) s)[ 8 ] == c9 ) 2084 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2085 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2086 ((unsigned char *) s)[ 9 ] == c10 ) 2087 2088 #define SKIP(val) do { \ 2089 ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2090 if (*ctxt->input->cur == 0) \ 2091 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2092 } while (0) 2093 2094 #define SKIPL(val) do { \ 2095 int skipl; \ 2096 for(skipl=0; skipl<val; skipl++) { \ 2097 if (*(ctxt->input->cur) == '\n') { \ 2098 ctxt->input->line++; ctxt->input->col = 1; \ 2099 } else ctxt->input->col++; \ 2100 ctxt->input->cur++; \ 2101 } \ 2102 if (*ctxt->input->cur == 0) \ 2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2104 } while (0) 2105 2106 #define SHRINK if ((ctxt->progressive == 0) && \ 2107 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2108 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2109 xmlSHRINK (ctxt); 2110 2111 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2112 xmlParserInputShrink(ctxt->input); 2113 if (*ctxt->input->cur == 0) 2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2115 } 2116 2117 #define GROW if ((ctxt->progressive == 0) && \ 2118 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2119 xmlGROW (ctxt); 2120 2121 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2122 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur; 2123 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base; 2124 2125 if (((curEnd > XML_MAX_LOOKUP_LIMIT) || 2126 (curBase > XML_MAX_LOOKUP_LIMIT)) && 2127 ((ctxt->input->buf) && 2128 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) && 2129 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2130 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2131 xmlHaltParser(ctxt); 2132 return; 2133 } 2134 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2135 if ((ctxt->input->cur > ctxt->input->end) || 2136 (ctxt->input->cur < ctxt->input->base)) { 2137 xmlHaltParser(ctxt); 2138 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2139 return; 2140 } 2141 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) 2142 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2143 } 2144 2145 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2146 2147 #define NEXT xmlNextChar(ctxt) 2148 2149 #define NEXT1 { \ 2150 ctxt->input->col++; \ 2151 ctxt->input->cur++; \ 2152 if (*ctxt->input->cur == 0) \ 2153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2154 } 2155 2156 #define NEXTL(l) do { \ 2157 if (*(ctxt->input->cur) == '\n') { \ 2158 ctxt->input->line++; ctxt->input->col = 1; \ 2159 } else ctxt->input->col++; \ 2160 ctxt->input->cur += l; \ 2161 } while (0) 2162 2163 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2164 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2165 2166 #define COPY_BUF(l,b,i,v) \ 2167 if (l == 1) b[i++] = (xmlChar) v; \ 2168 else i += xmlCopyCharMultiByte(&b[i],v) 2169 2170 #define CUR_CONSUMED \ 2171 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base)) 2172 2173 /** 2174 * xmlSkipBlankChars: 2175 * @ctxt: the XML parser context 2176 * 2177 * skip all blanks character found at that point in the input streams. 2178 * It pops up finished entities in the process if allowable at that point. 2179 * 2180 * Returns the number of space chars skipped 2181 */ 2182 2183 int 2184 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2185 int res = 0; 2186 2187 /* 2188 * It's Okay to use CUR/NEXT here since all the blanks are on 2189 * the ASCII range. 2190 */ 2191 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) || 2192 (ctxt->instate == XML_PARSER_START)) { 2193 const xmlChar *cur; 2194 /* 2195 * if we are in the document content, go really fast 2196 */ 2197 cur = ctxt->input->cur; 2198 while (IS_BLANK_CH(*cur)) { 2199 if (*cur == '\n') { 2200 ctxt->input->line++; ctxt->input->col = 1; 2201 } else { 2202 ctxt->input->col++; 2203 } 2204 cur++; 2205 if (res < INT_MAX) 2206 res++; 2207 if (*cur == 0) { 2208 ctxt->input->cur = cur; 2209 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2210 cur = ctxt->input->cur; 2211 } 2212 } 2213 ctxt->input->cur = cur; 2214 } else { 2215 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2216 2217 while (1) { 2218 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2219 NEXT; 2220 } else if (CUR == '%') { 2221 /* 2222 * Need to handle support of entities branching here 2223 */ 2224 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2225 break; 2226 xmlParsePEReference(ctxt); 2227 } else if (CUR == 0) { 2228 if (ctxt->inputNr <= 1) 2229 break; 2230 xmlPopInput(ctxt); 2231 } else { 2232 break; 2233 } 2234 2235 /* 2236 * Also increase the counter when entering or exiting a PERef. 2237 * The spec says: "When a parameter-entity reference is recognized 2238 * in the DTD and included, its replacement text MUST be enlarged 2239 * by the attachment of one leading and one following space (#x20) 2240 * character." 2241 */ 2242 if (res < INT_MAX) 2243 res++; 2244 } 2245 } 2246 return(res); 2247 } 2248 2249 /************************************************************************ 2250 * * 2251 * Commodity functions to handle entities * 2252 * * 2253 ************************************************************************/ 2254 2255 /** 2256 * xmlPopInput: 2257 * @ctxt: an XML parser context 2258 * 2259 * xmlPopInput: the current input pointed by ctxt->input came to an end 2260 * pop it and return the next char. 2261 * 2262 * Returns the current xmlChar in the parser context 2263 */ 2264 xmlChar 2265 xmlPopInput(xmlParserCtxtPtr ctxt) { 2266 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2267 if (xmlParserDebugEntities) 2268 xmlGenericError(xmlGenericErrorContext, 2269 "Popping input %d\n", ctxt->inputNr); 2270 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2271 (ctxt->instate != XML_PARSER_EOF)) 2272 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2273 "Unfinished entity outside the DTD"); 2274 xmlFreeInputStream(inputPop(ctxt)); 2275 if (*ctxt->input->cur == 0) 2276 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2277 return(CUR); 2278 } 2279 2280 /** 2281 * xmlPushInput: 2282 * @ctxt: an XML parser context 2283 * @input: an XML parser input fragment (entity, XML fragment ...). 2284 * 2285 * xmlPushInput: switch to a new input stream which is stacked on top 2286 * of the previous one(s). 2287 * Returns -1 in case of error or the index in the input stack 2288 */ 2289 int 2290 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2291 int ret; 2292 if (input == NULL) return(-1); 2293 2294 if (xmlParserDebugEntities) { 2295 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2296 xmlGenericError(xmlGenericErrorContext, 2297 "%s(%d): ", ctxt->input->filename, 2298 ctxt->input->line); 2299 xmlGenericError(xmlGenericErrorContext, 2300 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2301 } 2302 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2303 (ctxt->inputNr > 1024)) { 2304 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2305 while (ctxt->inputNr > 1) 2306 xmlFreeInputStream(inputPop(ctxt)); 2307 return(-1); 2308 } 2309 ret = inputPush(ctxt, input); 2310 if (ctxt->instate == XML_PARSER_EOF) 2311 return(-1); 2312 GROW; 2313 return(ret); 2314 } 2315 2316 /** 2317 * xmlParseCharRef: 2318 * @ctxt: an XML parser context 2319 * 2320 * parse Reference declarations 2321 * 2322 * [66] CharRef ::= '&#' [0-9]+ ';' | 2323 * '&#x' [0-9a-fA-F]+ ';' 2324 * 2325 * [ WFC: Legal Character ] 2326 * Characters referred to using character references must match the 2327 * production for Char. 2328 * 2329 * Returns the value parsed (as an int), 0 in case of error 2330 */ 2331 int 2332 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2333 int val = 0; 2334 int count = 0; 2335 2336 /* 2337 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2338 */ 2339 if ((RAW == '&') && (NXT(1) == '#') && 2340 (NXT(2) == 'x')) { 2341 SKIP(3); 2342 GROW; 2343 while (RAW != ';') { /* loop blocked by count */ 2344 if (count++ > 20) { 2345 count = 0; 2346 GROW; 2347 if (ctxt->instate == XML_PARSER_EOF) 2348 return(0); 2349 } 2350 if ((RAW >= '0') && (RAW <= '9')) 2351 val = val * 16 + (CUR - '0'); 2352 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2353 val = val * 16 + (CUR - 'a') + 10; 2354 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2355 val = val * 16 + (CUR - 'A') + 10; 2356 else { 2357 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2358 val = 0; 2359 break; 2360 } 2361 if (val > 0x110000) 2362 val = 0x110000; 2363 2364 NEXT; 2365 count++; 2366 } 2367 if (RAW == ';') { 2368 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2369 ctxt->input->col++; 2370 ctxt->input->cur++; 2371 } 2372 } else if ((RAW == '&') && (NXT(1) == '#')) { 2373 SKIP(2); 2374 GROW; 2375 while (RAW != ';') { /* loop blocked by count */ 2376 if (count++ > 20) { 2377 count = 0; 2378 GROW; 2379 if (ctxt->instate == XML_PARSER_EOF) 2380 return(0); 2381 } 2382 if ((RAW >= '0') && (RAW <= '9')) 2383 val = val * 10 + (CUR - '0'); 2384 else { 2385 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2386 val = 0; 2387 break; 2388 } 2389 if (val > 0x110000) 2390 val = 0x110000; 2391 2392 NEXT; 2393 count++; 2394 } 2395 if (RAW == ';') { 2396 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2397 ctxt->input->col++; 2398 ctxt->input->cur++; 2399 } 2400 } else { 2401 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2402 } 2403 2404 /* 2405 * [ WFC: Legal Character ] 2406 * Characters referred to using character references must match the 2407 * production for Char. 2408 */ 2409 if (val >= 0x110000) { 2410 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2411 "xmlParseCharRef: character reference out of bounds\n", 2412 val); 2413 } else if (IS_CHAR(val)) { 2414 return(val); 2415 } else { 2416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2417 "xmlParseCharRef: invalid xmlChar value %d\n", 2418 val); 2419 } 2420 return(0); 2421 } 2422 2423 /** 2424 * xmlParseStringCharRef: 2425 * @ctxt: an XML parser context 2426 * @str: a pointer to an index in the string 2427 * 2428 * parse Reference declarations, variant parsing from a string rather 2429 * than an an input flow. 2430 * 2431 * [66] CharRef ::= '&#' [0-9]+ ';' | 2432 * '&#x' [0-9a-fA-F]+ ';' 2433 * 2434 * [ WFC: Legal Character ] 2435 * Characters referred to using character references must match the 2436 * production for Char. 2437 * 2438 * Returns the value parsed (as an int), 0 in case of error, str will be 2439 * updated to the current value of the index 2440 */ 2441 static int 2442 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2443 const xmlChar *ptr; 2444 xmlChar cur; 2445 int val = 0; 2446 2447 if ((str == NULL) || (*str == NULL)) return(0); 2448 ptr = *str; 2449 cur = *ptr; 2450 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2451 ptr += 3; 2452 cur = *ptr; 2453 while (cur != ';') { /* Non input consuming loop */ 2454 if ((cur >= '0') && (cur <= '9')) 2455 val = val * 16 + (cur - '0'); 2456 else if ((cur >= 'a') && (cur <= 'f')) 2457 val = val * 16 + (cur - 'a') + 10; 2458 else if ((cur >= 'A') && (cur <= 'F')) 2459 val = val * 16 + (cur - 'A') + 10; 2460 else { 2461 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2462 val = 0; 2463 break; 2464 } 2465 if (val > 0x110000) 2466 val = 0x110000; 2467 2468 ptr++; 2469 cur = *ptr; 2470 } 2471 if (cur == ';') 2472 ptr++; 2473 } else if ((cur == '&') && (ptr[1] == '#')){ 2474 ptr += 2; 2475 cur = *ptr; 2476 while (cur != ';') { /* Non input consuming loops */ 2477 if ((cur >= '0') && (cur <= '9')) 2478 val = val * 10 + (cur - '0'); 2479 else { 2480 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2481 val = 0; 2482 break; 2483 } 2484 if (val > 0x110000) 2485 val = 0x110000; 2486 2487 ptr++; 2488 cur = *ptr; 2489 } 2490 if (cur == ';') 2491 ptr++; 2492 } else { 2493 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2494 return(0); 2495 } 2496 *str = ptr; 2497 2498 /* 2499 * [ WFC: Legal Character ] 2500 * Characters referred to using character references must match the 2501 * production for Char. 2502 */ 2503 if (val >= 0x110000) { 2504 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2505 "xmlParseStringCharRef: character reference out of bounds\n", 2506 val); 2507 } else if (IS_CHAR(val)) { 2508 return(val); 2509 } else { 2510 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2511 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2512 val); 2513 } 2514 return(0); 2515 } 2516 2517 /** 2518 * xmlParserHandlePEReference: 2519 * @ctxt: the parser context 2520 * 2521 * [69] PEReference ::= '%' Name ';' 2522 * 2523 * [ WFC: No Recursion ] 2524 * A parsed entity must not contain a recursive 2525 * reference to itself, either directly or indirectly. 2526 * 2527 * [ WFC: Entity Declared ] 2528 * In a document without any DTD, a document with only an internal DTD 2529 * subset which contains no parameter entity references, or a document 2530 * with "standalone='yes'", ... ... The declaration of a parameter 2531 * entity must precede any reference to it... 2532 * 2533 * [ VC: Entity Declared ] 2534 * In a document with an external subset or external parameter entities 2535 * with "standalone='no'", ... ... The declaration of a parameter entity 2536 * must precede any reference to it... 2537 * 2538 * [ WFC: In DTD ] 2539 * Parameter-entity references may only appear in the DTD. 2540 * NOTE: misleading but this is handled. 2541 * 2542 * A PEReference may have been detected in the current input stream 2543 * the handling is done accordingly to 2544 * http://www.w3.org/TR/REC-xml#entproc 2545 * i.e. 2546 * - Included in literal in entity values 2547 * - Included as Parameter Entity reference within DTDs 2548 */ 2549 void 2550 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2551 switch(ctxt->instate) { 2552 case XML_PARSER_CDATA_SECTION: 2553 return; 2554 case XML_PARSER_COMMENT: 2555 return; 2556 case XML_PARSER_START_TAG: 2557 return; 2558 case XML_PARSER_END_TAG: 2559 return; 2560 case XML_PARSER_EOF: 2561 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2562 return; 2563 case XML_PARSER_PROLOG: 2564 case XML_PARSER_START: 2565 case XML_PARSER_MISC: 2566 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2567 return; 2568 case XML_PARSER_ENTITY_DECL: 2569 case XML_PARSER_CONTENT: 2570 case XML_PARSER_ATTRIBUTE_VALUE: 2571 case XML_PARSER_PI: 2572 case XML_PARSER_SYSTEM_LITERAL: 2573 case XML_PARSER_PUBLIC_LITERAL: 2574 /* we just ignore it there */ 2575 return; 2576 case XML_PARSER_EPILOG: 2577 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2578 return; 2579 case XML_PARSER_ENTITY_VALUE: 2580 /* 2581 * NOTE: in the case of entity values, we don't do the 2582 * substitution here since we need the literal 2583 * entity value to be able to save the internal 2584 * subset of the document. 2585 * This will be handled by xmlStringDecodeEntities 2586 */ 2587 return; 2588 case XML_PARSER_DTD: 2589 /* 2590 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2591 * In the internal DTD subset, parameter-entity references 2592 * can occur only where markup declarations can occur, not 2593 * within markup declarations. 2594 * In that case this is handled in xmlParseMarkupDecl 2595 */ 2596 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2597 return; 2598 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2599 return; 2600 break; 2601 case XML_PARSER_IGNORE: 2602 return; 2603 } 2604 2605 xmlParsePEReference(ctxt); 2606 } 2607 2608 /* 2609 * Macro used to grow the current buffer. 2610 * buffer##_size is expected to be a size_t 2611 * mem_error: is expected to handle memory allocation failures 2612 */ 2613 #define growBuffer(buffer, n) { \ 2614 xmlChar *tmp; \ 2615 size_t new_size = buffer##_size * 2 + n; \ 2616 if (new_size < buffer##_size) goto mem_error; \ 2617 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2618 if (tmp == NULL) goto mem_error; \ 2619 buffer = tmp; \ 2620 buffer##_size = new_size; \ 2621 } 2622 2623 /** 2624 * xmlStringLenDecodeEntities: 2625 * @ctxt: the parser context 2626 * @str: the input string 2627 * @len: the string length 2628 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2629 * @end: an end marker xmlChar, 0 if none 2630 * @end2: an end marker xmlChar, 0 if none 2631 * @end3: an end marker xmlChar, 0 if none 2632 * 2633 * Takes a entity string content and process to do the adequate substitutions. 2634 * 2635 * [67] Reference ::= EntityRef | CharRef 2636 * 2637 * [69] PEReference ::= '%' Name ';' 2638 * 2639 * Returns A newly allocated string with the substitution done. The caller 2640 * must deallocate it ! 2641 */ 2642 xmlChar * 2643 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2644 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2645 xmlChar *buffer = NULL; 2646 size_t buffer_size = 0; 2647 size_t nbchars = 0; 2648 2649 xmlChar *current = NULL; 2650 xmlChar *rep = NULL; 2651 const xmlChar *last; 2652 xmlEntityPtr ent; 2653 int c,l; 2654 2655 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2656 return(NULL); 2657 last = str + len; 2658 2659 if (((ctxt->depth > 40) && 2660 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2661 (ctxt->depth > 1024)) { 2662 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2663 return(NULL); 2664 } 2665 2666 /* 2667 * allocate a translation buffer. 2668 */ 2669 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2670 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2671 if (buffer == NULL) goto mem_error; 2672 2673 /* 2674 * OK loop until we reach one of the ending char or a size limit. 2675 * we are operating on already parsed values. 2676 */ 2677 if (str < last) 2678 c = CUR_SCHAR(str, l); 2679 else 2680 c = 0; 2681 while ((c != 0) && (c != end) && /* non input consuming loop */ 2682 (c != end2) && (c != end3) && 2683 (ctxt->instate != XML_PARSER_EOF)) { 2684 2685 if (c == 0) break; 2686 if ((c == '&') && (str[1] == '#')) { 2687 int val = xmlParseStringCharRef(ctxt, &str); 2688 if (val == 0) 2689 goto int_error; 2690 COPY_BUF(0,buffer,nbchars,val); 2691 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2692 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2693 } 2694 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2695 if (xmlParserDebugEntities) 2696 xmlGenericError(xmlGenericErrorContext, 2697 "String decoding Entity Reference: %.30s\n", 2698 str); 2699 ent = xmlParseStringEntityRef(ctxt, &str); 2700 xmlParserEntityCheck(ctxt, 0, ent, 0); 2701 if (ent != NULL) 2702 ctxt->nbentities += ent->checked / 2; 2703 if ((ent != NULL) && 2704 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2705 if (ent->content != NULL) { 2706 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2707 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2708 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2709 } 2710 } else { 2711 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2712 "predefined entity has no content\n"); 2713 goto int_error; 2714 } 2715 } else if ((ent != NULL) && (ent->content != NULL)) { 2716 ctxt->depth++; 2717 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2718 0, 0, 0); 2719 ctxt->depth--; 2720 if (rep == NULL) { 2721 ent->content[0] = 0; 2722 goto int_error; 2723 } 2724 2725 current = rep; 2726 while (*current != 0) { /* non input consuming loop */ 2727 buffer[nbchars++] = *current++; 2728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2729 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2730 goto int_error; 2731 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2732 } 2733 } 2734 xmlFree(rep); 2735 rep = NULL; 2736 } else if (ent != NULL) { 2737 int i = xmlStrlen(ent->name); 2738 const xmlChar *cur = ent->name; 2739 2740 buffer[nbchars++] = '&'; 2741 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2742 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2743 } 2744 for (;i > 0;i--) 2745 buffer[nbchars++] = *cur++; 2746 buffer[nbchars++] = ';'; 2747 } 2748 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2749 if (xmlParserDebugEntities) 2750 xmlGenericError(xmlGenericErrorContext, 2751 "String decoding PE Reference: %.30s\n", str); 2752 ent = xmlParseStringPEReference(ctxt, &str); 2753 xmlParserEntityCheck(ctxt, 0, ent, 0); 2754 if (ent != NULL) 2755 ctxt->nbentities += ent->checked / 2; 2756 if (ent != NULL) { 2757 if (ent->content == NULL) { 2758 /* 2759 * Note: external parsed entities will not be loaded, 2760 * it is not required for a non-validating parser to 2761 * complete external PEReferences coming from the 2762 * internal subset 2763 */ 2764 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2765 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2766 (ctxt->validate != 0)) { 2767 xmlLoadEntityContent(ctxt, ent); 2768 } else { 2769 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2770 "not validating will not read content for PE entity %s\n", 2771 ent->name, NULL); 2772 } 2773 } 2774 ctxt->depth++; 2775 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2776 0, 0, 0); 2777 ctxt->depth--; 2778 if (rep == NULL) { 2779 if (ent->content != NULL) 2780 ent->content[0] = 0; 2781 goto int_error; 2782 } 2783 current = rep; 2784 while (*current != 0) { /* non input consuming loop */ 2785 buffer[nbchars++] = *current++; 2786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2787 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2788 goto int_error; 2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2790 } 2791 } 2792 xmlFree(rep); 2793 rep = NULL; 2794 } 2795 } else { 2796 COPY_BUF(l,buffer,nbchars,c); 2797 str += l; 2798 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2799 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2800 } 2801 } 2802 if (str < last) 2803 c = CUR_SCHAR(str, l); 2804 else 2805 c = 0; 2806 } 2807 buffer[nbchars] = 0; 2808 return(buffer); 2809 2810 mem_error: 2811 xmlErrMemory(ctxt, NULL); 2812 int_error: 2813 if (rep != NULL) 2814 xmlFree(rep); 2815 if (buffer != NULL) 2816 xmlFree(buffer); 2817 return(NULL); 2818 } 2819 2820 /** 2821 * xmlStringDecodeEntities: 2822 * @ctxt: the parser context 2823 * @str: the input string 2824 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2825 * @end: an end marker xmlChar, 0 if none 2826 * @end2: an end marker xmlChar, 0 if none 2827 * @end3: an end marker xmlChar, 0 if none 2828 * 2829 * Takes a entity string content and process to do the adequate substitutions. 2830 * 2831 * [67] Reference ::= EntityRef | CharRef 2832 * 2833 * [69] PEReference ::= '%' Name ';' 2834 * 2835 * Returns A newly allocated string with the substitution done. The caller 2836 * must deallocate it ! 2837 */ 2838 xmlChar * 2839 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2840 xmlChar end, xmlChar end2, xmlChar end3) { 2841 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2842 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2843 end, end2, end3)); 2844 } 2845 2846 /************************************************************************ 2847 * * 2848 * Commodity functions, cleanup needed ? * 2849 * * 2850 ************************************************************************/ 2851 2852 /** 2853 * areBlanks: 2854 * @ctxt: an XML parser context 2855 * @str: a xmlChar * 2856 * @len: the size of @str 2857 * @blank_chars: we know the chars are blanks 2858 * 2859 * Is this a sequence of blank chars that one can ignore ? 2860 * 2861 * Returns 1 if ignorable 0 otherwise. 2862 */ 2863 2864 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2865 int blank_chars) { 2866 int i, ret; 2867 xmlNodePtr lastChild; 2868 2869 /* 2870 * Don't spend time trying to differentiate them, the same callback is 2871 * used ! 2872 */ 2873 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2874 return(0); 2875 2876 /* 2877 * Check for xml:space value. 2878 */ 2879 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2880 (*(ctxt->space) == -2)) 2881 return(0); 2882 2883 /* 2884 * Check that the string is made of blanks 2885 */ 2886 if (blank_chars == 0) { 2887 for (i = 0;i < len;i++) 2888 if (!(IS_BLANK_CH(str[i]))) return(0); 2889 } 2890 2891 /* 2892 * Look if the element is mixed content in the DTD if available 2893 */ 2894 if (ctxt->node == NULL) return(0); 2895 if (ctxt->myDoc != NULL) { 2896 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2897 if (ret == 0) return(1); 2898 if (ret == 1) return(0); 2899 } 2900 2901 /* 2902 * Otherwise, heuristic :-\ 2903 */ 2904 if ((RAW != '<') && (RAW != 0xD)) return(0); 2905 if ((ctxt->node->children == NULL) && 2906 (RAW == '<') && (NXT(1) == '/')) return(0); 2907 2908 lastChild = xmlGetLastChild(ctxt->node); 2909 if (lastChild == NULL) { 2910 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2911 (ctxt->node->content != NULL)) return(0); 2912 } else if (xmlNodeIsText(lastChild)) 2913 return(0); 2914 else if ((ctxt->node->children != NULL) && 2915 (xmlNodeIsText(ctxt->node->children))) 2916 return(0); 2917 return(1); 2918 } 2919 2920 /************************************************************************ 2921 * * 2922 * Extra stuff for namespace support * 2923 * Relates to http://www.w3.org/TR/WD-xml-names * 2924 * * 2925 ************************************************************************/ 2926 2927 /** 2928 * xmlSplitQName: 2929 * @ctxt: an XML parser context 2930 * @name: an XML parser context 2931 * @prefix: a xmlChar ** 2932 * 2933 * parse an UTF8 encoded XML qualified name string 2934 * 2935 * [NS 5] QName ::= (Prefix ':')? LocalPart 2936 * 2937 * [NS 6] Prefix ::= NCName 2938 * 2939 * [NS 7] LocalPart ::= NCName 2940 * 2941 * Returns the local part, and prefix is updated 2942 * to get the Prefix if any. 2943 */ 2944 2945 xmlChar * 2946 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2947 xmlChar buf[XML_MAX_NAMELEN + 5]; 2948 xmlChar *buffer = NULL; 2949 int len = 0; 2950 int max = XML_MAX_NAMELEN; 2951 xmlChar *ret = NULL; 2952 const xmlChar *cur = name; 2953 int c; 2954 2955 if (prefix == NULL) return(NULL); 2956 *prefix = NULL; 2957 2958 if (cur == NULL) return(NULL); 2959 2960 #ifndef XML_XML_NAMESPACE 2961 /* xml: prefix is not really a namespace */ 2962 if ((cur[0] == 'x') && (cur[1] == 'm') && 2963 (cur[2] == 'l') && (cur[3] == ':')) 2964 return(xmlStrdup(name)); 2965 #endif 2966 2967 /* nasty but well=formed */ 2968 if (cur[0] == ':') 2969 return(xmlStrdup(name)); 2970 2971 c = *cur++; 2972 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2973 buf[len++] = c; 2974 c = *cur++; 2975 } 2976 if (len >= max) { 2977 /* 2978 * Okay someone managed to make a huge name, so he's ready to pay 2979 * for the processing speed. 2980 */ 2981 max = len * 2; 2982 2983 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2984 if (buffer == NULL) { 2985 xmlErrMemory(ctxt, NULL); 2986 return(NULL); 2987 } 2988 memcpy(buffer, buf, len); 2989 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2990 if (len + 10 > max) { 2991 xmlChar *tmp; 2992 2993 max *= 2; 2994 tmp = (xmlChar *) xmlRealloc(buffer, 2995 max * sizeof(xmlChar)); 2996 if (tmp == NULL) { 2997 xmlFree(buffer); 2998 xmlErrMemory(ctxt, NULL); 2999 return(NULL); 3000 } 3001 buffer = tmp; 3002 } 3003 buffer[len++] = c; 3004 c = *cur++; 3005 } 3006 buffer[len] = 0; 3007 } 3008 3009 if ((c == ':') && (*cur == 0)) { 3010 if (buffer != NULL) 3011 xmlFree(buffer); 3012 *prefix = NULL; 3013 return(xmlStrdup(name)); 3014 } 3015 3016 if (buffer == NULL) 3017 ret = xmlStrndup(buf, len); 3018 else { 3019 ret = buffer; 3020 buffer = NULL; 3021 max = XML_MAX_NAMELEN; 3022 } 3023 3024 3025 if (c == ':') { 3026 c = *cur; 3027 *prefix = ret; 3028 if (c == 0) { 3029 return(xmlStrndup(BAD_CAST "", 0)); 3030 } 3031 len = 0; 3032 3033 /* 3034 * Check that the first character is proper to start 3035 * a new name 3036 */ 3037 if (!(((c >= 0x61) && (c <= 0x7A)) || 3038 ((c >= 0x41) && (c <= 0x5A)) || 3039 (c == '_') || (c == ':'))) { 3040 int l; 3041 int first = CUR_SCHAR(cur, l); 3042 3043 if (!IS_LETTER(first) && (first != '_')) { 3044 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3045 "Name %s is not XML Namespace compliant\n", 3046 name); 3047 } 3048 } 3049 cur++; 3050 3051 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3052 buf[len++] = c; 3053 c = *cur++; 3054 } 3055 if (len >= max) { 3056 /* 3057 * Okay someone managed to make a huge name, so he's ready to pay 3058 * for the processing speed. 3059 */ 3060 max = len * 2; 3061 3062 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3063 if (buffer == NULL) { 3064 xmlErrMemory(ctxt, NULL); 3065 return(NULL); 3066 } 3067 memcpy(buffer, buf, len); 3068 while (c != 0) { /* tested bigname2.xml */ 3069 if (len + 10 > max) { 3070 xmlChar *tmp; 3071 3072 max *= 2; 3073 tmp = (xmlChar *) xmlRealloc(buffer, 3074 max * sizeof(xmlChar)); 3075 if (tmp == NULL) { 3076 xmlErrMemory(ctxt, NULL); 3077 xmlFree(buffer); 3078 return(NULL); 3079 } 3080 buffer = tmp; 3081 } 3082 buffer[len++] = c; 3083 c = *cur++; 3084 } 3085 buffer[len] = 0; 3086 } 3087 3088 if (buffer == NULL) 3089 ret = xmlStrndup(buf, len); 3090 else { 3091 ret = buffer; 3092 } 3093 } 3094 3095 return(ret); 3096 } 3097 3098 /************************************************************************ 3099 * * 3100 * The parser itself * 3101 * Relates to http://www.w3.org/TR/REC-xml * 3102 * * 3103 ************************************************************************/ 3104 3105 /************************************************************************ 3106 * * 3107 * Routines to parse Name, NCName and NmToken * 3108 * * 3109 ************************************************************************/ 3110 #ifdef DEBUG 3111 static unsigned long nbParseName = 0; 3112 static unsigned long nbParseNmToken = 0; 3113 static unsigned long nbParseNCName = 0; 3114 static unsigned long nbParseNCNameComplex = 0; 3115 static unsigned long nbParseNameComplex = 0; 3116 static unsigned long nbParseStringName = 0; 3117 #endif 3118 3119 /* 3120 * The two following functions are related to the change of accepted 3121 * characters for Name and NmToken in the Revision 5 of XML-1.0 3122 * They correspond to the modified production [4] and the new production [4a] 3123 * changes in that revision. Also note that the macros used for the 3124 * productions Letter, Digit, CombiningChar and Extender are not needed 3125 * anymore. 3126 * We still keep compatibility to pre-revision5 parsing semantic if the 3127 * new XML_PARSE_OLD10 option is given to the parser. 3128 */ 3129 static int 3130 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3131 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3132 /* 3133 * Use the new checks of production [4] [4a] amd [5] of the 3134 * Update 5 of XML-1.0 3135 */ 3136 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3137 (((c >= 'a') && (c <= 'z')) || 3138 ((c >= 'A') && (c <= 'Z')) || 3139 (c == '_') || (c == ':') || 3140 ((c >= 0xC0) && (c <= 0xD6)) || 3141 ((c >= 0xD8) && (c <= 0xF6)) || 3142 ((c >= 0xF8) && (c <= 0x2FF)) || 3143 ((c >= 0x370) && (c <= 0x37D)) || 3144 ((c >= 0x37F) && (c <= 0x1FFF)) || 3145 ((c >= 0x200C) && (c <= 0x200D)) || 3146 ((c >= 0x2070) && (c <= 0x218F)) || 3147 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3148 ((c >= 0x3001) && (c <= 0xD7FF)) || 3149 ((c >= 0xF900) && (c <= 0xFDCF)) || 3150 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3151 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3152 return(1); 3153 } else { 3154 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3155 return(1); 3156 } 3157 return(0); 3158 } 3159 3160 static int 3161 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3162 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3163 /* 3164 * Use the new checks of production [4] [4a] amd [5] of the 3165 * Update 5 of XML-1.0 3166 */ 3167 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3168 (((c >= 'a') && (c <= 'z')) || 3169 ((c >= 'A') && (c <= 'Z')) || 3170 ((c >= '0') && (c <= '9')) || /* !start */ 3171 (c == '_') || (c == ':') || 3172 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3173 ((c >= 0xC0) && (c <= 0xD6)) || 3174 ((c >= 0xD8) && (c <= 0xF6)) || 3175 ((c >= 0xF8) && (c <= 0x2FF)) || 3176 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3177 ((c >= 0x370) && (c <= 0x37D)) || 3178 ((c >= 0x37F) && (c <= 0x1FFF)) || 3179 ((c >= 0x200C) && (c <= 0x200D)) || 3180 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3181 ((c >= 0x2070) && (c <= 0x218F)) || 3182 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3183 ((c >= 0x3001) && (c <= 0xD7FF)) || 3184 ((c >= 0xF900) && (c <= 0xFDCF)) || 3185 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3186 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3187 return(1); 3188 } else { 3189 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3190 (c == '.') || (c == '-') || 3191 (c == '_') || (c == ':') || 3192 (IS_COMBINING(c)) || 3193 (IS_EXTENDER(c))) 3194 return(1); 3195 } 3196 return(0); 3197 } 3198 3199 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3200 int *len, int *alloc, int normalize); 3201 3202 static const xmlChar * 3203 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3204 int len = 0, l; 3205 int c; 3206 int count = 0; 3207 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3208 XML_MAX_TEXT_LENGTH : 3209 XML_MAX_NAME_LENGTH; 3210 3211 #ifdef DEBUG 3212 nbParseNameComplex++; 3213 #endif 3214 3215 /* 3216 * Handler for more complex cases 3217 */ 3218 GROW; 3219 if (ctxt->instate == XML_PARSER_EOF) 3220 return(NULL); 3221 c = CUR_CHAR(l); 3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3223 /* 3224 * Use the new checks of production [4] [4a] amd [5] of the 3225 * Update 5 of XML-1.0 3226 */ 3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3228 (!(((c >= 'a') && (c <= 'z')) || 3229 ((c >= 'A') && (c <= 'Z')) || 3230 (c == '_') || (c == ':') || 3231 ((c >= 0xC0) && (c <= 0xD6)) || 3232 ((c >= 0xD8) && (c <= 0xF6)) || 3233 ((c >= 0xF8) && (c <= 0x2FF)) || 3234 ((c >= 0x370) && (c <= 0x37D)) || 3235 ((c >= 0x37F) && (c <= 0x1FFF)) || 3236 ((c >= 0x200C) && (c <= 0x200D)) || 3237 ((c >= 0x2070) && (c <= 0x218F)) || 3238 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3239 ((c >= 0x3001) && (c <= 0xD7FF)) || 3240 ((c >= 0xF900) && (c <= 0xFDCF)) || 3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3243 return(NULL); 3244 } 3245 len += l; 3246 NEXTL(l); 3247 c = CUR_CHAR(l); 3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3249 (((c >= 'a') && (c <= 'z')) || 3250 ((c >= 'A') && (c <= 'Z')) || 3251 ((c >= '0') && (c <= '9')) || /* !start */ 3252 (c == '_') || (c == ':') || 3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3254 ((c >= 0xC0) && (c <= 0xD6)) || 3255 ((c >= 0xD8) && (c <= 0xF6)) || 3256 ((c >= 0xF8) && (c <= 0x2FF)) || 3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3258 ((c >= 0x370) && (c <= 0x37D)) || 3259 ((c >= 0x37F) && (c <= 0x1FFF)) || 3260 ((c >= 0x200C) && (c <= 0x200D)) || 3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3262 ((c >= 0x2070) && (c <= 0x218F)) || 3263 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3264 ((c >= 0x3001) && (c <= 0xD7FF)) || 3265 ((c >= 0xF900) && (c <= 0xFDCF)) || 3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3267 ((c >= 0x10000) && (c <= 0xEFFFF)) 3268 )) { 3269 if (count++ > XML_PARSER_CHUNK_SIZE) { 3270 count = 0; 3271 GROW; 3272 if (ctxt->instate == XML_PARSER_EOF) 3273 return(NULL); 3274 } 3275 if (len <= INT_MAX - l) 3276 len += l; 3277 NEXTL(l); 3278 c = CUR_CHAR(l); 3279 } 3280 } else { 3281 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3282 (!IS_LETTER(c) && (c != '_') && 3283 (c != ':'))) { 3284 return(NULL); 3285 } 3286 len += l; 3287 NEXTL(l); 3288 c = CUR_CHAR(l); 3289 3290 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3291 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3292 (c == '.') || (c == '-') || 3293 (c == '_') || (c == ':') || 3294 (IS_COMBINING(c)) || 3295 (IS_EXTENDER(c)))) { 3296 if (count++ > XML_PARSER_CHUNK_SIZE) { 3297 count = 0; 3298 GROW; 3299 if (ctxt->instate == XML_PARSER_EOF) 3300 return(NULL); 3301 } 3302 if (len <= INT_MAX - l) 3303 len += l; 3304 NEXTL(l); 3305 c = CUR_CHAR(l); 3306 } 3307 } 3308 if (len > maxLength) { 3309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3310 return(NULL); 3311 } 3312 if (ctxt->input->cur - ctxt->input->base < len) { 3313 /* 3314 * There were a couple of bugs where PERefs lead to to a change 3315 * of the buffer. Check the buffer size to avoid passing an invalid 3316 * pointer to xmlDictLookup. 3317 */ 3318 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3319 "unexpected change of input buffer"); 3320 return (NULL); 3321 } 3322 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3325 } 3326 3327 /** 3328 * xmlParseName: 3329 * @ctxt: an XML parser context 3330 * 3331 * parse an XML name. 3332 * 3333 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3334 * CombiningChar | Extender 3335 * 3336 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3337 * 3338 * [6] Names ::= Name (#x20 Name)* 3339 * 3340 * Returns the Name parsed or NULL 3341 */ 3342 3343 const xmlChar * 3344 xmlParseName(xmlParserCtxtPtr ctxt) { 3345 const xmlChar *in; 3346 const xmlChar *ret; 3347 size_t count = 0; 3348 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3349 XML_MAX_TEXT_LENGTH : 3350 XML_MAX_NAME_LENGTH; 3351 3352 GROW; 3353 3354 #ifdef DEBUG 3355 nbParseName++; 3356 #endif 3357 3358 /* 3359 * Accelerator for simple ASCII names 3360 */ 3361 in = ctxt->input->cur; 3362 if (((*in >= 0x61) && (*in <= 0x7A)) || 3363 ((*in >= 0x41) && (*in <= 0x5A)) || 3364 (*in == '_') || (*in == ':')) { 3365 in++; 3366 while (((*in >= 0x61) && (*in <= 0x7A)) || 3367 ((*in >= 0x41) && (*in <= 0x5A)) || 3368 ((*in >= 0x30) && (*in <= 0x39)) || 3369 (*in == '_') || (*in == '-') || 3370 (*in == ':') || (*in == '.')) 3371 in++; 3372 if ((*in > 0) && (*in < 0x80)) { 3373 count = in - ctxt->input->cur; 3374 if (count > maxLength) { 3375 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3376 return(NULL); 3377 } 3378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3379 ctxt->input->cur = in; 3380 ctxt->input->col += count; 3381 if (ret == NULL) 3382 xmlErrMemory(ctxt, NULL); 3383 return(ret); 3384 } 3385 } 3386 /* accelerator for special cases */ 3387 return(xmlParseNameComplex(ctxt)); 3388 } 3389 3390 static const xmlChar * 3391 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3392 int len = 0, l; 3393 int c; 3394 int count = 0; 3395 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3396 XML_MAX_TEXT_LENGTH : 3397 XML_MAX_NAME_LENGTH; 3398 size_t startPosition = 0; 3399 3400 #ifdef DEBUG 3401 nbParseNCNameComplex++; 3402 #endif 3403 3404 /* 3405 * Handler for more complex cases 3406 */ 3407 GROW; 3408 startPosition = CUR_PTR - BASE_PTR; 3409 c = CUR_CHAR(l); 3410 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3411 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3412 return(NULL); 3413 } 3414 3415 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3416 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3417 if (count++ > XML_PARSER_CHUNK_SIZE) { 3418 count = 0; 3419 GROW; 3420 if (ctxt->instate == XML_PARSER_EOF) 3421 return(NULL); 3422 } 3423 if (len <= INT_MAX - l) 3424 len += l; 3425 NEXTL(l); 3426 c = CUR_CHAR(l); 3427 if (c == 0) { 3428 count = 0; 3429 /* 3430 * when shrinking to extend the buffer we really need to preserve 3431 * the part of the name we already parsed. Hence rolling back 3432 * by current length. 3433 */ 3434 ctxt->input->cur -= l; 3435 GROW; 3436 if (ctxt->instate == XML_PARSER_EOF) 3437 return(NULL); 3438 ctxt->input->cur += l; 3439 c = CUR_CHAR(l); 3440 } 3441 } 3442 if (len > maxLength) { 3443 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3444 return(NULL); 3445 } 3446 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3447 } 3448 3449 /** 3450 * xmlParseNCName: 3451 * @ctxt: an XML parser context 3452 * @len: length of the string parsed 3453 * 3454 * parse an XML name. 3455 * 3456 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3457 * CombiningChar | Extender 3458 * 3459 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3460 * 3461 * Returns the Name parsed or NULL 3462 */ 3463 3464 static const xmlChar * 3465 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3466 const xmlChar *in, *e; 3467 const xmlChar *ret; 3468 size_t count = 0; 3469 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3470 XML_MAX_TEXT_LENGTH : 3471 XML_MAX_NAME_LENGTH; 3472 3473 #ifdef DEBUG 3474 nbParseNCName++; 3475 #endif 3476 3477 /* 3478 * Accelerator for simple ASCII names 3479 */ 3480 in = ctxt->input->cur; 3481 e = ctxt->input->end; 3482 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3483 ((*in >= 0x41) && (*in <= 0x5A)) || 3484 (*in == '_')) && (in < e)) { 3485 in++; 3486 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3487 ((*in >= 0x41) && (*in <= 0x5A)) || 3488 ((*in >= 0x30) && (*in <= 0x39)) || 3489 (*in == '_') || (*in == '-') || 3490 (*in == '.')) && (in < e)) 3491 in++; 3492 if (in >= e) 3493 goto complex; 3494 if ((*in > 0) && (*in < 0x80)) { 3495 count = in - ctxt->input->cur; 3496 if (count > maxLength) { 3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3498 return(NULL); 3499 } 3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3501 ctxt->input->cur = in; 3502 ctxt->input->col += count; 3503 if (ret == NULL) { 3504 xmlErrMemory(ctxt, NULL); 3505 } 3506 return(ret); 3507 } 3508 } 3509 complex: 3510 return(xmlParseNCNameComplex(ctxt)); 3511 } 3512 3513 /** 3514 * xmlParseNameAndCompare: 3515 * @ctxt: an XML parser context 3516 * 3517 * parse an XML name and compares for match 3518 * (specialized for endtag parsing) 3519 * 3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3521 * and the name for mismatch 3522 */ 3523 3524 static const xmlChar * 3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3526 register const xmlChar *cmp = other; 3527 register const xmlChar *in; 3528 const xmlChar *ret; 3529 3530 GROW; 3531 if (ctxt->instate == XML_PARSER_EOF) 3532 return(NULL); 3533 3534 in = ctxt->input->cur; 3535 while (*in != 0 && *in == *cmp) { 3536 ++in; 3537 ++cmp; 3538 } 3539 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3540 /* success */ 3541 ctxt->input->col += in - ctxt->input->cur; 3542 ctxt->input->cur = in; 3543 return (const xmlChar*) 1; 3544 } 3545 /* failure (or end of input buffer), check with full function */ 3546 ret = xmlParseName (ctxt); 3547 /* strings coming from the dictionary direct compare possible */ 3548 if (ret == other) { 3549 return (const xmlChar*) 1; 3550 } 3551 return ret; 3552 } 3553 3554 /** 3555 * xmlParseStringName: 3556 * @ctxt: an XML parser context 3557 * @str: a pointer to the string pointer (IN/OUT) 3558 * 3559 * parse an XML name. 3560 * 3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3562 * CombiningChar | Extender 3563 * 3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3565 * 3566 * [6] Names ::= Name (#x20 Name)* 3567 * 3568 * Returns the Name parsed or NULL. The @str pointer 3569 * is updated to the current location in the string. 3570 */ 3571 3572 static xmlChar * 3573 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3574 xmlChar buf[XML_MAX_NAMELEN + 5]; 3575 const xmlChar *cur = *str; 3576 int len = 0, l; 3577 int c; 3578 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3579 XML_MAX_TEXT_LENGTH : 3580 XML_MAX_NAME_LENGTH; 3581 3582 #ifdef DEBUG 3583 nbParseStringName++; 3584 #endif 3585 3586 c = CUR_SCHAR(cur, l); 3587 if (!xmlIsNameStartChar(ctxt, c)) { 3588 return(NULL); 3589 } 3590 3591 COPY_BUF(l,buf,len,c); 3592 cur += l; 3593 c = CUR_SCHAR(cur, l); 3594 while (xmlIsNameChar(ctxt, c)) { 3595 COPY_BUF(l,buf,len,c); 3596 cur += l; 3597 c = CUR_SCHAR(cur, l); 3598 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3599 /* 3600 * Okay someone managed to make a huge name, so he's ready to pay 3601 * for the processing speed. 3602 */ 3603 xmlChar *buffer; 3604 int max = len * 2; 3605 3606 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3607 if (buffer == NULL) { 3608 xmlErrMemory(ctxt, NULL); 3609 return(NULL); 3610 } 3611 memcpy(buffer, buf, len); 3612 while (xmlIsNameChar(ctxt, c)) { 3613 if (len + 10 > max) { 3614 xmlChar *tmp; 3615 3616 max *= 2; 3617 tmp = (xmlChar *) xmlRealloc(buffer, 3618 max * sizeof(xmlChar)); 3619 if (tmp == NULL) { 3620 xmlErrMemory(ctxt, NULL); 3621 xmlFree(buffer); 3622 return(NULL); 3623 } 3624 buffer = tmp; 3625 } 3626 COPY_BUF(l,buffer,len,c); 3627 cur += l; 3628 c = CUR_SCHAR(cur, l); 3629 if (len > maxLength) { 3630 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3631 xmlFree(buffer); 3632 return(NULL); 3633 } 3634 } 3635 buffer[len] = 0; 3636 *str = cur; 3637 return(buffer); 3638 } 3639 } 3640 if (len > maxLength) { 3641 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3642 return(NULL); 3643 } 3644 *str = cur; 3645 return(xmlStrndup(buf, len)); 3646 } 3647 3648 /** 3649 * xmlParseNmtoken: 3650 * @ctxt: an XML parser context 3651 * 3652 * parse an XML Nmtoken. 3653 * 3654 * [7] Nmtoken ::= (NameChar)+ 3655 * 3656 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3657 * 3658 * Returns the Nmtoken parsed or NULL 3659 */ 3660 3661 xmlChar * 3662 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3663 xmlChar buf[XML_MAX_NAMELEN + 5]; 3664 int len = 0, l; 3665 int c; 3666 int count = 0; 3667 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3668 XML_MAX_TEXT_LENGTH : 3669 XML_MAX_NAME_LENGTH; 3670 3671 #ifdef DEBUG 3672 nbParseNmToken++; 3673 #endif 3674 3675 GROW; 3676 if (ctxt->instate == XML_PARSER_EOF) 3677 return(NULL); 3678 c = CUR_CHAR(l); 3679 3680 while (xmlIsNameChar(ctxt, c)) { 3681 if (count++ > XML_PARSER_CHUNK_SIZE) { 3682 count = 0; 3683 GROW; 3684 } 3685 COPY_BUF(l,buf,len,c); 3686 NEXTL(l); 3687 c = CUR_CHAR(l); 3688 if (c == 0) { 3689 count = 0; 3690 GROW; 3691 if (ctxt->instate == XML_PARSER_EOF) 3692 return(NULL); 3693 c = CUR_CHAR(l); 3694 } 3695 if (len >= XML_MAX_NAMELEN) { 3696 /* 3697 * Okay someone managed to make a huge token, so he's ready to pay 3698 * for the processing speed. 3699 */ 3700 xmlChar *buffer; 3701 int max = len * 2; 3702 3703 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3704 if (buffer == NULL) { 3705 xmlErrMemory(ctxt, NULL); 3706 return(NULL); 3707 } 3708 memcpy(buffer, buf, len); 3709 while (xmlIsNameChar(ctxt, c)) { 3710 if (count++ > XML_PARSER_CHUNK_SIZE) { 3711 count = 0; 3712 GROW; 3713 if (ctxt->instate == XML_PARSER_EOF) { 3714 xmlFree(buffer); 3715 return(NULL); 3716 } 3717 } 3718 if (len + 10 > max) { 3719 xmlChar *tmp; 3720 3721 max *= 2; 3722 tmp = (xmlChar *) xmlRealloc(buffer, 3723 max * sizeof(xmlChar)); 3724 if (tmp == NULL) { 3725 xmlErrMemory(ctxt, NULL); 3726 xmlFree(buffer); 3727 return(NULL); 3728 } 3729 buffer = tmp; 3730 } 3731 COPY_BUF(l,buffer,len,c); 3732 NEXTL(l); 3733 c = CUR_CHAR(l); 3734 if (len > maxLength) { 3735 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3736 xmlFree(buffer); 3737 return(NULL); 3738 } 3739 } 3740 buffer[len] = 0; 3741 return(buffer); 3742 } 3743 } 3744 if (len == 0) 3745 return(NULL); 3746 if (len > maxLength) { 3747 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3748 return(NULL); 3749 } 3750 return(xmlStrndup(buf, len)); 3751 } 3752 3753 /** 3754 * xmlParseEntityValue: 3755 * @ctxt: an XML parser context 3756 * @orig: if non-NULL store a copy of the original entity value 3757 * 3758 * parse a value for ENTITY declarations 3759 * 3760 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3761 * "'" ([^%&'] | PEReference | Reference)* "'" 3762 * 3763 * Returns the EntityValue parsed with reference substituted or NULL 3764 */ 3765 3766 xmlChar * 3767 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3768 xmlChar *buf = NULL; 3769 int len = 0; 3770 int size = XML_PARSER_BUFFER_SIZE; 3771 int c, l; 3772 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3773 XML_MAX_HUGE_LENGTH : 3774 XML_MAX_TEXT_LENGTH; 3775 xmlChar stop; 3776 xmlChar *ret = NULL; 3777 const xmlChar *cur = NULL; 3778 xmlParserInputPtr input; 3779 3780 if (RAW == '"') stop = '"'; 3781 else if (RAW == '\'') stop = '\''; 3782 else { 3783 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3784 return(NULL); 3785 } 3786 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3787 if (buf == NULL) { 3788 xmlErrMemory(ctxt, NULL); 3789 return(NULL); 3790 } 3791 3792 /* 3793 * The content of the entity definition is copied in a buffer. 3794 */ 3795 3796 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3797 input = ctxt->input; 3798 GROW; 3799 if (ctxt->instate == XML_PARSER_EOF) 3800 goto error; 3801 NEXT; 3802 c = CUR_CHAR(l); 3803 /* 3804 * NOTE: 4.4.5 Included in Literal 3805 * When a parameter entity reference appears in a literal entity 3806 * value, ... a single or double quote character in the replacement 3807 * text is always treated as a normal data character and will not 3808 * terminate the literal. 3809 * In practice it means we stop the loop only when back at parsing 3810 * the initial entity and the quote is found 3811 */ 3812 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3813 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3814 if (len + 5 >= size) { 3815 xmlChar *tmp; 3816 3817 size *= 2; 3818 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3819 if (tmp == NULL) { 3820 xmlErrMemory(ctxt, NULL); 3821 goto error; 3822 } 3823 buf = tmp; 3824 } 3825 COPY_BUF(l,buf,len,c); 3826 NEXTL(l); 3827 3828 GROW; 3829 c = CUR_CHAR(l); 3830 if (c == 0) { 3831 GROW; 3832 c = CUR_CHAR(l); 3833 } 3834 3835 if (len > maxLength) { 3836 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 3837 "entity value too long\n"); 3838 goto error; 3839 } 3840 } 3841 buf[len] = 0; 3842 if (ctxt->instate == XML_PARSER_EOF) 3843 goto error; 3844 if (c != stop) { 3845 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3846 goto error; 3847 } 3848 NEXT; 3849 3850 /* 3851 * Raise problem w.r.t. '&' and '%' being used in non-entities 3852 * reference constructs. Note Charref will be handled in 3853 * xmlStringDecodeEntities() 3854 */ 3855 cur = buf; 3856 while (*cur != 0) { /* non input consuming */ 3857 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3858 xmlChar *name; 3859 xmlChar tmp = *cur; 3860 int nameOk = 0; 3861 3862 cur++; 3863 name = xmlParseStringName(ctxt, &cur); 3864 if (name != NULL) { 3865 nameOk = 1; 3866 xmlFree(name); 3867 } 3868 if ((nameOk == 0) || (*cur != ';')) { 3869 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3870 "EntityValue: '%c' forbidden except for entities references\n", 3871 tmp); 3872 goto error; 3873 } 3874 if ((tmp == '%') && (ctxt->inSubset == 1) && 3875 (ctxt->inputNr == 1)) { 3876 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3877 goto error; 3878 } 3879 if (*cur == 0) 3880 break; 3881 } 3882 cur++; 3883 } 3884 3885 /* 3886 * Then PEReference entities are substituted. 3887 * 3888 * NOTE: 4.4.7 Bypassed 3889 * When a general entity reference appears in the EntityValue in 3890 * an entity declaration, it is bypassed and left as is. 3891 * so XML_SUBSTITUTE_REF is not set here. 3892 */ 3893 ++ctxt->depth; 3894 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3895 0, 0, 0); 3896 --ctxt->depth; 3897 if (orig != NULL) { 3898 *orig = buf; 3899 buf = NULL; 3900 } 3901 3902 error: 3903 if (buf != NULL) 3904 xmlFree(buf); 3905 return(ret); 3906 } 3907 3908 /** 3909 * xmlParseAttValueComplex: 3910 * @ctxt: an XML parser context 3911 * @len: the resulting attribute len 3912 * @normalize: whether to apply the inner normalization 3913 * 3914 * parse a value for an attribute, this is the fallback function 3915 * of xmlParseAttValue() when the attribute parsing requires handling 3916 * of non-ASCII characters, or normalization compaction. 3917 * 3918 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3919 */ 3920 static xmlChar * 3921 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3922 xmlChar limit = 0; 3923 xmlChar *buf = NULL; 3924 xmlChar *rep = NULL; 3925 size_t len = 0; 3926 size_t buf_size = 0; 3927 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3928 XML_MAX_HUGE_LENGTH : 3929 XML_MAX_TEXT_LENGTH; 3930 int c, l, in_space = 0; 3931 xmlChar *current = NULL; 3932 xmlEntityPtr ent; 3933 3934 if (NXT(0) == '"') { 3935 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3936 limit = '"'; 3937 NEXT; 3938 } else if (NXT(0) == '\'') { 3939 limit = '\''; 3940 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3941 NEXT; 3942 } else { 3943 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3944 return(NULL); 3945 } 3946 3947 /* 3948 * allocate a translation buffer. 3949 */ 3950 buf_size = XML_PARSER_BUFFER_SIZE; 3951 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3952 if (buf == NULL) goto mem_error; 3953 3954 /* 3955 * OK loop until we reach one of the ending char or a size limit. 3956 */ 3957 c = CUR_CHAR(l); 3958 while (((NXT(0) != limit) && /* checked */ 3959 (IS_CHAR(c)) && (c != '<')) && 3960 (ctxt->instate != XML_PARSER_EOF)) { 3961 if (c == '&') { 3962 in_space = 0; 3963 if (NXT(1) == '#') { 3964 int val = xmlParseCharRef(ctxt); 3965 3966 if (val == '&') { 3967 if (ctxt->replaceEntities) { 3968 if (len + 10 > buf_size) { 3969 growBuffer(buf, 10); 3970 } 3971 buf[len++] = '&'; 3972 } else { 3973 /* 3974 * The reparsing will be done in xmlStringGetNodeList() 3975 * called by the attribute() function in SAX.c 3976 */ 3977 if (len + 10 > buf_size) { 3978 growBuffer(buf, 10); 3979 } 3980 buf[len++] = '&'; 3981 buf[len++] = '#'; 3982 buf[len++] = '3'; 3983 buf[len++] = '8'; 3984 buf[len++] = ';'; 3985 } 3986 } else if (val != 0) { 3987 if (len + 10 > buf_size) { 3988 growBuffer(buf, 10); 3989 } 3990 len += xmlCopyChar(0, &buf[len], val); 3991 } 3992 } else { 3993 ent = xmlParseEntityRef(ctxt); 3994 ctxt->nbentities++; 3995 if (ent != NULL) 3996 ctxt->nbentities += ent->owner; 3997 if ((ent != NULL) && 3998 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3999 if (len + 10 > buf_size) { 4000 growBuffer(buf, 10); 4001 } 4002 if ((ctxt->replaceEntities == 0) && 4003 (ent->content[0] == '&')) { 4004 buf[len++] = '&'; 4005 buf[len++] = '#'; 4006 buf[len++] = '3'; 4007 buf[len++] = '8'; 4008 buf[len++] = ';'; 4009 } else { 4010 buf[len++] = ent->content[0]; 4011 } 4012 } else if ((ent != NULL) && 4013 (ctxt->replaceEntities != 0)) { 4014 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4015 ++ctxt->depth; 4016 rep = xmlStringDecodeEntities(ctxt, ent->content, 4017 XML_SUBSTITUTE_REF, 4018 0, 0, 0); 4019 --ctxt->depth; 4020 if (rep != NULL) { 4021 current = rep; 4022 while (*current != 0) { /* non input consuming */ 4023 if ((*current == 0xD) || (*current == 0xA) || 4024 (*current == 0x9)) { 4025 buf[len++] = 0x20; 4026 current++; 4027 } else 4028 buf[len++] = *current++; 4029 if (len + 10 > buf_size) { 4030 growBuffer(buf, 10); 4031 } 4032 } 4033 xmlFree(rep); 4034 rep = NULL; 4035 } 4036 } else { 4037 if (len + 10 > buf_size) { 4038 growBuffer(buf, 10); 4039 } 4040 if (ent->content != NULL) 4041 buf[len++] = ent->content[0]; 4042 } 4043 } else if (ent != NULL) { 4044 int i = xmlStrlen(ent->name); 4045 const xmlChar *cur = ent->name; 4046 4047 /* 4048 * This may look absurd but is needed to detect 4049 * entities problems 4050 */ 4051 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4052 (ent->content != NULL) && (ent->checked == 0)) { 4053 unsigned long oldnbent = ctxt->nbentities, diff; 4054 4055 ++ctxt->depth; 4056 rep = xmlStringDecodeEntities(ctxt, ent->content, 4057 XML_SUBSTITUTE_REF, 0, 0, 0); 4058 --ctxt->depth; 4059 4060 diff = ctxt->nbentities - oldnbent + 1; 4061 if (diff > INT_MAX / 2) 4062 diff = INT_MAX / 2; 4063 ent->checked = diff * 2; 4064 if (rep != NULL) { 4065 if (xmlStrchr(rep, '<')) 4066 ent->checked |= 1; 4067 xmlFree(rep); 4068 rep = NULL; 4069 } else { 4070 ent->content[0] = 0; 4071 } 4072 } 4073 4074 /* 4075 * Just output the reference 4076 */ 4077 buf[len++] = '&'; 4078 while (len + i + 10 > buf_size) { 4079 growBuffer(buf, i + 10); 4080 } 4081 for (;i > 0;i--) 4082 buf[len++] = *cur++; 4083 buf[len++] = ';'; 4084 } 4085 } 4086 } else { 4087 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4088 if ((len != 0) || (!normalize)) { 4089 if ((!normalize) || (!in_space)) { 4090 COPY_BUF(l,buf,len,0x20); 4091 while (len + 10 > buf_size) { 4092 growBuffer(buf, 10); 4093 } 4094 } 4095 in_space = 1; 4096 } 4097 } else { 4098 in_space = 0; 4099 COPY_BUF(l,buf,len,c); 4100 if (len + 10 > buf_size) { 4101 growBuffer(buf, 10); 4102 } 4103 } 4104 NEXTL(l); 4105 } 4106 GROW; 4107 c = CUR_CHAR(l); 4108 if (len > maxLength) { 4109 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4110 "AttValue length too long\n"); 4111 goto mem_error; 4112 } 4113 } 4114 if (ctxt->instate == XML_PARSER_EOF) 4115 goto error; 4116 4117 if ((in_space) && (normalize)) { 4118 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4119 } 4120 buf[len] = 0; 4121 if (RAW == '<') { 4122 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4123 } else if (RAW != limit) { 4124 if ((c != 0) && (!IS_CHAR(c))) { 4125 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4126 "invalid character in attribute value\n"); 4127 } else { 4128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4129 "AttValue: ' expected\n"); 4130 } 4131 } else 4132 NEXT; 4133 4134 if (attlen != NULL) *attlen = (int) len; 4135 return(buf); 4136 4137 mem_error: 4138 xmlErrMemory(ctxt, NULL); 4139 error: 4140 if (buf != NULL) 4141 xmlFree(buf); 4142 if (rep != NULL) 4143 xmlFree(rep); 4144 return(NULL); 4145 } 4146 4147 /** 4148 * xmlParseAttValue: 4149 * @ctxt: an XML parser context 4150 * 4151 * parse a value for an attribute 4152 * Note: the parser won't do substitution of entities here, this 4153 * will be handled later in xmlStringGetNodeList 4154 * 4155 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4156 * "'" ([^<&'] | Reference)* "'" 4157 * 4158 * 3.3.3 Attribute-Value Normalization: 4159 * Before the value of an attribute is passed to the application or 4160 * checked for validity, the XML processor must normalize it as follows: 4161 * - a character reference is processed by appending the referenced 4162 * character to the attribute value 4163 * - an entity reference is processed by recursively processing the 4164 * replacement text of the entity 4165 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4166 * appending #x20 to the normalized value, except that only a single 4167 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4168 * parsed entity or the literal entity value of an internal parsed entity 4169 * - other characters are processed by appending them to the normalized value 4170 * If the declared value is not CDATA, then the XML processor must further 4171 * process the normalized attribute value by discarding any leading and 4172 * trailing space (#x20) characters, and by replacing sequences of space 4173 * (#x20) characters by a single space (#x20) character. 4174 * All attributes for which no declaration has been read should be treated 4175 * by a non-validating parser as if declared CDATA. 4176 * 4177 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4178 */ 4179 4180 4181 xmlChar * 4182 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4183 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4184 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4185 } 4186 4187 /** 4188 * xmlParseSystemLiteral: 4189 * @ctxt: an XML parser context 4190 * 4191 * parse an XML Literal 4192 * 4193 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4194 * 4195 * Returns the SystemLiteral parsed or NULL 4196 */ 4197 4198 xmlChar * 4199 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4200 xmlChar *buf = NULL; 4201 int len = 0; 4202 int size = XML_PARSER_BUFFER_SIZE; 4203 int cur, l; 4204 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4205 XML_MAX_TEXT_LENGTH : 4206 XML_MAX_NAME_LENGTH; 4207 xmlChar stop; 4208 int state = ctxt->instate; 4209 int count = 0; 4210 4211 SHRINK; 4212 if (RAW == '"') { 4213 NEXT; 4214 stop = '"'; 4215 } else if (RAW == '\'') { 4216 NEXT; 4217 stop = '\''; 4218 } else { 4219 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4220 return(NULL); 4221 } 4222 4223 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4224 if (buf == NULL) { 4225 xmlErrMemory(ctxt, NULL); 4226 return(NULL); 4227 } 4228 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4229 cur = CUR_CHAR(l); 4230 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4231 if (len + 5 >= size) { 4232 xmlChar *tmp; 4233 4234 size *= 2; 4235 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4236 if (tmp == NULL) { 4237 xmlFree(buf); 4238 xmlErrMemory(ctxt, NULL); 4239 ctxt->instate = (xmlParserInputState) state; 4240 return(NULL); 4241 } 4242 buf = tmp; 4243 } 4244 count++; 4245 if (count > 50) { 4246 SHRINK; 4247 GROW; 4248 count = 0; 4249 if (ctxt->instate == XML_PARSER_EOF) { 4250 xmlFree(buf); 4251 return(NULL); 4252 } 4253 } 4254 COPY_BUF(l,buf,len,cur); 4255 NEXTL(l); 4256 cur = CUR_CHAR(l); 4257 if (cur == 0) { 4258 GROW; 4259 SHRINK; 4260 cur = CUR_CHAR(l); 4261 } 4262 if (len > maxLength) { 4263 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4264 xmlFree(buf); 4265 ctxt->instate = (xmlParserInputState) state; 4266 return(NULL); 4267 } 4268 } 4269 buf[len] = 0; 4270 ctxt->instate = (xmlParserInputState) state; 4271 if (!IS_CHAR(cur)) { 4272 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4273 } else { 4274 NEXT; 4275 } 4276 return(buf); 4277 } 4278 4279 /** 4280 * xmlParsePubidLiteral: 4281 * @ctxt: an XML parser context 4282 * 4283 * parse an XML public literal 4284 * 4285 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4286 * 4287 * Returns the PubidLiteral parsed or NULL. 4288 */ 4289 4290 xmlChar * 4291 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4292 xmlChar *buf = NULL; 4293 int len = 0; 4294 int size = XML_PARSER_BUFFER_SIZE; 4295 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4296 XML_MAX_TEXT_LENGTH : 4297 XML_MAX_NAME_LENGTH; 4298 xmlChar cur; 4299 xmlChar stop; 4300 int count = 0; 4301 xmlParserInputState oldstate = ctxt->instate; 4302 4303 SHRINK; 4304 if (RAW == '"') { 4305 NEXT; 4306 stop = '"'; 4307 } else if (RAW == '\'') { 4308 NEXT; 4309 stop = '\''; 4310 } else { 4311 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4312 return(NULL); 4313 } 4314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4315 if (buf == NULL) { 4316 xmlErrMemory(ctxt, NULL); 4317 return(NULL); 4318 } 4319 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4320 cur = CUR; 4321 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4322 if (len + 1 >= size) { 4323 xmlChar *tmp; 4324 4325 size *= 2; 4326 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4327 if (tmp == NULL) { 4328 xmlErrMemory(ctxt, NULL); 4329 xmlFree(buf); 4330 return(NULL); 4331 } 4332 buf = tmp; 4333 } 4334 buf[len++] = cur; 4335 count++; 4336 if (count > 50) { 4337 SHRINK; 4338 GROW; 4339 count = 0; 4340 if (ctxt->instate == XML_PARSER_EOF) { 4341 xmlFree(buf); 4342 return(NULL); 4343 } 4344 } 4345 NEXT; 4346 cur = CUR; 4347 if (cur == 0) { 4348 GROW; 4349 SHRINK; 4350 cur = CUR; 4351 } 4352 if (len > maxLength) { 4353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4354 xmlFree(buf); 4355 return(NULL); 4356 } 4357 } 4358 buf[len] = 0; 4359 if (cur != stop) { 4360 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4361 } else { 4362 NEXT; 4363 } 4364 ctxt->instate = oldstate; 4365 return(buf); 4366 } 4367 4368 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4369 4370 /* 4371 * used for the test in the inner loop of the char data testing 4372 */ 4373 static const unsigned char test_char_data[256] = { 4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4375 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4378 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4379 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4380 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4381 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4382 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4383 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4384 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4385 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4386 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4387 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4388 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4389 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4406 }; 4407 4408 /** 4409 * xmlParseCharData: 4410 * @ctxt: an XML parser context 4411 * @cdata: int indicating whether we are within a CDATA section 4412 * 4413 * parse a CharData section. 4414 * if we are within a CDATA section ']]>' marks an end of section. 4415 * 4416 * The right angle bracket (>) may be represented using the string ">", 4417 * and must, for compatibility, be escaped using ">" or a character 4418 * reference when it appears in the string "]]>" in content, when that 4419 * string is not marking the end of a CDATA section. 4420 * 4421 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4422 */ 4423 4424 void 4425 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4426 const xmlChar *in; 4427 int nbchar = 0; 4428 int line = ctxt->input->line; 4429 int col = ctxt->input->col; 4430 int ccol; 4431 4432 SHRINK; 4433 GROW; 4434 /* 4435 * Accelerated common case where input don't need to be 4436 * modified before passing it to the handler. 4437 */ 4438 if (!cdata) { 4439 in = ctxt->input->cur; 4440 do { 4441 get_more_space: 4442 while (*in == 0x20) { in++; ctxt->input->col++; } 4443 if (*in == 0xA) { 4444 do { 4445 ctxt->input->line++; ctxt->input->col = 1; 4446 in++; 4447 } while (*in == 0xA); 4448 goto get_more_space; 4449 } 4450 if (*in == '<') { 4451 nbchar = in - ctxt->input->cur; 4452 if (nbchar > 0) { 4453 const xmlChar *tmp = ctxt->input->cur; 4454 ctxt->input->cur = in; 4455 4456 if ((ctxt->sax != NULL) && 4457 (ctxt->sax->ignorableWhitespace != 4458 ctxt->sax->characters)) { 4459 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4460 if (ctxt->sax->ignorableWhitespace != NULL) 4461 ctxt->sax->ignorableWhitespace(ctxt->userData, 4462 tmp, nbchar); 4463 } else { 4464 if (ctxt->sax->characters != NULL) 4465 ctxt->sax->characters(ctxt->userData, 4466 tmp, nbchar); 4467 if (*ctxt->space == -1) 4468 *ctxt->space = -2; 4469 } 4470 } else if ((ctxt->sax != NULL) && 4471 (ctxt->sax->characters != NULL)) { 4472 ctxt->sax->characters(ctxt->userData, 4473 tmp, nbchar); 4474 } 4475 } 4476 return; 4477 } 4478 4479 get_more: 4480 ccol = ctxt->input->col; 4481 while (test_char_data[*in]) { 4482 in++; 4483 ccol++; 4484 } 4485 ctxt->input->col = ccol; 4486 if (*in == 0xA) { 4487 do { 4488 ctxt->input->line++; ctxt->input->col = 1; 4489 in++; 4490 } while (*in == 0xA); 4491 goto get_more; 4492 } 4493 if (*in == ']') { 4494 if ((in[1] == ']') && (in[2] == '>')) { 4495 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4496 ctxt->input->cur = in + 1; 4497 return; 4498 } 4499 in++; 4500 ctxt->input->col++; 4501 goto get_more; 4502 } 4503 nbchar = in - ctxt->input->cur; 4504 if (nbchar > 0) { 4505 if ((ctxt->sax != NULL) && 4506 (ctxt->sax->ignorableWhitespace != 4507 ctxt->sax->characters) && 4508 (IS_BLANK_CH(*ctxt->input->cur))) { 4509 const xmlChar *tmp = ctxt->input->cur; 4510 ctxt->input->cur = in; 4511 4512 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4513 if (ctxt->sax->ignorableWhitespace != NULL) 4514 ctxt->sax->ignorableWhitespace(ctxt->userData, 4515 tmp, nbchar); 4516 } else { 4517 if (ctxt->sax->characters != NULL) 4518 ctxt->sax->characters(ctxt->userData, 4519 tmp, nbchar); 4520 if (*ctxt->space == -1) 4521 *ctxt->space = -2; 4522 } 4523 line = ctxt->input->line; 4524 col = ctxt->input->col; 4525 } else if (ctxt->sax != NULL) { 4526 if (ctxt->sax->characters != NULL) 4527 ctxt->sax->characters(ctxt->userData, 4528 ctxt->input->cur, nbchar); 4529 line = ctxt->input->line; 4530 col = ctxt->input->col; 4531 } 4532 /* something really bad happened in the SAX callback */ 4533 if (ctxt->instate != XML_PARSER_CONTENT) 4534 return; 4535 } 4536 ctxt->input->cur = in; 4537 if (*in == 0xD) { 4538 in++; 4539 if (*in == 0xA) { 4540 ctxt->input->cur = in; 4541 in++; 4542 ctxt->input->line++; ctxt->input->col = 1; 4543 continue; /* while */ 4544 } 4545 in--; 4546 } 4547 if (*in == '<') { 4548 return; 4549 } 4550 if (*in == '&') { 4551 return; 4552 } 4553 SHRINK; 4554 GROW; 4555 if (ctxt->instate == XML_PARSER_EOF) 4556 return; 4557 in = ctxt->input->cur; 4558 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 4559 nbchar = 0; 4560 } 4561 ctxt->input->line = line; 4562 ctxt->input->col = col; 4563 xmlParseCharDataComplex(ctxt, cdata); 4564 } 4565 4566 /** 4567 * xmlParseCharDataComplex: 4568 * @ctxt: an XML parser context 4569 * @cdata: int indicating whether we are within a CDATA section 4570 * 4571 * parse a CharData section.this is the fallback function 4572 * of xmlParseCharData() when the parsing requires handling 4573 * of non-ASCII characters. 4574 */ 4575 static void 4576 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4577 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4578 int nbchar = 0; 4579 int cur, l; 4580 int count = 0; 4581 4582 SHRINK; 4583 GROW; 4584 cur = CUR_CHAR(l); 4585 while ((cur != '<') && /* checked */ 4586 (cur != '&') && 4587 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4588 if ((cur == ']') && (NXT(1) == ']') && 4589 (NXT(2) == '>')) { 4590 if (cdata) break; 4591 else { 4592 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4593 } 4594 } 4595 COPY_BUF(l,buf,nbchar,cur); 4596 /* move current position before possible calling of ctxt->sax->characters */ 4597 NEXTL(l); 4598 cur = CUR_CHAR(l); 4599 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4600 buf[nbchar] = 0; 4601 4602 /* 4603 * OK the segment is to be consumed as chars. 4604 */ 4605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4606 if (areBlanks(ctxt, buf, nbchar, 0)) { 4607 if (ctxt->sax->ignorableWhitespace != NULL) 4608 ctxt->sax->ignorableWhitespace(ctxt->userData, 4609 buf, nbchar); 4610 } else { 4611 if (ctxt->sax->characters != NULL) 4612 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4613 if ((ctxt->sax->characters != 4614 ctxt->sax->ignorableWhitespace) && 4615 (*ctxt->space == -1)) 4616 *ctxt->space = -2; 4617 } 4618 } 4619 nbchar = 0; 4620 /* something really bad happened in the SAX callback */ 4621 if (ctxt->instate != XML_PARSER_CONTENT) 4622 return; 4623 } 4624 count++; 4625 if (count > 50) { 4626 SHRINK; 4627 GROW; 4628 count = 0; 4629 if (ctxt->instate == XML_PARSER_EOF) 4630 return; 4631 } 4632 } 4633 if (nbchar != 0) { 4634 buf[nbchar] = 0; 4635 /* 4636 * OK the segment is to be consumed as chars. 4637 */ 4638 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4639 if (areBlanks(ctxt, buf, nbchar, 0)) { 4640 if (ctxt->sax->ignorableWhitespace != NULL) 4641 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4642 } else { 4643 if (ctxt->sax->characters != NULL) 4644 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4645 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4646 (*ctxt->space == -1)) 4647 *ctxt->space = -2; 4648 } 4649 } 4650 } 4651 if ((cur != 0) && (!IS_CHAR(cur))) { 4652 /* Generate the error and skip the offending character */ 4653 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4654 "PCDATA invalid Char value %d\n", 4655 cur); 4656 NEXTL(l); 4657 } 4658 } 4659 4660 /** 4661 * xmlParseExternalID: 4662 * @ctxt: an XML parser context 4663 * @publicID: a xmlChar** receiving PubidLiteral 4664 * @strict: indicate whether we should restrict parsing to only 4665 * production [75], see NOTE below 4666 * 4667 * Parse an External ID or a Public ID 4668 * 4669 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4670 * 'PUBLIC' S PubidLiteral S SystemLiteral 4671 * 4672 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4673 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4674 * 4675 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4676 * 4677 * Returns the function returns SystemLiteral and in the second 4678 * case publicID receives PubidLiteral, is strict is off 4679 * it is possible to return NULL and have publicID set. 4680 */ 4681 4682 xmlChar * 4683 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4684 xmlChar *URI = NULL; 4685 4686 SHRINK; 4687 4688 *publicID = NULL; 4689 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4690 SKIP(6); 4691 if (SKIP_BLANKS == 0) { 4692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4693 "Space required after 'SYSTEM'\n"); 4694 } 4695 URI = xmlParseSystemLiteral(ctxt); 4696 if (URI == NULL) { 4697 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4698 } 4699 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4700 SKIP(6); 4701 if (SKIP_BLANKS == 0) { 4702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4703 "Space required after 'PUBLIC'\n"); 4704 } 4705 *publicID = xmlParsePubidLiteral(ctxt); 4706 if (*publicID == NULL) { 4707 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4708 } 4709 if (strict) { 4710 /* 4711 * We don't handle [83] so "S SystemLiteral" is required. 4712 */ 4713 if (SKIP_BLANKS == 0) { 4714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4715 "Space required after the Public Identifier\n"); 4716 } 4717 } else { 4718 /* 4719 * We handle [83] so we return immediately, if 4720 * "S SystemLiteral" is not detected. We skip blanks if no 4721 * system literal was found, but this is harmless since we must 4722 * be at the end of a NotationDecl. 4723 */ 4724 if (SKIP_BLANKS == 0) return(NULL); 4725 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4726 } 4727 URI = xmlParseSystemLiteral(ctxt); 4728 if (URI == NULL) { 4729 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4730 } 4731 } 4732 return(URI); 4733 } 4734 4735 /** 4736 * xmlParseCommentComplex: 4737 * @ctxt: an XML parser context 4738 * @buf: the already parsed part of the buffer 4739 * @len: number of bytes in the buffer 4740 * @size: allocated size of the buffer 4741 * 4742 * Skip an XML (SGML) comment <!-- .... --> 4743 * The spec says that "For compatibility, the string "--" (double-hyphen) 4744 * must not occur within comments. " 4745 * This is the slow routine in case the accelerator for ascii didn't work 4746 * 4747 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4748 */ 4749 static void 4750 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4751 size_t len, size_t size) { 4752 int q, ql; 4753 int r, rl; 4754 int cur, l; 4755 size_t count = 0; 4756 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4757 XML_MAX_HUGE_LENGTH : 4758 XML_MAX_TEXT_LENGTH; 4759 int inputid; 4760 4761 inputid = ctxt->input->id; 4762 4763 if (buf == NULL) { 4764 len = 0; 4765 size = XML_PARSER_BUFFER_SIZE; 4766 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4767 if (buf == NULL) { 4768 xmlErrMemory(ctxt, NULL); 4769 return; 4770 } 4771 } 4772 GROW; /* Assure there's enough input data */ 4773 q = CUR_CHAR(ql); 4774 if (q == 0) 4775 goto not_terminated; 4776 if (!IS_CHAR(q)) { 4777 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4778 "xmlParseComment: invalid xmlChar value %d\n", 4779 q); 4780 xmlFree (buf); 4781 return; 4782 } 4783 NEXTL(ql); 4784 r = CUR_CHAR(rl); 4785 if (r == 0) 4786 goto not_terminated; 4787 if (!IS_CHAR(r)) { 4788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4789 "xmlParseComment: invalid xmlChar value %d\n", 4790 q); 4791 xmlFree (buf); 4792 return; 4793 } 4794 NEXTL(rl); 4795 cur = CUR_CHAR(l); 4796 if (cur == 0) 4797 goto not_terminated; 4798 while (IS_CHAR(cur) && /* checked */ 4799 ((cur != '>') || 4800 (r != '-') || (q != '-'))) { 4801 if ((r == '-') && (q == '-')) { 4802 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4803 } 4804 if (len + 5 >= size) { 4805 xmlChar *new_buf; 4806 size_t new_size; 4807 4808 new_size = size * 2; 4809 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4810 if (new_buf == NULL) { 4811 xmlFree (buf); 4812 xmlErrMemory(ctxt, NULL); 4813 return; 4814 } 4815 buf = new_buf; 4816 size = new_size; 4817 } 4818 COPY_BUF(ql,buf,len,q); 4819 q = r; 4820 ql = rl; 4821 r = cur; 4822 rl = l; 4823 4824 count++; 4825 if (count > 50) { 4826 SHRINK; 4827 GROW; 4828 count = 0; 4829 if (ctxt->instate == XML_PARSER_EOF) { 4830 xmlFree(buf); 4831 return; 4832 } 4833 } 4834 NEXTL(l); 4835 cur = CUR_CHAR(l); 4836 if (cur == 0) { 4837 SHRINK; 4838 GROW; 4839 cur = CUR_CHAR(l); 4840 } 4841 4842 if (len > maxLength) { 4843 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4844 "Comment too big found", NULL); 4845 xmlFree (buf); 4846 return; 4847 } 4848 } 4849 buf[len] = 0; 4850 if (cur == 0) { 4851 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4852 "Comment not terminated \n<!--%.50s\n", buf); 4853 } else if (!IS_CHAR(cur)) { 4854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4855 "xmlParseComment: invalid xmlChar value %d\n", 4856 cur); 4857 } else { 4858 if (inputid != ctxt->input->id) { 4859 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4860 "Comment doesn't start and stop in the same" 4861 " entity\n"); 4862 } 4863 NEXT; 4864 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4865 (!ctxt->disableSAX)) 4866 ctxt->sax->comment(ctxt->userData, buf); 4867 } 4868 xmlFree(buf); 4869 return; 4870 not_terminated: 4871 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4872 "Comment not terminated\n", NULL); 4873 xmlFree(buf); 4874 return; 4875 } 4876 4877 /** 4878 * xmlParseComment: 4879 * @ctxt: an XML parser context 4880 * 4881 * Skip an XML (SGML) comment <!-- .... --> 4882 * The spec says that "For compatibility, the string "--" (double-hyphen) 4883 * must not occur within comments. " 4884 * 4885 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4886 */ 4887 void 4888 xmlParseComment(xmlParserCtxtPtr ctxt) { 4889 xmlChar *buf = NULL; 4890 size_t size = XML_PARSER_BUFFER_SIZE; 4891 size_t len = 0; 4892 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4893 XML_MAX_HUGE_LENGTH : 4894 XML_MAX_TEXT_LENGTH; 4895 xmlParserInputState state; 4896 const xmlChar *in; 4897 size_t nbchar = 0; 4898 int ccol; 4899 int inputid; 4900 4901 /* 4902 * Check that there is a comment right here. 4903 */ 4904 if ((RAW != '<') || (NXT(1) != '!') || 4905 (NXT(2) != '-') || (NXT(3) != '-')) return; 4906 state = ctxt->instate; 4907 ctxt->instate = XML_PARSER_COMMENT; 4908 inputid = ctxt->input->id; 4909 SKIP(4); 4910 SHRINK; 4911 GROW; 4912 4913 /* 4914 * Accelerated common case where input don't need to be 4915 * modified before passing it to the handler. 4916 */ 4917 in = ctxt->input->cur; 4918 do { 4919 if (*in == 0xA) { 4920 do { 4921 ctxt->input->line++; ctxt->input->col = 1; 4922 in++; 4923 } while (*in == 0xA); 4924 } 4925 get_more: 4926 ccol = ctxt->input->col; 4927 while (((*in > '-') && (*in <= 0x7F)) || 4928 ((*in >= 0x20) && (*in < '-')) || 4929 (*in == 0x09)) { 4930 in++; 4931 ccol++; 4932 } 4933 ctxt->input->col = ccol; 4934 if (*in == 0xA) { 4935 do { 4936 ctxt->input->line++; ctxt->input->col = 1; 4937 in++; 4938 } while (*in == 0xA); 4939 goto get_more; 4940 } 4941 nbchar = in - ctxt->input->cur; 4942 /* 4943 * save current set of data 4944 */ 4945 if (nbchar > 0) { 4946 if ((ctxt->sax != NULL) && 4947 (ctxt->sax->comment != NULL)) { 4948 if (buf == NULL) { 4949 if ((*in == '-') && (in[1] == '-')) 4950 size = nbchar + 1; 4951 else 4952 size = XML_PARSER_BUFFER_SIZE + nbchar; 4953 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4954 if (buf == NULL) { 4955 xmlErrMemory(ctxt, NULL); 4956 ctxt->instate = state; 4957 return; 4958 } 4959 len = 0; 4960 } else if (len + nbchar + 1 >= size) { 4961 xmlChar *new_buf; 4962 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4963 new_buf = (xmlChar *) xmlRealloc(buf, 4964 size * sizeof(xmlChar)); 4965 if (new_buf == NULL) { 4966 xmlFree (buf); 4967 xmlErrMemory(ctxt, NULL); 4968 ctxt->instate = state; 4969 return; 4970 } 4971 buf = new_buf; 4972 } 4973 memcpy(&buf[len], ctxt->input->cur, nbchar); 4974 len += nbchar; 4975 buf[len] = 0; 4976 } 4977 } 4978 if (len > maxLength) { 4979 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4980 "Comment too big found", NULL); 4981 xmlFree (buf); 4982 return; 4983 } 4984 ctxt->input->cur = in; 4985 if (*in == 0xA) { 4986 in++; 4987 ctxt->input->line++; ctxt->input->col = 1; 4988 } 4989 if (*in == 0xD) { 4990 in++; 4991 if (*in == 0xA) { 4992 ctxt->input->cur = in; 4993 in++; 4994 ctxt->input->line++; ctxt->input->col = 1; 4995 goto get_more; 4996 } 4997 in--; 4998 } 4999 SHRINK; 5000 GROW; 5001 if (ctxt->instate == XML_PARSER_EOF) { 5002 xmlFree(buf); 5003 return; 5004 } 5005 in = ctxt->input->cur; 5006 if (*in == '-') { 5007 if (in[1] == '-') { 5008 if (in[2] == '>') { 5009 if (ctxt->input->id != inputid) { 5010 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5011 "comment doesn't start and stop in the" 5012 " same entity\n"); 5013 } 5014 SKIP(3); 5015 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5016 (!ctxt->disableSAX)) { 5017 if (buf != NULL) 5018 ctxt->sax->comment(ctxt->userData, buf); 5019 else 5020 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5021 } 5022 if (buf != NULL) 5023 xmlFree(buf); 5024 if (ctxt->instate != XML_PARSER_EOF) 5025 ctxt->instate = state; 5026 return; 5027 } 5028 if (buf != NULL) { 5029 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5030 "Double hyphen within comment: " 5031 "<!--%.50s\n", 5032 buf); 5033 } else 5034 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5035 "Double hyphen within comment\n", NULL); 5036 if (ctxt->instate == XML_PARSER_EOF) { 5037 xmlFree(buf); 5038 return; 5039 } 5040 in++; 5041 ctxt->input->col++; 5042 } 5043 in++; 5044 ctxt->input->col++; 5045 goto get_more; 5046 } 5047 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 5048 xmlParseCommentComplex(ctxt, buf, len, size); 5049 ctxt->instate = state; 5050 return; 5051 } 5052 5053 5054 /** 5055 * xmlParsePITarget: 5056 * @ctxt: an XML parser context 5057 * 5058 * parse the name of a PI 5059 * 5060 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5061 * 5062 * Returns the PITarget name or NULL 5063 */ 5064 5065 const xmlChar * 5066 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5067 const xmlChar *name; 5068 5069 name = xmlParseName(ctxt); 5070 if ((name != NULL) && 5071 ((name[0] == 'x') || (name[0] == 'X')) && 5072 ((name[1] == 'm') || (name[1] == 'M')) && 5073 ((name[2] == 'l') || (name[2] == 'L'))) { 5074 int i; 5075 if ((name[0] == 'x') && (name[1] == 'm') && 5076 (name[2] == 'l') && (name[3] == 0)) { 5077 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5078 "XML declaration allowed only at the start of the document\n"); 5079 return(name); 5080 } else if (name[3] == 0) { 5081 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5082 return(name); 5083 } 5084 for (i = 0;;i++) { 5085 if (xmlW3CPIs[i] == NULL) break; 5086 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5087 return(name); 5088 } 5089 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5090 "xmlParsePITarget: invalid name prefix 'xml'\n", 5091 NULL, NULL); 5092 } 5093 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5094 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5095 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5096 } 5097 return(name); 5098 } 5099 5100 #ifdef LIBXML_CATALOG_ENABLED 5101 /** 5102 * xmlParseCatalogPI: 5103 * @ctxt: an XML parser context 5104 * @catalog: the PI value string 5105 * 5106 * parse an XML Catalog Processing Instruction. 5107 * 5108 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5109 * 5110 * Occurs only if allowed by the user and if happening in the Misc 5111 * part of the document before any doctype information 5112 * This will add the given catalog to the parsing context in order 5113 * to be used if there is a resolution need further down in the document 5114 */ 5115 5116 static void 5117 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5118 xmlChar *URL = NULL; 5119 const xmlChar *tmp, *base; 5120 xmlChar marker; 5121 5122 tmp = catalog; 5123 while (IS_BLANK_CH(*tmp)) tmp++; 5124 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5125 goto error; 5126 tmp += 7; 5127 while (IS_BLANK_CH(*tmp)) tmp++; 5128 if (*tmp != '=') { 5129 return; 5130 } 5131 tmp++; 5132 while (IS_BLANK_CH(*tmp)) tmp++; 5133 marker = *tmp; 5134 if ((marker != '\'') && (marker != '"')) 5135 goto error; 5136 tmp++; 5137 base = tmp; 5138 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5139 if (*tmp == 0) 5140 goto error; 5141 URL = xmlStrndup(base, tmp - base); 5142 tmp++; 5143 while (IS_BLANK_CH(*tmp)) tmp++; 5144 if (*tmp != 0) 5145 goto error; 5146 5147 if (URL != NULL) { 5148 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5149 xmlFree(URL); 5150 } 5151 return; 5152 5153 error: 5154 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5155 "Catalog PI syntax error: %s\n", 5156 catalog, NULL); 5157 if (URL != NULL) 5158 xmlFree(URL); 5159 } 5160 #endif 5161 5162 /** 5163 * xmlParsePI: 5164 * @ctxt: an XML parser context 5165 * 5166 * parse an XML Processing Instruction. 5167 * 5168 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5169 * 5170 * The processing is transferred to SAX once parsed. 5171 */ 5172 5173 void 5174 xmlParsePI(xmlParserCtxtPtr ctxt) { 5175 xmlChar *buf = NULL; 5176 size_t len = 0; 5177 size_t size = XML_PARSER_BUFFER_SIZE; 5178 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 5179 XML_MAX_HUGE_LENGTH : 5180 XML_MAX_TEXT_LENGTH; 5181 int cur, l; 5182 const xmlChar *target; 5183 xmlParserInputState state; 5184 int count = 0; 5185 5186 if ((RAW == '<') && (NXT(1) == '?')) { 5187 int inputid = ctxt->input->id; 5188 state = ctxt->instate; 5189 ctxt->instate = XML_PARSER_PI; 5190 /* 5191 * this is a Processing Instruction. 5192 */ 5193 SKIP(2); 5194 SHRINK; 5195 5196 /* 5197 * Parse the target name and check for special support like 5198 * namespace. 5199 */ 5200 target = xmlParsePITarget(ctxt); 5201 if (target != NULL) { 5202 if ((RAW == '?') && (NXT(1) == '>')) { 5203 if (inputid != ctxt->input->id) { 5204 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5205 "PI declaration doesn't start and stop in" 5206 " the same entity\n"); 5207 } 5208 SKIP(2); 5209 5210 /* 5211 * SAX: PI detected. 5212 */ 5213 if ((ctxt->sax) && (!ctxt->disableSAX) && 5214 (ctxt->sax->processingInstruction != NULL)) 5215 ctxt->sax->processingInstruction(ctxt->userData, 5216 target, NULL); 5217 if (ctxt->instate != XML_PARSER_EOF) 5218 ctxt->instate = state; 5219 return; 5220 } 5221 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5222 if (buf == NULL) { 5223 xmlErrMemory(ctxt, NULL); 5224 ctxt->instate = state; 5225 return; 5226 } 5227 if (SKIP_BLANKS == 0) { 5228 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5229 "ParsePI: PI %s space expected\n", target); 5230 } 5231 cur = CUR_CHAR(l); 5232 while (IS_CHAR(cur) && /* checked */ 5233 ((cur != '?') || (NXT(1) != '>'))) { 5234 if (len + 5 >= size) { 5235 xmlChar *tmp; 5236 size_t new_size = size * 2; 5237 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5238 if (tmp == NULL) { 5239 xmlErrMemory(ctxt, NULL); 5240 xmlFree(buf); 5241 ctxt->instate = state; 5242 return; 5243 } 5244 buf = tmp; 5245 size = new_size; 5246 } 5247 count++; 5248 if (count > 50) { 5249 SHRINK; 5250 GROW; 5251 if (ctxt->instate == XML_PARSER_EOF) { 5252 xmlFree(buf); 5253 return; 5254 } 5255 count = 0; 5256 } 5257 COPY_BUF(l,buf,len,cur); 5258 NEXTL(l); 5259 cur = CUR_CHAR(l); 5260 if (cur == 0) { 5261 SHRINK; 5262 GROW; 5263 cur = CUR_CHAR(l); 5264 } 5265 if (len > maxLength) { 5266 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5267 "PI %s too big found", target); 5268 xmlFree(buf); 5269 ctxt->instate = state; 5270 return; 5271 } 5272 } 5273 buf[len] = 0; 5274 if (cur != '?') { 5275 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5276 "ParsePI: PI %s never end ...\n", target); 5277 } else { 5278 if (inputid != ctxt->input->id) { 5279 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5280 "PI declaration doesn't start and stop in" 5281 " the same entity\n"); 5282 } 5283 SKIP(2); 5284 5285 #ifdef LIBXML_CATALOG_ENABLED 5286 if (((state == XML_PARSER_MISC) || 5287 (state == XML_PARSER_START)) && 5288 (xmlStrEqual(target, XML_CATALOG_PI))) { 5289 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5290 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5291 (allow == XML_CATA_ALLOW_ALL)) 5292 xmlParseCatalogPI(ctxt, buf); 5293 } 5294 #endif 5295 5296 5297 /* 5298 * SAX: PI detected. 5299 */ 5300 if ((ctxt->sax) && (!ctxt->disableSAX) && 5301 (ctxt->sax->processingInstruction != NULL)) 5302 ctxt->sax->processingInstruction(ctxt->userData, 5303 target, buf); 5304 } 5305 xmlFree(buf); 5306 } else { 5307 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5308 } 5309 if (ctxt->instate != XML_PARSER_EOF) 5310 ctxt->instate = state; 5311 } 5312 } 5313 5314 /** 5315 * xmlParseNotationDecl: 5316 * @ctxt: an XML parser context 5317 * 5318 * parse a notation declaration 5319 * 5320 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5321 * 5322 * Hence there is actually 3 choices: 5323 * 'PUBLIC' S PubidLiteral 5324 * 'PUBLIC' S PubidLiteral S SystemLiteral 5325 * and 'SYSTEM' S SystemLiteral 5326 * 5327 * See the NOTE on xmlParseExternalID(). 5328 */ 5329 5330 void 5331 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5332 const xmlChar *name; 5333 xmlChar *Pubid; 5334 xmlChar *Systemid; 5335 5336 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5337 int inputid = ctxt->input->id; 5338 SHRINK; 5339 SKIP(10); 5340 if (SKIP_BLANKS == 0) { 5341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5342 "Space required after '<!NOTATION'\n"); 5343 return; 5344 } 5345 5346 name = xmlParseName(ctxt); 5347 if (name == NULL) { 5348 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5349 return; 5350 } 5351 if (xmlStrchr(name, ':') != NULL) { 5352 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5353 "colons are forbidden from notation names '%s'\n", 5354 name, NULL, NULL); 5355 } 5356 if (SKIP_BLANKS == 0) { 5357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5358 "Space required after the NOTATION name'\n"); 5359 return; 5360 } 5361 5362 /* 5363 * Parse the IDs. 5364 */ 5365 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5366 SKIP_BLANKS; 5367 5368 if (RAW == '>') { 5369 if (inputid != ctxt->input->id) { 5370 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5371 "Notation declaration doesn't start and stop" 5372 " in the same entity\n"); 5373 } 5374 NEXT; 5375 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5376 (ctxt->sax->notationDecl != NULL)) 5377 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5378 } else { 5379 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5380 } 5381 if (Systemid != NULL) xmlFree(Systemid); 5382 if (Pubid != NULL) xmlFree(Pubid); 5383 } 5384 } 5385 5386 /** 5387 * xmlParseEntityDecl: 5388 * @ctxt: an XML parser context 5389 * 5390 * parse <!ENTITY declarations 5391 * 5392 * [70] EntityDecl ::= GEDecl | PEDecl 5393 * 5394 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5395 * 5396 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5397 * 5398 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5399 * 5400 * [74] PEDef ::= EntityValue | ExternalID 5401 * 5402 * [76] NDataDecl ::= S 'NDATA' S Name 5403 * 5404 * [ VC: Notation Declared ] 5405 * The Name must match the declared name of a notation. 5406 */ 5407 5408 void 5409 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5410 const xmlChar *name = NULL; 5411 xmlChar *value = NULL; 5412 xmlChar *URI = NULL, *literal = NULL; 5413 const xmlChar *ndata = NULL; 5414 int isParameter = 0; 5415 xmlChar *orig = NULL; 5416 5417 /* GROW; done in the caller */ 5418 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5419 int inputid = ctxt->input->id; 5420 SHRINK; 5421 SKIP(8); 5422 if (SKIP_BLANKS == 0) { 5423 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5424 "Space required after '<!ENTITY'\n"); 5425 } 5426 5427 if (RAW == '%') { 5428 NEXT; 5429 if (SKIP_BLANKS == 0) { 5430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5431 "Space required after '%%'\n"); 5432 } 5433 isParameter = 1; 5434 } 5435 5436 name = xmlParseName(ctxt); 5437 if (name == NULL) { 5438 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5439 "xmlParseEntityDecl: no name\n"); 5440 return; 5441 } 5442 if (xmlStrchr(name, ':') != NULL) { 5443 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5444 "colons are forbidden from entities names '%s'\n", 5445 name, NULL, NULL); 5446 } 5447 if (SKIP_BLANKS == 0) { 5448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5449 "Space required after the entity name\n"); 5450 } 5451 5452 ctxt->instate = XML_PARSER_ENTITY_DECL; 5453 /* 5454 * handle the various case of definitions... 5455 */ 5456 if (isParameter) { 5457 if ((RAW == '"') || (RAW == '\'')) { 5458 value = xmlParseEntityValue(ctxt, &orig); 5459 if (value) { 5460 if ((ctxt->sax != NULL) && 5461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5462 ctxt->sax->entityDecl(ctxt->userData, name, 5463 XML_INTERNAL_PARAMETER_ENTITY, 5464 NULL, NULL, value); 5465 } 5466 } else { 5467 URI = xmlParseExternalID(ctxt, &literal, 1); 5468 if ((URI == NULL) && (literal == NULL)) { 5469 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5470 } 5471 if (URI) { 5472 xmlURIPtr uri; 5473 5474 uri = xmlParseURI((const char *) URI); 5475 if (uri == NULL) { 5476 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5477 "Invalid URI: %s\n", URI); 5478 /* 5479 * This really ought to be a well formedness error 5480 * but the XML Core WG decided otherwise c.f. issue 5481 * E26 of the XML erratas. 5482 */ 5483 } else { 5484 if (uri->fragment != NULL) { 5485 /* 5486 * Okay this is foolish to block those but not 5487 * invalid URIs. 5488 */ 5489 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5490 } else { 5491 if ((ctxt->sax != NULL) && 5492 (!ctxt->disableSAX) && 5493 (ctxt->sax->entityDecl != NULL)) 5494 ctxt->sax->entityDecl(ctxt->userData, name, 5495 XML_EXTERNAL_PARAMETER_ENTITY, 5496 literal, URI, NULL); 5497 } 5498 xmlFreeURI(uri); 5499 } 5500 } 5501 } 5502 } else { 5503 if ((RAW == '"') || (RAW == '\'')) { 5504 value = xmlParseEntityValue(ctxt, &orig); 5505 if ((ctxt->sax != NULL) && 5506 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5507 ctxt->sax->entityDecl(ctxt->userData, name, 5508 XML_INTERNAL_GENERAL_ENTITY, 5509 NULL, NULL, value); 5510 /* 5511 * For expat compatibility in SAX mode. 5512 */ 5513 if ((ctxt->myDoc == NULL) || 5514 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5515 if (ctxt->myDoc == NULL) { 5516 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5517 if (ctxt->myDoc == NULL) { 5518 xmlErrMemory(ctxt, "New Doc failed"); 5519 return; 5520 } 5521 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5522 } 5523 if (ctxt->myDoc->intSubset == NULL) 5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5525 BAD_CAST "fake", NULL, NULL); 5526 5527 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5528 NULL, NULL, value); 5529 } 5530 } else { 5531 URI = xmlParseExternalID(ctxt, &literal, 1); 5532 if ((URI == NULL) && (literal == NULL)) { 5533 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5534 } 5535 if (URI) { 5536 xmlURIPtr uri; 5537 5538 uri = xmlParseURI((const char *)URI); 5539 if (uri == NULL) { 5540 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5541 "Invalid URI: %s\n", URI); 5542 /* 5543 * This really ought to be a well formedness error 5544 * but the XML Core WG decided otherwise c.f. issue 5545 * E26 of the XML erratas. 5546 */ 5547 } else { 5548 if (uri->fragment != NULL) { 5549 /* 5550 * Okay this is foolish to block those but not 5551 * invalid URIs. 5552 */ 5553 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5554 } 5555 xmlFreeURI(uri); 5556 } 5557 } 5558 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5560 "Space required before 'NDATA'\n"); 5561 } 5562 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5563 SKIP(5); 5564 if (SKIP_BLANKS == 0) { 5565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5566 "Space required after 'NDATA'\n"); 5567 } 5568 ndata = xmlParseName(ctxt); 5569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5570 (ctxt->sax->unparsedEntityDecl != NULL)) 5571 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5572 literal, URI, ndata); 5573 } else { 5574 if ((ctxt->sax != NULL) && 5575 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5576 ctxt->sax->entityDecl(ctxt->userData, name, 5577 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5578 literal, URI, NULL); 5579 /* 5580 * For expat compatibility in SAX mode. 5581 * assuming the entity replacement was asked for 5582 */ 5583 if ((ctxt->replaceEntities != 0) && 5584 ((ctxt->myDoc == NULL) || 5585 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5586 if (ctxt->myDoc == NULL) { 5587 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5588 if (ctxt->myDoc == NULL) { 5589 xmlErrMemory(ctxt, "New Doc failed"); 5590 return; 5591 } 5592 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5593 } 5594 5595 if (ctxt->myDoc->intSubset == NULL) 5596 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5597 BAD_CAST "fake", NULL, NULL); 5598 xmlSAX2EntityDecl(ctxt, name, 5599 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5600 literal, URI, NULL); 5601 } 5602 } 5603 } 5604 } 5605 if (ctxt->instate == XML_PARSER_EOF) 5606 goto done; 5607 SKIP_BLANKS; 5608 if (RAW != '>') { 5609 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5610 "xmlParseEntityDecl: entity %s not terminated\n", name); 5611 xmlHaltParser(ctxt); 5612 } else { 5613 if (inputid != ctxt->input->id) { 5614 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5615 "Entity declaration doesn't start and stop in" 5616 " the same entity\n"); 5617 } 5618 NEXT; 5619 } 5620 if (orig != NULL) { 5621 /* 5622 * Ugly mechanism to save the raw entity value. 5623 */ 5624 xmlEntityPtr cur = NULL; 5625 5626 if (isParameter) { 5627 if ((ctxt->sax != NULL) && 5628 (ctxt->sax->getParameterEntity != NULL)) 5629 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5630 } else { 5631 if ((ctxt->sax != NULL) && 5632 (ctxt->sax->getEntity != NULL)) 5633 cur = ctxt->sax->getEntity(ctxt->userData, name); 5634 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5635 cur = xmlSAX2GetEntity(ctxt, name); 5636 } 5637 } 5638 if ((cur != NULL) && (cur->orig == NULL)) { 5639 cur->orig = orig; 5640 orig = NULL; 5641 } 5642 } 5643 5644 done: 5645 if (value != NULL) xmlFree(value); 5646 if (URI != NULL) xmlFree(URI); 5647 if (literal != NULL) xmlFree(literal); 5648 if (orig != NULL) xmlFree(orig); 5649 } 5650 } 5651 5652 /** 5653 * xmlParseDefaultDecl: 5654 * @ctxt: an XML parser context 5655 * @value: Receive a possible fixed default value for the attribute 5656 * 5657 * Parse an attribute default declaration 5658 * 5659 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5660 * 5661 * [ VC: Required Attribute ] 5662 * if the default declaration is the keyword #REQUIRED, then the 5663 * attribute must be specified for all elements of the type in the 5664 * attribute-list declaration. 5665 * 5666 * [ VC: Attribute Default Legal ] 5667 * The declared default value must meet the lexical constraints of 5668 * the declared attribute type c.f. xmlValidateAttributeDecl() 5669 * 5670 * [ VC: Fixed Attribute Default ] 5671 * if an attribute has a default value declared with the #FIXED 5672 * keyword, instances of that attribute must match the default value. 5673 * 5674 * [ WFC: No < in Attribute Values ] 5675 * handled in xmlParseAttValue() 5676 * 5677 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5678 * or XML_ATTRIBUTE_FIXED. 5679 */ 5680 5681 int 5682 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5683 int val; 5684 xmlChar *ret; 5685 5686 *value = NULL; 5687 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5688 SKIP(9); 5689 return(XML_ATTRIBUTE_REQUIRED); 5690 } 5691 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5692 SKIP(8); 5693 return(XML_ATTRIBUTE_IMPLIED); 5694 } 5695 val = XML_ATTRIBUTE_NONE; 5696 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5697 SKIP(6); 5698 val = XML_ATTRIBUTE_FIXED; 5699 if (SKIP_BLANKS == 0) { 5700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5701 "Space required after '#FIXED'\n"); 5702 } 5703 } 5704 ret = xmlParseAttValue(ctxt); 5705 ctxt->instate = XML_PARSER_DTD; 5706 if (ret == NULL) { 5707 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5708 "Attribute default value declaration error\n"); 5709 } else 5710 *value = ret; 5711 return(val); 5712 } 5713 5714 /** 5715 * xmlParseNotationType: 5716 * @ctxt: an XML parser context 5717 * 5718 * parse an Notation attribute type. 5719 * 5720 * Note: the leading 'NOTATION' S part has already being parsed... 5721 * 5722 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5723 * 5724 * [ VC: Notation Attributes ] 5725 * Values of this type must match one of the notation names included 5726 * in the declaration; all notation names in the declaration must be declared. 5727 * 5728 * Returns: the notation attribute tree built while parsing 5729 */ 5730 5731 xmlEnumerationPtr 5732 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5733 const xmlChar *name; 5734 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5735 5736 if (RAW != '(') { 5737 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5738 return(NULL); 5739 } 5740 SHRINK; 5741 do { 5742 NEXT; 5743 SKIP_BLANKS; 5744 name = xmlParseName(ctxt); 5745 if (name == NULL) { 5746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5747 "Name expected in NOTATION declaration\n"); 5748 xmlFreeEnumeration(ret); 5749 return(NULL); 5750 } 5751 tmp = ret; 5752 while (tmp != NULL) { 5753 if (xmlStrEqual(name, tmp->name)) { 5754 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5755 "standalone: attribute notation value token %s duplicated\n", 5756 name, NULL); 5757 if (!xmlDictOwns(ctxt->dict, name)) 5758 xmlFree((xmlChar *) name); 5759 break; 5760 } 5761 tmp = tmp->next; 5762 } 5763 if (tmp == NULL) { 5764 cur = xmlCreateEnumeration(name); 5765 if (cur == NULL) { 5766 xmlFreeEnumeration(ret); 5767 return(NULL); 5768 } 5769 if (last == NULL) ret = last = cur; 5770 else { 5771 last->next = cur; 5772 last = cur; 5773 } 5774 } 5775 SKIP_BLANKS; 5776 } while (RAW == '|'); 5777 if (RAW != ')') { 5778 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5779 xmlFreeEnumeration(ret); 5780 return(NULL); 5781 } 5782 NEXT; 5783 return(ret); 5784 } 5785 5786 /** 5787 * xmlParseEnumerationType: 5788 * @ctxt: an XML parser context 5789 * 5790 * parse an Enumeration attribute type. 5791 * 5792 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5793 * 5794 * [ VC: Enumeration ] 5795 * Values of this type must match one of the Nmtoken tokens in 5796 * the declaration 5797 * 5798 * Returns: the enumeration attribute tree built while parsing 5799 */ 5800 5801 xmlEnumerationPtr 5802 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5803 xmlChar *name; 5804 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5805 5806 if (RAW != '(') { 5807 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5808 return(NULL); 5809 } 5810 SHRINK; 5811 do { 5812 NEXT; 5813 SKIP_BLANKS; 5814 name = xmlParseNmtoken(ctxt); 5815 if (name == NULL) { 5816 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5817 return(ret); 5818 } 5819 tmp = ret; 5820 while (tmp != NULL) { 5821 if (xmlStrEqual(name, tmp->name)) { 5822 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5823 "standalone: attribute enumeration value token %s duplicated\n", 5824 name, NULL); 5825 if (!xmlDictOwns(ctxt->dict, name)) 5826 xmlFree(name); 5827 break; 5828 } 5829 tmp = tmp->next; 5830 } 5831 if (tmp == NULL) { 5832 cur = xmlCreateEnumeration(name); 5833 if (!xmlDictOwns(ctxt->dict, name)) 5834 xmlFree(name); 5835 if (cur == NULL) { 5836 xmlFreeEnumeration(ret); 5837 return(NULL); 5838 } 5839 if (last == NULL) ret = last = cur; 5840 else { 5841 last->next = cur; 5842 last = cur; 5843 } 5844 } 5845 SKIP_BLANKS; 5846 } while (RAW == '|'); 5847 if (RAW != ')') { 5848 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5849 return(ret); 5850 } 5851 NEXT; 5852 return(ret); 5853 } 5854 5855 /** 5856 * xmlParseEnumeratedType: 5857 * @ctxt: an XML parser context 5858 * @tree: the enumeration tree built while parsing 5859 * 5860 * parse an Enumerated attribute type. 5861 * 5862 * [57] EnumeratedType ::= NotationType | Enumeration 5863 * 5864 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5865 * 5866 * 5867 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5868 */ 5869 5870 int 5871 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5872 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5873 SKIP(8); 5874 if (SKIP_BLANKS == 0) { 5875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5876 "Space required after 'NOTATION'\n"); 5877 return(0); 5878 } 5879 *tree = xmlParseNotationType(ctxt); 5880 if (*tree == NULL) return(0); 5881 return(XML_ATTRIBUTE_NOTATION); 5882 } 5883 *tree = xmlParseEnumerationType(ctxt); 5884 if (*tree == NULL) return(0); 5885 return(XML_ATTRIBUTE_ENUMERATION); 5886 } 5887 5888 /** 5889 * xmlParseAttributeType: 5890 * @ctxt: an XML parser context 5891 * @tree: the enumeration tree built while parsing 5892 * 5893 * parse the Attribute list def for an element 5894 * 5895 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5896 * 5897 * [55] StringType ::= 'CDATA' 5898 * 5899 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5900 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5901 * 5902 * Validity constraints for attribute values syntax are checked in 5903 * xmlValidateAttributeValue() 5904 * 5905 * [ VC: ID ] 5906 * Values of type ID must match the Name production. A name must not 5907 * appear more than once in an XML document as a value of this type; 5908 * i.e., ID values must uniquely identify the elements which bear them. 5909 * 5910 * [ VC: One ID per Element Type ] 5911 * No element type may have more than one ID attribute specified. 5912 * 5913 * [ VC: ID Attribute Default ] 5914 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5915 * 5916 * [ VC: IDREF ] 5917 * Values of type IDREF must match the Name production, and values 5918 * of type IDREFS must match Names; each IDREF Name must match the value 5919 * of an ID attribute on some element in the XML document; i.e. IDREF 5920 * values must match the value of some ID attribute. 5921 * 5922 * [ VC: Entity Name ] 5923 * Values of type ENTITY must match the Name production, values 5924 * of type ENTITIES must match Names; each Entity Name must match the 5925 * name of an unparsed entity declared in the DTD. 5926 * 5927 * [ VC: Name Token ] 5928 * Values of type NMTOKEN must match the Nmtoken production; values 5929 * of type NMTOKENS must match Nmtokens. 5930 * 5931 * Returns the attribute type 5932 */ 5933 int 5934 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5935 SHRINK; 5936 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5937 SKIP(5); 5938 return(XML_ATTRIBUTE_CDATA); 5939 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5940 SKIP(6); 5941 return(XML_ATTRIBUTE_IDREFS); 5942 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5943 SKIP(5); 5944 return(XML_ATTRIBUTE_IDREF); 5945 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5946 SKIP(2); 5947 return(XML_ATTRIBUTE_ID); 5948 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5949 SKIP(6); 5950 return(XML_ATTRIBUTE_ENTITY); 5951 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5952 SKIP(8); 5953 return(XML_ATTRIBUTE_ENTITIES); 5954 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5955 SKIP(8); 5956 return(XML_ATTRIBUTE_NMTOKENS); 5957 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5958 SKIP(7); 5959 return(XML_ATTRIBUTE_NMTOKEN); 5960 } 5961 return(xmlParseEnumeratedType(ctxt, tree)); 5962 } 5963 5964 /** 5965 * xmlParseAttributeListDecl: 5966 * @ctxt: an XML parser context 5967 * 5968 * : parse the Attribute list def for an element 5969 * 5970 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5971 * 5972 * [53] AttDef ::= S Name S AttType S DefaultDecl 5973 * 5974 */ 5975 void 5976 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5977 const xmlChar *elemName; 5978 const xmlChar *attrName; 5979 xmlEnumerationPtr tree; 5980 5981 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5982 int inputid = ctxt->input->id; 5983 5984 SKIP(9); 5985 if (SKIP_BLANKS == 0) { 5986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5987 "Space required after '<!ATTLIST'\n"); 5988 } 5989 elemName = xmlParseName(ctxt); 5990 if (elemName == NULL) { 5991 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5992 "ATTLIST: no name for Element\n"); 5993 return; 5994 } 5995 SKIP_BLANKS; 5996 GROW; 5997 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5998 int type; 5999 int def; 6000 xmlChar *defaultValue = NULL; 6001 6002 GROW; 6003 tree = NULL; 6004 attrName = xmlParseName(ctxt); 6005 if (attrName == NULL) { 6006 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6007 "ATTLIST: no name for Attribute\n"); 6008 break; 6009 } 6010 GROW; 6011 if (SKIP_BLANKS == 0) { 6012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6013 "Space required after the attribute name\n"); 6014 break; 6015 } 6016 6017 type = xmlParseAttributeType(ctxt, &tree); 6018 if (type <= 0) { 6019 break; 6020 } 6021 6022 GROW; 6023 if (SKIP_BLANKS == 0) { 6024 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6025 "Space required after the attribute type\n"); 6026 if (tree != NULL) 6027 xmlFreeEnumeration(tree); 6028 break; 6029 } 6030 6031 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6032 if (def <= 0) { 6033 if (defaultValue != NULL) 6034 xmlFree(defaultValue); 6035 if (tree != NULL) 6036 xmlFreeEnumeration(tree); 6037 break; 6038 } 6039 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6040 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6041 6042 GROW; 6043 if (RAW != '>') { 6044 if (SKIP_BLANKS == 0) { 6045 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6046 "Space required after the attribute default value\n"); 6047 if (defaultValue != NULL) 6048 xmlFree(defaultValue); 6049 if (tree != NULL) 6050 xmlFreeEnumeration(tree); 6051 break; 6052 } 6053 } 6054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6055 (ctxt->sax->attributeDecl != NULL)) 6056 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6057 type, def, defaultValue, tree); 6058 else if (tree != NULL) 6059 xmlFreeEnumeration(tree); 6060 6061 if ((ctxt->sax2) && (defaultValue != NULL) && 6062 (def != XML_ATTRIBUTE_IMPLIED) && 6063 (def != XML_ATTRIBUTE_REQUIRED)) { 6064 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6065 } 6066 if (ctxt->sax2) { 6067 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6068 } 6069 if (defaultValue != NULL) 6070 xmlFree(defaultValue); 6071 GROW; 6072 } 6073 if (RAW == '>') { 6074 if (inputid != ctxt->input->id) { 6075 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6076 "Attribute list declaration doesn't start and" 6077 " stop in the same entity\n"); 6078 } 6079 NEXT; 6080 } 6081 } 6082 } 6083 6084 /** 6085 * xmlParseElementMixedContentDecl: 6086 * @ctxt: an XML parser context 6087 * @inputchk: the input used for the current entity, needed for boundary checks 6088 * 6089 * parse the declaration for a Mixed Element content 6090 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6091 * 6092 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6093 * '(' S? '#PCDATA' S? ')' 6094 * 6095 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6096 * 6097 * [ VC: No Duplicate Types ] 6098 * The same name must not appear more than once in a single 6099 * mixed-content declaration. 6100 * 6101 * returns: the list of the xmlElementContentPtr describing the element choices 6102 */ 6103 xmlElementContentPtr 6104 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6105 xmlElementContentPtr ret = NULL, cur = NULL, n; 6106 const xmlChar *elem = NULL; 6107 6108 GROW; 6109 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6110 SKIP(7); 6111 SKIP_BLANKS; 6112 SHRINK; 6113 if (RAW == ')') { 6114 if (ctxt->input->id != inputchk) { 6115 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6116 "Element content declaration doesn't start and" 6117 " stop in the same entity\n"); 6118 } 6119 NEXT; 6120 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6121 if (ret == NULL) 6122 return(NULL); 6123 if (RAW == '*') { 6124 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6125 NEXT; 6126 } 6127 return(ret); 6128 } 6129 if ((RAW == '(') || (RAW == '|')) { 6130 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6131 if (ret == NULL) return(NULL); 6132 } 6133 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6134 NEXT; 6135 if (elem == NULL) { 6136 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6137 if (ret == NULL) { 6138 xmlFreeDocElementContent(ctxt->myDoc, cur); 6139 return(NULL); 6140 } 6141 ret->c1 = cur; 6142 if (cur != NULL) 6143 cur->parent = ret; 6144 cur = ret; 6145 } else { 6146 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6147 if (n == NULL) { 6148 xmlFreeDocElementContent(ctxt->myDoc, ret); 6149 return(NULL); 6150 } 6151 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6152 if (n->c1 != NULL) 6153 n->c1->parent = n; 6154 cur->c2 = n; 6155 if (n != NULL) 6156 n->parent = cur; 6157 cur = n; 6158 } 6159 SKIP_BLANKS; 6160 elem = xmlParseName(ctxt); 6161 if (elem == NULL) { 6162 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6163 "xmlParseElementMixedContentDecl : Name expected\n"); 6164 xmlFreeDocElementContent(ctxt->myDoc, ret); 6165 return(NULL); 6166 } 6167 SKIP_BLANKS; 6168 GROW; 6169 } 6170 if ((RAW == ')') && (NXT(1) == '*')) { 6171 if (elem != NULL) { 6172 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6173 XML_ELEMENT_CONTENT_ELEMENT); 6174 if (cur->c2 != NULL) 6175 cur->c2->parent = cur; 6176 } 6177 if (ret != NULL) 6178 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6179 if (ctxt->input->id != inputchk) { 6180 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6181 "Element content declaration doesn't start and" 6182 " stop in the same entity\n"); 6183 } 6184 SKIP(2); 6185 } else { 6186 xmlFreeDocElementContent(ctxt->myDoc, ret); 6187 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6188 return(NULL); 6189 } 6190 6191 } else { 6192 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6193 } 6194 return(ret); 6195 } 6196 6197 /** 6198 * xmlParseElementChildrenContentDeclPriv: 6199 * @ctxt: an XML parser context 6200 * @inputchk: the input used for the current entity, needed for boundary checks 6201 * @depth: the level of recursion 6202 * 6203 * parse the declaration for a Mixed Element content 6204 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6205 * 6206 * 6207 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6208 * 6209 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6210 * 6211 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6212 * 6213 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6214 * 6215 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6216 * TODO Parameter-entity replacement text must be properly nested 6217 * with parenthesized groups. That is to say, if either of the 6218 * opening or closing parentheses in a choice, seq, or Mixed 6219 * construct is contained in the replacement text for a parameter 6220 * entity, both must be contained in the same replacement text. For 6221 * interoperability, if a parameter-entity reference appears in a 6222 * choice, seq, or Mixed construct, its replacement text should not 6223 * be empty, and neither the first nor last non-blank character of 6224 * the replacement text should be a connector (| or ,). 6225 * 6226 * Returns the tree of xmlElementContentPtr describing the element 6227 * hierarchy. 6228 */ 6229 static xmlElementContentPtr 6230 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6231 int depth) { 6232 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6233 const xmlChar *elem; 6234 xmlChar type = 0; 6235 6236 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6237 (depth > 2048)) { 6238 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6239 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6240 depth); 6241 return(NULL); 6242 } 6243 SKIP_BLANKS; 6244 GROW; 6245 if (RAW == '(') { 6246 int inputid = ctxt->input->id; 6247 6248 /* Recurse on first child */ 6249 NEXT; 6250 SKIP_BLANKS; 6251 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6252 depth + 1); 6253 if (cur == NULL) 6254 return(NULL); 6255 SKIP_BLANKS; 6256 GROW; 6257 } else { 6258 elem = xmlParseName(ctxt); 6259 if (elem == NULL) { 6260 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6261 return(NULL); 6262 } 6263 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6264 if (cur == NULL) { 6265 xmlErrMemory(ctxt, NULL); 6266 return(NULL); 6267 } 6268 GROW; 6269 if (RAW == '?') { 6270 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6271 NEXT; 6272 } else if (RAW == '*') { 6273 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6274 NEXT; 6275 } else if (RAW == '+') { 6276 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6277 NEXT; 6278 } else { 6279 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6280 } 6281 GROW; 6282 } 6283 SKIP_BLANKS; 6284 SHRINK; 6285 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6286 /* 6287 * Each loop we parse one separator and one element. 6288 */ 6289 if (RAW == ',') { 6290 if (type == 0) type = CUR; 6291 6292 /* 6293 * Detect "Name | Name , Name" error 6294 */ 6295 else if (type != CUR) { 6296 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6297 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6298 type); 6299 if ((last != NULL) && (last != ret)) 6300 xmlFreeDocElementContent(ctxt->myDoc, last); 6301 if (ret != NULL) 6302 xmlFreeDocElementContent(ctxt->myDoc, ret); 6303 return(NULL); 6304 } 6305 NEXT; 6306 6307 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6308 if (op == NULL) { 6309 if ((last != NULL) && (last != ret)) 6310 xmlFreeDocElementContent(ctxt->myDoc, last); 6311 xmlFreeDocElementContent(ctxt->myDoc, ret); 6312 return(NULL); 6313 } 6314 if (last == NULL) { 6315 op->c1 = ret; 6316 if (ret != NULL) 6317 ret->parent = op; 6318 ret = cur = op; 6319 } else { 6320 cur->c2 = op; 6321 if (op != NULL) 6322 op->parent = cur; 6323 op->c1 = last; 6324 if (last != NULL) 6325 last->parent = op; 6326 cur =op; 6327 last = NULL; 6328 } 6329 } else if (RAW == '|') { 6330 if (type == 0) type = CUR; 6331 6332 /* 6333 * Detect "Name , Name | Name" error 6334 */ 6335 else if (type != CUR) { 6336 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6337 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6338 type); 6339 if ((last != NULL) && (last != ret)) 6340 xmlFreeDocElementContent(ctxt->myDoc, last); 6341 if (ret != NULL) 6342 xmlFreeDocElementContent(ctxt->myDoc, ret); 6343 return(NULL); 6344 } 6345 NEXT; 6346 6347 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6348 if (op == NULL) { 6349 if ((last != NULL) && (last != ret)) 6350 xmlFreeDocElementContent(ctxt->myDoc, last); 6351 if (ret != NULL) 6352 xmlFreeDocElementContent(ctxt->myDoc, ret); 6353 return(NULL); 6354 } 6355 if (last == NULL) { 6356 op->c1 = ret; 6357 if (ret != NULL) 6358 ret->parent = op; 6359 ret = cur = op; 6360 } else { 6361 cur->c2 = op; 6362 if (op != NULL) 6363 op->parent = cur; 6364 op->c1 = last; 6365 if (last != NULL) 6366 last->parent = op; 6367 cur =op; 6368 last = NULL; 6369 } 6370 } else { 6371 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6372 if ((last != NULL) && (last != ret)) 6373 xmlFreeDocElementContent(ctxt->myDoc, last); 6374 if (ret != NULL) 6375 xmlFreeDocElementContent(ctxt->myDoc, ret); 6376 return(NULL); 6377 } 6378 GROW; 6379 SKIP_BLANKS; 6380 GROW; 6381 if (RAW == '(') { 6382 int inputid = ctxt->input->id; 6383 /* Recurse on second child */ 6384 NEXT; 6385 SKIP_BLANKS; 6386 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6387 depth + 1); 6388 if (last == NULL) { 6389 if (ret != NULL) 6390 xmlFreeDocElementContent(ctxt->myDoc, ret); 6391 return(NULL); 6392 } 6393 SKIP_BLANKS; 6394 } else { 6395 elem = xmlParseName(ctxt); 6396 if (elem == NULL) { 6397 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6398 if (ret != NULL) 6399 xmlFreeDocElementContent(ctxt->myDoc, ret); 6400 return(NULL); 6401 } 6402 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6403 if (last == NULL) { 6404 if (ret != NULL) 6405 xmlFreeDocElementContent(ctxt->myDoc, ret); 6406 return(NULL); 6407 } 6408 if (RAW == '?') { 6409 last->ocur = XML_ELEMENT_CONTENT_OPT; 6410 NEXT; 6411 } else if (RAW == '*') { 6412 last->ocur = XML_ELEMENT_CONTENT_MULT; 6413 NEXT; 6414 } else if (RAW == '+') { 6415 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6416 NEXT; 6417 } else { 6418 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6419 } 6420 } 6421 SKIP_BLANKS; 6422 GROW; 6423 } 6424 if ((cur != NULL) && (last != NULL)) { 6425 cur->c2 = last; 6426 if (last != NULL) 6427 last->parent = cur; 6428 } 6429 if (ctxt->input->id != inputchk) { 6430 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6431 "Element content declaration doesn't start and stop in" 6432 " the same entity\n"); 6433 } 6434 NEXT; 6435 if (RAW == '?') { 6436 if (ret != NULL) { 6437 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6439 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6440 else 6441 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6442 } 6443 NEXT; 6444 } else if (RAW == '*') { 6445 if (ret != NULL) { 6446 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6447 cur = ret; 6448 /* 6449 * Some normalization: 6450 * (a | b* | c?)* == (a | b | c)* 6451 */ 6452 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6453 if ((cur->c1 != NULL) && 6454 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6455 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6456 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6457 if ((cur->c2 != NULL) && 6458 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6459 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6460 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6461 cur = cur->c2; 6462 } 6463 } 6464 NEXT; 6465 } else if (RAW == '+') { 6466 if (ret != NULL) { 6467 int found = 0; 6468 6469 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6470 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6471 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6472 else 6473 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6474 /* 6475 * Some normalization: 6476 * (a | b*)+ == (a | b)* 6477 * (a | b?)+ == (a | b)* 6478 */ 6479 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6480 if ((cur->c1 != NULL) && 6481 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6482 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6483 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6484 found = 1; 6485 } 6486 if ((cur->c2 != NULL) && 6487 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6488 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6489 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6490 found = 1; 6491 } 6492 cur = cur->c2; 6493 } 6494 if (found) 6495 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6496 } 6497 NEXT; 6498 } 6499 return(ret); 6500 } 6501 6502 /** 6503 * xmlParseElementChildrenContentDecl: 6504 * @ctxt: an XML parser context 6505 * @inputchk: the input used for the current entity, needed for boundary checks 6506 * 6507 * parse the declaration for a Mixed Element content 6508 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6509 * 6510 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6511 * 6512 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6513 * 6514 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6515 * 6516 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6517 * 6518 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6519 * TODO Parameter-entity replacement text must be properly nested 6520 * with parenthesized groups. That is to say, if either of the 6521 * opening or closing parentheses in a choice, seq, or Mixed 6522 * construct is contained in the replacement text for a parameter 6523 * entity, both must be contained in the same replacement text. For 6524 * interoperability, if a parameter-entity reference appears in a 6525 * choice, seq, or Mixed construct, its replacement text should not 6526 * be empty, and neither the first nor last non-blank character of 6527 * the replacement text should be a connector (| or ,). 6528 * 6529 * Returns the tree of xmlElementContentPtr describing the element 6530 * hierarchy. 6531 */ 6532 xmlElementContentPtr 6533 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6534 /* stub left for API/ABI compat */ 6535 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6536 } 6537 6538 /** 6539 * xmlParseElementContentDecl: 6540 * @ctxt: an XML parser context 6541 * @name: the name of the element being defined. 6542 * @result: the Element Content pointer will be stored here if any 6543 * 6544 * parse the declaration for an Element content either Mixed or Children, 6545 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6546 * 6547 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6548 * 6549 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6550 */ 6551 6552 int 6553 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6554 xmlElementContentPtr *result) { 6555 6556 xmlElementContentPtr tree = NULL; 6557 int inputid = ctxt->input->id; 6558 int res; 6559 6560 *result = NULL; 6561 6562 if (RAW != '(') { 6563 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6564 "xmlParseElementContentDecl : %s '(' expected\n", name); 6565 return(-1); 6566 } 6567 NEXT; 6568 GROW; 6569 if (ctxt->instate == XML_PARSER_EOF) 6570 return(-1); 6571 SKIP_BLANKS; 6572 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6573 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6574 res = XML_ELEMENT_TYPE_MIXED; 6575 } else { 6576 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6577 res = XML_ELEMENT_TYPE_ELEMENT; 6578 } 6579 SKIP_BLANKS; 6580 *result = tree; 6581 return(res); 6582 } 6583 6584 /** 6585 * xmlParseElementDecl: 6586 * @ctxt: an XML parser context 6587 * 6588 * parse an Element declaration. 6589 * 6590 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6591 * 6592 * [ VC: Unique Element Type Declaration ] 6593 * No element type may be declared more than once 6594 * 6595 * Returns the type of the element, or -1 in case of error 6596 */ 6597 int 6598 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6599 const xmlChar *name; 6600 int ret = -1; 6601 xmlElementContentPtr content = NULL; 6602 6603 /* GROW; done in the caller */ 6604 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6605 int inputid = ctxt->input->id; 6606 6607 SKIP(9); 6608 if (SKIP_BLANKS == 0) { 6609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6610 "Space required after 'ELEMENT'\n"); 6611 return(-1); 6612 } 6613 name = xmlParseName(ctxt); 6614 if (name == NULL) { 6615 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6616 "xmlParseElementDecl: no name for Element\n"); 6617 return(-1); 6618 } 6619 if (SKIP_BLANKS == 0) { 6620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6621 "Space required after the element name\n"); 6622 } 6623 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6624 SKIP(5); 6625 /* 6626 * Element must always be empty. 6627 */ 6628 ret = XML_ELEMENT_TYPE_EMPTY; 6629 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6630 (NXT(2) == 'Y')) { 6631 SKIP(3); 6632 /* 6633 * Element is a generic container. 6634 */ 6635 ret = XML_ELEMENT_TYPE_ANY; 6636 } else if (RAW == '(') { 6637 ret = xmlParseElementContentDecl(ctxt, name, &content); 6638 } else { 6639 /* 6640 * [ WFC: PEs in Internal Subset ] error handling. 6641 */ 6642 if ((RAW == '%') && (ctxt->external == 0) && 6643 (ctxt->inputNr == 1)) { 6644 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6645 "PEReference: forbidden within markup decl in internal subset\n"); 6646 } else { 6647 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6649 } 6650 return(-1); 6651 } 6652 6653 SKIP_BLANKS; 6654 6655 if (RAW != '>') { 6656 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6657 if (content != NULL) { 6658 xmlFreeDocElementContent(ctxt->myDoc, content); 6659 } 6660 } else { 6661 if (inputid != ctxt->input->id) { 6662 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6663 "Element declaration doesn't start and stop in" 6664 " the same entity\n"); 6665 } 6666 6667 NEXT; 6668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6669 (ctxt->sax->elementDecl != NULL)) { 6670 if (content != NULL) 6671 content->parent = NULL; 6672 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6673 content); 6674 if ((content != NULL) && (content->parent == NULL)) { 6675 /* 6676 * this is a trick: if xmlAddElementDecl is called, 6677 * instead of copying the full tree it is plugged directly 6678 * if called from the parser. Avoid duplicating the 6679 * interfaces or change the API/ABI 6680 */ 6681 xmlFreeDocElementContent(ctxt->myDoc, content); 6682 } 6683 } else if (content != NULL) { 6684 xmlFreeDocElementContent(ctxt->myDoc, content); 6685 } 6686 } 6687 } 6688 return(ret); 6689 } 6690 6691 /** 6692 * xmlParseConditionalSections 6693 * @ctxt: an XML parser context 6694 * 6695 * [61] conditionalSect ::= includeSect | ignoreSect 6696 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6697 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6698 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6699 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6700 */ 6701 6702 static void 6703 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6704 int *inputIds = NULL; 6705 size_t inputIdsSize = 0; 6706 size_t depth = 0; 6707 6708 while (ctxt->instate != XML_PARSER_EOF) { 6709 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6710 int id = ctxt->input->id; 6711 6712 SKIP(3); 6713 SKIP_BLANKS; 6714 6715 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6716 SKIP(7); 6717 SKIP_BLANKS; 6718 if (RAW != '[') { 6719 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6720 xmlHaltParser(ctxt); 6721 goto error; 6722 } 6723 if (ctxt->input->id != id) { 6724 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6725 "All markup of the conditional section is" 6726 " not in the same entity\n"); 6727 } 6728 NEXT; 6729 6730 if (inputIdsSize <= depth) { 6731 int *tmp; 6732 6733 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2); 6734 tmp = (int *) xmlRealloc(inputIds, 6735 inputIdsSize * sizeof(int)); 6736 if (tmp == NULL) { 6737 xmlErrMemory(ctxt, NULL); 6738 goto error; 6739 } 6740 inputIds = tmp; 6741 } 6742 inputIds[depth] = id; 6743 depth++; 6744 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6745 int state; 6746 xmlParserInputState instate; 6747 size_t ignoreDepth = 0; 6748 6749 SKIP(6); 6750 SKIP_BLANKS; 6751 if (RAW != '[') { 6752 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6753 xmlHaltParser(ctxt); 6754 goto error; 6755 } 6756 if (ctxt->input->id != id) { 6757 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6758 "All markup of the conditional section is" 6759 " not in the same entity\n"); 6760 } 6761 NEXT; 6762 6763 /* 6764 * Parse up to the end of the conditional section but disable 6765 * SAX event generating DTD building in the meantime 6766 */ 6767 state = ctxt->disableSAX; 6768 instate = ctxt->instate; 6769 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6770 ctxt->instate = XML_PARSER_IGNORE; 6771 6772 while (RAW != 0) { 6773 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6774 SKIP(3); 6775 ignoreDepth++; 6776 /* Check for integer overflow */ 6777 if (ignoreDepth == 0) { 6778 xmlErrMemory(ctxt, NULL); 6779 goto error; 6780 } 6781 } else if ((RAW == ']') && (NXT(1) == ']') && 6782 (NXT(2) == '>')) { 6783 if (ignoreDepth == 0) 6784 break; 6785 SKIP(3); 6786 ignoreDepth--; 6787 } else { 6788 NEXT; 6789 } 6790 } 6791 6792 ctxt->disableSAX = state; 6793 ctxt->instate = instate; 6794 6795 if (RAW == 0) { 6796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6797 goto error; 6798 } 6799 if (ctxt->input->id != id) { 6800 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6801 "All markup of the conditional section is" 6802 " not in the same entity\n"); 6803 } 6804 SKIP(3); 6805 } else { 6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6807 xmlHaltParser(ctxt); 6808 goto error; 6809 } 6810 } else if ((depth > 0) && 6811 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6812 depth--; 6813 if (ctxt->input->id != inputIds[depth]) { 6814 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6815 "All markup of the conditional section is not" 6816 " in the same entity\n"); 6817 } 6818 SKIP(3); 6819 } else { 6820 int id = ctxt->input->id; 6821 unsigned long cons = CUR_CONSUMED; 6822 6823 xmlParseMarkupDecl(ctxt); 6824 6825 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { 6826 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6827 xmlHaltParser(ctxt); 6828 goto error; 6829 } 6830 } 6831 6832 if (depth == 0) 6833 break; 6834 6835 SKIP_BLANKS; 6836 GROW; 6837 } 6838 6839 error: 6840 xmlFree(inputIds); 6841 } 6842 6843 /** 6844 * xmlParseMarkupDecl: 6845 * @ctxt: an XML parser context 6846 * 6847 * parse Markup declarations 6848 * 6849 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6850 * NotationDecl | PI | Comment 6851 * 6852 * [ VC: Proper Declaration/PE Nesting ] 6853 * Parameter-entity replacement text must be properly nested with 6854 * markup declarations. That is to say, if either the first character 6855 * or the last character of a markup declaration (markupdecl above) is 6856 * contained in the replacement text for a parameter-entity reference, 6857 * both must be contained in the same replacement text. 6858 * 6859 * [ WFC: PEs in Internal Subset ] 6860 * In the internal DTD subset, parameter-entity references can occur 6861 * only where markup declarations can occur, not within markup declarations. 6862 * (This does not apply to references that occur in external parameter 6863 * entities or to the external subset.) 6864 */ 6865 void 6866 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6867 GROW; 6868 if (CUR == '<') { 6869 if (NXT(1) == '!') { 6870 switch (NXT(2)) { 6871 case 'E': 6872 if (NXT(3) == 'L') 6873 xmlParseElementDecl(ctxt); 6874 else if (NXT(3) == 'N') 6875 xmlParseEntityDecl(ctxt); 6876 break; 6877 case 'A': 6878 xmlParseAttributeListDecl(ctxt); 6879 break; 6880 case 'N': 6881 xmlParseNotationDecl(ctxt); 6882 break; 6883 case '-': 6884 xmlParseComment(ctxt); 6885 break; 6886 default: 6887 /* there is an error but it will be detected later */ 6888 break; 6889 } 6890 } else if (NXT(1) == '?') { 6891 xmlParsePI(ctxt); 6892 } 6893 } 6894 6895 /* 6896 * detect requirement to exit there and act accordingly 6897 * and avoid having instate overridden later on 6898 */ 6899 if (ctxt->instate == XML_PARSER_EOF) 6900 return; 6901 6902 ctxt->instate = XML_PARSER_DTD; 6903 } 6904 6905 /** 6906 * xmlParseTextDecl: 6907 * @ctxt: an XML parser context 6908 * 6909 * parse an XML declaration header for external entities 6910 * 6911 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6912 */ 6913 6914 void 6915 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6916 xmlChar *version; 6917 const xmlChar *encoding; 6918 int oldstate; 6919 6920 /* 6921 * We know that '<?xml' is here. 6922 */ 6923 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6924 SKIP(5); 6925 } else { 6926 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6927 return; 6928 } 6929 6930 /* Avoid expansion of parameter entities when skipping blanks. */ 6931 oldstate = ctxt->instate; 6932 ctxt->instate = XML_PARSER_START; 6933 6934 if (SKIP_BLANKS == 0) { 6935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6936 "Space needed after '<?xml'\n"); 6937 } 6938 6939 /* 6940 * We may have the VersionInfo here. 6941 */ 6942 version = xmlParseVersionInfo(ctxt); 6943 if (version == NULL) 6944 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6945 else { 6946 if (SKIP_BLANKS == 0) { 6947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6948 "Space needed here\n"); 6949 } 6950 } 6951 ctxt->input->version = version; 6952 6953 /* 6954 * We must have the encoding declaration 6955 */ 6956 encoding = xmlParseEncodingDecl(ctxt); 6957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6958 /* 6959 * The XML REC instructs us to stop parsing right here 6960 */ 6961 ctxt->instate = oldstate; 6962 return; 6963 } 6964 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6965 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6966 "Missing encoding in text declaration\n"); 6967 } 6968 6969 SKIP_BLANKS; 6970 if ((RAW == '?') && (NXT(1) == '>')) { 6971 SKIP(2); 6972 } else if (RAW == '>') { 6973 /* Deprecated old WD ... */ 6974 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6975 NEXT; 6976 } else { 6977 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6978 MOVETO_ENDTAG(CUR_PTR); 6979 NEXT; 6980 } 6981 6982 ctxt->instate = oldstate; 6983 } 6984 6985 /** 6986 * xmlParseExternalSubset: 6987 * @ctxt: an XML parser context 6988 * @ExternalID: the external identifier 6989 * @SystemID: the system identifier (or URL) 6990 * 6991 * parse Markup declarations from an external subset 6992 * 6993 * [30] extSubset ::= textDecl? extSubsetDecl 6994 * 6995 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6996 */ 6997 void 6998 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6999 const xmlChar *SystemID) { 7000 xmlDetectSAX2(ctxt); 7001 GROW; 7002 7003 if ((ctxt->encoding == NULL) && 7004 (ctxt->input->end - ctxt->input->cur >= 4)) { 7005 xmlChar start[4]; 7006 xmlCharEncoding enc; 7007 7008 start[0] = RAW; 7009 start[1] = NXT(1); 7010 start[2] = NXT(2); 7011 start[3] = NXT(3); 7012 enc = xmlDetectCharEncoding(start, 4); 7013 if (enc != XML_CHAR_ENCODING_NONE) 7014 xmlSwitchEncoding(ctxt, enc); 7015 } 7016 7017 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7018 xmlParseTextDecl(ctxt); 7019 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7020 /* 7021 * The XML REC instructs us to stop parsing right here 7022 */ 7023 xmlHaltParser(ctxt); 7024 return; 7025 } 7026 } 7027 if (ctxt->myDoc == NULL) { 7028 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7029 if (ctxt->myDoc == NULL) { 7030 xmlErrMemory(ctxt, "New Doc failed"); 7031 return; 7032 } 7033 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7034 } 7035 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7036 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7037 7038 ctxt->instate = XML_PARSER_DTD; 7039 ctxt->external = 1; 7040 SKIP_BLANKS; 7041 while (((RAW == '<') && (NXT(1) == '?')) || 7042 ((RAW == '<') && (NXT(1) == '!')) || 7043 (RAW == '%')) { 7044 int id = ctxt->input->id; 7045 unsigned long cons = CUR_CONSUMED; 7046 7047 GROW; 7048 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7049 xmlParseConditionalSections(ctxt); 7050 } else 7051 xmlParseMarkupDecl(ctxt); 7052 SKIP_BLANKS; 7053 7054 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { 7055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7056 break; 7057 } 7058 } 7059 7060 if (RAW != 0) { 7061 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7062 } 7063 7064 } 7065 7066 /** 7067 * xmlParseReference: 7068 * @ctxt: an XML parser context 7069 * 7070 * parse and handle entity references in content, depending on the SAX 7071 * interface, this may end-up in a call to character() if this is a 7072 * CharRef, a predefined entity, if there is no reference() callback. 7073 * or if the parser was asked to switch to that mode. 7074 * 7075 * [67] Reference ::= EntityRef | CharRef 7076 */ 7077 void 7078 xmlParseReference(xmlParserCtxtPtr ctxt) { 7079 xmlEntityPtr ent; 7080 xmlChar *val; 7081 int was_checked; 7082 xmlNodePtr list = NULL; 7083 xmlParserErrors ret = XML_ERR_OK; 7084 7085 7086 if (RAW != '&') 7087 return; 7088 7089 /* 7090 * Simple case of a CharRef 7091 */ 7092 if (NXT(1) == '#') { 7093 int i = 0; 7094 xmlChar out[16]; 7095 int hex = NXT(2); 7096 int value = xmlParseCharRef(ctxt); 7097 7098 if (value == 0) 7099 return; 7100 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7101 /* 7102 * So we are using non-UTF-8 buffers 7103 * Check that the char fit on 8bits, if not 7104 * generate a CharRef. 7105 */ 7106 if (value <= 0xFF) { 7107 out[0] = value; 7108 out[1] = 0; 7109 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7110 (!ctxt->disableSAX)) 7111 ctxt->sax->characters(ctxt->userData, out, 1); 7112 } else { 7113 if ((hex == 'x') || (hex == 'X')) 7114 snprintf((char *)out, sizeof(out), "#x%X", value); 7115 else 7116 snprintf((char *)out, sizeof(out), "#%d", value); 7117 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7118 (!ctxt->disableSAX)) 7119 ctxt->sax->reference(ctxt->userData, out); 7120 } 7121 } else { 7122 /* 7123 * Just encode the value in UTF-8 7124 */ 7125 COPY_BUF(0 ,out, i, value); 7126 out[i] = 0; 7127 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7128 (!ctxt->disableSAX)) 7129 ctxt->sax->characters(ctxt->userData, out, i); 7130 } 7131 return; 7132 } 7133 7134 /* 7135 * We are seeing an entity reference 7136 */ 7137 ent = xmlParseEntityRef(ctxt); 7138 if (ent == NULL) return; 7139 if (!ctxt->wellFormed) 7140 return; 7141 was_checked = ent->checked; 7142 7143 /* special case of predefined entities */ 7144 if ((ent->name == NULL) || 7145 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7146 val = ent->content; 7147 if (val == NULL) return; 7148 /* 7149 * inline the entity. 7150 */ 7151 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7152 (!ctxt->disableSAX)) 7153 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7154 return; 7155 } 7156 7157 /* 7158 * The first reference to the entity trigger a parsing phase 7159 * where the ent->children is filled with the result from 7160 * the parsing. 7161 * Note: external parsed entities will not be loaded, it is not 7162 * required for a non-validating parser, unless the parsing option 7163 * of validating, or substituting entities were given. Doing so is 7164 * far more secure as the parser will only process data coming from 7165 * the document entity by default. 7166 */ 7167 if (((ent->checked == 0) || 7168 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7169 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7170 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7171 unsigned long oldnbent = ctxt->nbentities, diff; 7172 7173 /* 7174 * This is a bit hackish but this seems the best 7175 * way to make sure both SAX and DOM entity support 7176 * behaves okay. 7177 */ 7178 void *user_data; 7179 if (ctxt->userData == ctxt) 7180 user_data = NULL; 7181 else 7182 user_data = ctxt->userData; 7183 7184 /* 7185 * Check that this entity is well formed 7186 * 4.3.2: An internal general parsed entity is well-formed 7187 * if its replacement text matches the production labeled 7188 * content. 7189 */ 7190 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7191 ctxt->depth++; 7192 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7193 user_data, &list); 7194 ctxt->depth--; 7195 7196 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7197 ctxt->depth++; 7198 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7199 user_data, ctxt->depth, ent->URI, 7200 ent->ExternalID, &list); 7201 ctxt->depth--; 7202 } else { 7203 ret = XML_ERR_ENTITY_PE_INTERNAL; 7204 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7205 "invalid entity type found\n", NULL); 7206 } 7207 7208 /* 7209 * Store the number of entities needing parsing for this entity 7210 * content and do checkings 7211 */ 7212 diff = ctxt->nbentities - oldnbent + 1; 7213 if (diff > INT_MAX / 2) 7214 diff = INT_MAX / 2; 7215 ent->checked = diff * 2; 7216 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7217 ent->checked |= 1; 7218 if (ret == XML_ERR_ENTITY_LOOP) { 7219 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7220 xmlHaltParser(ctxt); 7221 xmlFreeNodeList(list); 7222 return; 7223 } 7224 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7225 xmlFreeNodeList(list); 7226 return; 7227 } 7228 7229 if ((ret == XML_ERR_OK) && (list != NULL)) { 7230 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7231 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7232 (ent->children == NULL)) { 7233 ent->children = list; 7234 /* 7235 * Prune it directly in the generated document 7236 * except for single text nodes. 7237 */ 7238 if ((ctxt->replaceEntities == 0) || 7239 (ctxt->parseMode == XML_PARSE_READER) || 7240 ((list->type == XML_TEXT_NODE) && 7241 (list->next == NULL))) { 7242 ent->owner = 1; 7243 while (list != NULL) { 7244 list->parent = (xmlNodePtr) ent; 7245 if (list->doc != ent->doc) 7246 xmlSetTreeDoc(list, ent->doc); 7247 if (list->next == NULL) 7248 ent->last = list; 7249 list = list->next; 7250 } 7251 list = NULL; 7252 } else { 7253 ent->owner = 0; 7254 while (list != NULL) { 7255 list->parent = (xmlNodePtr) ctxt->node; 7256 list->doc = ctxt->myDoc; 7257 if (list->next == NULL) 7258 ent->last = list; 7259 list = list->next; 7260 } 7261 list = ent->children; 7262 #ifdef LIBXML_LEGACY_ENABLED 7263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7264 xmlAddEntityReference(ent, list, NULL); 7265 #endif /* LIBXML_LEGACY_ENABLED */ 7266 } 7267 } else { 7268 xmlFreeNodeList(list); 7269 list = NULL; 7270 } 7271 } else if ((ret != XML_ERR_OK) && 7272 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7273 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7274 "Entity '%s' failed to parse\n", ent->name); 7275 if (ent->content != NULL) 7276 ent->content[0] = 0; 7277 xmlParserEntityCheck(ctxt, 0, ent, 0); 7278 } else if (list != NULL) { 7279 xmlFreeNodeList(list); 7280 list = NULL; 7281 } 7282 if (ent->checked == 0) 7283 ent->checked = 2; 7284 7285 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7286 was_checked = 0; 7287 } else if (ent->checked != 1) { 7288 ctxt->nbentities += ent->checked / 2; 7289 } 7290 7291 /* 7292 * Now that the entity content has been gathered 7293 * provide it to the application, this can take different forms based 7294 * on the parsing modes. 7295 */ 7296 if (ent->children == NULL) { 7297 /* 7298 * Probably running in SAX mode and the callbacks don't 7299 * build the entity content. So unless we already went 7300 * though parsing for first checking go though the entity 7301 * content to generate callbacks associated to the entity 7302 */ 7303 if (was_checked != 0) { 7304 void *user_data; 7305 /* 7306 * This is a bit hackish but this seems the best 7307 * way to make sure both SAX and DOM entity support 7308 * behaves okay. 7309 */ 7310 if (ctxt->userData == ctxt) 7311 user_data = NULL; 7312 else 7313 user_data = ctxt->userData; 7314 7315 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7316 ctxt->depth++; 7317 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7318 ent->content, user_data, NULL); 7319 ctxt->depth--; 7320 } else if (ent->etype == 7321 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7322 ctxt->depth++; 7323 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7324 ctxt->sax, user_data, ctxt->depth, 7325 ent->URI, ent->ExternalID, NULL); 7326 ctxt->depth--; 7327 } else { 7328 ret = XML_ERR_ENTITY_PE_INTERNAL; 7329 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7330 "invalid entity type found\n", NULL); 7331 } 7332 if (ret == XML_ERR_ENTITY_LOOP) { 7333 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7334 return; 7335 } 7336 } 7337 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7338 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7339 /* 7340 * Entity reference callback comes second, it's somewhat 7341 * superfluous but a compatibility to historical behaviour 7342 */ 7343 ctxt->sax->reference(ctxt->userData, ent->name); 7344 } 7345 return; 7346 } 7347 7348 /* 7349 * If we didn't get any children for the entity being built 7350 */ 7351 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7352 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7353 /* 7354 * Create a node. 7355 */ 7356 ctxt->sax->reference(ctxt->userData, ent->name); 7357 return; 7358 } 7359 7360 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7361 /* 7362 * There is a problem on the handling of _private for entities 7363 * (bug 155816): Should we copy the content of the field from 7364 * the entity (possibly overwriting some value set by the user 7365 * when a copy is created), should we leave it alone, or should 7366 * we try to take care of different situations? The problem 7367 * is exacerbated by the usage of this field by the xmlReader. 7368 * To fix this bug, we look at _private on the created node 7369 * and, if it's NULL, we copy in whatever was in the entity. 7370 * If it's not NULL we leave it alone. This is somewhat of a 7371 * hack - maybe we should have further tests to determine 7372 * what to do. 7373 */ 7374 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7375 /* 7376 * Seems we are generating the DOM content, do 7377 * a simple tree copy for all references except the first 7378 * In the first occurrence list contains the replacement. 7379 */ 7380 if (((list == NULL) && (ent->owner == 0)) || 7381 (ctxt->parseMode == XML_PARSE_READER)) { 7382 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7383 7384 /* 7385 * We are copying here, make sure there is no abuse 7386 */ 7387 ctxt->sizeentcopy += ent->length + 5; 7388 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7389 return; 7390 7391 /* 7392 * when operating on a reader, the entities definitions 7393 * are always owning the entities subtree. 7394 if (ctxt->parseMode == XML_PARSE_READER) 7395 ent->owner = 1; 7396 */ 7397 7398 cur = ent->children; 7399 while (cur != NULL) { 7400 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7401 if (nw != NULL) { 7402 if (nw->_private == NULL) 7403 nw->_private = cur->_private; 7404 if (firstChild == NULL){ 7405 firstChild = nw; 7406 } 7407 nw = xmlAddChild(ctxt->node, nw); 7408 } 7409 if (cur == ent->last) { 7410 /* 7411 * needed to detect some strange empty 7412 * node cases in the reader tests 7413 */ 7414 if ((ctxt->parseMode == XML_PARSE_READER) && 7415 (nw != NULL) && 7416 (nw->type == XML_ELEMENT_NODE) && 7417 (nw->children == NULL)) 7418 nw->extra = 1; 7419 7420 break; 7421 } 7422 cur = cur->next; 7423 } 7424 #ifdef LIBXML_LEGACY_ENABLED 7425 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7426 xmlAddEntityReference(ent, firstChild, nw); 7427 #endif /* LIBXML_LEGACY_ENABLED */ 7428 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7429 xmlNodePtr nw = NULL, cur, next, last, 7430 firstChild = NULL; 7431 7432 /* 7433 * We are copying here, make sure there is no abuse 7434 */ 7435 ctxt->sizeentcopy += ent->length + 5; 7436 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7437 return; 7438 7439 /* 7440 * Copy the entity child list and make it the new 7441 * entity child list. The goal is to make sure any 7442 * ID or REF referenced will be the one from the 7443 * document content and not the entity copy. 7444 */ 7445 cur = ent->children; 7446 ent->children = NULL; 7447 last = ent->last; 7448 ent->last = NULL; 7449 while (cur != NULL) { 7450 next = cur->next; 7451 cur->next = NULL; 7452 cur->parent = NULL; 7453 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7454 if (nw != NULL) { 7455 if (nw->_private == NULL) 7456 nw->_private = cur->_private; 7457 if (firstChild == NULL){ 7458 firstChild = cur; 7459 } 7460 xmlAddChild((xmlNodePtr) ent, nw); 7461 xmlAddChild(ctxt->node, cur); 7462 } 7463 if (cur == last) 7464 break; 7465 cur = next; 7466 } 7467 if (ent->owner == 0) 7468 ent->owner = 1; 7469 #ifdef LIBXML_LEGACY_ENABLED 7470 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7471 xmlAddEntityReference(ent, firstChild, nw); 7472 #endif /* LIBXML_LEGACY_ENABLED */ 7473 } else { 7474 const xmlChar *nbktext; 7475 7476 /* 7477 * the name change is to avoid coalescing of the 7478 * node with a possible previous text one which 7479 * would make ent->children a dangling pointer 7480 */ 7481 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7482 -1); 7483 if (ent->children->type == XML_TEXT_NODE) 7484 ent->children->name = nbktext; 7485 if ((ent->last != ent->children) && 7486 (ent->last->type == XML_TEXT_NODE)) 7487 ent->last->name = nbktext; 7488 xmlAddChildList(ctxt->node, ent->children); 7489 } 7490 7491 /* 7492 * This is to avoid a nasty side effect, see 7493 * characters() in SAX.c 7494 */ 7495 ctxt->nodemem = 0; 7496 ctxt->nodelen = 0; 7497 return; 7498 } 7499 } 7500 } 7501 7502 /** 7503 * xmlParseEntityRef: 7504 * @ctxt: an XML parser context 7505 * 7506 * parse ENTITY references declarations 7507 * 7508 * [68] EntityRef ::= '&' Name ';' 7509 * 7510 * [ WFC: Entity Declared ] 7511 * In a document without any DTD, a document with only an internal DTD 7512 * subset which contains no parameter entity references, or a document 7513 * with "standalone='yes'", the Name given in the entity reference 7514 * must match that in an entity declaration, except that well-formed 7515 * documents need not declare any of the following entities: amp, lt, 7516 * gt, apos, quot. The declaration of a parameter entity must precede 7517 * any reference to it. Similarly, the declaration of a general entity 7518 * must precede any reference to it which appears in a default value in an 7519 * attribute-list declaration. Note that if entities are declared in the 7520 * external subset or in external parameter entities, a non-validating 7521 * processor is not obligated to read and process their declarations; 7522 * for such documents, the rule that an entity must be declared is a 7523 * well-formedness constraint only if standalone='yes'. 7524 * 7525 * [ WFC: Parsed Entity ] 7526 * An entity reference must not contain the name of an unparsed entity 7527 * 7528 * Returns the xmlEntityPtr if found, or NULL otherwise. 7529 */ 7530 xmlEntityPtr 7531 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7532 const xmlChar *name; 7533 xmlEntityPtr ent = NULL; 7534 7535 GROW; 7536 if (ctxt->instate == XML_PARSER_EOF) 7537 return(NULL); 7538 7539 if (RAW != '&') 7540 return(NULL); 7541 NEXT; 7542 name = xmlParseName(ctxt); 7543 if (name == NULL) { 7544 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7545 "xmlParseEntityRef: no name\n"); 7546 return(NULL); 7547 } 7548 if (RAW != ';') { 7549 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7550 return(NULL); 7551 } 7552 NEXT; 7553 7554 /* 7555 * Predefined entities override any extra definition 7556 */ 7557 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7558 ent = xmlGetPredefinedEntity(name); 7559 if (ent != NULL) 7560 return(ent); 7561 } 7562 7563 /* 7564 * Increase the number of entity references parsed 7565 */ 7566 ctxt->nbentities++; 7567 7568 /* 7569 * Ask first SAX for entity resolution, otherwise try the 7570 * entities which may have stored in the parser context. 7571 */ 7572 if (ctxt->sax != NULL) { 7573 if (ctxt->sax->getEntity != NULL) 7574 ent = ctxt->sax->getEntity(ctxt->userData, name); 7575 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7576 (ctxt->options & XML_PARSE_OLDSAX)) 7577 ent = xmlGetPredefinedEntity(name); 7578 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7579 (ctxt->userData==ctxt)) { 7580 ent = xmlSAX2GetEntity(ctxt, name); 7581 } 7582 } 7583 if (ctxt->instate == XML_PARSER_EOF) 7584 return(NULL); 7585 /* 7586 * [ WFC: Entity Declared ] 7587 * In a document without any DTD, a document with only an 7588 * internal DTD subset which contains no parameter entity 7589 * references, or a document with "standalone='yes'", the 7590 * Name given in the entity reference must match that in an 7591 * entity declaration, except that well-formed documents 7592 * need not declare any of the following entities: amp, lt, 7593 * gt, apos, quot. 7594 * The declaration of a parameter entity must precede any 7595 * reference to it. 7596 * Similarly, the declaration of a general entity must 7597 * precede any reference to it which appears in a default 7598 * value in an attribute-list declaration. Note that if 7599 * entities are declared in the external subset or in 7600 * external parameter entities, a non-validating processor 7601 * is not obligated to read and process their declarations; 7602 * for such documents, the rule that an entity must be 7603 * declared is a well-formedness constraint only if 7604 * standalone='yes'. 7605 */ 7606 if (ent == NULL) { 7607 if ((ctxt->standalone == 1) || 7608 ((ctxt->hasExternalSubset == 0) && 7609 (ctxt->hasPErefs == 0))) { 7610 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7611 "Entity '%s' not defined\n", name); 7612 } else { 7613 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7614 "Entity '%s' not defined\n", name); 7615 if ((ctxt->inSubset == 0) && 7616 (ctxt->sax != NULL) && 7617 (ctxt->sax->reference != NULL)) { 7618 ctxt->sax->reference(ctxt->userData, name); 7619 } 7620 } 7621 xmlParserEntityCheck(ctxt, 0, ent, 0); 7622 ctxt->valid = 0; 7623 } 7624 7625 /* 7626 * [ WFC: Parsed Entity ] 7627 * An entity reference must not contain the name of an 7628 * unparsed entity 7629 */ 7630 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7631 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7632 "Entity reference to unparsed entity %s\n", name); 7633 } 7634 7635 /* 7636 * [ WFC: No External Entity References ] 7637 * Attribute values cannot contain direct or indirect 7638 * entity references to external entities. 7639 */ 7640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7642 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7643 "Attribute references external entity '%s'\n", name); 7644 } 7645 /* 7646 * [ WFC: No < in Attribute Values ] 7647 * The replacement text of any entity referred to directly or 7648 * indirectly in an attribute value (other than "<") must 7649 * not contain a <. 7650 */ 7651 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7652 (ent != NULL) && 7653 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7654 if (((ent->checked & 1) || (ent->checked == 0)) && 7655 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7656 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7657 "'<' in entity '%s' is not allowed in attributes values\n", name); 7658 } 7659 } 7660 7661 /* 7662 * Internal check, no parameter entities here ... 7663 */ 7664 else { 7665 switch (ent->etype) { 7666 case XML_INTERNAL_PARAMETER_ENTITY: 7667 case XML_EXTERNAL_PARAMETER_ENTITY: 7668 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7669 "Attempt to reference the parameter entity '%s'\n", 7670 name); 7671 break; 7672 default: 7673 break; 7674 } 7675 } 7676 7677 /* 7678 * [ WFC: No Recursion ] 7679 * A parsed entity must not contain a recursive reference 7680 * to itself, either directly or indirectly. 7681 * Done somewhere else 7682 */ 7683 return(ent); 7684 } 7685 7686 /** 7687 * xmlParseStringEntityRef: 7688 * @ctxt: an XML parser context 7689 * @str: a pointer to an index in the string 7690 * 7691 * parse ENTITY references declarations, but this version parses it from 7692 * a string value. 7693 * 7694 * [68] EntityRef ::= '&' Name ';' 7695 * 7696 * [ WFC: Entity Declared ] 7697 * In a document without any DTD, a document with only an internal DTD 7698 * subset which contains no parameter entity references, or a document 7699 * with "standalone='yes'", the Name given in the entity reference 7700 * must match that in an entity declaration, except that well-formed 7701 * documents need not declare any of the following entities: amp, lt, 7702 * gt, apos, quot. The declaration of a parameter entity must precede 7703 * any reference to it. Similarly, the declaration of a general entity 7704 * must precede any reference to it which appears in a default value in an 7705 * attribute-list declaration. Note that if entities are declared in the 7706 * external subset or in external parameter entities, a non-validating 7707 * processor is not obligated to read and process their declarations; 7708 * for such documents, the rule that an entity must be declared is a 7709 * well-formedness constraint only if standalone='yes'. 7710 * 7711 * [ WFC: Parsed Entity ] 7712 * An entity reference must not contain the name of an unparsed entity 7713 * 7714 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7715 * is updated to the current location in the string. 7716 */ 7717 static xmlEntityPtr 7718 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7719 xmlChar *name; 7720 const xmlChar *ptr; 7721 xmlChar cur; 7722 xmlEntityPtr ent = NULL; 7723 7724 if ((str == NULL) || (*str == NULL)) 7725 return(NULL); 7726 ptr = *str; 7727 cur = *ptr; 7728 if (cur != '&') 7729 return(NULL); 7730 7731 ptr++; 7732 name = xmlParseStringName(ctxt, &ptr); 7733 if (name == NULL) { 7734 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7735 "xmlParseStringEntityRef: no name\n"); 7736 *str = ptr; 7737 return(NULL); 7738 } 7739 if (*ptr != ';') { 7740 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7741 xmlFree(name); 7742 *str = ptr; 7743 return(NULL); 7744 } 7745 ptr++; 7746 7747 7748 /* 7749 * Predefined entities override any extra definition 7750 */ 7751 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7752 ent = xmlGetPredefinedEntity(name); 7753 if (ent != NULL) { 7754 xmlFree(name); 7755 *str = ptr; 7756 return(ent); 7757 } 7758 } 7759 7760 /* 7761 * Increase the number of entity references parsed 7762 */ 7763 ctxt->nbentities++; 7764 7765 /* 7766 * Ask first SAX for entity resolution, otherwise try the 7767 * entities which may have stored in the parser context. 7768 */ 7769 if (ctxt->sax != NULL) { 7770 if (ctxt->sax->getEntity != NULL) 7771 ent = ctxt->sax->getEntity(ctxt->userData, name); 7772 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7773 ent = xmlGetPredefinedEntity(name); 7774 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7775 ent = xmlSAX2GetEntity(ctxt, name); 7776 } 7777 } 7778 if (ctxt->instate == XML_PARSER_EOF) { 7779 xmlFree(name); 7780 return(NULL); 7781 } 7782 7783 /* 7784 * [ WFC: Entity Declared ] 7785 * In a document without any DTD, a document with only an 7786 * internal DTD subset which contains no parameter entity 7787 * references, or a document with "standalone='yes'", the 7788 * Name given in the entity reference must match that in an 7789 * entity declaration, except that well-formed documents 7790 * need not declare any of the following entities: amp, lt, 7791 * gt, apos, quot. 7792 * The declaration of a parameter entity must precede any 7793 * reference to it. 7794 * Similarly, the declaration of a general entity must 7795 * precede any reference to it which appears in a default 7796 * value in an attribute-list declaration. Note that if 7797 * entities are declared in the external subset or in 7798 * external parameter entities, a non-validating processor 7799 * is not obligated to read and process their declarations; 7800 * for such documents, the rule that an entity must be 7801 * declared is a well-formedness constraint only if 7802 * standalone='yes'. 7803 */ 7804 if (ent == NULL) { 7805 if ((ctxt->standalone == 1) || 7806 ((ctxt->hasExternalSubset == 0) && 7807 (ctxt->hasPErefs == 0))) { 7808 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7809 "Entity '%s' not defined\n", name); 7810 } else { 7811 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7812 "Entity '%s' not defined\n", 7813 name); 7814 } 7815 xmlParserEntityCheck(ctxt, 0, ent, 0); 7816 /* TODO ? check regressions ctxt->valid = 0; */ 7817 } 7818 7819 /* 7820 * [ WFC: Parsed Entity ] 7821 * An entity reference must not contain the name of an 7822 * unparsed entity 7823 */ 7824 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7825 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7826 "Entity reference to unparsed entity %s\n", name); 7827 } 7828 7829 /* 7830 * [ WFC: No External Entity References ] 7831 * Attribute values cannot contain direct or indirect 7832 * entity references to external entities. 7833 */ 7834 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7835 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7836 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7837 "Attribute references external entity '%s'\n", name); 7838 } 7839 /* 7840 * [ WFC: No < in Attribute Values ] 7841 * The replacement text of any entity referred to directly or 7842 * indirectly in an attribute value (other than "<") must 7843 * not contain a <. 7844 */ 7845 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7846 (ent != NULL) && (ent->content != NULL) && 7847 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7848 (xmlStrchr(ent->content, '<'))) { 7849 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7850 "'<' in entity '%s' is not allowed in attributes values\n", 7851 name); 7852 } 7853 7854 /* 7855 * Internal check, no parameter entities here ... 7856 */ 7857 else { 7858 switch (ent->etype) { 7859 case XML_INTERNAL_PARAMETER_ENTITY: 7860 case XML_EXTERNAL_PARAMETER_ENTITY: 7861 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7862 "Attempt to reference the parameter entity '%s'\n", 7863 name); 7864 break; 7865 default: 7866 break; 7867 } 7868 } 7869 7870 /* 7871 * [ WFC: No Recursion ] 7872 * A parsed entity must not contain a recursive reference 7873 * to itself, either directly or indirectly. 7874 * Done somewhere else 7875 */ 7876 7877 xmlFree(name); 7878 *str = ptr; 7879 return(ent); 7880 } 7881 7882 /** 7883 * xmlParsePEReference: 7884 * @ctxt: an XML parser context 7885 * 7886 * parse PEReference declarations 7887 * The entity content is handled directly by pushing it's content as 7888 * a new input stream. 7889 * 7890 * [69] PEReference ::= '%' Name ';' 7891 * 7892 * [ WFC: No Recursion ] 7893 * A parsed entity must not contain a recursive 7894 * reference to itself, either directly or indirectly. 7895 * 7896 * [ WFC: Entity Declared ] 7897 * In a document without any DTD, a document with only an internal DTD 7898 * subset which contains no parameter entity references, or a document 7899 * with "standalone='yes'", ... ... The declaration of a parameter 7900 * entity must precede any reference to it... 7901 * 7902 * [ VC: Entity Declared ] 7903 * In a document with an external subset or external parameter entities 7904 * with "standalone='no'", ... ... The declaration of a parameter entity 7905 * must precede any reference to it... 7906 * 7907 * [ WFC: In DTD ] 7908 * Parameter-entity references may only appear in the DTD. 7909 * NOTE: misleading but this is handled. 7910 */ 7911 void 7912 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7913 { 7914 const xmlChar *name; 7915 xmlEntityPtr entity = NULL; 7916 xmlParserInputPtr input; 7917 7918 if (RAW != '%') 7919 return; 7920 NEXT; 7921 name = xmlParseName(ctxt); 7922 if (name == NULL) { 7923 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 7924 return; 7925 } 7926 if (xmlParserDebugEntities) 7927 xmlGenericError(xmlGenericErrorContext, 7928 "PEReference: %s\n", name); 7929 if (RAW != ';') { 7930 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 7931 return; 7932 } 7933 7934 NEXT; 7935 7936 /* 7937 * Increase the number of entity references parsed 7938 */ 7939 ctxt->nbentities++; 7940 7941 /* 7942 * Request the entity from SAX 7943 */ 7944 if ((ctxt->sax != NULL) && 7945 (ctxt->sax->getParameterEntity != NULL)) 7946 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7947 if (ctxt->instate == XML_PARSER_EOF) 7948 return; 7949 if (entity == NULL) { 7950 /* 7951 * [ WFC: Entity Declared ] 7952 * In a document without any DTD, a document with only an 7953 * internal DTD subset which contains no parameter entity 7954 * references, or a document with "standalone='yes'", ... 7955 * ... The declaration of a parameter entity must precede 7956 * any reference to it... 7957 */ 7958 if ((ctxt->standalone == 1) || 7959 ((ctxt->hasExternalSubset == 0) && 7960 (ctxt->hasPErefs == 0))) { 7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7962 "PEReference: %%%s; not found\n", 7963 name); 7964 } else { 7965 /* 7966 * [ VC: Entity Declared ] 7967 * In a document with an external subset or external 7968 * parameter entities with "standalone='no'", ... 7969 * ... The declaration of a parameter entity must 7970 * precede any reference to it... 7971 */ 7972 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 7973 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 7974 "PEReference: %%%s; not found\n", 7975 name, NULL); 7976 } else 7977 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7978 "PEReference: %%%s; not found\n", 7979 name, NULL); 7980 ctxt->valid = 0; 7981 } 7982 xmlParserEntityCheck(ctxt, 0, NULL, 0); 7983 } else { 7984 /* 7985 * Internal checking in case the entity quest barfed 7986 */ 7987 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7988 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7989 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7990 "Internal: %%%s; is not a parameter entity\n", 7991 name, NULL); 7992 } else { 7993 xmlChar start[4]; 7994 xmlCharEncoding enc; 7995 7996 if (xmlParserEntityCheck(ctxt, 0, entity, 0)) 7997 return; 7998 7999 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8000 ((ctxt->options & XML_PARSE_NOENT) == 0) && 8001 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 8002 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 8003 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 8004 (ctxt->replaceEntities == 0) && 8005 (ctxt->validate == 0)) 8006 return; 8007 8008 input = xmlNewEntityInputStream(ctxt, entity); 8009 if (xmlPushInput(ctxt, input) < 0) { 8010 xmlFreeInputStream(input); 8011 return; 8012 } 8013 8014 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 8015 /* 8016 * Get the 4 first bytes and decode the charset 8017 * if enc != XML_CHAR_ENCODING_NONE 8018 * plug some encoding conversion routines. 8019 * Note that, since we may have some non-UTF8 8020 * encoding (like UTF16, bug 135229), the 'length' 8021 * is not known, but we can calculate based upon 8022 * the amount of data in the buffer. 8023 */ 8024 GROW 8025 if (ctxt->instate == XML_PARSER_EOF) 8026 return; 8027 if ((ctxt->input->end - ctxt->input->cur)>=4) { 8028 start[0] = RAW; 8029 start[1] = NXT(1); 8030 start[2] = NXT(2); 8031 start[3] = NXT(3); 8032 enc = xmlDetectCharEncoding(start, 4); 8033 if (enc != XML_CHAR_ENCODING_NONE) { 8034 xmlSwitchEncoding(ctxt, enc); 8035 } 8036 } 8037 8038 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8039 (IS_BLANK_CH(NXT(5)))) { 8040 xmlParseTextDecl(ctxt); 8041 } 8042 } 8043 } 8044 } 8045 ctxt->hasPErefs = 1; 8046 } 8047 8048 /** 8049 * xmlLoadEntityContent: 8050 * @ctxt: an XML parser context 8051 * @entity: an unloaded system entity 8052 * 8053 * Load the original content of the given system entity from the 8054 * ExternalID/SystemID given. This is to be used for Included in Literal 8055 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8056 * 8057 * Returns 0 in case of success and -1 in case of failure 8058 */ 8059 static int 8060 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8061 xmlParserInputPtr input; 8062 xmlBufferPtr buf; 8063 int l, c; 8064 int count = 0; 8065 8066 if ((ctxt == NULL) || (entity == NULL) || 8067 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8068 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8069 (entity->content != NULL)) { 8070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8071 "xmlLoadEntityContent parameter error"); 8072 return(-1); 8073 } 8074 8075 if (xmlParserDebugEntities) 8076 xmlGenericError(xmlGenericErrorContext, 8077 "Reading %s entity content input\n", entity->name); 8078 8079 buf = xmlBufferCreate(); 8080 if (buf == NULL) { 8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8082 "xmlLoadEntityContent parameter error"); 8083 return(-1); 8084 } 8085 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); 8086 8087 input = xmlNewEntityInputStream(ctxt, entity); 8088 if (input == NULL) { 8089 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8090 "xmlLoadEntityContent input error"); 8091 xmlBufferFree(buf); 8092 return(-1); 8093 } 8094 8095 /* 8096 * Push the entity as the current input, read char by char 8097 * saving to the buffer until the end of the entity or an error 8098 */ 8099 if (xmlPushInput(ctxt, input) < 0) { 8100 xmlBufferFree(buf); 8101 xmlFreeInputStream(input); 8102 return(-1); 8103 } 8104 8105 GROW; 8106 c = CUR_CHAR(l); 8107 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8108 (IS_CHAR(c))) { 8109 xmlBufferAdd(buf, ctxt->input->cur, l); 8110 if (count++ > XML_PARSER_CHUNK_SIZE) { 8111 count = 0; 8112 GROW; 8113 if (ctxt->instate == XML_PARSER_EOF) { 8114 xmlBufferFree(buf); 8115 return(-1); 8116 } 8117 } 8118 NEXTL(l); 8119 c = CUR_CHAR(l); 8120 if (c == 0) { 8121 count = 0; 8122 GROW; 8123 if (ctxt->instate == XML_PARSER_EOF) { 8124 xmlBufferFree(buf); 8125 return(-1); 8126 } 8127 c = CUR_CHAR(l); 8128 } 8129 } 8130 8131 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8132 xmlPopInput(ctxt); 8133 } else if (!IS_CHAR(c)) { 8134 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8135 "xmlLoadEntityContent: invalid char value %d\n", 8136 c); 8137 xmlBufferFree(buf); 8138 return(-1); 8139 } 8140 entity->content = buf->content; 8141 buf->content = NULL; 8142 xmlBufferFree(buf); 8143 8144 return(0); 8145 } 8146 8147 /** 8148 * xmlParseStringPEReference: 8149 * @ctxt: an XML parser context 8150 * @str: a pointer to an index in the string 8151 * 8152 * parse PEReference declarations 8153 * 8154 * [69] PEReference ::= '%' Name ';' 8155 * 8156 * [ WFC: No Recursion ] 8157 * A parsed entity must not contain a recursive 8158 * reference to itself, either directly or indirectly. 8159 * 8160 * [ WFC: Entity Declared ] 8161 * In a document without any DTD, a document with only an internal DTD 8162 * subset which contains no parameter entity references, or a document 8163 * with "standalone='yes'", ... ... The declaration of a parameter 8164 * entity must precede any reference to it... 8165 * 8166 * [ VC: Entity Declared ] 8167 * In a document with an external subset or external parameter entities 8168 * with "standalone='no'", ... ... The declaration of a parameter entity 8169 * must precede any reference to it... 8170 * 8171 * [ WFC: In DTD ] 8172 * Parameter-entity references may only appear in the DTD. 8173 * NOTE: misleading but this is handled. 8174 * 8175 * Returns the string of the entity content. 8176 * str is updated to the current value of the index 8177 */ 8178 static xmlEntityPtr 8179 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8180 const xmlChar *ptr; 8181 xmlChar cur; 8182 xmlChar *name; 8183 xmlEntityPtr entity = NULL; 8184 8185 if ((str == NULL) || (*str == NULL)) return(NULL); 8186 ptr = *str; 8187 cur = *ptr; 8188 if (cur != '%') 8189 return(NULL); 8190 ptr++; 8191 name = xmlParseStringName(ctxt, &ptr); 8192 if (name == NULL) { 8193 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8194 "xmlParseStringPEReference: no name\n"); 8195 *str = ptr; 8196 return(NULL); 8197 } 8198 cur = *ptr; 8199 if (cur != ';') { 8200 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8201 xmlFree(name); 8202 *str = ptr; 8203 return(NULL); 8204 } 8205 ptr++; 8206 8207 /* 8208 * Increase the number of entity references parsed 8209 */ 8210 ctxt->nbentities++; 8211 8212 /* 8213 * Request the entity from SAX 8214 */ 8215 if ((ctxt->sax != NULL) && 8216 (ctxt->sax->getParameterEntity != NULL)) 8217 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8218 if (ctxt->instate == XML_PARSER_EOF) { 8219 xmlFree(name); 8220 *str = ptr; 8221 return(NULL); 8222 } 8223 if (entity == NULL) { 8224 /* 8225 * [ WFC: Entity Declared ] 8226 * In a document without any DTD, a document with only an 8227 * internal DTD subset which contains no parameter entity 8228 * references, or a document with "standalone='yes'", ... 8229 * ... The declaration of a parameter entity must precede 8230 * any reference to it... 8231 */ 8232 if ((ctxt->standalone == 1) || 8233 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8235 "PEReference: %%%s; not found\n", name); 8236 } else { 8237 /* 8238 * [ VC: Entity Declared ] 8239 * In a document with an external subset or external 8240 * parameter entities with "standalone='no'", ... 8241 * ... The declaration of a parameter entity must 8242 * precede any reference to it... 8243 */ 8244 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8245 "PEReference: %%%s; not found\n", 8246 name, NULL); 8247 ctxt->valid = 0; 8248 } 8249 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8250 } else { 8251 /* 8252 * Internal checking in case the entity quest barfed 8253 */ 8254 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8255 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8256 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8257 "%%%s; is not a parameter entity\n", 8258 name, NULL); 8259 } 8260 } 8261 ctxt->hasPErefs = 1; 8262 xmlFree(name); 8263 *str = ptr; 8264 return(entity); 8265 } 8266 8267 /** 8268 * xmlParseDocTypeDecl: 8269 * @ctxt: an XML parser context 8270 * 8271 * parse a DOCTYPE declaration 8272 * 8273 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8274 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8275 * 8276 * [ VC: Root Element Type ] 8277 * The Name in the document type declaration must match the element 8278 * type of the root element. 8279 */ 8280 8281 void 8282 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8283 const xmlChar *name = NULL; 8284 xmlChar *ExternalID = NULL; 8285 xmlChar *URI = NULL; 8286 8287 /* 8288 * We know that '<!DOCTYPE' has been detected. 8289 */ 8290 SKIP(9); 8291 8292 SKIP_BLANKS; 8293 8294 /* 8295 * Parse the DOCTYPE name. 8296 */ 8297 name = xmlParseName(ctxt); 8298 if (name == NULL) { 8299 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8300 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8301 } 8302 ctxt->intSubName = name; 8303 8304 SKIP_BLANKS; 8305 8306 /* 8307 * Check for SystemID and ExternalID 8308 */ 8309 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8310 8311 if ((URI != NULL) || (ExternalID != NULL)) { 8312 ctxt->hasExternalSubset = 1; 8313 } 8314 ctxt->extSubURI = URI; 8315 ctxt->extSubSystem = ExternalID; 8316 8317 SKIP_BLANKS; 8318 8319 /* 8320 * Create and update the internal subset. 8321 */ 8322 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8323 (!ctxt->disableSAX)) 8324 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8325 if (ctxt->instate == XML_PARSER_EOF) 8326 return; 8327 8328 /* 8329 * Is there any internal subset declarations ? 8330 * they are handled separately in xmlParseInternalSubset() 8331 */ 8332 if (RAW == '[') 8333 return; 8334 8335 /* 8336 * We should be at the end of the DOCTYPE declaration. 8337 */ 8338 if (RAW != '>') { 8339 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8340 } 8341 NEXT; 8342 } 8343 8344 /** 8345 * xmlParseInternalSubset: 8346 * @ctxt: an XML parser context 8347 * 8348 * parse the internal subset declaration 8349 * 8350 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8351 */ 8352 8353 static void 8354 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8355 /* 8356 * Is there any DTD definition ? 8357 */ 8358 if (RAW == '[') { 8359 int baseInputNr = ctxt->inputNr; 8360 ctxt->instate = XML_PARSER_DTD; 8361 NEXT; 8362 /* 8363 * Parse the succession of Markup declarations and 8364 * PEReferences. 8365 * Subsequence (markupdecl | PEReference | S)* 8366 */ 8367 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8368 (ctxt->instate != XML_PARSER_EOF)) { 8369 int id = ctxt->input->id; 8370 unsigned long cons = CUR_CONSUMED; 8371 8372 SKIP_BLANKS; 8373 xmlParseMarkupDecl(ctxt); 8374 xmlParsePEReference(ctxt); 8375 8376 /* 8377 * Conditional sections are allowed from external entities included 8378 * by PE References in the internal subset. 8379 */ 8380 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && 8381 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 8382 xmlParseConditionalSections(ctxt); 8383 } 8384 8385 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { 8386 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8387 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8388 if (ctxt->inputNr > baseInputNr) 8389 xmlPopInput(ctxt); 8390 else 8391 break; 8392 } 8393 } 8394 if (RAW == ']') { 8395 NEXT; 8396 SKIP_BLANKS; 8397 } 8398 } 8399 8400 /* 8401 * We should be at the end of the DOCTYPE declaration. 8402 */ 8403 if (RAW != '>') { 8404 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8405 return; 8406 } 8407 NEXT; 8408 } 8409 8410 #ifdef LIBXML_SAX1_ENABLED 8411 /** 8412 * xmlParseAttribute: 8413 * @ctxt: an XML parser context 8414 * @value: a xmlChar ** used to store the value of the attribute 8415 * 8416 * parse an attribute 8417 * 8418 * [41] Attribute ::= Name Eq AttValue 8419 * 8420 * [ WFC: No External Entity References ] 8421 * Attribute values cannot contain direct or indirect entity references 8422 * to external entities. 8423 * 8424 * [ WFC: No < in Attribute Values ] 8425 * The replacement text of any entity referred to directly or indirectly in 8426 * an attribute value (other than "<") must not contain a <. 8427 * 8428 * [ VC: Attribute Value Type ] 8429 * The attribute must have been declared; the value must be of the type 8430 * declared for it. 8431 * 8432 * [25] Eq ::= S? '=' S? 8433 * 8434 * With namespace: 8435 * 8436 * [NS 11] Attribute ::= QName Eq AttValue 8437 * 8438 * Also the case QName == xmlns:??? is handled independently as a namespace 8439 * definition. 8440 * 8441 * Returns the attribute name, and the value in *value. 8442 */ 8443 8444 const xmlChar * 8445 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8446 const xmlChar *name; 8447 xmlChar *val; 8448 8449 *value = NULL; 8450 GROW; 8451 name = xmlParseName(ctxt); 8452 if (name == NULL) { 8453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8454 "error parsing attribute name\n"); 8455 return(NULL); 8456 } 8457 8458 /* 8459 * read the value 8460 */ 8461 SKIP_BLANKS; 8462 if (RAW == '=') { 8463 NEXT; 8464 SKIP_BLANKS; 8465 val = xmlParseAttValue(ctxt); 8466 ctxt->instate = XML_PARSER_CONTENT; 8467 } else { 8468 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8469 "Specification mandates value for attribute %s\n", name); 8470 return(NULL); 8471 } 8472 8473 /* 8474 * Check that xml:lang conforms to the specification 8475 * No more registered as an error, just generate a warning now 8476 * since this was deprecated in XML second edition 8477 */ 8478 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8479 if (!xmlCheckLanguageID(val)) { 8480 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8481 "Malformed value for xml:lang : %s\n", 8482 val, NULL); 8483 } 8484 } 8485 8486 /* 8487 * Check that xml:space conforms to the specification 8488 */ 8489 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8490 if (xmlStrEqual(val, BAD_CAST "default")) 8491 *(ctxt->space) = 0; 8492 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8493 *(ctxt->space) = 1; 8494 else { 8495 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8496 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8497 val, NULL); 8498 } 8499 } 8500 8501 *value = val; 8502 return(name); 8503 } 8504 8505 /** 8506 * xmlParseStartTag: 8507 * @ctxt: an XML parser context 8508 * 8509 * parse a start of tag either for rule element or 8510 * EmptyElement. In both case we don't parse the tag closing chars. 8511 * 8512 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8513 * 8514 * [ WFC: Unique Att Spec ] 8515 * No attribute name may appear more than once in the same start-tag or 8516 * empty-element tag. 8517 * 8518 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8519 * 8520 * [ WFC: Unique Att Spec ] 8521 * No attribute name may appear more than once in the same start-tag or 8522 * empty-element tag. 8523 * 8524 * With namespace: 8525 * 8526 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8527 * 8528 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8529 * 8530 * Returns the element name parsed 8531 */ 8532 8533 const xmlChar * 8534 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8535 const xmlChar *name; 8536 const xmlChar *attname; 8537 xmlChar *attvalue; 8538 const xmlChar **atts = ctxt->atts; 8539 int nbatts = 0; 8540 int maxatts = ctxt->maxatts; 8541 int i; 8542 8543 if (RAW != '<') return(NULL); 8544 NEXT1; 8545 8546 name = xmlParseName(ctxt); 8547 if (name == NULL) { 8548 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8549 "xmlParseStartTag: invalid element name\n"); 8550 return(NULL); 8551 } 8552 8553 /* 8554 * Now parse the attributes, it ends up with the ending 8555 * 8556 * (S Attribute)* S? 8557 */ 8558 SKIP_BLANKS; 8559 GROW; 8560 8561 while (((RAW != '>') && 8562 ((RAW != '/') || (NXT(1) != '>')) && 8563 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8564 int id = ctxt->input->id; 8565 unsigned long cons = CUR_CONSUMED; 8566 8567 attname = xmlParseAttribute(ctxt, &attvalue); 8568 if ((attname != NULL) && (attvalue != NULL)) { 8569 /* 8570 * [ WFC: Unique Att Spec ] 8571 * No attribute name may appear more than once in the same 8572 * start-tag or empty-element tag. 8573 */ 8574 for (i = 0; i < nbatts;i += 2) { 8575 if (xmlStrEqual(atts[i], attname)) { 8576 xmlErrAttributeDup(ctxt, NULL, attname); 8577 xmlFree(attvalue); 8578 goto failed; 8579 } 8580 } 8581 /* 8582 * Add the pair to atts 8583 */ 8584 if (atts == NULL) { 8585 maxatts = 22; /* allow for 10 attrs by default */ 8586 atts = (const xmlChar **) 8587 xmlMalloc(maxatts * sizeof(xmlChar *)); 8588 if (atts == NULL) { 8589 xmlErrMemory(ctxt, NULL); 8590 if (attvalue != NULL) 8591 xmlFree(attvalue); 8592 goto failed; 8593 } 8594 ctxt->atts = atts; 8595 ctxt->maxatts = maxatts; 8596 } else if (nbatts + 4 > maxatts) { 8597 const xmlChar **n; 8598 8599 maxatts *= 2; 8600 n = (const xmlChar **) xmlRealloc((void *) atts, 8601 maxatts * sizeof(const xmlChar *)); 8602 if (n == NULL) { 8603 xmlErrMemory(ctxt, NULL); 8604 if (attvalue != NULL) 8605 xmlFree(attvalue); 8606 goto failed; 8607 } 8608 atts = n; 8609 ctxt->atts = atts; 8610 ctxt->maxatts = maxatts; 8611 } 8612 atts[nbatts++] = attname; 8613 atts[nbatts++] = attvalue; 8614 atts[nbatts] = NULL; 8615 atts[nbatts + 1] = NULL; 8616 } else { 8617 if (attvalue != NULL) 8618 xmlFree(attvalue); 8619 } 8620 8621 failed: 8622 8623 GROW 8624 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8625 break; 8626 if (SKIP_BLANKS == 0) { 8627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8628 "attributes construct error\n"); 8629 } 8630 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) && 8631 (attname == NULL) && (attvalue == NULL)) { 8632 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8633 "xmlParseStartTag: problem parsing attributes\n"); 8634 break; 8635 } 8636 SHRINK; 8637 GROW; 8638 } 8639 8640 /* 8641 * SAX: Start of Element ! 8642 */ 8643 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8644 (!ctxt->disableSAX)) { 8645 if (nbatts > 0) 8646 ctxt->sax->startElement(ctxt->userData, name, atts); 8647 else 8648 ctxt->sax->startElement(ctxt->userData, name, NULL); 8649 } 8650 8651 if (atts != NULL) { 8652 /* Free only the content strings */ 8653 for (i = 1;i < nbatts;i+=2) 8654 if (atts[i] != NULL) 8655 xmlFree((xmlChar *) atts[i]); 8656 } 8657 return(name); 8658 } 8659 8660 /** 8661 * xmlParseEndTag1: 8662 * @ctxt: an XML parser context 8663 * @line: line of the start tag 8664 * @nsNr: number of namespaces on the start tag 8665 * 8666 * parse an end of tag 8667 * 8668 * [42] ETag ::= '</' Name S? '>' 8669 * 8670 * With namespace 8671 * 8672 * [NS 9] ETag ::= '</' QName S? '>' 8673 */ 8674 8675 static void 8676 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8677 const xmlChar *name; 8678 8679 GROW; 8680 if ((RAW != '<') || (NXT(1) != '/')) { 8681 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8682 "xmlParseEndTag: '</' not found\n"); 8683 return; 8684 } 8685 SKIP(2); 8686 8687 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8688 8689 /* 8690 * We should definitely be at the ending "S? '>'" part 8691 */ 8692 GROW; 8693 SKIP_BLANKS; 8694 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8695 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8696 } else 8697 NEXT1; 8698 8699 /* 8700 * [ WFC: Element Type Match ] 8701 * The Name in an element's end-tag must match the element type in the 8702 * start-tag. 8703 * 8704 */ 8705 if (name != (xmlChar*)1) { 8706 if (name == NULL) name = BAD_CAST "unparsable"; 8707 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8708 "Opening and ending tag mismatch: %s line %d and %s\n", 8709 ctxt->name, line, name); 8710 } 8711 8712 /* 8713 * SAX: End of Tag 8714 */ 8715 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8716 (!ctxt->disableSAX)) 8717 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8718 8719 namePop(ctxt); 8720 spacePop(ctxt); 8721 return; 8722 } 8723 8724 /** 8725 * xmlParseEndTag: 8726 * @ctxt: an XML parser context 8727 * 8728 * parse an end of tag 8729 * 8730 * [42] ETag ::= '</' Name S? '>' 8731 * 8732 * With namespace 8733 * 8734 * [NS 9] ETag ::= '</' QName S? '>' 8735 */ 8736 8737 void 8738 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8739 xmlParseEndTag1(ctxt, 0); 8740 } 8741 #endif /* LIBXML_SAX1_ENABLED */ 8742 8743 /************************************************************************ 8744 * * 8745 * SAX 2 specific operations * 8746 * * 8747 ************************************************************************/ 8748 8749 /* 8750 * xmlGetNamespace: 8751 * @ctxt: an XML parser context 8752 * @prefix: the prefix to lookup 8753 * 8754 * Lookup the namespace name for the @prefix (which ca be NULL) 8755 * The prefix must come from the @ctxt->dict dictionary 8756 * 8757 * Returns the namespace name or NULL if not bound 8758 */ 8759 static const xmlChar * 8760 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8761 int i; 8762 8763 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8764 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8765 if (ctxt->nsTab[i] == prefix) { 8766 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8767 return(NULL); 8768 return(ctxt->nsTab[i + 1]); 8769 } 8770 return(NULL); 8771 } 8772 8773 /** 8774 * xmlParseQName: 8775 * @ctxt: an XML parser context 8776 * @prefix: pointer to store the prefix part 8777 * 8778 * parse an XML Namespace QName 8779 * 8780 * [6] QName ::= (Prefix ':')? LocalPart 8781 * [7] Prefix ::= NCName 8782 * [8] LocalPart ::= NCName 8783 * 8784 * Returns the Name parsed or NULL 8785 */ 8786 8787 static const xmlChar * 8788 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8789 const xmlChar *l, *p; 8790 8791 GROW; 8792 8793 l = xmlParseNCName(ctxt); 8794 if (l == NULL) { 8795 if (CUR == ':') { 8796 l = xmlParseName(ctxt); 8797 if (l != NULL) { 8798 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8799 "Failed to parse QName '%s'\n", l, NULL, NULL); 8800 *prefix = NULL; 8801 return(l); 8802 } 8803 } 8804 return(NULL); 8805 } 8806 if (CUR == ':') { 8807 NEXT; 8808 p = l; 8809 l = xmlParseNCName(ctxt); 8810 if (l == NULL) { 8811 xmlChar *tmp; 8812 8813 if (ctxt->instate == XML_PARSER_EOF) 8814 return(NULL); 8815 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8816 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8817 l = xmlParseNmtoken(ctxt); 8818 if (l == NULL) { 8819 if (ctxt->instate == XML_PARSER_EOF) 8820 return(NULL); 8821 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8822 } else { 8823 tmp = xmlBuildQName(l, p, NULL, 0); 8824 xmlFree((char *)l); 8825 } 8826 p = xmlDictLookup(ctxt->dict, tmp, -1); 8827 if (tmp != NULL) xmlFree(tmp); 8828 *prefix = NULL; 8829 return(p); 8830 } 8831 if (CUR == ':') { 8832 xmlChar *tmp; 8833 8834 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8835 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8836 NEXT; 8837 tmp = (xmlChar *) xmlParseName(ctxt); 8838 if (tmp != NULL) { 8839 tmp = xmlBuildQName(tmp, l, NULL, 0); 8840 l = xmlDictLookup(ctxt->dict, tmp, -1); 8841 if (tmp != NULL) xmlFree(tmp); 8842 *prefix = p; 8843 return(l); 8844 } 8845 if (ctxt->instate == XML_PARSER_EOF) 8846 return(NULL); 8847 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8848 l = xmlDictLookup(ctxt->dict, tmp, -1); 8849 if (tmp != NULL) xmlFree(tmp); 8850 *prefix = p; 8851 return(l); 8852 } 8853 *prefix = p; 8854 } else 8855 *prefix = NULL; 8856 return(l); 8857 } 8858 8859 /** 8860 * xmlParseQNameAndCompare: 8861 * @ctxt: an XML parser context 8862 * @name: the localname 8863 * @prefix: the prefix, if any. 8864 * 8865 * parse an XML name and compares for match 8866 * (specialized for endtag parsing) 8867 * 8868 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8869 * and the name for mismatch 8870 */ 8871 8872 static const xmlChar * 8873 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8874 xmlChar const *prefix) { 8875 const xmlChar *cmp; 8876 const xmlChar *in; 8877 const xmlChar *ret; 8878 const xmlChar *prefix2; 8879 8880 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8881 8882 GROW; 8883 in = ctxt->input->cur; 8884 8885 cmp = prefix; 8886 while (*in != 0 && *in == *cmp) { 8887 ++in; 8888 ++cmp; 8889 } 8890 if ((*cmp == 0) && (*in == ':')) { 8891 in++; 8892 cmp = name; 8893 while (*in != 0 && *in == *cmp) { 8894 ++in; 8895 ++cmp; 8896 } 8897 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8898 /* success */ 8899 ctxt->input->col += in - ctxt->input->cur; 8900 ctxt->input->cur = in; 8901 return((const xmlChar*) 1); 8902 } 8903 } 8904 /* 8905 * all strings coms from the dictionary, equality can be done directly 8906 */ 8907 ret = xmlParseQName (ctxt, &prefix2); 8908 if ((ret == name) && (prefix == prefix2)) 8909 return((const xmlChar*) 1); 8910 return ret; 8911 } 8912 8913 /** 8914 * xmlParseAttValueInternal: 8915 * @ctxt: an XML parser context 8916 * @len: attribute len result 8917 * @alloc: whether the attribute was reallocated as a new string 8918 * @normalize: if 1 then further non-CDATA normalization must be done 8919 * 8920 * parse a value for an attribute. 8921 * NOTE: if no normalization is needed, the routine will return pointers 8922 * directly from the data buffer. 8923 * 8924 * 3.3.3 Attribute-Value Normalization: 8925 * Before the value of an attribute is passed to the application or 8926 * checked for validity, the XML processor must normalize it as follows: 8927 * - a character reference is processed by appending the referenced 8928 * character to the attribute value 8929 * - an entity reference is processed by recursively processing the 8930 * replacement text of the entity 8931 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8932 * appending #x20 to the normalized value, except that only a single 8933 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8934 * parsed entity or the literal entity value of an internal parsed entity 8935 * - other characters are processed by appending them to the normalized value 8936 * If the declared value is not CDATA, then the XML processor must further 8937 * process the normalized attribute value by discarding any leading and 8938 * trailing space (#x20) characters, and by replacing sequences of space 8939 * (#x20) characters by a single space (#x20) character. 8940 * All attributes for which no declaration has been read should be treated 8941 * by a non-validating parser as if declared CDATA. 8942 * 8943 * Returns the AttValue parsed or NULL. The value has to be freed by the 8944 * caller if it was copied, this can be detected by val[*len] == 0. 8945 */ 8946 8947 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \ 8948 const xmlChar *oldbase = ctxt->input->base;\ 8949 GROW;\ 8950 if (ctxt->instate == XML_PARSER_EOF)\ 8951 return(NULL);\ 8952 if (oldbase != ctxt->input->base) {\ 8953 ptrdiff_t delta = ctxt->input->base - oldbase;\ 8954 start = start + delta;\ 8955 in = in + delta;\ 8956 }\ 8957 end = ctxt->input->end; 8958 8959 static xmlChar * 8960 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8961 int normalize) 8962 { 8963 xmlChar limit = 0; 8964 const xmlChar *in = NULL, *start, *end, *last; 8965 xmlChar *ret = NULL; 8966 int line, col; 8967 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 8968 XML_MAX_HUGE_LENGTH : 8969 XML_MAX_TEXT_LENGTH; 8970 8971 GROW; 8972 in = (xmlChar *) CUR_PTR; 8973 line = ctxt->input->line; 8974 col = ctxt->input->col; 8975 if (*in != '"' && *in != '\'') { 8976 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8977 return (NULL); 8978 } 8979 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8980 8981 /* 8982 * try to handle in this routine the most common case where no 8983 * allocation of a new string is required and where content is 8984 * pure ASCII. 8985 */ 8986 limit = *in++; 8987 col++; 8988 end = ctxt->input->end; 8989 start = in; 8990 if (in >= end) { 8991 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8992 } 8993 if (normalize) { 8994 /* 8995 * Skip any leading spaces 8996 */ 8997 while ((in < end) && (*in != limit) && 8998 ((*in == 0x20) || (*in == 0x9) || 8999 (*in == 0xA) || (*in == 0xD))) { 9000 if (*in == 0xA) { 9001 line++; col = 1; 9002 } else { 9003 col++; 9004 } 9005 in++; 9006 start = in; 9007 if (in >= end) { 9008 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9009 if ((in - start) > maxLength) { 9010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9011 "AttValue length too long\n"); 9012 return(NULL); 9013 } 9014 } 9015 } 9016 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9017 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9018 col++; 9019 if ((*in++ == 0x20) && (*in == 0x20)) break; 9020 if (in >= end) { 9021 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9022 if ((in - start) > maxLength) { 9023 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9024 "AttValue length too long\n"); 9025 return(NULL); 9026 } 9027 } 9028 } 9029 last = in; 9030 /* 9031 * skip the trailing blanks 9032 */ 9033 while ((last[-1] == 0x20) && (last > start)) last--; 9034 while ((in < end) && (*in != limit) && 9035 ((*in == 0x20) || (*in == 0x9) || 9036 (*in == 0xA) || (*in == 0xD))) { 9037 if (*in == 0xA) { 9038 line++, col = 1; 9039 } else { 9040 col++; 9041 } 9042 in++; 9043 if (in >= end) { 9044 const xmlChar *oldbase = ctxt->input->base; 9045 GROW; 9046 if (ctxt->instate == XML_PARSER_EOF) 9047 return(NULL); 9048 if (oldbase != ctxt->input->base) { 9049 ptrdiff_t delta = ctxt->input->base - oldbase; 9050 start = start + delta; 9051 in = in + delta; 9052 last = last + delta; 9053 } 9054 end = ctxt->input->end; 9055 if ((in - start) > maxLength) { 9056 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9057 "AttValue length too long\n"); 9058 return(NULL); 9059 } 9060 } 9061 } 9062 if ((in - start) > maxLength) { 9063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9064 "AttValue length too long\n"); 9065 return(NULL); 9066 } 9067 if (*in != limit) goto need_complex; 9068 } else { 9069 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9070 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9071 in++; 9072 col++; 9073 if (in >= end) { 9074 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9075 if ((in - start) > maxLength) { 9076 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9077 "AttValue length too long\n"); 9078 return(NULL); 9079 } 9080 } 9081 } 9082 last = in; 9083 if ((in - start) > maxLength) { 9084 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9085 "AttValue length too long\n"); 9086 return(NULL); 9087 } 9088 if (*in != limit) goto need_complex; 9089 } 9090 in++; 9091 col++; 9092 if (len != NULL) { 9093 *len = last - start; 9094 ret = (xmlChar *) start; 9095 } else { 9096 if (alloc) *alloc = 1; 9097 ret = xmlStrndup(start, last - start); 9098 } 9099 CUR_PTR = in; 9100 ctxt->input->line = line; 9101 ctxt->input->col = col; 9102 if (alloc) *alloc = 0; 9103 return ret; 9104 need_complex: 9105 if (alloc) *alloc = 1; 9106 return xmlParseAttValueComplex(ctxt, len, normalize); 9107 } 9108 9109 /** 9110 * xmlParseAttribute2: 9111 * @ctxt: an XML parser context 9112 * @pref: the element prefix 9113 * @elem: the element name 9114 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9115 * @value: a xmlChar ** used to store the value of the attribute 9116 * @len: an int * to save the length of the attribute 9117 * @alloc: an int * to indicate if the attribute was allocated 9118 * 9119 * parse an attribute in the new SAX2 framework. 9120 * 9121 * Returns the attribute name, and the value in *value, . 9122 */ 9123 9124 static const xmlChar * 9125 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9126 const xmlChar * pref, const xmlChar * elem, 9127 const xmlChar ** prefix, xmlChar ** value, 9128 int *len, int *alloc) 9129 { 9130 const xmlChar *name; 9131 xmlChar *val, *internal_val = NULL; 9132 int normalize = 0; 9133 9134 *value = NULL; 9135 GROW; 9136 name = xmlParseQName(ctxt, prefix); 9137 if (name == NULL) { 9138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9139 "error parsing attribute name\n"); 9140 return (NULL); 9141 } 9142 9143 /* 9144 * get the type if needed 9145 */ 9146 if (ctxt->attsSpecial != NULL) { 9147 int type; 9148 9149 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9150 pref, elem, *prefix, name); 9151 if (type != 0) 9152 normalize = 1; 9153 } 9154 9155 /* 9156 * read the value 9157 */ 9158 SKIP_BLANKS; 9159 if (RAW == '=') { 9160 NEXT; 9161 SKIP_BLANKS; 9162 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9163 if (normalize) { 9164 /* 9165 * Sometimes a second normalisation pass for spaces is needed 9166 * but that only happens if charrefs or entities references 9167 * have been used in the attribute value, i.e. the attribute 9168 * value have been extracted in an allocated string already. 9169 */ 9170 if (*alloc) { 9171 const xmlChar *val2; 9172 9173 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9174 if ((val2 != NULL) && (val2 != val)) { 9175 xmlFree(val); 9176 val = (xmlChar *) val2; 9177 } 9178 } 9179 } 9180 ctxt->instate = XML_PARSER_CONTENT; 9181 } else { 9182 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9183 "Specification mandates value for attribute %s\n", 9184 name); 9185 return (NULL); 9186 } 9187 9188 if (*prefix == ctxt->str_xml) { 9189 /* 9190 * Check that xml:lang conforms to the specification 9191 * No more registered as an error, just generate a warning now 9192 * since this was deprecated in XML second edition 9193 */ 9194 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9195 internal_val = xmlStrndup(val, *len); 9196 if (!xmlCheckLanguageID(internal_val)) { 9197 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9198 "Malformed value for xml:lang : %s\n", 9199 internal_val, NULL); 9200 } 9201 } 9202 9203 /* 9204 * Check that xml:space conforms to the specification 9205 */ 9206 if (xmlStrEqual(name, BAD_CAST "space")) { 9207 internal_val = xmlStrndup(val, *len); 9208 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9209 *(ctxt->space) = 0; 9210 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9211 *(ctxt->space) = 1; 9212 else { 9213 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9214 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9215 internal_val, NULL); 9216 } 9217 } 9218 if (internal_val) { 9219 xmlFree(internal_val); 9220 } 9221 } 9222 9223 *value = val; 9224 return (name); 9225 } 9226 /** 9227 * xmlParseStartTag2: 9228 * @ctxt: an XML parser context 9229 * 9230 * parse a start of tag either for rule element or 9231 * EmptyElement. In both case we don't parse the tag closing chars. 9232 * This routine is called when running SAX2 parsing 9233 * 9234 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9235 * 9236 * [ WFC: Unique Att Spec ] 9237 * No attribute name may appear more than once in the same start-tag or 9238 * empty-element tag. 9239 * 9240 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9241 * 9242 * [ WFC: Unique Att Spec ] 9243 * No attribute name may appear more than once in the same start-tag or 9244 * empty-element tag. 9245 * 9246 * With namespace: 9247 * 9248 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9249 * 9250 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9251 * 9252 * Returns the element name parsed 9253 */ 9254 9255 static const xmlChar * 9256 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9257 const xmlChar **URI, int *tlen) { 9258 const xmlChar *localname; 9259 const xmlChar *prefix; 9260 const xmlChar *attname; 9261 const xmlChar *aprefix; 9262 const xmlChar *nsname; 9263 xmlChar *attvalue; 9264 const xmlChar **atts = ctxt->atts; 9265 int maxatts = ctxt->maxatts; 9266 int nratts, nbatts, nbdef, inputid; 9267 int i, j, nbNs, attval; 9268 unsigned long cur; 9269 int nsNr = ctxt->nsNr; 9270 9271 if (RAW != '<') return(NULL); 9272 NEXT1; 9273 9274 /* 9275 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9276 * point since the attribute values may be stored as pointers to 9277 * the buffer and calling SHRINK would destroy them ! 9278 * The Shrinking is only possible once the full set of attribute 9279 * callbacks have been done. 9280 */ 9281 SHRINK; 9282 cur = ctxt->input->cur - ctxt->input->base; 9283 inputid = ctxt->input->id; 9284 nbatts = 0; 9285 nratts = 0; 9286 nbdef = 0; 9287 nbNs = 0; 9288 attval = 0; 9289 /* Forget any namespaces added during an earlier parse of this element. */ 9290 ctxt->nsNr = nsNr; 9291 9292 localname = xmlParseQName(ctxt, &prefix); 9293 if (localname == NULL) { 9294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9295 "StartTag: invalid element name\n"); 9296 return(NULL); 9297 } 9298 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9299 9300 /* 9301 * Now parse the attributes, it ends up with the ending 9302 * 9303 * (S Attribute)* S? 9304 */ 9305 SKIP_BLANKS; 9306 GROW; 9307 9308 while (((RAW != '>') && 9309 ((RAW != '/') || (NXT(1) != '>')) && 9310 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9311 int id = ctxt->input->id; 9312 unsigned long cons = CUR_CONSUMED; 9313 int len = -1, alloc = 0; 9314 9315 attname = xmlParseAttribute2(ctxt, prefix, localname, 9316 &aprefix, &attvalue, &len, &alloc); 9317 if ((attname == NULL) || (attvalue == NULL)) 9318 goto next_attr; 9319 if (len < 0) len = xmlStrlen(attvalue); 9320 9321 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9322 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9323 xmlURIPtr uri; 9324 9325 if (URL == NULL) { 9326 xmlErrMemory(ctxt, "dictionary allocation failure"); 9327 if ((attvalue != NULL) && (alloc != 0)) 9328 xmlFree(attvalue); 9329 localname = NULL; 9330 goto done; 9331 } 9332 if (*URL != 0) { 9333 uri = xmlParseURI((const char *) URL); 9334 if (uri == NULL) { 9335 xmlNsErr(ctxt, XML_WAR_NS_URI, 9336 "xmlns: '%s' is not a valid URI\n", 9337 URL, NULL, NULL); 9338 } else { 9339 if (uri->scheme == NULL) { 9340 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9341 "xmlns: URI %s is not absolute\n", 9342 URL, NULL, NULL); 9343 } 9344 xmlFreeURI(uri); 9345 } 9346 if (URL == ctxt->str_xml_ns) { 9347 if (attname != ctxt->str_xml) { 9348 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9349 "xml namespace URI cannot be the default namespace\n", 9350 NULL, NULL, NULL); 9351 } 9352 goto next_attr; 9353 } 9354 if ((len == 29) && 9355 (xmlStrEqual(URL, 9356 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9357 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9358 "reuse of the xmlns namespace name is forbidden\n", 9359 NULL, NULL, NULL); 9360 goto next_attr; 9361 } 9362 } 9363 /* 9364 * check that it's not a defined namespace 9365 */ 9366 for (j = 1;j <= nbNs;j++) 9367 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9368 break; 9369 if (j <= nbNs) 9370 xmlErrAttributeDup(ctxt, NULL, attname); 9371 else 9372 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9373 9374 } else if (aprefix == ctxt->str_xmlns) { 9375 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9376 xmlURIPtr uri; 9377 9378 if (attname == ctxt->str_xml) { 9379 if (URL != ctxt->str_xml_ns) { 9380 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9381 "xml namespace prefix mapped to wrong URI\n", 9382 NULL, NULL, NULL); 9383 } 9384 /* 9385 * Do not keep a namespace definition node 9386 */ 9387 goto next_attr; 9388 } 9389 if (URL == ctxt->str_xml_ns) { 9390 if (attname != ctxt->str_xml) { 9391 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9392 "xml namespace URI mapped to wrong prefix\n", 9393 NULL, NULL, NULL); 9394 } 9395 goto next_attr; 9396 } 9397 if (attname == ctxt->str_xmlns) { 9398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9399 "redefinition of the xmlns prefix is forbidden\n", 9400 NULL, NULL, NULL); 9401 goto next_attr; 9402 } 9403 if ((len == 29) && 9404 (xmlStrEqual(URL, 9405 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9407 "reuse of the xmlns namespace name is forbidden\n", 9408 NULL, NULL, NULL); 9409 goto next_attr; 9410 } 9411 if ((URL == NULL) || (URL[0] == 0)) { 9412 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9413 "xmlns:%s: Empty XML namespace is not allowed\n", 9414 attname, NULL, NULL); 9415 goto next_attr; 9416 } else { 9417 uri = xmlParseURI((const char *) URL); 9418 if (uri == NULL) { 9419 xmlNsErr(ctxt, XML_WAR_NS_URI, 9420 "xmlns:%s: '%s' is not a valid URI\n", 9421 attname, URL, NULL); 9422 } else { 9423 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9424 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9425 "xmlns:%s: URI %s is not absolute\n", 9426 attname, URL, NULL); 9427 } 9428 xmlFreeURI(uri); 9429 } 9430 } 9431 9432 /* 9433 * check that it's not a defined namespace 9434 */ 9435 for (j = 1;j <= nbNs;j++) 9436 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9437 break; 9438 if (j <= nbNs) 9439 xmlErrAttributeDup(ctxt, aprefix, attname); 9440 else 9441 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9442 9443 } else { 9444 /* 9445 * Add the pair to atts 9446 */ 9447 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9448 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9449 goto next_attr; 9450 } 9451 maxatts = ctxt->maxatts; 9452 atts = ctxt->atts; 9453 } 9454 ctxt->attallocs[nratts++] = alloc; 9455 atts[nbatts++] = attname; 9456 atts[nbatts++] = aprefix; 9457 /* 9458 * The namespace URI field is used temporarily to point at the 9459 * base of the current input buffer for non-alloced attributes. 9460 * When the input buffer is reallocated, all the pointers become 9461 * invalid, but they can be reconstructed later. 9462 */ 9463 if (alloc) 9464 atts[nbatts++] = NULL; 9465 else 9466 atts[nbatts++] = ctxt->input->base; 9467 atts[nbatts++] = attvalue; 9468 attvalue += len; 9469 atts[nbatts++] = attvalue; 9470 /* 9471 * tag if some deallocation is needed 9472 */ 9473 if (alloc != 0) attval = 1; 9474 attvalue = NULL; /* moved into atts */ 9475 } 9476 9477 next_attr: 9478 if ((attvalue != NULL) && (alloc != 0)) { 9479 xmlFree(attvalue); 9480 attvalue = NULL; 9481 } 9482 9483 GROW 9484 if (ctxt->instate == XML_PARSER_EOF) 9485 break; 9486 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9487 break; 9488 if (SKIP_BLANKS == 0) { 9489 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9490 "attributes construct error\n"); 9491 break; 9492 } 9493 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) && 9494 (attname == NULL) && (attvalue == NULL)) { 9495 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9496 "xmlParseStartTag: problem parsing attributes\n"); 9497 break; 9498 } 9499 GROW; 9500 } 9501 9502 if (ctxt->input->id != inputid) { 9503 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9504 "Unexpected change of input\n"); 9505 localname = NULL; 9506 goto done; 9507 } 9508 9509 /* Reconstruct attribute value pointers. */ 9510 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9511 if (atts[i+2] != NULL) { 9512 /* 9513 * Arithmetic on dangling pointers is technically undefined 9514 * behavior, but well... 9515 */ 9516 ptrdiff_t offset = ctxt->input->base - atts[i+2]; 9517 atts[i+2] = NULL; /* Reset repurposed namespace URI */ 9518 atts[i+3] += offset; /* value */ 9519 atts[i+4] += offset; /* valuend */ 9520 } 9521 } 9522 9523 /* 9524 * The attributes defaulting 9525 */ 9526 if (ctxt->attsDefault != NULL) { 9527 xmlDefAttrsPtr defaults; 9528 9529 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9530 if (defaults != NULL) { 9531 for (i = 0;i < defaults->nbAttrs;i++) { 9532 attname = defaults->values[5 * i]; 9533 aprefix = defaults->values[5 * i + 1]; 9534 9535 /* 9536 * special work for namespaces defaulted defs 9537 */ 9538 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9539 /* 9540 * check that it's not a defined namespace 9541 */ 9542 for (j = 1;j <= nbNs;j++) 9543 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9544 break; 9545 if (j <= nbNs) continue; 9546 9547 nsname = xmlGetNamespace(ctxt, NULL); 9548 if (nsname != defaults->values[5 * i + 2]) { 9549 if (nsPush(ctxt, NULL, 9550 defaults->values[5 * i + 2]) > 0) 9551 nbNs++; 9552 } 9553 } else if (aprefix == ctxt->str_xmlns) { 9554 /* 9555 * check that it's not a defined namespace 9556 */ 9557 for (j = 1;j <= nbNs;j++) 9558 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9559 break; 9560 if (j <= nbNs) continue; 9561 9562 nsname = xmlGetNamespace(ctxt, attname); 9563 if (nsname != defaults->values[2]) { 9564 if (nsPush(ctxt, attname, 9565 defaults->values[5 * i + 2]) > 0) 9566 nbNs++; 9567 } 9568 } else { 9569 /* 9570 * check that it's not a defined attribute 9571 */ 9572 for (j = 0;j < nbatts;j+=5) { 9573 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9574 break; 9575 } 9576 if (j < nbatts) continue; 9577 9578 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9579 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9580 localname = NULL; 9581 goto done; 9582 } 9583 maxatts = ctxt->maxatts; 9584 atts = ctxt->atts; 9585 } 9586 atts[nbatts++] = attname; 9587 atts[nbatts++] = aprefix; 9588 if (aprefix == NULL) 9589 atts[nbatts++] = NULL; 9590 else 9591 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9592 atts[nbatts++] = defaults->values[5 * i + 2]; 9593 atts[nbatts++] = defaults->values[5 * i + 3]; 9594 if ((ctxt->standalone == 1) && 9595 (defaults->values[5 * i + 4] != NULL)) { 9596 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9597 "standalone: attribute %s on %s defaulted from external subset\n", 9598 attname, localname); 9599 } 9600 nbdef++; 9601 } 9602 } 9603 } 9604 } 9605 9606 /* 9607 * The attributes checkings 9608 */ 9609 for (i = 0; i < nbatts;i += 5) { 9610 /* 9611 * The default namespace does not apply to attribute names. 9612 */ 9613 if (atts[i + 1] != NULL) { 9614 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9615 if (nsname == NULL) { 9616 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9617 "Namespace prefix %s for %s on %s is not defined\n", 9618 atts[i + 1], atts[i], localname); 9619 } 9620 atts[i + 2] = nsname; 9621 } else 9622 nsname = NULL; 9623 /* 9624 * [ WFC: Unique Att Spec ] 9625 * No attribute name may appear more than once in the same 9626 * start-tag or empty-element tag. 9627 * As extended by the Namespace in XML REC. 9628 */ 9629 for (j = 0; j < i;j += 5) { 9630 if (atts[i] == atts[j]) { 9631 if (atts[i+1] == atts[j+1]) { 9632 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9633 break; 9634 } 9635 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9636 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9637 "Namespaced Attribute %s in '%s' redefined\n", 9638 atts[i], nsname, NULL); 9639 break; 9640 } 9641 } 9642 } 9643 } 9644 9645 nsname = xmlGetNamespace(ctxt, prefix); 9646 if ((prefix != NULL) && (nsname == NULL)) { 9647 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9648 "Namespace prefix %s on %s is not defined\n", 9649 prefix, localname, NULL); 9650 } 9651 *pref = prefix; 9652 *URI = nsname; 9653 9654 /* 9655 * SAX: Start of Element ! 9656 */ 9657 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9658 (!ctxt->disableSAX)) { 9659 if (nbNs > 0) 9660 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9661 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9662 nbatts / 5, nbdef, atts); 9663 else 9664 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9665 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9666 } 9667 9668 done: 9669 /* 9670 * Free up attribute allocated strings if needed 9671 */ 9672 if (attval != 0) { 9673 for (i = 3,j = 0; j < nratts;i += 5,j++) 9674 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9675 xmlFree((xmlChar *) atts[i]); 9676 } 9677 9678 return(localname); 9679 } 9680 9681 /** 9682 * xmlParseEndTag2: 9683 * @ctxt: an XML parser context 9684 * @line: line of the start tag 9685 * @nsNr: number of namespaces on the start tag 9686 * 9687 * parse an end of tag 9688 * 9689 * [42] ETag ::= '</' Name S? '>' 9690 * 9691 * With namespace 9692 * 9693 * [NS 9] ETag ::= '</' QName S? '>' 9694 */ 9695 9696 static void 9697 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) { 9698 const xmlChar *name; 9699 9700 GROW; 9701 if ((RAW != '<') || (NXT(1) != '/')) { 9702 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9703 return; 9704 } 9705 SKIP(2); 9706 9707 if (tag->prefix == NULL) 9708 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9709 else 9710 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix); 9711 9712 /* 9713 * We should definitely be at the ending "S? '>'" part 9714 */ 9715 GROW; 9716 if (ctxt->instate == XML_PARSER_EOF) 9717 return; 9718 SKIP_BLANKS; 9719 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9720 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9721 } else 9722 NEXT1; 9723 9724 /* 9725 * [ WFC: Element Type Match ] 9726 * The Name in an element's end-tag must match the element type in the 9727 * start-tag. 9728 * 9729 */ 9730 if (name != (xmlChar*)1) { 9731 if (name == NULL) name = BAD_CAST "unparsable"; 9732 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9733 "Opening and ending tag mismatch: %s line %d and %s\n", 9734 ctxt->name, tag->line, name); 9735 } 9736 9737 /* 9738 * SAX: End of Tag 9739 */ 9740 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9741 (!ctxt->disableSAX)) 9742 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix, 9743 tag->URI); 9744 9745 spacePop(ctxt); 9746 if (tag->nsNr != 0) 9747 nsPop(ctxt, tag->nsNr); 9748 } 9749 9750 /** 9751 * xmlParseCDSect: 9752 * @ctxt: an XML parser context 9753 * 9754 * Parse escaped pure raw content. 9755 * 9756 * [18] CDSect ::= CDStart CData CDEnd 9757 * 9758 * [19] CDStart ::= '<![CDATA[' 9759 * 9760 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9761 * 9762 * [21] CDEnd ::= ']]>' 9763 */ 9764 void 9765 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9766 xmlChar *buf = NULL; 9767 int len = 0; 9768 int size = XML_PARSER_BUFFER_SIZE; 9769 int r, rl; 9770 int s, sl; 9771 int cur, l; 9772 int count = 0; 9773 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 9774 XML_MAX_HUGE_LENGTH : 9775 XML_MAX_TEXT_LENGTH; 9776 9777 /* Check 2.6.0 was NXT(0) not RAW */ 9778 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9779 SKIP(9); 9780 } else 9781 return; 9782 9783 ctxt->instate = XML_PARSER_CDATA_SECTION; 9784 r = CUR_CHAR(rl); 9785 if (!IS_CHAR(r)) { 9786 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9787 ctxt->instate = XML_PARSER_CONTENT; 9788 return; 9789 } 9790 NEXTL(rl); 9791 s = CUR_CHAR(sl); 9792 if (!IS_CHAR(s)) { 9793 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9794 ctxt->instate = XML_PARSER_CONTENT; 9795 return; 9796 } 9797 NEXTL(sl); 9798 cur = CUR_CHAR(l); 9799 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9800 if (buf == NULL) { 9801 xmlErrMemory(ctxt, NULL); 9802 return; 9803 } 9804 while (IS_CHAR(cur) && 9805 ((r != ']') || (s != ']') || (cur != '>'))) { 9806 if (len + 5 >= size) { 9807 xmlChar *tmp; 9808 9809 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9810 if (tmp == NULL) { 9811 xmlFree(buf); 9812 xmlErrMemory(ctxt, NULL); 9813 return; 9814 } 9815 buf = tmp; 9816 size *= 2; 9817 } 9818 COPY_BUF(rl,buf,len,r); 9819 r = s; 9820 rl = sl; 9821 s = cur; 9822 sl = l; 9823 count++; 9824 if (count > 50) { 9825 SHRINK; 9826 GROW; 9827 if (ctxt->instate == XML_PARSER_EOF) { 9828 xmlFree(buf); 9829 return; 9830 } 9831 count = 0; 9832 } 9833 NEXTL(l); 9834 cur = CUR_CHAR(l); 9835 if (len > maxLength) { 9836 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9837 "CData section too big found\n"); 9838 xmlFree(buf); 9839 return; 9840 } 9841 } 9842 buf[len] = 0; 9843 ctxt->instate = XML_PARSER_CONTENT; 9844 if (cur != '>') { 9845 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9846 "CData section not finished\n%.50s\n", buf); 9847 xmlFree(buf); 9848 return; 9849 } 9850 NEXTL(l); 9851 9852 /* 9853 * OK the buffer is to be consumed as cdata. 9854 */ 9855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9856 if (ctxt->sax->cdataBlock != NULL) 9857 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9858 else if (ctxt->sax->characters != NULL) 9859 ctxt->sax->characters(ctxt->userData, buf, len); 9860 } 9861 xmlFree(buf); 9862 } 9863 9864 /** 9865 * xmlParseContentInternal: 9866 * @ctxt: an XML parser context 9867 * 9868 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of 9869 * unexpected EOF to the caller. 9870 */ 9871 9872 static void 9873 xmlParseContentInternal(xmlParserCtxtPtr ctxt) { 9874 int nameNr = ctxt->nameNr; 9875 9876 GROW; 9877 while ((RAW != 0) && 9878 (ctxt->instate != XML_PARSER_EOF)) { 9879 int id = ctxt->input->id; 9880 unsigned long cons = CUR_CONSUMED; 9881 const xmlChar *cur = ctxt->input->cur; 9882 9883 /* 9884 * First case : a Processing Instruction. 9885 */ 9886 if ((*cur == '<') && (cur[1] == '?')) { 9887 xmlParsePI(ctxt); 9888 } 9889 9890 /* 9891 * Second case : a CDSection 9892 */ 9893 /* 2.6.0 test was *cur not RAW */ 9894 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9895 xmlParseCDSect(ctxt); 9896 } 9897 9898 /* 9899 * Third case : a comment 9900 */ 9901 else if ((*cur == '<') && (NXT(1) == '!') && 9902 (NXT(2) == '-') && (NXT(3) == '-')) { 9903 xmlParseComment(ctxt); 9904 ctxt->instate = XML_PARSER_CONTENT; 9905 } 9906 9907 /* 9908 * Fourth case : a sub-element. 9909 */ 9910 else if (*cur == '<') { 9911 if (NXT(1) == '/') { 9912 if (ctxt->nameNr <= nameNr) 9913 break; 9914 xmlParseElementEnd(ctxt); 9915 } else { 9916 xmlParseElementStart(ctxt); 9917 } 9918 } 9919 9920 /* 9921 * Fifth case : a reference. If if has not been resolved, 9922 * parsing returns it's Name, create the node 9923 */ 9924 9925 else if (*cur == '&') { 9926 xmlParseReference(ctxt); 9927 } 9928 9929 /* 9930 * Last case, text. Note that References are handled directly. 9931 */ 9932 else { 9933 xmlParseCharData(ctxt, 0); 9934 } 9935 9936 GROW; 9937 SHRINK; 9938 9939 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) { 9940 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9941 "detected an error in element content\n"); 9942 xmlHaltParser(ctxt); 9943 break; 9944 } 9945 } 9946 } 9947 9948 /** 9949 * xmlParseContent: 9950 * @ctxt: an XML parser context 9951 * 9952 * Parse a content sequence. Stops at EOF or '</'. 9953 * 9954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9955 */ 9956 9957 void 9958 xmlParseContent(xmlParserCtxtPtr ctxt) { 9959 int nameNr = ctxt->nameNr; 9960 9961 xmlParseContentInternal(ctxt); 9962 9963 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) { 9964 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 9965 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 9966 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9967 "Premature end of data in tag %s line %d\n", 9968 name, line, NULL); 9969 } 9970 } 9971 9972 /** 9973 * xmlParseElement: 9974 * @ctxt: an XML parser context 9975 * 9976 * parse an XML element 9977 * 9978 * [39] element ::= EmptyElemTag | STag content ETag 9979 * 9980 * [ WFC: Element Type Match ] 9981 * The Name in an element's end-tag must match the element type in the 9982 * start-tag. 9983 * 9984 */ 9985 9986 void 9987 xmlParseElement(xmlParserCtxtPtr ctxt) { 9988 if (xmlParseElementStart(ctxt) != 0) 9989 return; 9990 9991 xmlParseContentInternal(ctxt); 9992 if (ctxt->instate == XML_PARSER_EOF) 9993 return; 9994 9995 if (CUR == 0) { 9996 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 9997 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 9998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9999 "Premature end of data in tag %s line %d\n", 10000 name, line, NULL); 10001 return; 10002 } 10003 10004 xmlParseElementEnd(ctxt); 10005 } 10006 10007 /** 10008 * xmlParseElementStart: 10009 * @ctxt: an XML parser context 10010 * 10011 * Parse the start of an XML element. Returns -1 in case of error, 0 if an 10012 * opening tag was parsed, 1 if an empty element was parsed. 10013 */ 10014 static int 10015 xmlParseElementStart(xmlParserCtxtPtr ctxt) { 10016 const xmlChar *name; 10017 const xmlChar *prefix = NULL; 10018 const xmlChar *URI = NULL; 10019 xmlParserNodeInfo node_info; 10020 int line, tlen = 0; 10021 xmlNodePtr ret; 10022 int nsNr = ctxt->nsNr; 10023 10024 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10025 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10026 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10027 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10028 xmlParserMaxDepth); 10029 xmlHaltParser(ctxt); 10030 return(-1); 10031 } 10032 10033 /* Capture start position */ 10034 if (ctxt->record_info) { 10035 node_info.begin_pos = ctxt->input->consumed + 10036 (CUR_PTR - ctxt->input->base); 10037 node_info.begin_line = ctxt->input->line; 10038 } 10039 10040 if (ctxt->spaceNr == 0) 10041 spacePush(ctxt, -1); 10042 else if (*ctxt->space == -2) 10043 spacePush(ctxt, -1); 10044 else 10045 spacePush(ctxt, *ctxt->space); 10046 10047 line = ctxt->input->line; 10048 #ifdef LIBXML_SAX1_ENABLED 10049 if (ctxt->sax2) 10050 #endif /* LIBXML_SAX1_ENABLED */ 10051 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10052 #ifdef LIBXML_SAX1_ENABLED 10053 else 10054 name = xmlParseStartTag(ctxt); 10055 #endif /* LIBXML_SAX1_ENABLED */ 10056 if (ctxt->instate == XML_PARSER_EOF) 10057 return(-1); 10058 if (name == NULL) { 10059 spacePop(ctxt); 10060 return(-1); 10061 } 10062 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); 10063 ret = ctxt->node; 10064 10065 #ifdef LIBXML_VALID_ENABLED 10066 /* 10067 * [ VC: Root Element Type ] 10068 * The Name in the document type declaration must match the element 10069 * type of the root element. 10070 */ 10071 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10072 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10073 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10074 #endif /* LIBXML_VALID_ENABLED */ 10075 10076 /* 10077 * Check for an Empty Element. 10078 */ 10079 if ((RAW == '/') && (NXT(1) == '>')) { 10080 SKIP(2); 10081 if (ctxt->sax2) { 10082 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10083 (!ctxt->disableSAX)) 10084 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10085 #ifdef LIBXML_SAX1_ENABLED 10086 } else { 10087 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10088 (!ctxt->disableSAX)) 10089 ctxt->sax->endElement(ctxt->userData, name); 10090 #endif /* LIBXML_SAX1_ENABLED */ 10091 } 10092 namePop(ctxt); 10093 spacePop(ctxt); 10094 if (nsNr != ctxt->nsNr) 10095 nsPop(ctxt, ctxt->nsNr - nsNr); 10096 if ( ret != NULL && ctxt->record_info ) { 10097 node_info.end_pos = ctxt->input->consumed + 10098 (CUR_PTR - ctxt->input->base); 10099 node_info.end_line = ctxt->input->line; 10100 node_info.node = ret; 10101 xmlParserAddNodeInfo(ctxt, &node_info); 10102 } 10103 return(1); 10104 } 10105 if (RAW == '>') { 10106 NEXT1; 10107 } else { 10108 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10109 "Couldn't find end of Start Tag %s line %d\n", 10110 name, line, NULL); 10111 10112 /* 10113 * end of parsing of this node. 10114 */ 10115 nodePop(ctxt); 10116 namePop(ctxt); 10117 spacePop(ctxt); 10118 if (nsNr != ctxt->nsNr) 10119 nsPop(ctxt, ctxt->nsNr - nsNr); 10120 10121 /* 10122 * Capture end position and add node 10123 */ 10124 if ( ret != NULL && ctxt->record_info ) { 10125 node_info.end_pos = ctxt->input->consumed + 10126 (CUR_PTR - ctxt->input->base); 10127 node_info.end_line = ctxt->input->line; 10128 node_info.node = ret; 10129 xmlParserAddNodeInfo(ctxt, &node_info); 10130 } 10131 return(-1); 10132 } 10133 10134 return(0); 10135 } 10136 10137 /** 10138 * xmlParseElementEnd: 10139 * @ctxt: an XML parser context 10140 * 10141 * Parse the end of an XML element. 10142 */ 10143 static void 10144 xmlParseElementEnd(xmlParserCtxtPtr ctxt) { 10145 xmlParserNodeInfo node_info; 10146 xmlNodePtr ret = ctxt->node; 10147 10148 if (ctxt->nameNr <= 0) 10149 return; 10150 10151 /* 10152 * parse the end of tag: '</' should be here. 10153 */ 10154 if (ctxt->sax2) { 10155 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 10156 namePop(ctxt); 10157 } 10158 #ifdef LIBXML_SAX1_ENABLED 10159 else 10160 xmlParseEndTag1(ctxt, 0); 10161 #endif /* LIBXML_SAX1_ENABLED */ 10162 10163 /* 10164 * Capture end position and add node 10165 */ 10166 if ( ret != NULL && ctxt->record_info ) { 10167 node_info.end_pos = ctxt->input->consumed + 10168 (CUR_PTR - ctxt->input->base); 10169 node_info.end_line = ctxt->input->line; 10170 node_info.node = ret; 10171 xmlParserAddNodeInfo(ctxt, &node_info); 10172 } 10173 } 10174 10175 /** 10176 * xmlParseVersionNum: 10177 * @ctxt: an XML parser context 10178 * 10179 * parse the XML version value. 10180 * 10181 * [26] VersionNum ::= '1.' [0-9]+ 10182 * 10183 * In practice allow [0-9].[0-9]+ at that level 10184 * 10185 * Returns the string giving the XML version number, or NULL 10186 */ 10187 xmlChar * 10188 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10189 xmlChar *buf = NULL; 10190 int len = 0; 10191 int size = 10; 10192 xmlChar cur; 10193 10194 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10195 if (buf == NULL) { 10196 xmlErrMemory(ctxt, NULL); 10197 return(NULL); 10198 } 10199 cur = CUR; 10200 if (!((cur >= '0') && (cur <= '9'))) { 10201 xmlFree(buf); 10202 return(NULL); 10203 } 10204 buf[len++] = cur; 10205 NEXT; 10206 cur=CUR; 10207 if (cur != '.') { 10208 xmlFree(buf); 10209 return(NULL); 10210 } 10211 buf[len++] = cur; 10212 NEXT; 10213 cur=CUR; 10214 while ((cur >= '0') && (cur <= '9')) { 10215 if (len + 1 >= size) { 10216 xmlChar *tmp; 10217 10218 size *= 2; 10219 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10220 if (tmp == NULL) { 10221 xmlFree(buf); 10222 xmlErrMemory(ctxt, NULL); 10223 return(NULL); 10224 } 10225 buf = tmp; 10226 } 10227 buf[len++] = cur; 10228 NEXT; 10229 cur=CUR; 10230 } 10231 buf[len] = 0; 10232 return(buf); 10233 } 10234 10235 /** 10236 * xmlParseVersionInfo: 10237 * @ctxt: an XML parser context 10238 * 10239 * parse the XML version. 10240 * 10241 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10242 * 10243 * [25] Eq ::= S? '=' S? 10244 * 10245 * Returns the version string, e.g. "1.0" 10246 */ 10247 10248 xmlChar * 10249 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10250 xmlChar *version = NULL; 10251 10252 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10253 SKIP(7); 10254 SKIP_BLANKS; 10255 if (RAW != '=') { 10256 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10257 return(NULL); 10258 } 10259 NEXT; 10260 SKIP_BLANKS; 10261 if (RAW == '"') { 10262 NEXT; 10263 version = xmlParseVersionNum(ctxt); 10264 if (RAW != '"') { 10265 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10266 } else 10267 NEXT; 10268 } else if (RAW == '\''){ 10269 NEXT; 10270 version = xmlParseVersionNum(ctxt); 10271 if (RAW != '\'') { 10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10273 } else 10274 NEXT; 10275 } else { 10276 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10277 } 10278 } 10279 return(version); 10280 } 10281 10282 /** 10283 * xmlParseEncName: 10284 * @ctxt: an XML parser context 10285 * 10286 * parse the XML encoding name 10287 * 10288 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10289 * 10290 * Returns the encoding name value or NULL 10291 */ 10292 xmlChar * 10293 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10294 xmlChar *buf = NULL; 10295 int len = 0; 10296 int size = 10; 10297 xmlChar cur; 10298 10299 cur = CUR; 10300 if (((cur >= 'a') && (cur <= 'z')) || 10301 ((cur >= 'A') && (cur <= 'Z'))) { 10302 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10303 if (buf == NULL) { 10304 xmlErrMemory(ctxt, NULL); 10305 return(NULL); 10306 } 10307 10308 buf[len++] = cur; 10309 NEXT; 10310 cur = CUR; 10311 while (((cur >= 'a') && (cur <= 'z')) || 10312 ((cur >= 'A') && (cur <= 'Z')) || 10313 ((cur >= '0') && (cur <= '9')) || 10314 (cur == '.') || (cur == '_') || 10315 (cur == '-')) { 10316 if (len + 1 >= size) { 10317 xmlChar *tmp; 10318 10319 size *= 2; 10320 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10321 if (tmp == NULL) { 10322 xmlErrMemory(ctxt, NULL); 10323 xmlFree(buf); 10324 return(NULL); 10325 } 10326 buf = tmp; 10327 } 10328 buf[len++] = cur; 10329 NEXT; 10330 cur = CUR; 10331 if (cur == 0) { 10332 SHRINK; 10333 GROW; 10334 cur = CUR; 10335 } 10336 } 10337 buf[len] = 0; 10338 } else { 10339 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10340 } 10341 return(buf); 10342 } 10343 10344 /** 10345 * xmlParseEncodingDecl: 10346 * @ctxt: an XML parser context 10347 * 10348 * parse the XML encoding declaration 10349 * 10350 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10351 * 10352 * this setups the conversion filters. 10353 * 10354 * Returns the encoding value or NULL 10355 */ 10356 10357 const xmlChar * 10358 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10359 xmlChar *encoding = NULL; 10360 10361 SKIP_BLANKS; 10362 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10363 SKIP(8); 10364 SKIP_BLANKS; 10365 if (RAW != '=') { 10366 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10367 return(NULL); 10368 } 10369 NEXT; 10370 SKIP_BLANKS; 10371 if (RAW == '"') { 10372 NEXT; 10373 encoding = xmlParseEncName(ctxt); 10374 if (RAW != '"') { 10375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10376 xmlFree((xmlChar *) encoding); 10377 return(NULL); 10378 } else 10379 NEXT; 10380 } else if (RAW == '\''){ 10381 NEXT; 10382 encoding = xmlParseEncName(ctxt); 10383 if (RAW != '\'') { 10384 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10385 xmlFree((xmlChar *) encoding); 10386 return(NULL); 10387 } else 10388 NEXT; 10389 } else { 10390 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10391 } 10392 10393 /* 10394 * Non standard parsing, allowing the user to ignore encoding 10395 */ 10396 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10397 xmlFree((xmlChar *) encoding); 10398 return(NULL); 10399 } 10400 10401 /* 10402 * UTF-16 encoding switch has already taken place at this stage, 10403 * more over the little-endian/big-endian selection is already done 10404 */ 10405 if ((encoding != NULL) && 10406 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10407 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10408 /* 10409 * If no encoding was passed to the parser, that we are 10410 * using UTF-16 and no decoder is present i.e. the 10411 * document is apparently UTF-8 compatible, then raise an 10412 * encoding mismatch fatal error 10413 */ 10414 if ((ctxt->encoding == NULL) && 10415 (ctxt->input->buf != NULL) && 10416 (ctxt->input->buf->encoder == NULL)) { 10417 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10418 "Document labelled UTF-16 but has UTF-8 content\n"); 10419 } 10420 if (ctxt->encoding != NULL) 10421 xmlFree((xmlChar *) ctxt->encoding); 10422 ctxt->encoding = encoding; 10423 } 10424 /* 10425 * UTF-8 encoding is handled natively 10426 */ 10427 else if ((encoding != NULL) && 10428 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10429 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10430 if (ctxt->encoding != NULL) 10431 xmlFree((xmlChar *) ctxt->encoding); 10432 ctxt->encoding = encoding; 10433 } 10434 else if (encoding != NULL) { 10435 xmlCharEncodingHandlerPtr handler; 10436 10437 if (ctxt->input->encoding != NULL) 10438 xmlFree((xmlChar *) ctxt->input->encoding); 10439 ctxt->input->encoding = encoding; 10440 10441 handler = xmlFindCharEncodingHandler((const char *) encoding); 10442 if (handler != NULL) { 10443 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10444 /* failed to convert */ 10445 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10446 return(NULL); 10447 } 10448 } else { 10449 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10450 "Unsupported encoding %s\n", encoding); 10451 return(NULL); 10452 } 10453 } 10454 } 10455 return(encoding); 10456 } 10457 10458 /** 10459 * xmlParseSDDecl: 10460 * @ctxt: an XML parser context 10461 * 10462 * parse the XML standalone declaration 10463 * 10464 * [32] SDDecl ::= S 'standalone' Eq 10465 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10466 * 10467 * [ VC: Standalone Document Declaration ] 10468 * TODO The standalone document declaration must have the value "no" 10469 * if any external markup declarations contain declarations of: 10470 * - attributes with default values, if elements to which these 10471 * attributes apply appear in the document without specifications 10472 * of values for these attributes, or 10473 * - entities (other than amp, lt, gt, apos, quot), if references 10474 * to those entities appear in the document, or 10475 * - attributes with values subject to normalization, where the 10476 * attribute appears in the document with a value which will change 10477 * as a result of normalization, or 10478 * - element types with element content, if white space occurs directly 10479 * within any instance of those types. 10480 * 10481 * Returns: 10482 * 1 if standalone="yes" 10483 * 0 if standalone="no" 10484 * -2 if standalone attribute is missing or invalid 10485 * (A standalone value of -2 means that the XML declaration was found, 10486 * but no value was specified for the standalone attribute). 10487 */ 10488 10489 int 10490 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10491 int standalone = -2; 10492 10493 SKIP_BLANKS; 10494 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10495 SKIP(10); 10496 SKIP_BLANKS; 10497 if (RAW != '=') { 10498 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10499 return(standalone); 10500 } 10501 NEXT; 10502 SKIP_BLANKS; 10503 if (RAW == '\''){ 10504 NEXT; 10505 if ((RAW == 'n') && (NXT(1) == 'o')) { 10506 standalone = 0; 10507 SKIP(2); 10508 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10509 (NXT(2) == 's')) { 10510 standalone = 1; 10511 SKIP(3); 10512 } else { 10513 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10514 } 10515 if (RAW != '\'') { 10516 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10517 } else 10518 NEXT; 10519 } else if (RAW == '"'){ 10520 NEXT; 10521 if ((RAW == 'n') && (NXT(1) == 'o')) { 10522 standalone = 0; 10523 SKIP(2); 10524 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10525 (NXT(2) == 's')) { 10526 standalone = 1; 10527 SKIP(3); 10528 } else { 10529 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10530 } 10531 if (RAW != '"') { 10532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10533 } else 10534 NEXT; 10535 } else { 10536 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10537 } 10538 } 10539 return(standalone); 10540 } 10541 10542 /** 10543 * xmlParseXMLDecl: 10544 * @ctxt: an XML parser context 10545 * 10546 * parse an XML declaration header 10547 * 10548 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10549 */ 10550 10551 void 10552 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10553 xmlChar *version; 10554 10555 /* 10556 * This value for standalone indicates that the document has an 10557 * XML declaration but it does not have a standalone attribute. 10558 * It will be overwritten later if a standalone attribute is found. 10559 */ 10560 ctxt->input->standalone = -2; 10561 10562 /* 10563 * We know that '<?xml' is here. 10564 */ 10565 SKIP(5); 10566 10567 if (!IS_BLANK_CH(RAW)) { 10568 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10569 "Blank needed after '<?xml'\n"); 10570 } 10571 SKIP_BLANKS; 10572 10573 /* 10574 * We must have the VersionInfo here. 10575 */ 10576 version = xmlParseVersionInfo(ctxt); 10577 if (version == NULL) { 10578 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10579 } else { 10580 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10581 /* 10582 * Changed here for XML-1.0 5th edition 10583 */ 10584 if (ctxt->options & XML_PARSE_OLD10) { 10585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10586 "Unsupported version '%s'\n", 10587 version); 10588 } else { 10589 if ((version[0] == '1') && ((version[1] == '.'))) { 10590 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10591 "Unsupported version '%s'\n", 10592 version, NULL); 10593 } else { 10594 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10595 "Unsupported version '%s'\n", 10596 version); 10597 } 10598 } 10599 } 10600 if (ctxt->version != NULL) 10601 xmlFree((void *) ctxt->version); 10602 ctxt->version = version; 10603 } 10604 10605 /* 10606 * We may have the encoding declaration 10607 */ 10608 if (!IS_BLANK_CH(RAW)) { 10609 if ((RAW == '?') && (NXT(1) == '>')) { 10610 SKIP(2); 10611 return; 10612 } 10613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10614 } 10615 xmlParseEncodingDecl(ctxt); 10616 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10617 (ctxt->instate == XML_PARSER_EOF)) { 10618 /* 10619 * The XML REC instructs us to stop parsing right here 10620 */ 10621 return; 10622 } 10623 10624 /* 10625 * We may have the standalone status. 10626 */ 10627 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10628 if ((RAW == '?') && (NXT(1) == '>')) { 10629 SKIP(2); 10630 return; 10631 } 10632 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10633 } 10634 10635 /* 10636 * We can grow the input buffer freely at that point 10637 */ 10638 GROW; 10639 10640 SKIP_BLANKS; 10641 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10642 10643 SKIP_BLANKS; 10644 if ((RAW == '?') && (NXT(1) == '>')) { 10645 SKIP(2); 10646 } else if (RAW == '>') { 10647 /* Deprecated old WD ... */ 10648 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10649 NEXT; 10650 } else { 10651 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10652 MOVETO_ENDTAG(CUR_PTR); 10653 NEXT; 10654 } 10655 } 10656 10657 /** 10658 * xmlParseMisc: 10659 * @ctxt: an XML parser context 10660 * 10661 * parse an XML Misc* optional field. 10662 * 10663 * [27] Misc ::= Comment | PI | S 10664 */ 10665 10666 void 10667 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10668 while (ctxt->instate != XML_PARSER_EOF) { 10669 SKIP_BLANKS; 10670 GROW; 10671 if ((RAW == '<') && (NXT(1) == '?')) { 10672 xmlParsePI(ctxt); 10673 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) { 10674 xmlParseComment(ctxt); 10675 } else { 10676 break; 10677 } 10678 } 10679 } 10680 10681 /** 10682 * xmlParseDocument: 10683 * @ctxt: an XML parser context 10684 * 10685 * parse an XML document (and build a tree if using the standard SAX 10686 * interface). 10687 * 10688 * [1] document ::= prolog element Misc* 10689 * 10690 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10691 * 10692 * Returns 0, -1 in case of error. the parser context is augmented 10693 * as a result of the parsing. 10694 */ 10695 10696 int 10697 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10698 xmlChar start[4]; 10699 xmlCharEncoding enc; 10700 10701 xmlInitParser(); 10702 10703 if ((ctxt == NULL) || (ctxt->input == NULL)) 10704 return(-1); 10705 10706 GROW; 10707 10708 /* 10709 * SAX: detecting the level. 10710 */ 10711 xmlDetectSAX2(ctxt); 10712 10713 /* 10714 * SAX: beginning of the document processing. 10715 */ 10716 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10717 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10718 if (ctxt->instate == XML_PARSER_EOF) 10719 return(-1); 10720 10721 if ((ctxt->encoding == NULL) && 10722 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10723 /* 10724 * Get the 4 first bytes and decode the charset 10725 * if enc != XML_CHAR_ENCODING_NONE 10726 * plug some encoding conversion routines. 10727 */ 10728 start[0] = RAW; 10729 start[1] = NXT(1); 10730 start[2] = NXT(2); 10731 start[3] = NXT(3); 10732 enc = xmlDetectCharEncoding(&start[0], 4); 10733 if (enc != XML_CHAR_ENCODING_NONE) { 10734 xmlSwitchEncoding(ctxt, enc); 10735 } 10736 } 10737 10738 10739 if (CUR == 0) { 10740 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10741 return(-1); 10742 } 10743 10744 /* 10745 * Check for the XMLDecl in the Prolog. 10746 * do not GROW here to avoid the detected encoder to decode more 10747 * than just the first line, unless the amount of data is really 10748 * too small to hold "<?xml version="1.0" encoding="foo" 10749 */ 10750 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10751 GROW; 10752 } 10753 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10754 10755 /* 10756 * Note that we will switch encoding on the fly. 10757 */ 10758 xmlParseXMLDecl(ctxt); 10759 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10760 (ctxt->instate == XML_PARSER_EOF)) { 10761 /* 10762 * The XML REC instructs us to stop parsing right here 10763 */ 10764 return(-1); 10765 } 10766 ctxt->standalone = ctxt->input->standalone; 10767 SKIP_BLANKS; 10768 } else { 10769 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10770 } 10771 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10772 ctxt->sax->startDocument(ctxt->userData); 10773 if (ctxt->instate == XML_PARSER_EOF) 10774 return(-1); 10775 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10776 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10777 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10778 } 10779 10780 /* 10781 * The Misc part of the Prolog 10782 */ 10783 xmlParseMisc(ctxt); 10784 10785 /* 10786 * Then possibly doc type declaration(s) and more Misc 10787 * (doctypedecl Misc*)? 10788 */ 10789 GROW; 10790 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10791 10792 ctxt->inSubset = 1; 10793 xmlParseDocTypeDecl(ctxt); 10794 if (RAW == '[') { 10795 ctxt->instate = XML_PARSER_DTD; 10796 xmlParseInternalSubset(ctxt); 10797 if (ctxt->instate == XML_PARSER_EOF) 10798 return(-1); 10799 } 10800 10801 /* 10802 * Create and update the external subset. 10803 */ 10804 ctxt->inSubset = 2; 10805 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10806 (!ctxt->disableSAX)) 10807 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10808 ctxt->extSubSystem, ctxt->extSubURI); 10809 if (ctxt->instate == XML_PARSER_EOF) 10810 return(-1); 10811 ctxt->inSubset = 0; 10812 10813 xmlCleanSpecialAttr(ctxt); 10814 10815 ctxt->instate = XML_PARSER_PROLOG; 10816 xmlParseMisc(ctxt); 10817 } 10818 10819 /* 10820 * Time to start parsing the tree itself 10821 */ 10822 GROW; 10823 if (RAW != '<') { 10824 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10825 "Start tag expected, '<' not found\n"); 10826 } else { 10827 ctxt->instate = XML_PARSER_CONTENT; 10828 xmlParseElement(ctxt); 10829 ctxt->instate = XML_PARSER_EPILOG; 10830 10831 10832 /* 10833 * The Misc part at the end 10834 */ 10835 xmlParseMisc(ctxt); 10836 10837 if (RAW != 0) { 10838 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10839 } 10840 ctxt->instate = XML_PARSER_EOF; 10841 } 10842 10843 /* 10844 * SAX: end of the document processing. 10845 */ 10846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10847 ctxt->sax->endDocument(ctxt->userData); 10848 10849 /* 10850 * Remove locally kept entity definitions if the tree was not built 10851 */ 10852 if ((ctxt->myDoc != NULL) && 10853 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10854 xmlFreeDoc(ctxt->myDoc); 10855 ctxt->myDoc = NULL; 10856 } 10857 10858 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10859 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10860 if (ctxt->valid) 10861 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10862 if (ctxt->nsWellFormed) 10863 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10864 if (ctxt->options & XML_PARSE_OLD10) 10865 ctxt->myDoc->properties |= XML_DOC_OLD10; 10866 } 10867 if (! ctxt->wellFormed) { 10868 ctxt->valid = 0; 10869 return(-1); 10870 } 10871 return(0); 10872 } 10873 10874 /** 10875 * xmlParseExtParsedEnt: 10876 * @ctxt: an XML parser context 10877 * 10878 * parse a general parsed entity 10879 * An external general parsed entity is well-formed if it matches the 10880 * production labeled extParsedEnt. 10881 * 10882 * [78] extParsedEnt ::= TextDecl? content 10883 * 10884 * Returns 0, -1 in case of error. the parser context is augmented 10885 * as a result of the parsing. 10886 */ 10887 10888 int 10889 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10890 xmlChar start[4]; 10891 xmlCharEncoding enc; 10892 10893 if ((ctxt == NULL) || (ctxt->input == NULL)) 10894 return(-1); 10895 10896 xmlDetectSAX2(ctxt); 10897 10898 GROW; 10899 10900 /* 10901 * SAX: beginning of the document processing. 10902 */ 10903 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10904 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10905 10906 /* 10907 * Get the 4 first bytes and decode the charset 10908 * if enc != XML_CHAR_ENCODING_NONE 10909 * plug some encoding conversion routines. 10910 */ 10911 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10912 start[0] = RAW; 10913 start[1] = NXT(1); 10914 start[2] = NXT(2); 10915 start[3] = NXT(3); 10916 enc = xmlDetectCharEncoding(start, 4); 10917 if (enc != XML_CHAR_ENCODING_NONE) { 10918 xmlSwitchEncoding(ctxt, enc); 10919 } 10920 } 10921 10922 10923 if (CUR == 0) { 10924 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10925 } 10926 10927 /* 10928 * Check for the XMLDecl in the Prolog. 10929 */ 10930 GROW; 10931 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10932 10933 /* 10934 * Note that we will switch encoding on the fly. 10935 */ 10936 xmlParseXMLDecl(ctxt); 10937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10938 /* 10939 * The XML REC instructs us to stop parsing right here 10940 */ 10941 return(-1); 10942 } 10943 SKIP_BLANKS; 10944 } else { 10945 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10946 } 10947 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10948 ctxt->sax->startDocument(ctxt->userData); 10949 if (ctxt->instate == XML_PARSER_EOF) 10950 return(-1); 10951 10952 /* 10953 * Doing validity checking on chunk doesn't make sense 10954 */ 10955 ctxt->instate = XML_PARSER_CONTENT; 10956 ctxt->validate = 0; 10957 ctxt->loadsubset = 0; 10958 ctxt->depth = 0; 10959 10960 xmlParseContent(ctxt); 10961 if (ctxt->instate == XML_PARSER_EOF) 10962 return(-1); 10963 10964 if ((RAW == '<') && (NXT(1) == '/')) { 10965 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10966 } else if (RAW != 0) { 10967 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10968 } 10969 10970 /* 10971 * SAX: end of the document processing. 10972 */ 10973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10974 ctxt->sax->endDocument(ctxt->userData); 10975 10976 if (! ctxt->wellFormed) return(-1); 10977 return(0); 10978 } 10979 10980 #ifdef LIBXML_PUSH_ENABLED 10981 /************************************************************************ 10982 * * 10983 * Progressive parsing interfaces * 10984 * * 10985 ************************************************************************/ 10986 10987 /** 10988 * xmlParseLookupSequence: 10989 * @ctxt: an XML parser context 10990 * @first: the first char to lookup 10991 * @next: the next char to lookup or zero 10992 * @third: the next char to lookup or zero 10993 * 10994 * Try to find if a sequence (first, next, third) or just (first next) or 10995 * (first) is available in the input stream. 10996 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10997 * to avoid rescanning sequences of bytes, it DOES change the state of the 10998 * parser, do not use liberally. 10999 * 11000 * Returns the index to the current parsing point if the full sequence 11001 * is available, -1 otherwise. 11002 */ 11003 static int 11004 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11005 xmlChar next, xmlChar third) { 11006 int base, len; 11007 xmlParserInputPtr in; 11008 const xmlChar *buf; 11009 11010 in = ctxt->input; 11011 if (in == NULL) return(-1); 11012 base = in->cur - in->base; 11013 if (base < 0) return(-1); 11014 if (ctxt->checkIndex > base) 11015 base = ctxt->checkIndex; 11016 if (in->buf == NULL) { 11017 buf = in->base; 11018 len = in->length; 11019 } else { 11020 buf = xmlBufContent(in->buf->buffer); 11021 len = xmlBufUse(in->buf->buffer); 11022 } 11023 /* take into account the sequence length */ 11024 if (third) len -= 2; 11025 else if (next) len --; 11026 for (;base < len;base++) { 11027 if (buf[base] == first) { 11028 if (third != 0) { 11029 if ((buf[base + 1] != next) || 11030 (buf[base + 2] != third)) continue; 11031 } else if (next != 0) { 11032 if (buf[base + 1] != next) continue; 11033 } 11034 ctxt->checkIndex = 0; 11035 #ifdef DEBUG_PUSH 11036 if (next == 0) 11037 xmlGenericError(xmlGenericErrorContext, 11038 "PP: lookup '%c' found at %d\n", 11039 first, base); 11040 else if (third == 0) 11041 xmlGenericError(xmlGenericErrorContext, 11042 "PP: lookup '%c%c' found at %d\n", 11043 first, next, base); 11044 else 11045 xmlGenericError(xmlGenericErrorContext, 11046 "PP: lookup '%c%c%c' found at %d\n", 11047 first, next, third, base); 11048 #endif 11049 return(base - (in->cur - in->base)); 11050 } 11051 } 11052 ctxt->checkIndex = base; 11053 #ifdef DEBUG_PUSH 11054 if (next == 0) 11055 xmlGenericError(xmlGenericErrorContext, 11056 "PP: lookup '%c' failed\n", first); 11057 else if (third == 0) 11058 xmlGenericError(xmlGenericErrorContext, 11059 "PP: lookup '%c%c' failed\n", first, next); 11060 else 11061 xmlGenericError(xmlGenericErrorContext, 11062 "PP: lookup '%c%c%c' failed\n", first, next, third); 11063 #endif 11064 return(-1); 11065 } 11066 11067 /** 11068 * xmlParseGetLasts: 11069 * @ctxt: an XML parser context 11070 * @lastlt: pointer to store the last '<' from the input 11071 * @lastgt: pointer to store the last '>' from the input 11072 * 11073 * Lookup the last < and > in the current chunk 11074 */ 11075 static void 11076 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11077 const xmlChar **lastgt) { 11078 const xmlChar *tmp; 11079 11080 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11081 xmlGenericError(xmlGenericErrorContext, 11082 "Internal error: xmlParseGetLasts\n"); 11083 return; 11084 } 11085 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11086 tmp = ctxt->input->end; 11087 tmp--; 11088 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11089 if (tmp < ctxt->input->base) { 11090 *lastlt = NULL; 11091 *lastgt = NULL; 11092 } else { 11093 *lastlt = tmp; 11094 tmp++; 11095 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11096 if (*tmp == '\'') { 11097 tmp++; 11098 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11099 if (tmp < ctxt->input->end) tmp++; 11100 } else if (*tmp == '"') { 11101 tmp++; 11102 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11103 if (tmp < ctxt->input->end) tmp++; 11104 } else 11105 tmp++; 11106 } 11107 if (tmp < ctxt->input->end) 11108 *lastgt = tmp; 11109 else { 11110 tmp = *lastlt; 11111 tmp--; 11112 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11113 if (tmp >= ctxt->input->base) 11114 *lastgt = tmp; 11115 else 11116 *lastgt = NULL; 11117 } 11118 } 11119 } else { 11120 *lastlt = NULL; 11121 *lastgt = NULL; 11122 } 11123 } 11124 /** 11125 * xmlCheckCdataPush: 11126 * @cur: pointer to the block of characters 11127 * @len: length of the block in bytes 11128 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11129 * 11130 * Check that the block of characters is okay as SCdata content [20] 11131 * 11132 * Returns the number of bytes to pass if okay, a negative index where an 11133 * UTF-8 error occurred otherwise 11134 */ 11135 static int 11136 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11137 int ix; 11138 unsigned char c; 11139 int codepoint; 11140 11141 if ((utf == NULL) || (len <= 0)) 11142 return(0); 11143 11144 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11145 c = utf[ix]; 11146 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11147 if (c >= 0x20) 11148 ix++; 11149 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11150 ix++; 11151 else 11152 return(-ix); 11153 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11154 if (ix + 2 > len) return(complete ? -ix : ix); 11155 if ((utf[ix+1] & 0xc0 ) != 0x80) 11156 return(-ix); 11157 codepoint = (utf[ix] & 0x1f) << 6; 11158 codepoint |= utf[ix+1] & 0x3f; 11159 if (!xmlIsCharQ(codepoint)) 11160 return(-ix); 11161 ix += 2; 11162 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11163 if (ix + 3 > len) return(complete ? -ix : ix); 11164 if (((utf[ix+1] & 0xc0) != 0x80) || 11165 ((utf[ix+2] & 0xc0) != 0x80)) 11166 return(-ix); 11167 codepoint = (utf[ix] & 0xf) << 12; 11168 codepoint |= (utf[ix+1] & 0x3f) << 6; 11169 codepoint |= utf[ix+2] & 0x3f; 11170 if (!xmlIsCharQ(codepoint)) 11171 return(-ix); 11172 ix += 3; 11173 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11174 if (ix + 4 > len) return(complete ? -ix : ix); 11175 if (((utf[ix+1] & 0xc0) != 0x80) || 11176 ((utf[ix+2] & 0xc0) != 0x80) || 11177 ((utf[ix+3] & 0xc0) != 0x80)) 11178 return(-ix); 11179 codepoint = (utf[ix] & 0x7) << 18; 11180 codepoint |= (utf[ix+1] & 0x3f) << 12; 11181 codepoint |= (utf[ix+2] & 0x3f) << 6; 11182 codepoint |= utf[ix+3] & 0x3f; 11183 if (!xmlIsCharQ(codepoint)) 11184 return(-ix); 11185 ix += 4; 11186 } else /* unknown encoding */ 11187 return(-ix); 11188 } 11189 return(ix); 11190 } 11191 11192 /** 11193 * xmlParseTryOrFinish: 11194 * @ctxt: an XML parser context 11195 * @terminate: last chunk indicator 11196 * 11197 * Try to progress on parsing 11198 * 11199 * Returns zero if no parsing was possible 11200 */ 11201 static int 11202 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11203 int ret = 0; 11204 int avail, tlen; 11205 xmlChar cur, next; 11206 const xmlChar *lastlt, *lastgt; 11207 11208 if (ctxt->input == NULL) 11209 return(0); 11210 11211 #ifdef DEBUG_PUSH 11212 switch (ctxt->instate) { 11213 case XML_PARSER_EOF: 11214 xmlGenericError(xmlGenericErrorContext, 11215 "PP: try EOF\n"); break; 11216 case XML_PARSER_START: 11217 xmlGenericError(xmlGenericErrorContext, 11218 "PP: try START\n"); break; 11219 case XML_PARSER_MISC: 11220 xmlGenericError(xmlGenericErrorContext, 11221 "PP: try MISC\n");break; 11222 case XML_PARSER_COMMENT: 11223 xmlGenericError(xmlGenericErrorContext, 11224 "PP: try COMMENT\n");break; 11225 case XML_PARSER_PROLOG: 11226 xmlGenericError(xmlGenericErrorContext, 11227 "PP: try PROLOG\n");break; 11228 case XML_PARSER_START_TAG: 11229 xmlGenericError(xmlGenericErrorContext, 11230 "PP: try START_TAG\n");break; 11231 case XML_PARSER_CONTENT: 11232 xmlGenericError(xmlGenericErrorContext, 11233 "PP: try CONTENT\n");break; 11234 case XML_PARSER_CDATA_SECTION: 11235 xmlGenericError(xmlGenericErrorContext, 11236 "PP: try CDATA_SECTION\n");break; 11237 case XML_PARSER_END_TAG: 11238 xmlGenericError(xmlGenericErrorContext, 11239 "PP: try END_TAG\n");break; 11240 case XML_PARSER_ENTITY_DECL: 11241 xmlGenericError(xmlGenericErrorContext, 11242 "PP: try ENTITY_DECL\n");break; 11243 case XML_PARSER_ENTITY_VALUE: 11244 xmlGenericError(xmlGenericErrorContext, 11245 "PP: try ENTITY_VALUE\n");break; 11246 case XML_PARSER_ATTRIBUTE_VALUE: 11247 xmlGenericError(xmlGenericErrorContext, 11248 "PP: try ATTRIBUTE_VALUE\n");break; 11249 case XML_PARSER_DTD: 11250 xmlGenericError(xmlGenericErrorContext, 11251 "PP: try DTD\n");break; 11252 case XML_PARSER_EPILOG: 11253 xmlGenericError(xmlGenericErrorContext, 11254 "PP: try EPILOG\n");break; 11255 case XML_PARSER_PI: 11256 xmlGenericError(xmlGenericErrorContext, 11257 "PP: try PI\n");break; 11258 case XML_PARSER_IGNORE: 11259 xmlGenericError(xmlGenericErrorContext, 11260 "PP: try IGNORE\n");break; 11261 } 11262 #endif 11263 11264 if ((ctxt->input != NULL) && 11265 (ctxt->input->cur - ctxt->input->base > 4096)) { 11266 xmlSHRINK(ctxt); 11267 ctxt->checkIndex = 0; 11268 } 11269 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11270 11271 while (ctxt->instate != XML_PARSER_EOF) { 11272 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11273 return(0); 11274 11275 if (ctxt->input == NULL) break; 11276 if (ctxt->input->buf == NULL) 11277 avail = ctxt->input->length - 11278 (ctxt->input->cur - ctxt->input->base); 11279 else { 11280 /* 11281 * If we are operating on converted input, try to flush 11282 * remaining chars to avoid them stalling in the non-converted 11283 * buffer. But do not do this in document start where 11284 * encoding="..." may not have been read and we work on a 11285 * guessed encoding. 11286 */ 11287 if ((ctxt->instate != XML_PARSER_START) && 11288 (ctxt->input->buf->raw != NULL) && 11289 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11290 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11291 ctxt->input); 11292 size_t current = ctxt->input->cur - ctxt->input->base; 11293 11294 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11295 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11296 base, current); 11297 } 11298 avail = xmlBufUse(ctxt->input->buf->buffer) - 11299 (ctxt->input->cur - ctxt->input->base); 11300 } 11301 if (avail < 1) 11302 goto done; 11303 switch (ctxt->instate) { 11304 case XML_PARSER_EOF: 11305 /* 11306 * Document parsing is done ! 11307 */ 11308 goto done; 11309 case XML_PARSER_START: 11310 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11311 xmlChar start[4]; 11312 xmlCharEncoding enc; 11313 11314 /* 11315 * Very first chars read from the document flow. 11316 */ 11317 if (avail < 4) 11318 goto done; 11319 11320 /* 11321 * Get the 4 first bytes and decode the charset 11322 * if enc != XML_CHAR_ENCODING_NONE 11323 * plug some encoding conversion routines, 11324 * else xmlSwitchEncoding will set to (default) 11325 * UTF8. 11326 */ 11327 start[0] = RAW; 11328 start[1] = NXT(1); 11329 start[2] = NXT(2); 11330 start[3] = NXT(3); 11331 enc = xmlDetectCharEncoding(start, 4); 11332 xmlSwitchEncoding(ctxt, enc); 11333 break; 11334 } 11335 11336 if (avail < 2) 11337 goto done; 11338 cur = ctxt->input->cur[0]; 11339 next = ctxt->input->cur[1]; 11340 if (cur == 0) { 11341 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11342 ctxt->sax->setDocumentLocator(ctxt->userData, 11343 &xmlDefaultSAXLocator); 11344 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11345 xmlHaltParser(ctxt); 11346 #ifdef DEBUG_PUSH 11347 xmlGenericError(xmlGenericErrorContext, 11348 "PP: entering EOF\n"); 11349 #endif 11350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11351 ctxt->sax->endDocument(ctxt->userData); 11352 goto done; 11353 } 11354 if ((cur == '<') && (next == '?')) { 11355 /* PI or XML decl */ 11356 if (avail < 5) return(ret); 11357 if ((!terminate) && 11358 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11359 return(ret); 11360 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11361 ctxt->sax->setDocumentLocator(ctxt->userData, 11362 &xmlDefaultSAXLocator); 11363 if ((ctxt->input->cur[2] == 'x') && 11364 (ctxt->input->cur[3] == 'm') && 11365 (ctxt->input->cur[4] == 'l') && 11366 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11367 ret += 5; 11368 #ifdef DEBUG_PUSH 11369 xmlGenericError(xmlGenericErrorContext, 11370 "PP: Parsing XML Decl\n"); 11371 #endif 11372 xmlParseXMLDecl(ctxt); 11373 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11374 /* 11375 * The XML REC instructs us to stop parsing right 11376 * here 11377 */ 11378 xmlHaltParser(ctxt); 11379 return(0); 11380 } 11381 ctxt->standalone = ctxt->input->standalone; 11382 if ((ctxt->encoding == NULL) && 11383 (ctxt->input->encoding != NULL)) 11384 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11385 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11386 (!ctxt->disableSAX)) 11387 ctxt->sax->startDocument(ctxt->userData); 11388 ctxt->instate = XML_PARSER_MISC; 11389 #ifdef DEBUG_PUSH 11390 xmlGenericError(xmlGenericErrorContext, 11391 "PP: entering MISC\n"); 11392 #endif 11393 } else { 11394 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11395 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11396 (!ctxt->disableSAX)) 11397 ctxt->sax->startDocument(ctxt->userData); 11398 ctxt->instate = XML_PARSER_MISC; 11399 #ifdef DEBUG_PUSH 11400 xmlGenericError(xmlGenericErrorContext, 11401 "PP: entering MISC\n"); 11402 #endif 11403 } 11404 } else { 11405 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11406 ctxt->sax->setDocumentLocator(ctxt->userData, 11407 &xmlDefaultSAXLocator); 11408 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11409 if (ctxt->version == NULL) { 11410 xmlErrMemory(ctxt, NULL); 11411 break; 11412 } 11413 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11414 (!ctxt->disableSAX)) 11415 ctxt->sax->startDocument(ctxt->userData); 11416 ctxt->instate = XML_PARSER_MISC; 11417 #ifdef DEBUG_PUSH 11418 xmlGenericError(xmlGenericErrorContext, 11419 "PP: entering MISC\n"); 11420 #endif 11421 } 11422 break; 11423 case XML_PARSER_START_TAG: { 11424 const xmlChar *name; 11425 const xmlChar *prefix = NULL; 11426 const xmlChar *URI = NULL; 11427 int line = ctxt->input->line; 11428 int nsNr = ctxt->nsNr; 11429 11430 if ((avail < 2) && (ctxt->inputNr == 1)) 11431 goto done; 11432 cur = ctxt->input->cur[0]; 11433 if (cur != '<') { 11434 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11435 xmlHaltParser(ctxt); 11436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11437 ctxt->sax->endDocument(ctxt->userData); 11438 goto done; 11439 } 11440 if (!terminate) { 11441 if (ctxt->progressive) { 11442 /* > can be found unescaped in attribute values */ 11443 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11444 goto done; 11445 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11446 goto done; 11447 } 11448 } 11449 if (ctxt->spaceNr == 0) 11450 spacePush(ctxt, -1); 11451 else if (*ctxt->space == -2) 11452 spacePush(ctxt, -1); 11453 else 11454 spacePush(ctxt, *ctxt->space); 11455 #ifdef LIBXML_SAX1_ENABLED 11456 if (ctxt->sax2) 11457 #endif /* LIBXML_SAX1_ENABLED */ 11458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11459 #ifdef LIBXML_SAX1_ENABLED 11460 else 11461 name = xmlParseStartTag(ctxt); 11462 #endif /* LIBXML_SAX1_ENABLED */ 11463 if (ctxt->instate == XML_PARSER_EOF) 11464 goto done; 11465 if (name == NULL) { 11466 spacePop(ctxt); 11467 xmlHaltParser(ctxt); 11468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11469 ctxt->sax->endDocument(ctxt->userData); 11470 goto done; 11471 } 11472 #ifdef LIBXML_VALID_ENABLED 11473 /* 11474 * [ VC: Root Element Type ] 11475 * The Name in the document type declaration must match 11476 * the element type of the root element. 11477 */ 11478 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11479 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11480 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11481 #endif /* LIBXML_VALID_ENABLED */ 11482 11483 /* 11484 * Check for an Empty Element. 11485 */ 11486 if ((RAW == '/') && (NXT(1) == '>')) { 11487 SKIP(2); 11488 11489 if (ctxt->sax2) { 11490 if ((ctxt->sax != NULL) && 11491 (ctxt->sax->endElementNs != NULL) && 11492 (!ctxt->disableSAX)) 11493 ctxt->sax->endElementNs(ctxt->userData, name, 11494 prefix, URI); 11495 if (ctxt->nsNr - nsNr > 0) 11496 nsPop(ctxt, ctxt->nsNr - nsNr); 11497 #ifdef LIBXML_SAX1_ENABLED 11498 } else { 11499 if ((ctxt->sax != NULL) && 11500 (ctxt->sax->endElement != NULL) && 11501 (!ctxt->disableSAX)) 11502 ctxt->sax->endElement(ctxt->userData, name); 11503 #endif /* LIBXML_SAX1_ENABLED */ 11504 } 11505 if (ctxt->instate == XML_PARSER_EOF) 11506 goto done; 11507 spacePop(ctxt); 11508 if (ctxt->nameNr == 0) { 11509 ctxt->instate = XML_PARSER_EPILOG; 11510 } else { 11511 ctxt->instate = XML_PARSER_CONTENT; 11512 } 11513 ctxt->progressive = 1; 11514 break; 11515 } 11516 if (RAW == '>') { 11517 NEXT; 11518 } else { 11519 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11520 "Couldn't find end of Start Tag %s\n", 11521 name); 11522 nodePop(ctxt); 11523 spacePop(ctxt); 11524 } 11525 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); 11526 11527 ctxt->instate = XML_PARSER_CONTENT; 11528 ctxt->progressive = 1; 11529 break; 11530 } 11531 case XML_PARSER_CONTENT: { 11532 int id; 11533 unsigned long cons; 11534 if ((avail < 2) && (ctxt->inputNr == 1)) 11535 goto done; 11536 cur = ctxt->input->cur[0]; 11537 next = ctxt->input->cur[1]; 11538 11539 id = ctxt->input->id; 11540 cons = CUR_CONSUMED; 11541 if ((cur == '<') && (next == '/')) { 11542 ctxt->instate = XML_PARSER_END_TAG; 11543 break; 11544 } else if ((cur == '<') && (next == '?')) { 11545 if ((!terminate) && 11546 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11547 ctxt->progressive = XML_PARSER_PI; 11548 goto done; 11549 } 11550 xmlParsePI(ctxt); 11551 ctxt->instate = XML_PARSER_CONTENT; 11552 ctxt->progressive = 1; 11553 } else if ((cur == '<') && (next != '!')) { 11554 ctxt->instate = XML_PARSER_START_TAG; 11555 break; 11556 } else if ((cur == '<') && (next == '!') && 11557 (ctxt->input->cur[2] == '-') && 11558 (ctxt->input->cur[3] == '-')) { 11559 int term; 11560 11561 if (avail < 4) 11562 goto done; 11563 ctxt->input->cur += 4; 11564 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11565 ctxt->input->cur -= 4; 11566 if ((!terminate) && (term < 0)) { 11567 ctxt->progressive = XML_PARSER_COMMENT; 11568 goto done; 11569 } 11570 xmlParseComment(ctxt); 11571 ctxt->instate = XML_PARSER_CONTENT; 11572 ctxt->progressive = 1; 11573 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11574 (ctxt->input->cur[2] == '[') && 11575 (ctxt->input->cur[3] == 'C') && 11576 (ctxt->input->cur[4] == 'D') && 11577 (ctxt->input->cur[5] == 'A') && 11578 (ctxt->input->cur[6] == 'T') && 11579 (ctxt->input->cur[7] == 'A') && 11580 (ctxt->input->cur[8] == '[')) { 11581 SKIP(9); 11582 ctxt->instate = XML_PARSER_CDATA_SECTION; 11583 break; 11584 } else if ((cur == '<') && (next == '!') && 11585 (avail < 9)) { 11586 goto done; 11587 } else if (cur == '&') { 11588 if ((!terminate) && 11589 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11590 goto done; 11591 xmlParseReference(ctxt); 11592 } else { 11593 /* TODO Avoid the extra copy, handle directly !!! */ 11594 /* 11595 * Goal of the following test is: 11596 * - minimize calls to the SAX 'character' callback 11597 * when they are mergeable 11598 * - handle an problem for isBlank when we only parse 11599 * a sequence of blank chars and the next one is 11600 * not available to check against '<' presence. 11601 * - tries to homogenize the differences in SAX 11602 * callbacks between the push and pull versions 11603 * of the parser. 11604 */ 11605 if ((ctxt->inputNr == 1) && 11606 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11607 if (!terminate) { 11608 if (ctxt->progressive) { 11609 if ((lastlt == NULL) || 11610 (ctxt->input->cur > lastlt)) 11611 goto done; 11612 } else if (xmlParseLookupSequence(ctxt, 11613 '<', 0, 0) < 0) { 11614 goto done; 11615 } 11616 } 11617 } 11618 ctxt->checkIndex = 0; 11619 xmlParseCharData(ctxt, 0); 11620 } 11621 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) { 11622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11623 "detected an error in element content\n"); 11624 xmlHaltParser(ctxt); 11625 break; 11626 } 11627 break; 11628 } 11629 case XML_PARSER_END_TAG: 11630 if (avail < 2) 11631 goto done; 11632 if (!terminate) { 11633 if (ctxt->progressive) { 11634 /* > can be found unescaped in attribute values */ 11635 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11636 goto done; 11637 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11638 goto done; 11639 } 11640 } 11641 if (ctxt->sax2) { 11642 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 11643 nameNsPop(ctxt); 11644 } 11645 #ifdef LIBXML_SAX1_ENABLED 11646 else 11647 xmlParseEndTag1(ctxt, 0); 11648 #endif /* LIBXML_SAX1_ENABLED */ 11649 if (ctxt->instate == XML_PARSER_EOF) { 11650 /* Nothing */ 11651 } else if (ctxt->nameNr == 0) { 11652 ctxt->instate = XML_PARSER_EPILOG; 11653 } else { 11654 ctxt->instate = XML_PARSER_CONTENT; 11655 } 11656 break; 11657 case XML_PARSER_CDATA_SECTION: { 11658 /* 11659 * The Push mode need to have the SAX callback for 11660 * cdataBlock merge back contiguous callbacks. 11661 */ 11662 int base; 11663 11664 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11665 if (base < 0) { 11666 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11667 int tmp; 11668 11669 tmp = xmlCheckCdataPush(ctxt->input->cur, 11670 XML_PARSER_BIG_BUFFER_SIZE, 0); 11671 if (tmp < 0) { 11672 tmp = -tmp; 11673 ctxt->input->cur += tmp; 11674 goto encoding_error; 11675 } 11676 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11677 if (ctxt->sax->cdataBlock != NULL) 11678 ctxt->sax->cdataBlock(ctxt->userData, 11679 ctxt->input->cur, tmp); 11680 else if (ctxt->sax->characters != NULL) 11681 ctxt->sax->characters(ctxt->userData, 11682 ctxt->input->cur, tmp); 11683 } 11684 if (ctxt->instate == XML_PARSER_EOF) 11685 goto done; 11686 SKIPL(tmp); 11687 ctxt->checkIndex = 0; 11688 } 11689 goto done; 11690 } else { 11691 int tmp; 11692 11693 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11694 if ((tmp < 0) || (tmp != base)) { 11695 tmp = -tmp; 11696 ctxt->input->cur += tmp; 11697 goto encoding_error; 11698 } 11699 if ((ctxt->sax != NULL) && (base == 0) && 11700 (ctxt->sax->cdataBlock != NULL) && 11701 (!ctxt->disableSAX)) { 11702 /* 11703 * Special case to provide identical behaviour 11704 * between pull and push parsers on enpty CDATA 11705 * sections 11706 */ 11707 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11708 (!strncmp((const char *)&ctxt->input->cur[-9], 11709 "<![CDATA[", 9))) 11710 ctxt->sax->cdataBlock(ctxt->userData, 11711 BAD_CAST "", 0); 11712 } else if ((ctxt->sax != NULL) && (base > 0) && 11713 (!ctxt->disableSAX)) { 11714 if (ctxt->sax->cdataBlock != NULL) 11715 ctxt->sax->cdataBlock(ctxt->userData, 11716 ctxt->input->cur, base); 11717 else if (ctxt->sax->characters != NULL) 11718 ctxt->sax->characters(ctxt->userData, 11719 ctxt->input->cur, base); 11720 } 11721 if (ctxt->instate == XML_PARSER_EOF) 11722 goto done; 11723 SKIPL(base + 3); 11724 ctxt->checkIndex = 0; 11725 ctxt->instate = XML_PARSER_CONTENT; 11726 #ifdef DEBUG_PUSH 11727 xmlGenericError(xmlGenericErrorContext, 11728 "PP: entering CONTENT\n"); 11729 #endif 11730 } 11731 break; 11732 } 11733 case XML_PARSER_MISC: 11734 SKIP_BLANKS; 11735 if (ctxt->input->buf == NULL) 11736 avail = ctxt->input->length - 11737 (ctxt->input->cur - ctxt->input->base); 11738 else 11739 avail = xmlBufUse(ctxt->input->buf->buffer) - 11740 (ctxt->input->cur - ctxt->input->base); 11741 if (avail < 2) 11742 goto done; 11743 cur = ctxt->input->cur[0]; 11744 next = ctxt->input->cur[1]; 11745 if ((cur == '<') && (next == '?')) { 11746 if ((!terminate) && 11747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11748 ctxt->progressive = XML_PARSER_PI; 11749 goto done; 11750 } 11751 #ifdef DEBUG_PUSH 11752 xmlGenericError(xmlGenericErrorContext, 11753 "PP: Parsing PI\n"); 11754 #endif 11755 xmlParsePI(ctxt); 11756 if (ctxt->instate == XML_PARSER_EOF) 11757 goto done; 11758 ctxt->instate = XML_PARSER_MISC; 11759 ctxt->progressive = 1; 11760 ctxt->checkIndex = 0; 11761 } else if ((cur == '<') && (next == '!') && 11762 (ctxt->input->cur[2] == '-') && 11763 (ctxt->input->cur[3] == '-')) { 11764 if ((!terminate) && 11765 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11766 ctxt->progressive = XML_PARSER_COMMENT; 11767 goto done; 11768 } 11769 #ifdef DEBUG_PUSH 11770 xmlGenericError(xmlGenericErrorContext, 11771 "PP: Parsing Comment\n"); 11772 #endif 11773 xmlParseComment(ctxt); 11774 if (ctxt->instate == XML_PARSER_EOF) 11775 goto done; 11776 ctxt->instate = XML_PARSER_MISC; 11777 ctxt->progressive = 1; 11778 ctxt->checkIndex = 0; 11779 } else if ((cur == '<') && (next == '!') && 11780 (ctxt->input->cur[2] == 'D') && 11781 (ctxt->input->cur[3] == 'O') && 11782 (ctxt->input->cur[4] == 'C') && 11783 (ctxt->input->cur[5] == 'T') && 11784 (ctxt->input->cur[6] == 'Y') && 11785 (ctxt->input->cur[7] == 'P') && 11786 (ctxt->input->cur[8] == 'E')) { 11787 if ((!terminate) && 11788 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11789 ctxt->progressive = XML_PARSER_DTD; 11790 goto done; 11791 } 11792 #ifdef DEBUG_PUSH 11793 xmlGenericError(xmlGenericErrorContext, 11794 "PP: Parsing internal subset\n"); 11795 #endif 11796 ctxt->inSubset = 1; 11797 ctxt->progressive = 0; 11798 ctxt->checkIndex = 0; 11799 xmlParseDocTypeDecl(ctxt); 11800 if (ctxt->instate == XML_PARSER_EOF) 11801 goto done; 11802 if (RAW == '[') { 11803 ctxt->instate = XML_PARSER_DTD; 11804 #ifdef DEBUG_PUSH 11805 xmlGenericError(xmlGenericErrorContext, 11806 "PP: entering DTD\n"); 11807 #endif 11808 } else { 11809 /* 11810 * Create and update the external subset. 11811 */ 11812 ctxt->inSubset = 2; 11813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11814 (ctxt->sax->externalSubset != NULL)) 11815 ctxt->sax->externalSubset(ctxt->userData, 11816 ctxt->intSubName, ctxt->extSubSystem, 11817 ctxt->extSubURI); 11818 ctxt->inSubset = 0; 11819 xmlCleanSpecialAttr(ctxt); 11820 ctxt->instate = XML_PARSER_PROLOG; 11821 #ifdef DEBUG_PUSH 11822 xmlGenericError(xmlGenericErrorContext, 11823 "PP: entering PROLOG\n"); 11824 #endif 11825 } 11826 } else if ((cur == '<') && (next == '!') && 11827 (avail < 9)) { 11828 goto done; 11829 } else { 11830 ctxt->instate = XML_PARSER_START_TAG; 11831 ctxt->progressive = XML_PARSER_START_TAG; 11832 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11833 #ifdef DEBUG_PUSH 11834 xmlGenericError(xmlGenericErrorContext, 11835 "PP: entering START_TAG\n"); 11836 #endif 11837 } 11838 break; 11839 case XML_PARSER_PROLOG: 11840 SKIP_BLANKS; 11841 if (ctxt->input->buf == NULL) 11842 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11843 else 11844 avail = xmlBufUse(ctxt->input->buf->buffer) - 11845 (ctxt->input->cur - ctxt->input->base); 11846 if (avail < 2) 11847 goto done; 11848 cur = ctxt->input->cur[0]; 11849 next = ctxt->input->cur[1]; 11850 if ((cur == '<') && (next == '?')) { 11851 if ((!terminate) && 11852 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11853 ctxt->progressive = XML_PARSER_PI; 11854 goto done; 11855 } 11856 #ifdef DEBUG_PUSH 11857 xmlGenericError(xmlGenericErrorContext, 11858 "PP: Parsing PI\n"); 11859 #endif 11860 xmlParsePI(ctxt); 11861 if (ctxt->instate == XML_PARSER_EOF) 11862 goto done; 11863 ctxt->instate = XML_PARSER_PROLOG; 11864 ctxt->progressive = 1; 11865 } else if ((cur == '<') && (next == '!') && 11866 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11867 if ((!terminate) && 11868 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11869 ctxt->progressive = XML_PARSER_COMMENT; 11870 goto done; 11871 } 11872 #ifdef DEBUG_PUSH 11873 xmlGenericError(xmlGenericErrorContext, 11874 "PP: Parsing Comment\n"); 11875 #endif 11876 xmlParseComment(ctxt); 11877 if (ctxt->instate == XML_PARSER_EOF) 11878 goto done; 11879 ctxt->instate = XML_PARSER_PROLOG; 11880 ctxt->progressive = 1; 11881 } else if ((cur == '<') && (next == '!') && 11882 (avail < 4)) { 11883 goto done; 11884 } else { 11885 ctxt->instate = XML_PARSER_START_TAG; 11886 if (ctxt->progressive == 0) 11887 ctxt->progressive = XML_PARSER_START_TAG; 11888 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11889 #ifdef DEBUG_PUSH 11890 xmlGenericError(xmlGenericErrorContext, 11891 "PP: entering START_TAG\n"); 11892 #endif 11893 } 11894 break; 11895 case XML_PARSER_EPILOG: 11896 SKIP_BLANKS; 11897 if (ctxt->input->buf == NULL) 11898 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11899 else 11900 avail = xmlBufUse(ctxt->input->buf->buffer) - 11901 (ctxt->input->cur - ctxt->input->base); 11902 if (avail < 2) 11903 goto done; 11904 cur = ctxt->input->cur[0]; 11905 next = ctxt->input->cur[1]; 11906 if ((cur == '<') && (next == '?')) { 11907 if ((!terminate) && 11908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11909 ctxt->progressive = XML_PARSER_PI; 11910 goto done; 11911 } 11912 #ifdef DEBUG_PUSH 11913 xmlGenericError(xmlGenericErrorContext, 11914 "PP: Parsing PI\n"); 11915 #endif 11916 xmlParsePI(ctxt); 11917 if (ctxt->instate == XML_PARSER_EOF) 11918 goto done; 11919 ctxt->instate = XML_PARSER_EPILOG; 11920 ctxt->progressive = 1; 11921 } else if ((cur == '<') && (next == '!') && 11922 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11923 if ((!terminate) && 11924 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11925 ctxt->progressive = XML_PARSER_COMMENT; 11926 goto done; 11927 } 11928 #ifdef DEBUG_PUSH 11929 xmlGenericError(xmlGenericErrorContext, 11930 "PP: Parsing Comment\n"); 11931 #endif 11932 xmlParseComment(ctxt); 11933 if (ctxt->instate == XML_PARSER_EOF) 11934 goto done; 11935 ctxt->instate = XML_PARSER_EPILOG; 11936 ctxt->progressive = 1; 11937 } else if ((cur == '<') && (next == '!') && 11938 (avail < 4)) { 11939 goto done; 11940 } else { 11941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11942 xmlHaltParser(ctxt); 11943 #ifdef DEBUG_PUSH 11944 xmlGenericError(xmlGenericErrorContext, 11945 "PP: entering EOF\n"); 11946 #endif 11947 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11948 ctxt->sax->endDocument(ctxt->userData); 11949 goto done; 11950 } 11951 break; 11952 case XML_PARSER_DTD: { 11953 /* 11954 * Sorry but progressive parsing of the internal subset 11955 * is not expected to be supported. We first check that 11956 * the full content of the internal subset is available and 11957 * the parsing is launched only at that point. 11958 * Internal subset ends up with "']' S? '>'" in an unescaped 11959 * section and not in a ']]>' sequence which are conditional 11960 * sections (whoever argued to keep that crap in XML deserve 11961 * a place in hell !). 11962 */ 11963 int base, i; 11964 xmlChar *buf; 11965 xmlChar quote = 0; 11966 size_t use; 11967 11968 base = ctxt->input->cur - ctxt->input->base; 11969 if (base < 0) return(0); 11970 if (ctxt->checkIndex > base) 11971 base = ctxt->checkIndex; 11972 buf = xmlBufContent(ctxt->input->buf->buffer); 11973 use = xmlBufUse(ctxt->input->buf->buffer); 11974 for (;(unsigned int) base < use; base++) { 11975 if (quote != 0) { 11976 if (buf[base] == quote) 11977 quote = 0; 11978 continue; 11979 } 11980 if ((quote == 0) && (buf[base] == '<')) { 11981 int found = 0; 11982 /* special handling of comments */ 11983 if (((unsigned int) base + 4 < use) && 11984 (buf[base + 1] == '!') && 11985 (buf[base + 2] == '-') && 11986 (buf[base + 3] == '-')) { 11987 for (;(unsigned int) base + 3 < use; base++) { 11988 if ((buf[base] == '-') && 11989 (buf[base + 1] == '-') && 11990 (buf[base + 2] == '>')) { 11991 found = 1; 11992 base += 2; 11993 break; 11994 } 11995 } 11996 if (!found) { 11997 #if 0 11998 fprintf(stderr, "unfinished comment\n"); 11999 #endif 12000 break; /* for */ 12001 } 12002 continue; 12003 } 12004 } 12005 if (buf[base] == '"') { 12006 quote = '"'; 12007 continue; 12008 } 12009 if (buf[base] == '\'') { 12010 quote = '\''; 12011 continue; 12012 } 12013 if (buf[base] == ']') { 12014 #if 0 12015 fprintf(stderr, "%c%c%c%c: ", buf[base], 12016 buf[base + 1], buf[base + 2], buf[base + 3]); 12017 #endif 12018 if ((unsigned int) base +1 >= use) 12019 break; 12020 if (buf[base + 1] == ']') { 12021 /* conditional crap, skip both ']' ! */ 12022 base++; 12023 continue; 12024 } 12025 for (i = 1; (unsigned int) base + i < use; i++) { 12026 if (buf[base + i] == '>') { 12027 #if 0 12028 fprintf(stderr, "found\n"); 12029 #endif 12030 goto found_end_int_subset; 12031 } 12032 if (!IS_BLANK_CH(buf[base + i])) { 12033 #if 0 12034 fprintf(stderr, "not found\n"); 12035 #endif 12036 goto not_end_of_int_subset; 12037 } 12038 } 12039 #if 0 12040 fprintf(stderr, "end of stream\n"); 12041 #endif 12042 break; 12043 12044 } 12045 not_end_of_int_subset: 12046 continue; /* for */ 12047 } 12048 /* 12049 * We didn't found the end of the Internal subset 12050 */ 12051 if (quote == 0) 12052 ctxt->checkIndex = base; 12053 else 12054 ctxt->checkIndex = 0; 12055 #ifdef DEBUG_PUSH 12056 if (next == 0) 12057 xmlGenericError(xmlGenericErrorContext, 12058 "PP: lookup of int subset end filed\n"); 12059 #endif 12060 goto done; 12061 12062 found_end_int_subset: 12063 ctxt->checkIndex = 0; 12064 xmlParseInternalSubset(ctxt); 12065 if (ctxt->instate == XML_PARSER_EOF) 12066 goto done; 12067 ctxt->inSubset = 2; 12068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12069 (ctxt->sax->externalSubset != NULL)) 12070 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12071 ctxt->extSubSystem, ctxt->extSubURI); 12072 ctxt->inSubset = 0; 12073 xmlCleanSpecialAttr(ctxt); 12074 if (ctxt->instate == XML_PARSER_EOF) 12075 goto done; 12076 ctxt->instate = XML_PARSER_PROLOG; 12077 ctxt->checkIndex = 0; 12078 #ifdef DEBUG_PUSH 12079 xmlGenericError(xmlGenericErrorContext, 12080 "PP: entering PROLOG\n"); 12081 #endif 12082 break; 12083 } 12084 case XML_PARSER_COMMENT: 12085 xmlGenericError(xmlGenericErrorContext, 12086 "PP: internal error, state == COMMENT\n"); 12087 ctxt->instate = XML_PARSER_CONTENT; 12088 #ifdef DEBUG_PUSH 12089 xmlGenericError(xmlGenericErrorContext, 12090 "PP: entering CONTENT\n"); 12091 #endif 12092 break; 12093 case XML_PARSER_IGNORE: 12094 xmlGenericError(xmlGenericErrorContext, 12095 "PP: internal error, state == IGNORE"); 12096 ctxt->instate = XML_PARSER_DTD; 12097 #ifdef DEBUG_PUSH 12098 xmlGenericError(xmlGenericErrorContext, 12099 "PP: entering DTD\n"); 12100 #endif 12101 break; 12102 case XML_PARSER_PI: 12103 xmlGenericError(xmlGenericErrorContext, 12104 "PP: internal error, state == PI\n"); 12105 ctxt->instate = XML_PARSER_CONTENT; 12106 #ifdef DEBUG_PUSH 12107 xmlGenericError(xmlGenericErrorContext, 12108 "PP: entering CONTENT\n"); 12109 #endif 12110 break; 12111 case XML_PARSER_ENTITY_DECL: 12112 xmlGenericError(xmlGenericErrorContext, 12113 "PP: internal error, state == ENTITY_DECL\n"); 12114 ctxt->instate = XML_PARSER_DTD; 12115 #ifdef DEBUG_PUSH 12116 xmlGenericError(xmlGenericErrorContext, 12117 "PP: entering DTD\n"); 12118 #endif 12119 break; 12120 case XML_PARSER_ENTITY_VALUE: 12121 xmlGenericError(xmlGenericErrorContext, 12122 "PP: internal error, state == ENTITY_VALUE\n"); 12123 ctxt->instate = XML_PARSER_CONTENT; 12124 #ifdef DEBUG_PUSH 12125 xmlGenericError(xmlGenericErrorContext, 12126 "PP: entering DTD\n"); 12127 #endif 12128 break; 12129 case XML_PARSER_ATTRIBUTE_VALUE: 12130 xmlGenericError(xmlGenericErrorContext, 12131 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12132 ctxt->instate = XML_PARSER_START_TAG; 12133 #ifdef DEBUG_PUSH 12134 xmlGenericError(xmlGenericErrorContext, 12135 "PP: entering START_TAG\n"); 12136 #endif 12137 break; 12138 case XML_PARSER_SYSTEM_LITERAL: 12139 xmlGenericError(xmlGenericErrorContext, 12140 "PP: internal error, state == SYSTEM_LITERAL\n"); 12141 ctxt->instate = XML_PARSER_START_TAG; 12142 #ifdef DEBUG_PUSH 12143 xmlGenericError(xmlGenericErrorContext, 12144 "PP: entering START_TAG\n"); 12145 #endif 12146 break; 12147 case XML_PARSER_PUBLIC_LITERAL: 12148 xmlGenericError(xmlGenericErrorContext, 12149 "PP: internal error, state == PUBLIC_LITERAL\n"); 12150 ctxt->instate = XML_PARSER_START_TAG; 12151 #ifdef DEBUG_PUSH 12152 xmlGenericError(xmlGenericErrorContext, 12153 "PP: entering START_TAG\n"); 12154 #endif 12155 break; 12156 } 12157 } 12158 done: 12159 #ifdef DEBUG_PUSH 12160 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12161 #endif 12162 return(ret); 12163 encoding_error: 12164 { 12165 char buffer[150]; 12166 12167 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12168 ctxt->input->cur[0], ctxt->input->cur[1], 12169 ctxt->input->cur[2], ctxt->input->cur[3]); 12170 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12171 "Input is not proper UTF-8, indicate encoding !\n%s", 12172 BAD_CAST buffer, NULL); 12173 } 12174 return(0); 12175 } 12176 12177 /** 12178 * xmlParseCheckTransition: 12179 * @ctxt: an XML parser context 12180 * @chunk: a char array 12181 * @size: the size in byte of the chunk 12182 * 12183 * Check depending on the current parser state if the chunk given must be 12184 * processed immediately or one need more data to advance on parsing. 12185 * 12186 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12187 */ 12188 static int 12189 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12190 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12191 return(-1); 12192 if (ctxt->instate == XML_PARSER_START_TAG) { 12193 if (memchr(chunk, '>', size) != NULL) 12194 return(1); 12195 return(0); 12196 } 12197 if (ctxt->progressive == XML_PARSER_COMMENT) { 12198 if (memchr(chunk, '>', size) != NULL) 12199 return(1); 12200 return(0); 12201 } 12202 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12203 if (memchr(chunk, '>', size) != NULL) 12204 return(1); 12205 return(0); 12206 } 12207 if (ctxt->progressive == XML_PARSER_PI) { 12208 if (memchr(chunk, '>', size) != NULL) 12209 return(1); 12210 return(0); 12211 } 12212 if (ctxt->instate == XML_PARSER_END_TAG) { 12213 if (memchr(chunk, '>', size) != NULL) 12214 return(1); 12215 return(0); 12216 } 12217 if ((ctxt->progressive == XML_PARSER_DTD) || 12218 (ctxt->instate == XML_PARSER_DTD)) { 12219 if (memchr(chunk, '>', size) != NULL) 12220 return(1); 12221 return(0); 12222 } 12223 return(1); 12224 } 12225 12226 /** 12227 * xmlParseChunk: 12228 * @ctxt: an XML parser context 12229 * @chunk: an char array 12230 * @size: the size in byte of the chunk 12231 * @terminate: last chunk indicator 12232 * 12233 * Parse a Chunk of memory 12234 * 12235 * Returns zero if no error, the xmlParserErrors otherwise. 12236 */ 12237 int 12238 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12239 int terminate) { 12240 int end_in_lf = 0; 12241 int remain = 0; 12242 size_t old_avail = 0; 12243 size_t avail = 0; 12244 12245 if (ctxt == NULL) 12246 return(XML_ERR_INTERNAL_ERROR); 12247 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12248 return(ctxt->errNo); 12249 if (ctxt->instate == XML_PARSER_EOF) 12250 return(-1); 12251 if (ctxt->instate == XML_PARSER_START) 12252 xmlDetectSAX2(ctxt); 12253 if ((size > 0) && (chunk != NULL) && (!terminate) && 12254 (chunk[size - 1] == '\r')) { 12255 end_in_lf = 1; 12256 size--; 12257 } 12258 12259 xmldecl_done: 12260 12261 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12262 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12263 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12264 size_t cur = ctxt->input->cur - ctxt->input->base; 12265 int res; 12266 12267 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12268 /* 12269 * Specific handling if we autodetected an encoding, we should not 12270 * push more than the first line ... which depend on the encoding 12271 * And only push the rest once the final encoding was detected 12272 */ 12273 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12274 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12275 unsigned int len = 45; 12276 12277 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12278 BAD_CAST "UTF-16")) || 12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12280 BAD_CAST "UTF16"))) 12281 len = 90; 12282 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12283 BAD_CAST "UCS-4")) || 12284 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12285 BAD_CAST "UCS4"))) 12286 len = 180; 12287 12288 if (ctxt->input->buf->rawconsumed < len) 12289 len -= ctxt->input->buf->rawconsumed; 12290 12291 /* 12292 * Change size for reading the initial declaration only 12293 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12294 * will blindly copy extra bytes from memory. 12295 */ 12296 if ((unsigned int) size > len) { 12297 remain = size - len; 12298 size = len; 12299 } else { 12300 remain = 0; 12301 } 12302 } 12303 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12304 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12305 if (res < 0) { 12306 ctxt->errNo = XML_PARSER_EOF; 12307 xmlHaltParser(ctxt); 12308 return (XML_PARSER_EOF); 12309 } 12310 #ifdef DEBUG_PUSH 12311 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12312 #endif 12313 12314 } else if (ctxt->instate != XML_PARSER_EOF) { 12315 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12316 xmlParserInputBufferPtr in = ctxt->input->buf; 12317 if ((in->encoder != NULL) && (in->buffer != NULL) && 12318 (in->raw != NULL)) { 12319 int nbchars; 12320 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12321 size_t current = ctxt->input->cur - ctxt->input->base; 12322 12323 nbchars = xmlCharEncInput(in, terminate); 12324 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12325 if (nbchars < 0) { 12326 /* TODO 2.6.0 */ 12327 xmlGenericError(xmlGenericErrorContext, 12328 "xmlParseChunk: encoder error\n"); 12329 xmlHaltParser(ctxt); 12330 return(XML_ERR_INVALID_ENCODING); 12331 } 12332 } 12333 } 12334 } 12335 if (remain != 0) { 12336 xmlParseTryOrFinish(ctxt, 0); 12337 } else { 12338 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12339 avail = xmlBufUse(ctxt->input->buf->buffer); 12340 /* 12341 * Depending on the current state it may not be such 12342 * a good idea to try parsing if there is nothing in the chunk 12343 * which would be worth doing a parser state transition and we 12344 * need to wait for more data 12345 */ 12346 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12347 (old_avail == 0) || (avail == 0) || 12348 (xmlParseCheckTransition(ctxt, 12349 (const char *)&ctxt->input->base[old_avail], 12350 avail - old_avail))) 12351 xmlParseTryOrFinish(ctxt, terminate); 12352 } 12353 if (ctxt->instate == XML_PARSER_EOF) 12354 return(ctxt->errNo); 12355 12356 if ((ctxt->input != NULL) && 12357 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12358 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12359 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12360 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12361 xmlHaltParser(ctxt); 12362 } 12363 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12364 return(ctxt->errNo); 12365 12366 if (remain != 0) { 12367 chunk += size; 12368 size = remain; 12369 remain = 0; 12370 goto xmldecl_done; 12371 } 12372 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12373 (ctxt->input->buf != NULL)) { 12374 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12375 ctxt->input); 12376 size_t current = ctxt->input->cur - ctxt->input->base; 12377 12378 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12379 12380 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12381 base, current); 12382 } 12383 if (terminate) { 12384 /* 12385 * Check for termination 12386 */ 12387 int cur_avail = 0; 12388 12389 if (ctxt->input != NULL) { 12390 if (ctxt->input->buf == NULL) 12391 cur_avail = ctxt->input->length - 12392 (ctxt->input->cur - ctxt->input->base); 12393 else 12394 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12395 (ctxt->input->cur - ctxt->input->base); 12396 } 12397 12398 if ((ctxt->instate != XML_PARSER_EOF) && 12399 (ctxt->instate != XML_PARSER_EPILOG)) { 12400 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12401 } 12402 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12403 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12404 } 12405 if (ctxt->instate != XML_PARSER_EOF) { 12406 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12407 ctxt->sax->endDocument(ctxt->userData); 12408 } 12409 ctxt->instate = XML_PARSER_EOF; 12410 } 12411 if (ctxt->wellFormed == 0) 12412 return((xmlParserErrors) ctxt->errNo); 12413 else 12414 return(0); 12415 } 12416 12417 /************************************************************************ 12418 * * 12419 * I/O front end functions to the parser * 12420 * * 12421 ************************************************************************/ 12422 12423 /** 12424 * xmlCreatePushParserCtxt: 12425 * @sax: a SAX handler 12426 * @user_data: The user data returned on SAX callbacks 12427 * @chunk: a pointer to an array of chars 12428 * @size: number of chars in the array 12429 * @filename: an optional file name or URI 12430 * 12431 * Create a parser context for using the XML parser in push mode. 12432 * If @buffer and @size are non-NULL, the data is used to detect 12433 * the encoding. The remaining characters will be parsed so they 12434 * don't need to be fed in again through xmlParseChunk. 12435 * To allow content encoding detection, @size should be >= 4 12436 * The value of @filename is used for fetching external entities 12437 * and error/warning reports. 12438 * 12439 * Returns the new parser context or NULL 12440 */ 12441 12442 xmlParserCtxtPtr 12443 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12444 const char *chunk, int size, const char *filename) { 12445 xmlParserCtxtPtr ctxt; 12446 xmlParserInputPtr inputStream; 12447 xmlParserInputBufferPtr buf; 12448 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12449 12450 /* 12451 * plug some encoding conversion routines 12452 */ 12453 if ((chunk != NULL) && (size >= 4)) 12454 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12455 12456 buf = xmlAllocParserInputBuffer(enc); 12457 if (buf == NULL) return(NULL); 12458 12459 ctxt = xmlNewParserCtxt(); 12460 if (ctxt == NULL) { 12461 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12462 xmlFreeParserInputBuffer(buf); 12463 return(NULL); 12464 } 12465 ctxt->dictNames = 1; 12466 if (sax != NULL) { 12467 #ifdef LIBXML_SAX1_ENABLED 12468 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12469 #endif /* LIBXML_SAX1_ENABLED */ 12470 xmlFree(ctxt->sax); 12471 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12472 if (ctxt->sax == NULL) { 12473 xmlErrMemory(ctxt, NULL); 12474 xmlFreeParserInputBuffer(buf); 12475 xmlFreeParserCtxt(ctxt); 12476 return(NULL); 12477 } 12478 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12479 if (sax->initialized == XML_SAX2_MAGIC) 12480 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12481 else 12482 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12483 if (user_data != NULL) 12484 ctxt->userData = user_data; 12485 } 12486 if (filename == NULL) { 12487 ctxt->directory = NULL; 12488 } else { 12489 ctxt->directory = xmlParserGetDirectory(filename); 12490 } 12491 12492 inputStream = xmlNewInputStream(ctxt); 12493 if (inputStream == NULL) { 12494 xmlFreeParserCtxt(ctxt); 12495 xmlFreeParserInputBuffer(buf); 12496 return(NULL); 12497 } 12498 12499 if (filename == NULL) 12500 inputStream->filename = NULL; 12501 else { 12502 inputStream->filename = (char *) 12503 xmlCanonicPath((const xmlChar *) filename); 12504 if (inputStream->filename == NULL) { 12505 xmlFreeParserCtxt(ctxt); 12506 xmlFreeParserInputBuffer(buf); 12507 return(NULL); 12508 } 12509 } 12510 inputStream->buf = buf; 12511 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12512 inputPush(ctxt, inputStream); 12513 12514 /* 12515 * If the caller didn't provide an initial 'chunk' for determining 12516 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12517 * that it can be automatically determined later 12518 */ 12519 if ((size == 0) || (chunk == NULL)) { 12520 ctxt->charset = XML_CHAR_ENCODING_NONE; 12521 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12522 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12523 size_t cur = ctxt->input->cur - ctxt->input->base; 12524 12525 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12526 12527 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12528 #ifdef DEBUG_PUSH 12529 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12530 #endif 12531 } 12532 12533 if (enc != XML_CHAR_ENCODING_NONE) { 12534 xmlSwitchEncoding(ctxt, enc); 12535 } 12536 12537 return(ctxt); 12538 } 12539 #endif /* LIBXML_PUSH_ENABLED */ 12540 12541 /** 12542 * xmlHaltParser: 12543 * @ctxt: an XML parser context 12544 * 12545 * Blocks further parser processing don't override error 12546 * for internal use 12547 */ 12548 static void 12549 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12550 if (ctxt == NULL) 12551 return; 12552 ctxt->instate = XML_PARSER_EOF; 12553 ctxt->disableSAX = 1; 12554 while (ctxt->inputNr > 1) 12555 xmlFreeInputStream(inputPop(ctxt)); 12556 if (ctxt->input != NULL) { 12557 /* 12558 * in case there was a specific allocation deallocate before 12559 * overriding base 12560 */ 12561 if (ctxt->input->free != NULL) { 12562 ctxt->input->free((xmlChar *) ctxt->input->base); 12563 ctxt->input->free = NULL; 12564 } 12565 if (ctxt->input->buf != NULL) { 12566 xmlFreeParserInputBuffer(ctxt->input->buf); 12567 ctxt->input->buf = NULL; 12568 } 12569 ctxt->input->cur = BAD_CAST""; 12570 ctxt->input->length = 0; 12571 ctxt->input->base = ctxt->input->cur; 12572 ctxt->input->end = ctxt->input->cur; 12573 } 12574 } 12575 12576 /** 12577 * xmlStopParser: 12578 * @ctxt: an XML parser context 12579 * 12580 * Blocks further parser processing 12581 */ 12582 void 12583 xmlStopParser(xmlParserCtxtPtr ctxt) { 12584 if (ctxt == NULL) 12585 return; 12586 xmlHaltParser(ctxt); 12587 ctxt->errNo = XML_ERR_USER_STOP; 12588 } 12589 12590 /** 12591 * xmlCreateIOParserCtxt: 12592 * @sax: a SAX handler 12593 * @user_data: The user data returned on SAX callbacks 12594 * @ioread: an I/O read function 12595 * @ioclose: an I/O close function 12596 * @ioctx: an I/O handler 12597 * @enc: the charset encoding if known 12598 * 12599 * Create a parser context for using the XML parser with an existing 12600 * I/O stream 12601 * 12602 * Returns the new parser context or NULL 12603 */ 12604 xmlParserCtxtPtr 12605 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12606 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12607 void *ioctx, xmlCharEncoding enc) { 12608 xmlParserCtxtPtr ctxt; 12609 xmlParserInputPtr inputStream; 12610 xmlParserInputBufferPtr buf; 12611 12612 if (ioread == NULL) return(NULL); 12613 12614 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12615 if (buf == NULL) { 12616 if (ioclose != NULL) 12617 ioclose(ioctx); 12618 return (NULL); 12619 } 12620 12621 ctxt = xmlNewParserCtxt(); 12622 if (ctxt == NULL) { 12623 xmlFreeParserInputBuffer(buf); 12624 return(NULL); 12625 } 12626 if (sax != NULL) { 12627 #ifdef LIBXML_SAX1_ENABLED 12628 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12629 #endif /* LIBXML_SAX1_ENABLED */ 12630 xmlFree(ctxt->sax); 12631 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12632 if (ctxt->sax == NULL) { 12633 xmlFreeParserInputBuffer(buf); 12634 xmlErrMemory(ctxt, NULL); 12635 xmlFreeParserCtxt(ctxt); 12636 return(NULL); 12637 } 12638 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12639 if (sax->initialized == XML_SAX2_MAGIC) 12640 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12641 else 12642 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12643 if (user_data != NULL) 12644 ctxt->userData = user_data; 12645 } 12646 12647 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12648 if (inputStream == NULL) { 12649 xmlFreeParserCtxt(ctxt); 12650 return(NULL); 12651 } 12652 inputPush(ctxt, inputStream); 12653 12654 return(ctxt); 12655 } 12656 12657 #ifdef LIBXML_VALID_ENABLED 12658 /************************************************************************ 12659 * * 12660 * Front ends when parsing a DTD * 12661 * * 12662 ************************************************************************/ 12663 12664 /** 12665 * xmlIOParseDTD: 12666 * @sax: the SAX handler block or NULL 12667 * @input: an Input Buffer 12668 * @enc: the charset encoding if known 12669 * 12670 * Load and parse a DTD 12671 * 12672 * Returns the resulting xmlDtdPtr or NULL in case of error. 12673 * @input will be freed by the function in any case. 12674 */ 12675 12676 xmlDtdPtr 12677 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12678 xmlCharEncoding enc) { 12679 xmlDtdPtr ret = NULL; 12680 xmlParserCtxtPtr ctxt; 12681 xmlParserInputPtr pinput = NULL; 12682 xmlChar start[4]; 12683 12684 if (input == NULL) 12685 return(NULL); 12686 12687 ctxt = xmlNewParserCtxt(); 12688 if (ctxt == NULL) { 12689 xmlFreeParserInputBuffer(input); 12690 return(NULL); 12691 } 12692 12693 /* We are loading a DTD */ 12694 ctxt->options |= XML_PARSE_DTDLOAD; 12695 12696 /* 12697 * Set-up the SAX context 12698 */ 12699 if (sax != NULL) { 12700 if (ctxt->sax != NULL) 12701 xmlFree(ctxt->sax); 12702 ctxt->sax = sax; 12703 ctxt->userData = ctxt; 12704 } 12705 xmlDetectSAX2(ctxt); 12706 12707 /* 12708 * generate a parser input from the I/O handler 12709 */ 12710 12711 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12712 if (pinput == NULL) { 12713 if (sax != NULL) ctxt->sax = NULL; 12714 xmlFreeParserInputBuffer(input); 12715 xmlFreeParserCtxt(ctxt); 12716 return(NULL); 12717 } 12718 12719 /* 12720 * plug some encoding conversion routines here. 12721 */ 12722 if (xmlPushInput(ctxt, pinput) < 0) { 12723 if (sax != NULL) ctxt->sax = NULL; 12724 xmlFreeParserCtxt(ctxt); 12725 return(NULL); 12726 } 12727 if (enc != XML_CHAR_ENCODING_NONE) { 12728 xmlSwitchEncoding(ctxt, enc); 12729 } 12730 12731 pinput->filename = NULL; 12732 pinput->line = 1; 12733 pinput->col = 1; 12734 pinput->base = ctxt->input->cur; 12735 pinput->cur = ctxt->input->cur; 12736 pinput->free = NULL; 12737 12738 /* 12739 * let's parse that entity knowing it's an external subset. 12740 */ 12741 ctxt->inSubset = 2; 12742 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12743 if (ctxt->myDoc == NULL) { 12744 xmlErrMemory(ctxt, "New Doc failed"); 12745 return(NULL); 12746 } 12747 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12748 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12749 BAD_CAST "none", BAD_CAST "none"); 12750 12751 if ((enc == XML_CHAR_ENCODING_NONE) && 12752 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12753 /* 12754 * Get the 4 first bytes and decode the charset 12755 * if enc != XML_CHAR_ENCODING_NONE 12756 * plug some encoding conversion routines. 12757 */ 12758 start[0] = RAW; 12759 start[1] = NXT(1); 12760 start[2] = NXT(2); 12761 start[3] = NXT(3); 12762 enc = xmlDetectCharEncoding(start, 4); 12763 if (enc != XML_CHAR_ENCODING_NONE) { 12764 xmlSwitchEncoding(ctxt, enc); 12765 } 12766 } 12767 12768 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12769 12770 if (ctxt->myDoc != NULL) { 12771 if (ctxt->wellFormed) { 12772 ret = ctxt->myDoc->extSubset; 12773 ctxt->myDoc->extSubset = NULL; 12774 if (ret != NULL) { 12775 xmlNodePtr tmp; 12776 12777 ret->doc = NULL; 12778 tmp = ret->children; 12779 while (tmp != NULL) { 12780 tmp->doc = NULL; 12781 tmp = tmp->next; 12782 } 12783 } 12784 } else { 12785 ret = NULL; 12786 } 12787 xmlFreeDoc(ctxt->myDoc); 12788 ctxt->myDoc = NULL; 12789 } 12790 if (sax != NULL) ctxt->sax = NULL; 12791 xmlFreeParserCtxt(ctxt); 12792 12793 return(ret); 12794 } 12795 12796 /** 12797 * xmlSAXParseDTD: 12798 * @sax: the SAX handler block 12799 * @ExternalID: a NAME* containing the External ID of the DTD 12800 * @SystemID: a NAME* containing the URL to the DTD 12801 * 12802 * Load and parse an external subset. 12803 * 12804 * Returns the resulting xmlDtdPtr or NULL in case of error. 12805 */ 12806 12807 xmlDtdPtr 12808 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12809 const xmlChar *SystemID) { 12810 xmlDtdPtr ret = NULL; 12811 xmlParserCtxtPtr ctxt; 12812 xmlParserInputPtr input = NULL; 12813 xmlCharEncoding enc; 12814 xmlChar* systemIdCanonic; 12815 12816 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12817 12818 ctxt = xmlNewParserCtxt(); 12819 if (ctxt == NULL) { 12820 return(NULL); 12821 } 12822 12823 /* We are loading a DTD */ 12824 ctxt->options |= XML_PARSE_DTDLOAD; 12825 12826 /* 12827 * Set-up the SAX context 12828 */ 12829 if (sax != NULL) { 12830 if (ctxt->sax != NULL) 12831 xmlFree(ctxt->sax); 12832 ctxt->sax = sax; 12833 ctxt->userData = ctxt; 12834 } 12835 12836 /* 12837 * Canonicalise the system ID 12838 */ 12839 systemIdCanonic = xmlCanonicPath(SystemID); 12840 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12841 xmlFreeParserCtxt(ctxt); 12842 return(NULL); 12843 } 12844 12845 /* 12846 * Ask the Entity resolver to load the damn thing 12847 */ 12848 12849 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12850 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12851 systemIdCanonic); 12852 if (input == NULL) { 12853 if (sax != NULL) ctxt->sax = NULL; 12854 xmlFreeParserCtxt(ctxt); 12855 if (systemIdCanonic != NULL) 12856 xmlFree(systemIdCanonic); 12857 return(NULL); 12858 } 12859 12860 /* 12861 * plug some encoding conversion routines here. 12862 */ 12863 if (xmlPushInput(ctxt, input) < 0) { 12864 if (sax != NULL) ctxt->sax = NULL; 12865 xmlFreeParserCtxt(ctxt); 12866 if (systemIdCanonic != NULL) 12867 xmlFree(systemIdCanonic); 12868 return(NULL); 12869 } 12870 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12871 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12872 xmlSwitchEncoding(ctxt, enc); 12873 } 12874 12875 if (input->filename == NULL) 12876 input->filename = (char *) systemIdCanonic; 12877 else 12878 xmlFree(systemIdCanonic); 12879 input->line = 1; 12880 input->col = 1; 12881 input->base = ctxt->input->cur; 12882 input->cur = ctxt->input->cur; 12883 input->free = NULL; 12884 12885 /* 12886 * let's parse that entity knowing it's an external subset. 12887 */ 12888 ctxt->inSubset = 2; 12889 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12890 if (ctxt->myDoc == NULL) { 12891 xmlErrMemory(ctxt, "New Doc failed"); 12892 if (sax != NULL) ctxt->sax = NULL; 12893 xmlFreeParserCtxt(ctxt); 12894 return(NULL); 12895 } 12896 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12897 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12898 ExternalID, SystemID); 12899 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12900 12901 if (ctxt->myDoc != NULL) { 12902 if (ctxt->wellFormed) { 12903 ret = ctxt->myDoc->extSubset; 12904 ctxt->myDoc->extSubset = NULL; 12905 if (ret != NULL) { 12906 xmlNodePtr tmp; 12907 12908 ret->doc = NULL; 12909 tmp = ret->children; 12910 while (tmp != NULL) { 12911 tmp->doc = NULL; 12912 tmp = tmp->next; 12913 } 12914 } 12915 } else { 12916 ret = NULL; 12917 } 12918 xmlFreeDoc(ctxt->myDoc); 12919 ctxt->myDoc = NULL; 12920 } 12921 if (sax != NULL) ctxt->sax = NULL; 12922 xmlFreeParserCtxt(ctxt); 12923 12924 return(ret); 12925 } 12926 12927 12928 /** 12929 * xmlParseDTD: 12930 * @ExternalID: a NAME* containing the External ID of the DTD 12931 * @SystemID: a NAME* containing the URL to the DTD 12932 * 12933 * Load and parse an external subset. 12934 * 12935 * Returns the resulting xmlDtdPtr or NULL in case of error. 12936 */ 12937 12938 xmlDtdPtr 12939 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12940 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12941 } 12942 #endif /* LIBXML_VALID_ENABLED */ 12943 12944 /************************************************************************ 12945 * * 12946 * Front ends when parsing an Entity * 12947 * * 12948 ************************************************************************/ 12949 12950 /** 12951 * xmlParseCtxtExternalEntity: 12952 * @ctx: the existing parsing context 12953 * @URL: the URL for the entity to load 12954 * @ID: the System ID for the entity to load 12955 * @lst: the return value for the set of parsed nodes 12956 * 12957 * Parse an external general entity within an existing parsing context 12958 * An external general parsed entity is well-formed if it matches the 12959 * production labeled extParsedEnt. 12960 * 12961 * [78] extParsedEnt ::= TextDecl? content 12962 * 12963 * Returns 0 if the entity is well formed, -1 in case of args problem and 12964 * the parser error code otherwise 12965 */ 12966 12967 int 12968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12969 const xmlChar *ID, xmlNodePtr *lst) { 12970 void *userData; 12971 12972 if (ctx == NULL) return(-1); 12973 /* 12974 * If the user provided their own SAX callbacks, then reuse the 12975 * userData callback field, otherwise the expected setup in a 12976 * DOM builder is to have userData == ctxt 12977 */ 12978 if (ctx->userData == ctx) 12979 userData = NULL; 12980 else 12981 userData = ctx->userData; 12982 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax, 12983 userData, ctx->depth + 1, 12984 URL, ID, lst); 12985 } 12986 12987 /** 12988 * xmlParseExternalEntityPrivate: 12989 * @doc: the document the chunk pertains to 12990 * @oldctxt: the previous parser context if available 12991 * @sax: the SAX handler block (possibly NULL) 12992 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12993 * @depth: Used for loop detection, use 0 12994 * @URL: the URL for the entity to load 12995 * @ID: the System ID for the entity to load 12996 * @list: the return value for the set of parsed nodes 12997 * 12998 * Private version of xmlParseExternalEntity() 12999 * 13000 * Returns 0 if the entity is well formed, -1 in case of args problem and 13001 * the parser error code otherwise 13002 */ 13003 13004 static xmlParserErrors 13005 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13006 xmlSAXHandlerPtr sax, 13007 void *user_data, int depth, const xmlChar *URL, 13008 const xmlChar *ID, xmlNodePtr *list) { 13009 xmlParserCtxtPtr ctxt; 13010 xmlDocPtr newDoc; 13011 xmlNodePtr newRoot; 13012 xmlSAXHandlerPtr oldsax = NULL; 13013 xmlParserErrors ret = XML_ERR_OK; 13014 xmlChar start[4]; 13015 xmlCharEncoding enc; 13016 13017 if (((depth > 40) && 13018 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13019 (depth > 1024)) { 13020 return(XML_ERR_ENTITY_LOOP); 13021 } 13022 13023 if (list != NULL) 13024 *list = NULL; 13025 if ((URL == NULL) && (ID == NULL)) 13026 return(XML_ERR_INTERNAL_ERROR); 13027 if (doc == NULL) 13028 return(XML_ERR_INTERNAL_ERROR); 13029 13030 13031 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13032 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13033 ctxt->userData = ctxt; 13034 if (sax != NULL) { 13035 oldsax = ctxt->sax; 13036 ctxt->sax = sax; 13037 if (user_data != NULL) 13038 ctxt->userData = user_data; 13039 } 13040 xmlDetectSAX2(ctxt); 13041 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13042 if (newDoc == NULL) { 13043 xmlFreeParserCtxt(ctxt); 13044 return(XML_ERR_INTERNAL_ERROR); 13045 } 13046 newDoc->properties = XML_DOC_INTERNAL; 13047 if (doc) { 13048 newDoc->intSubset = doc->intSubset; 13049 newDoc->extSubset = doc->extSubset; 13050 if (doc->dict) { 13051 newDoc->dict = doc->dict; 13052 xmlDictReference(newDoc->dict); 13053 } 13054 if (doc->URL != NULL) { 13055 newDoc->URL = xmlStrdup(doc->URL); 13056 } 13057 } 13058 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13059 if (newRoot == NULL) { 13060 if (sax != NULL) 13061 ctxt->sax = oldsax; 13062 xmlFreeParserCtxt(ctxt); 13063 newDoc->intSubset = NULL; 13064 newDoc->extSubset = NULL; 13065 xmlFreeDoc(newDoc); 13066 return(XML_ERR_INTERNAL_ERROR); 13067 } 13068 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13069 nodePush(ctxt, newDoc->children); 13070 if (doc == NULL) { 13071 ctxt->myDoc = newDoc; 13072 } else { 13073 ctxt->myDoc = doc; 13074 newRoot->doc = doc; 13075 } 13076 13077 /* 13078 * Get the 4 first bytes and decode the charset 13079 * if enc != XML_CHAR_ENCODING_NONE 13080 * plug some encoding conversion routines. 13081 */ 13082 GROW; 13083 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13084 start[0] = RAW; 13085 start[1] = NXT(1); 13086 start[2] = NXT(2); 13087 start[3] = NXT(3); 13088 enc = xmlDetectCharEncoding(start, 4); 13089 if (enc != XML_CHAR_ENCODING_NONE) { 13090 xmlSwitchEncoding(ctxt, enc); 13091 } 13092 } 13093 13094 /* 13095 * Parse a possible text declaration first 13096 */ 13097 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13098 xmlParseTextDecl(ctxt); 13099 /* 13100 * An XML-1.0 document can't reference an entity not XML-1.0 13101 */ 13102 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) && 13103 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13104 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13105 "Version mismatch between document and entity\n"); 13106 } 13107 } 13108 13109 ctxt->instate = XML_PARSER_CONTENT; 13110 ctxt->depth = depth; 13111 if (oldctxt != NULL) { 13112 ctxt->_private = oldctxt->_private; 13113 ctxt->loadsubset = oldctxt->loadsubset; 13114 ctxt->validate = oldctxt->validate; 13115 ctxt->valid = oldctxt->valid; 13116 ctxt->replaceEntities = oldctxt->replaceEntities; 13117 if (oldctxt->validate) { 13118 ctxt->vctxt.error = oldctxt->vctxt.error; 13119 ctxt->vctxt.warning = oldctxt->vctxt.warning; 13120 ctxt->vctxt.userData = oldctxt->vctxt.userData; 13121 } 13122 ctxt->external = oldctxt->external; 13123 if (ctxt->dict) xmlDictFree(ctxt->dict); 13124 ctxt->dict = oldctxt->dict; 13125 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13126 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13127 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13128 ctxt->dictNames = oldctxt->dictNames; 13129 ctxt->attsDefault = oldctxt->attsDefault; 13130 ctxt->attsSpecial = oldctxt->attsSpecial; 13131 ctxt->linenumbers = oldctxt->linenumbers; 13132 ctxt->record_info = oldctxt->record_info; 13133 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13134 ctxt->node_seq.length = oldctxt->node_seq.length; 13135 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13136 } else { 13137 /* 13138 * Doing validity checking on chunk without context 13139 * doesn't make sense 13140 */ 13141 ctxt->_private = NULL; 13142 ctxt->validate = 0; 13143 ctxt->external = 2; 13144 ctxt->loadsubset = 0; 13145 } 13146 13147 xmlParseContent(ctxt); 13148 13149 if ((RAW == '<') && (NXT(1) == '/')) { 13150 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13151 } else if (RAW != 0) { 13152 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13153 } 13154 if (ctxt->node != newDoc->children) { 13155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13156 } 13157 13158 if (!ctxt->wellFormed) { 13159 if (ctxt->errNo == 0) 13160 ret = XML_ERR_INTERNAL_ERROR; 13161 else 13162 ret = (xmlParserErrors)ctxt->errNo; 13163 } else { 13164 if (list != NULL) { 13165 xmlNodePtr cur; 13166 13167 /* 13168 * Return the newly created nodeset after unlinking it from 13169 * they pseudo parent. 13170 */ 13171 cur = newDoc->children->children; 13172 *list = cur; 13173 while (cur != NULL) { 13174 cur->parent = NULL; 13175 cur = cur->next; 13176 } 13177 newDoc->children->children = NULL; 13178 } 13179 ret = XML_ERR_OK; 13180 } 13181 13182 /* 13183 * Record in the parent context the number of entities replacement 13184 * done when parsing that reference. 13185 */ 13186 if (oldctxt != NULL) 13187 oldctxt->nbentities += ctxt->nbentities; 13188 13189 /* 13190 * Also record the size of the entity parsed 13191 */ 13192 if (ctxt->input != NULL && oldctxt != NULL) { 13193 oldctxt->sizeentities += ctxt->input->consumed; 13194 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13195 } 13196 /* 13197 * And record the last error if any 13198 */ 13199 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13200 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13201 13202 if (sax != NULL) 13203 ctxt->sax = oldsax; 13204 if (oldctxt != NULL) { 13205 ctxt->dict = NULL; 13206 ctxt->attsDefault = NULL; 13207 ctxt->attsSpecial = NULL; 13208 oldctxt->validate = ctxt->validate; 13209 oldctxt->valid = ctxt->valid; 13210 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13211 oldctxt->node_seq.length = ctxt->node_seq.length; 13212 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13213 } 13214 ctxt->node_seq.maximum = 0; 13215 ctxt->node_seq.length = 0; 13216 ctxt->node_seq.buffer = NULL; 13217 xmlFreeParserCtxt(ctxt); 13218 newDoc->intSubset = NULL; 13219 newDoc->extSubset = NULL; 13220 xmlFreeDoc(newDoc); 13221 13222 return(ret); 13223 } 13224 13225 #ifdef LIBXML_SAX1_ENABLED 13226 /** 13227 * xmlParseExternalEntity: 13228 * @doc: the document the chunk pertains to 13229 * @sax: the SAX handler block (possibly NULL) 13230 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13231 * @depth: Used for loop detection, use 0 13232 * @URL: the URL for the entity to load 13233 * @ID: the System ID for the entity to load 13234 * @lst: the return value for the set of parsed nodes 13235 * 13236 * Parse an external general entity 13237 * An external general parsed entity is well-formed if it matches the 13238 * production labeled extParsedEnt. 13239 * 13240 * [78] extParsedEnt ::= TextDecl? content 13241 * 13242 * Returns 0 if the entity is well formed, -1 in case of args problem and 13243 * the parser error code otherwise 13244 */ 13245 13246 int 13247 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13248 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13249 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13250 ID, lst)); 13251 } 13252 13253 /** 13254 * xmlParseBalancedChunkMemory: 13255 * @doc: the document the chunk pertains to (must not be NULL) 13256 * @sax: the SAX handler block (possibly NULL) 13257 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13258 * @depth: Used for loop detection, use 0 13259 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13260 * @lst: the return value for the set of parsed nodes 13261 * 13262 * Parse a well-balanced chunk of an XML document 13263 * called by the parser 13264 * The allowed sequence for the Well Balanced Chunk is the one defined by 13265 * the content production in the XML grammar: 13266 * 13267 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13268 * 13269 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13270 * the parser error code otherwise 13271 */ 13272 13273 int 13274 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13275 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13276 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13277 depth, string, lst, 0 ); 13278 } 13279 #endif /* LIBXML_SAX1_ENABLED */ 13280 13281 /** 13282 * xmlParseBalancedChunkMemoryInternal: 13283 * @oldctxt: the existing parsing context 13284 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13285 * @user_data: the user data field for the parser context 13286 * @lst: the return value for the set of parsed nodes 13287 * 13288 * 13289 * Parse a well-balanced chunk of an XML document 13290 * called by the parser 13291 * The allowed sequence for the Well Balanced Chunk is the one defined by 13292 * the content production in the XML grammar: 13293 * 13294 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13295 * 13296 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13297 * error code otherwise 13298 * 13299 * In case recover is set to 1, the nodelist will not be empty even if 13300 * the parsed chunk is not well balanced. 13301 */ 13302 static xmlParserErrors 13303 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13304 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13305 xmlParserCtxtPtr ctxt; 13306 xmlDocPtr newDoc = NULL; 13307 xmlNodePtr newRoot; 13308 xmlSAXHandlerPtr oldsax = NULL; 13309 xmlNodePtr content = NULL; 13310 xmlNodePtr last = NULL; 13311 int size; 13312 xmlParserErrors ret = XML_ERR_OK; 13313 #ifdef SAX2 13314 int i; 13315 #endif 13316 13317 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13318 (oldctxt->depth > 1024)) { 13319 return(XML_ERR_ENTITY_LOOP); 13320 } 13321 13322 13323 if (lst != NULL) 13324 *lst = NULL; 13325 if (string == NULL) 13326 return(XML_ERR_INTERNAL_ERROR); 13327 13328 size = xmlStrlen(string); 13329 13330 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13331 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13332 if (user_data != NULL) 13333 ctxt->userData = user_data; 13334 else 13335 ctxt->userData = ctxt; 13336 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13337 ctxt->dict = oldctxt->dict; 13338 ctxt->input_id = oldctxt->input_id + 1; 13339 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13340 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13341 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13342 13343 #ifdef SAX2 13344 /* propagate namespaces down the entity */ 13345 for (i = 0;i < oldctxt->nsNr;i += 2) { 13346 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13347 } 13348 #endif 13349 13350 oldsax = ctxt->sax; 13351 ctxt->sax = oldctxt->sax; 13352 xmlDetectSAX2(ctxt); 13353 ctxt->replaceEntities = oldctxt->replaceEntities; 13354 ctxt->options = oldctxt->options; 13355 13356 ctxt->_private = oldctxt->_private; 13357 if (oldctxt->myDoc == NULL) { 13358 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13359 if (newDoc == NULL) { 13360 ctxt->sax = oldsax; 13361 ctxt->dict = NULL; 13362 xmlFreeParserCtxt(ctxt); 13363 return(XML_ERR_INTERNAL_ERROR); 13364 } 13365 newDoc->properties = XML_DOC_INTERNAL; 13366 newDoc->dict = ctxt->dict; 13367 xmlDictReference(newDoc->dict); 13368 ctxt->myDoc = newDoc; 13369 } else { 13370 ctxt->myDoc = oldctxt->myDoc; 13371 content = ctxt->myDoc->children; 13372 last = ctxt->myDoc->last; 13373 } 13374 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13375 if (newRoot == NULL) { 13376 ctxt->sax = oldsax; 13377 ctxt->dict = NULL; 13378 xmlFreeParserCtxt(ctxt); 13379 if (newDoc != NULL) { 13380 xmlFreeDoc(newDoc); 13381 } 13382 return(XML_ERR_INTERNAL_ERROR); 13383 } 13384 ctxt->myDoc->children = NULL; 13385 ctxt->myDoc->last = NULL; 13386 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13387 nodePush(ctxt, ctxt->myDoc->children); 13388 ctxt->instate = XML_PARSER_CONTENT; 13389 ctxt->depth = oldctxt->depth + 1; 13390 13391 ctxt->validate = 0; 13392 ctxt->loadsubset = oldctxt->loadsubset; 13393 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13394 /* 13395 * ID/IDREF registration will be done in xmlValidateElement below 13396 */ 13397 ctxt->loadsubset |= XML_SKIP_IDS; 13398 } 13399 ctxt->dictNames = oldctxt->dictNames; 13400 ctxt->attsDefault = oldctxt->attsDefault; 13401 ctxt->attsSpecial = oldctxt->attsSpecial; 13402 13403 xmlParseContent(ctxt); 13404 if ((RAW == '<') && (NXT(1) == '/')) { 13405 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13406 } else if (RAW != 0) { 13407 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13408 } 13409 if (ctxt->node != ctxt->myDoc->children) { 13410 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13411 } 13412 13413 if (!ctxt->wellFormed) { 13414 if (ctxt->errNo == 0) 13415 ret = XML_ERR_INTERNAL_ERROR; 13416 else 13417 ret = (xmlParserErrors)ctxt->errNo; 13418 } else { 13419 ret = XML_ERR_OK; 13420 } 13421 13422 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13423 xmlNodePtr cur; 13424 13425 /* 13426 * Return the newly created nodeset after unlinking it from 13427 * they pseudo parent. 13428 */ 13429 cur = ctxt->myDoc->children->children; 13430 *lst = cur; 13431 while (cur != NULL) { 13432 #ifdef LIBXML_VALID_ENABLED 13433 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13434 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13435 (cur->type == XML_ELEMENT_NODE)) { 13436 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13437 oldctxt->myDoc, cur); 13438 } 13439 #endif /* LIBXML_VALID_ENABLED */ 13440 cur->parent = NULL; 13441 cur = cur->next; 13442 } 13443 ctxt->myDoc->children->children = NULL; 13444 } 13445 if (ctxt->myDoc != NULL) { 13446 xmlFreeNode(ctxt->myDoc->children); 13447 ctxt->myDoc->children = content; 13448 ctxt->myDoc->last = last; 13449 } 13450 13451 /* 13452 * Record in the parent context the number of entities replacement 13453 * done when parsing that reference. 13454 */ 13455 if (oldctxt != NULL) 13456 oldctxt->nbentities += ctxt->nbentities; 13457 13458 /* 13459 * Also record the last error if any 13460 */ 13461 if (ctxt->lastError.code != XML_ERR_OK) 13462 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13463 13464 ctxt->sax = oldsax; 13465 ctxt->dict = NULL; 13466 ctxt->attsDefault = NULL; 13467 ctxt->attsSpecial = NULL; 13468 xmlFreeParserCtxt(ctxt); 13469 if (newDoc != NULL) { 13470 xmlFreeDoc(newDoc); 13471 } 13472 13473 return(ret); 13474 } 13475 13476 /** 13477 * xmlParseInNodeContext: 13478 * @node: the context node 13479 * @data: the input string 13480 * @datalen: the input string length in bytes 13481 * @options: a combination of xmlParserOption 13482 * @lst: the return value for the set of parsed nodes 13483 * 13484 * Parse a well-balanced chunk of an XML document 13485 * within the context (DTD, namespaces, etc ...) of the given node. 13486 * 13487 * The allowed sequence for the data is a Well Balanced Chunk defined by 13488 * the content production in the XML grammar: 13489 * 13490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13491 * 13492 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13493 * error code otherwise 13494 */ 13495 xmlParserErrors 13496 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13497 int options, xmlNodePtr *lst) { 13498 #ifdef SAX2 13499 xmlParserCtxtPtr ctxt; 13500 xmlDocPtr doc = NULL; 13501 xmlNodePtr fake, cur; 13502 int nsnr = 0; 13503 13504 xmlParserErrors ret = XML_ERR_OK; 13505 13506 /* 13507 * check all input parameters, grab the document 13508 */ 13509 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13510 return(XML_ERR_INTERNAL_ERROR); 13511 switch (node->type) { 13512 case XML_ELEMENT_NODE: 13513 case XML_ATTRIBUTE_NODE: 13514 case XML_TEXT_NODE: 13515 case XML_CDATA_SECTION_NODE: 13516 case XML_ENTITY_REF_NODE: 13517 case XML_PI_NODE: 13518 case XML_COMMENT_NODE: 13519 case XML_DOCUMENT_NODE: 13520 case XML_HTML_DOCUMENT_NODE: 13521 break; 13522 default: 13523 return(XML_ERR_INTERNAL_ERROR); 13524 13525 } 13526 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13527 (node->type != XML_DOCUMENT_NODE) && 13528 (node->type != XML_HTML_DOCUMENT_NODE)) 13529 node = node->parent; 13530 if (node == NULL) 13531 return(XML_ERR_INTERNAL_ERROR); 13532 if (node->type == XML_ELEMENT_NODE) 13533 doc = node->doc; 13534 else 13535 doc = (xmlDocPtr) node; 13536 if (doc == NULL) 13537 return(XML_ERR_INTERNAL_ERROR); 13538 13539 /* 13540 * allocate a context and set-up everything not related to the 13541 * node position in the tree 13542 */ 13543 if (doc->type == XML_DOCUMENT_NODE) 13544 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13545 #ifdef LIBXML_HTML_ENABLED 13546 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13547 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13548 /* 13549 * When parsing in context, it makes no sense to add implied 13550 * elements like html/body/etc... 13551 */ 13552 options |= HTML_PARSE_NOIMPLIED; 13553 } 13554 #endif 13555 else 13556 return(XML_ERR_INTERNAL_ERROR); 13557 13558 if (ctxt == NULL) 13559 return(XML_ERR_NO_MEMORY); 13560 13561 /* 13562 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13563 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13564 * we must wait until the last moment to free the original one. 13565 */ 13566 if (doc->dict != NULL) { 13567 if (ctxt->dict != NULL) 13568 xmlDictFree(ctxt->dict); 13569 ctxt->dict = doc->dict; 13570 } else 13571 options |= XML_PARSE_NODICT; 13572 13573 if (doc->encoding != NULL) { 13574 xmlCharEncodingHandlerPtr hdlr; 13575 13576 if (ctxt->encoding != NULL) 13577 xmlFree((xmlChar *) ctxt->encoding); 13578 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13579 13580 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13581 if (hdlr != NULL) { 13582 xmlSwitchToEncoding(ctxt, hdlr); 13583 } else { 13584 return(XML_ERR_UNSUPPORTED_ENCODING); 13585 } 13586 } 13587 13588 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13589 xmlDetectSAX2(ctxt); 13590 ctxt->myDoc = doc; 13591 /* parsing in context, i.e. as within existing content */ 13592 ctxt->input_id = 2; 13593 ctxt->instate = XML_PARSER_CONTENT; 13594 13595 fake = xmlNewDocComment(node->doc, NULL); 13596 if (fake == NULL) { 13597 xmlFreeParserCtxt(ctxt); 13598 return(XML_ERR_NO_MEMORY); 13599 } 13600 xmlAddChild(node, fake); 13601 13602 if (node->type == XML_ELEMENT_NODE) { 13603 nodePush(ctxt, node); 13604 /* 13605 * initialize the SAX2 namespaces stack 13606 */ 13607 cur = node; 13608 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13609 xmlNsPtr ns = cur->nsDef; 13610 const xmlChar *iprefix, *ihref; 13611 13612 while (ns != NULL) { 13613 if (ctxt->dict) { 13614 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13615 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13616 } else { 13617 iprefix = ns->prefix; 13618 ihref = ns->href; 13619 } 13620 13621 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13622 nsPush(ctxt, iprefix, ihref); 13623 nsnr++; 13624 } 13625 ns = ns->next; 13626 } 13627 cur = cur->parent; 13628 } 13629 } 13630 13631 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13632 /* 13633 * ID/IDREF registration will be done in xmlValidateElement below 13634 */ 13635 ctxt->loadsubset |= XML_SKIP_IDS; 13636 } 13637 13638 #ifdef LIBXML_HTML_ENABLED 13639 if (doc->type == XML_HTML_DOCUMENT_NODE) 13640 __htmlParseContent(ctxt); 13641 else 13642 #endif 13643 xmlParseContent(ctxt); 13644 13645 nsPop(ctxt, nsnr); 13646 if ((RAW == '<') && (NXT(1) == '/')) { 13647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13648 } else if (RAW != 0) { 13649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13650 } 13651 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13653 ctxt->wellFormed = 0; 13654 } 13655 13656 if (!ctxt->wellFormed) { 13657 if (ctxt->errNo == 0) 13658 ret = XML_ERR_INTERNAL_ERROR; 13659 else 13660 ret = (xmlParserErrors)ctxt->errNo; 13661 } else { 13662 ret = XML_ERR_OK; 13663 } 13664 13665 /* 13666 * Return the newly created nodeset after unlinking it from 13667 * the pseudo sibling. 13668 */ 13669 13670 cur = fake->next; 13671 fake->next = NULL; 13672 node->last = fake; 13673 13674 if (cur != NULL) { 13675 cur->prev = NULL; 13676 } 13677 13678 *lst = cur; 13679 13680 while (cur != NULL) { 13681 cur->parent = NULL; 13682 cur = cur->next; 13683 } 13684 13685 xmlUnlinkNode(fake); 13686 xmlFreeNode(fake); 13687 13688 13689 if (ret != XML_ERR_OK) { 13690 xmlFreeNodeList(*lst); 13691 *lst = NULL; 13692 } 13693 13694 if (doc->dict != NULL) 13695 ctxt->dict = NULL; 13696 xmlFreeParserCtxt(ctxt); 13697 13698 return(ret); 13699 #else /* !SAX2 */ 13700 return(XML_ERR_INTERNAL_ERROR); 13701 #endif 13702 } 13703 13704 #ifdef LIBXML_SAX1_ENABLED 13705 /** 13706 * xmlParseBalancedChunkMemoryRecover: 13707 * @doc: the document the chunk pertains to (must not be NULL) 13708 * @sax: the SAX handler block (possibly NULL) 13709 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13710 * @depth: Used for loop detection, use 0 13711 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13712 * @lst: the return value for the set of parsed nodes 13713 * @recover: return nodes even if the data is broken (use 0) 13714 * 13715 * 13716 * Parse a well-balanced chunk of an XML document 13717 * called by the parser 13718 * The allowed sequence for the Well Balanced Chunk is the one defined by 13719 * the content production in the XML grammar: 13720 * 13721 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13722 * 13723 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13724 * the parser error code otherwise 13725 * 13726 * In case recover is set to 1, the nodelist will not be empty even if 13727 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13728 * some extent. 13729 */ 13730 int 13731 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13732 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13733 int recover) { 13734 xmlParserCtxtPtr ctxt; 13735 xmlDocPtr newDoc; 13736 xmlSAXHandlerPtr oldsax = NULL; 13737 xmlNodePtr content, newRoot; 13738 int size; 13739 int ret = 0; 13740 13741 if (depth > 40) { 13742 return(XML_ERR_ENTITY_LOOP); 13743 } 13744 13745 13746 if (lst != NULL) 13747 *lst = NULL; 13748 if (string == NULL) 13749 return(-1); 13750 13751 size = xmlStrlen(string); 13752 13753 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13754 if (ctxt == NULL) return(-1); 13755 ctxt->userData = ctxt; 13756 if (sax != NULL) { 13757 oldsax = ctxt->sax; 13758 ctxt->sax = sax; 13759 if (user_data != NULL) 13760 ctxt->userData = user_data; 13761 } 13762 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13763 if (newDoc == NULL) { 13764 xmlFreeParserCtxt(ctxt); 13765 return(-1); 13766 } 13767 newDoc->properties = XML_DOC_INTERNAL; 13768 if ((doc != NULL) && (doc->dict != NULL)) { 13769 xmlDictFree(ctxt->dict); 13770 ctxt->dict = doc->dict; 13771 xmlDictReference(ctxt->dict); 13772 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13773 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13774 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13775 ctxt->dictNames = 1; 13776 } else { 13777 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13778 } 13779 /* doc == NULL is only supported for historic reasons */ 13780 if (doc != NULL) { 13781 newDoc->intSubset = doc->intSubset; 13782 newDoc->extSubset = doc->extSubset; 13783 } 13784 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13785 if (newRoot == NULL) { 13786 if (sax != NULL) 13787 ctxt->sax = oldsax; 13788 xmlFreeParserCtxt(ctxt); 13789 newDoc->intSubset = NULL; 13790 newDoc->extSubset = NULL; 13791 xmlFreeDoc(newDoc); 13792 return(-1); 13793 } 13794 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13795 nodePush(ctxt, newRoot); 13796 /* doc == NULL is only supported for historic reasons */ 13797 if (doc == NULL) { 13798 ctxt->myDoc = newDoc; 13799 } else { 13800 ctxt->myDoc = newDoc; 13801 newDoc->children->doc = doc; 13802 /* Ensure that doc has XML spec namespace */ 13803 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13804 newDoc->oldNs = doc->oldNs; 13805 } 13806 ctxt->instate = XML_PARSER_CONTENT; 13807 ctxt->input_id = 2; 13808 ctxt->depth = depth; 13809 13810 /* 13811 * Doing validity checking on chunk doesn't make sense 13812 */ 13813 ctxt->validate = 0; 13814 ctxt->loadsubset = 0; 13815 xmlDetectSAX2(ctxt); 13816 13817 if ( doc != NULL ){ 13818 content = doc->children; 13819 doc->children = NULL; 13820 xmlParseContent(ctxt); 13821 doc->children = content; 13822 } 13823 else { 13824 xmlParseContent(ctxt); 13825 } 13826 if ((RAW == '<') && (NXT(1) == '/')) { 13827 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13828 } else if (RAW != 0) { 13829 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13830 } 13831 if (ctxt->node != newDoc->children) { 13832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13833 } 13834 13835 if (!ctxt->wellFormed) { 13836 if (ctxt->errNo == 0) 13837 ret = 1; 13838 else 13839 ret = ctxt->errNo; 13840 } else { 13841 ret = 0; 13842 } 13843 13844 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13845 xmlNodePtr cur; 13846 13847 /* 13848 * Return the newly created nodeset after unlinking it from 13849 * they pseudo parent. 13850 */ 13851 cur = newDoc->children->children; 13852 *lst = cur; 13853 while (cur != NULL) { 13854 xmlSetTreeDoc(cur, doc); 13855 cur->parent = NULL; 13856 cur = cur->next; 13857 } 13858 newDoc->children->children = NULL; 13859 } 13860 13861 if (sax != NULL) 13862 ctxt->sax = oldsax; 13863 xmlFreeParserCtxt(ctxt); 13864 newDoc->intSubset = NULL; 13865 newDoc->extSubset = NULL; 13866 /* This leaks the namespace list if doc == NULL */ 13867 newDoc->oldNs = NULL; 13868 xmlFreeDoc(newDoc); 13869 13870 return(ret); 13871 } 13872 13873 /** 13874 * xmlSAXParseEntity: 13875 * @sax: the SAX handler block 13876 * @filename: the filename 13877 * 13878 * parse an XML external entity out of context and build a tree. 13879 * It use the given SAX function block to handle the parsing callback. 13880 * If sax is NULL, fallback to the default DOM tree building routines. 13881 * 13882 * [78] extParsedEnt ::= TextDecl? content 13883 * 13884 * This correspond to a "Well Balanced" chunk 13885 * 13886 * Returns the resulting document tree 13887 */ 13888 13889 xmlDocPtr 13890 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13891 xmlDocPtr ret; 13892 xmlParserCtxtPtr ctxt; 13893 13894 ctxt = xmlCreateFileParserCtxt(filename); 13895 if (ctxt == NULL) { 13896 return(NULL); 13897 } 13898 if (sax != NULL) { 13899 if (ctxt->sax != NULL) 13900 xmlFree(ctxt->sax); 13901 ctxt->sax = sax; 13902 ctxt->userData = NULL; 13903 } 13904 13905 xmlParseExtParsedEnt(ctxt); 13906 13907 if (ctxt->wellFormed) 13908 ret = ctxt->myDoc; 13909 else { 13910 ret = NULL; 13911 xmlFreeDoc(ctxt->myDoc); 13912 ctxt->myDoc = NULL; 13913 } 13914 if (sax != NULL) 13915 ctxt->sax = NULL; 13916 xmlFreeParserCtxt(ctxt); 13917 13918 return(ret); 13919 } 13920 13921 /** 13922 * xmlParseEntity: 13923 * @filename: the filename 13924 * 13925 * parse an XML external entity out of context and build a tree. 13926 * 13927 * [78] extParsedEnt ::= TextDecl? content 13928 * 13929 * This correspond to a "Well Balanced" chunk 13930 * 13931 * Returns the resulting document tree 13932 */ 13933 13934 xmlDocPtr 13935 xmlParseEntity(const char *filename) { 13936 return(xmlSAXParseEntity(NULL, filename)); 13937 } 13938 #endif /* LIBXML_SAX1_ENABLED */ 13939 13940 /** 13941 * xmlCreateEntityParserCtxtInternal: 13942 * @URL: the entity URL 13943 * @ID: the entity PUBLIC ID 13944 * @base: a possible base for the target URI 13945 * @pctx: parser context used to set options on new context 13946 * 13947 * Create a parser context for an external entity 13948 * Automatic support for ZLIB/Compress compressed document is provided 13949 * by default if found at compile-time. 13950 * 13951 * Returns the new parser context or NULL 13952 */ 13953 static xmlParserCtxtPtr 13954 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13955 const xmlChar *base, xmlParserCtxtPtr pctx) { 13956 xmlParserCtxtPtr ctxt; 13957 xmlParserInputPtr inputStream; 13958 char *directory = NULL; 13959 xmlChar *uri; 13960 13961 ctxt = xmlNewParserCtxt(); 13962 if (ctxt == NULL) { 13963 return(NULL); 13964 } 13965 13966 if (pctx != NULL) { 13967 ctxt->options = pctx->options; 13968 ctxt->_private = pctx->_private; 13969 /* 13970 * this is a subparser of pctx, so the input_id should be 13971 * incremented to distinguish from main entity 13972 */ 13973 ctxt->input_id = pctx->input_id + 1; 13974 } 13975 13976 /* Don't read from stdin. */ 13977 if (xmlStrcmp(URL, BAD_CAST "-") == 0) 13978 URL = BAD_CAST "./-"; 13979 13980 uri = xmlBuildURI(URL, base); 13981 13982 if (uri == NULL) { 13983 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13984 if (inputStream == NULL) { 13985 xmlFreeParserCtxt(ctxt); 13986 return(NULL); 13987 } 13988 13989 inputPush(ctxt, inputStream); 13990 13991 if ((ctxt->directory == NULL) && (directory == NULL)) 13992 directory = xmlParserGetDirectory((char *)URL); 13993 if ((ctxt->directory == NULL) && (directory != NULL)) 13994 ctxt->directory = directory; 13995 } else { 13996 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13997 if (inputStream == NULL) { 13998 xmlFree(uri); 13999 xmlFreeParserCtxt(ctxt); 14000 return(NULL); 14001 } 14002 14003 inputPush(ctxt, inputStream); 14004 14005 if ((ctxt->directory == NULL) && (directory == NULL)) 14006 directory = xmlParserGetDirectory((char *)uri); 14007 if ((ctxt->directory == NULL) && (directory != NULL)) 14008 ctxt->directory = directory; 14009 xmlFree(uri); 14010 } 14011 return(ctxt); 14012 } 14013 14014 /** 14015 * xmlCreateEntityParserCtxt: 14016 * @URL: the entity URL 14017 * @ID: the entity PUBLIC ID 14018 * @base: a possible base for the target URI 14019 * 14020 * Create a parser context for an external entity 14021 * Automatic support for ZLIB/Compress compressed document is provided 14022 * by default if found at compile-time. 14023 * 14024 * Returns the new parser context or NULL 14025 */ 14026 xmlParserCtxtPtr 14027 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14028 const xmlChar *base) { 14029 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14030 14031 } 14032 14033 /************************************************************************ 14034 * * 14035 * Front ends when parsing from a file * 14036 * * 14037 ************************************************************************/ 14038 14039 /** 14040 * xmlCreateURLParserCtxt: 14041 * @filename: the filename or URL 14042 * @options: a combination of xmlParserOption 14043 * 14044 * Create a parser context for a file or URL content. 14045 * Automatic support for ZLIB/Compress compressed document is provided 14046 * by default if found at compile-time and for file accesses 14047 * 14048 * Returns the new parser context or NULL 14049 */ 14050 xmlParserCtxtPtr 14051 xmlCreateURLParserCtxt(const char *filename, int options) 14052 { 14053 xmlParserCtxtPtr ctxt; 14054 xmlParserInputPtr inputStream; 14055 char *directory = NULL; 14056 14057 ctxt = xmlNewParserCtxt(); 14058 if (ctxt == NULL) { 14059 xmlErrMemory(NULL, "cannot allocate parser context"); 14060 return(NULL); 14061 } 14062 14063 if (options) 14064 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14065 ctxt->linenumbers = 1; 14066 14067 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14068 if (inputStream == NULL) { 14069 xmlFreeParserCtxt(ctxt); 14070 return(NULL); 14071 } 14072 14073 inputPush(ctxt, inputStream); 14074 if ((ctxt->directory == NULL) && (directory == NULL)) 14075 directory = xmlParserGetDirectory(filename); 14076 if ((ctxt->directory == NULL) && (directory != NULL)) 14077 ctxt->directory = directory; 14078 14079 return(ctxt); 14080 } 14081 14082 /** 14083 * xmlCreateFileParserCtxt: 14084 * @filename: the filename 14085 * 14086 * Create a parser context for a file content. 14087 * Automatic support for ZLIB/Compress compressed document is provided 14088 * by default if found at compile-time. 14089 * 14090 * Returns the new parser context or NULL 14091 */ 14092 xmlParserCtxtPtr 14093 xmlCreateFileParserCtxt(const char *filename) 14094 { 14095 return(xmlCreateURLParserCtxt(filename, 0)); 14096 } 14097 14098 #ifdef LIBXML_SAX1_ENABLED 14099 /** 14100 * xmlSAXParseFileWithData: 14101 * @sax: the SAX handler block 14102 * @filename: the filename 14103 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14104 * documents 14105 * @data: the userdata 14106 * 14107 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14108 * compressed document is provided by default if found at compile-time. 14109 * It use the given SAX function block to handle the parsing callback. 14110 * If sax is NULL, fallback to the default DOM tree building routines. 14111 * 14112 * User data (void *) is stored within the parser context in the 14113 * context's _private member, so it is available nearly everywhere in libxml 14114 * 14115 * Returns the resulting document tree 14116 */ 14117 14118 xmlDocPtr 14119 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14120 int recovery, void *data) { 14121 xmlDocPtr ret; 14122 xmlParserCtxtPtr ctxt; 14123 14124 xmlInitParser(); 14125 14126 ctxt = xmlCreateFileParserCtxt(filename); 14127 if (ctxt == NULL) { 14128 return(NULL); 14129 } 14130 if (sax != NULL) { 14131 if (ctxt->sax != NULL) 14132 xmlFree(ctxt->sax); 14133 ctxt->sax = sax; 14134 } 14135 xmlDetectSAX2(ctxt); 14136 if (data!=NULL) { 14137 ctxt->_private = data; 14138 } 14139 14140 if (ctxt->directory == NULL) 14141 ctxt->directory = xmlParserGetDirectory(filename); 14142 14143 ctxt->recovery = recovery; 14144 14145 xmlParseDocument(ctxt); 14146 14147 if ((ctxt->wellFormed) || recovery) { 14148 ret = ctxt->myDoc; 14149 if ((ret != NULL) && (ctxt->input->buf != NULL)) { 14150 if (ctxt->input->buf->compressed > 0) 14151 ret->compression = 9; 14152 else 14153 ret->compression = ctxt->input->buf->compressed; 14154 } 14155 } 14156 else { 14157 ret = NULL; 14158 xmlFreeDoc(ctxt->myDoc); 14159 ctxt->myDoc = NULL; 14160 } 14161 if (sax != NULL) 14162 ctxt->sax = NULL; 14163 xmlFreeParserCtxt(ctxt); 14164 14165 return(ret); 14166 } 14167 14168 /** 14169 * xmlSAXParseFile: 14170 * @sax: the SAX handler block 14171 * @filename: the filename 14172 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14173 * documents 14174 * 14175 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14176 * compressed document is provided by default if found at compile-time. 14177 * It use the given SAX function block to handle the parsing callback. 14178 * If sax is NULL, fallback to the default DOM tree building routines. 14179 * 14180 * Returns the resulting document tree 14181 */ 14182 14183 xmlDocPtr 14184 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14185 int recovery) { 14186 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14187 } 14188 14189 /** 14190 * xmlRecoverDoc: 14191 * @cur: a pointer to an array of xmlChar 14192 * 14193 * parse an XML in-memory document and build a tree. 14194 * In the case the document is not Well Formed, a attempt to build a 14195 * tree is tried anyway 14196 * 14197 * Returns the resulting document tree or NULL in case of failure 14198 */ 14199 14200 xmlDocPtr 14201 xmlRecoverDoc(const xmlChar *cur) { 14202 return(xmlSAXParseDoc(NULL, cur, 1)); 14203 } 14204 14205 /** 14206 * xmlParseFile: 14207 * @filename: the filename 14208 * 14209 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14210 * compressed document is provided by default if found at compile-time. 14211 * 14212 * Returns the resulting document tree if the file was wellformed, 14213 * NULL otherwise. 14214 */ 14215 14216 xmlDocPtr 14217 xmlParseFile(const char *filename) { 14218 return(xmlSAXParseFile(NULL, filename, 0)); 14219 } 14220 14221 /** 14222 * xmlRecoverFile: 14223 * @filename: the filename 14224 * 14225 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14226 * compressed document is provided by default if found at compile-time. 14227 * In the case the document is not Well Formed, it attempts to build 14228 * a tree anyway 14229 * 14230 * Returns the resulting document tree or NULL in case of failure 14231 */ 14232 14233 xmlDocPtr 14234 xmlRecoverFile(const char *filename) { 14235 return(xmlSAXParseFile(NULL, filename, 1)); 14236 } 14237 14238 14239 /** 14240 * xmlSetupParserForBuffer: 14241 * @ctxt: an XML parser context 14242 * @buffer: a xmlChar * buffer 14243 * @filename: a file name 14244 * 14245 * Setup the parser context to parse a new buffer; Clears any prior 14246 * contents from the parser context. The buffer parameter must not be 14247 * NULL, but the filename parameter can be 14248 */ 14249 void 14250 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14251 const char* filename) 14252 { 14253 xmlParserInputPtr input; 14254 14255 if ((ctxt == NULL) || (buffer == NULL)) 14256 return; 14257 14258 input = xmlNewInputStream(ctxt); 14259 if (input == NULL) { 14260 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14261 xmlClearParserCtxt(ctxt); 14262 return; 14263 } 14264 14265 xmlClearParserCtxt(ctxt); 14266 if (filename != NULL) 14267 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14268 input->base = buffer; 14269 input->cur = buffer; 14270 input->end = &buffer[xmlStrlen(buffer)]; 14271 inputPush(ctxt, input); 14272 } 14273 14274 /** 14275 * xmlSAXUserParseFile: 14276 * @sax: a SAX handler 14277 * @user_data: The user data returned on SAX callbacks 14278 * @filename: a file name 14279 * 14280 * parse an XML file and call the given SAX handler routines. 14281 * Automatic support for ZLIB/Compress compressed document is provided 14282 * 14283 * Returns 0 in case of success or a error number otherwise 14284 */ 14285 int 14286 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14287 const char *filename) { 14288 int ret = 0; 14289 xmlParserCtxtPtr ctxt; 14290 14291 ctxt = xmlCreateFileParserCtxt(filename); 14292 if (ctxt == NULL) return -1; 14293 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14294 xmlFree(ctxt->sax); 14295 ctxt->sax = sax; 14296 xmlDetectSAX2(ctxt); 14297 14298 if (user_data != NULL) 14299 ctxt->userData = user_data; 14300 14301 xmlParseDocument(ctxt); 14302 14303 if (ctxt->wellFormed) 14304 ret = 0; 14305 else { 14306 if (ctxt->errNo != 0) 14307 ret = ctxt->errNo; 14308 else 14309 ret = -1; 14310 } 14311 if (sax != NULL) 14312 ctxt->sax = NULL; 14313 if (ctxt->myDoc != NULL) { 14314 xmlFreeDoc(ctxt->myDoc); 14315 ctxt->myDoc = NULL; 14316 } 14317 xmlFreeParserCtxt(ctxt); 14318 14319 return ret; 14320 } 14321 #endif /* LIBXML_SAX1_ENABLED */ 14322 14323 /************************************************************************ 14324 * * 14325 * Front ends when parsing from memory * 14326 * * 14327 ************************************************************************/ 14328 14329 /** 14330 * xmlCreateMemoryParserCtxt: 14331 * @buffer: a pointer to a char array 14332 * @size: the size of the array 14333 * 14334 * Create a parser context for an XML in-memory document. 14335 * 14336 * Returns the new parser context or NULL 14337 */ 14338 xmlParserCtxtPtr 14339 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14340 xmlParserCtxtPtr ctxt; 14341 xmlParserInputPtr input; 14342 xmlParserInputBufferPtr buf; 14343 14344 if (buffer == NULL) 14345 return(NULL); 14346 if (size <= 0) 14347 return(NULL); 14348 14349 ctxt = xmlNewParserCtxt(); 14350 if (ctxt == NULL) 14351 return(NULL); 14352 14353 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14354 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14355 if (buf == NULL) { 14356 xmlFreeParserCtxt(ctxt); 14357 return(NULL); 14358 } 14359 14360 input = xmlNewInputStream(ctxt); 14361 if (input == NULL) { 14362 xmlFreeParserInputBuffer(buf); 14363 xmlFreeParserCtxt(ctxt); 14364 return(NULL); 14365 } 14366 14367 input->filename = NULL; 14368 input->buf = buf; 14369 xmlBufResetInput(input->buf->buffer, input); 14370 14371 inputPush(ctxt, input); 14372 return(ctxt); 14373 } 14374 14375 #ifdef LIBXML_SAX1_ENABLED 14376 /** 14377 * xmlSAXParseMemoryWithData: 14378 * @sax: the SAX handler block 14379 * @buffer: an pointer to a char array 14380 * @size: the size of the array 14381 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14382 * documents 14383 * @data: the userdata 14384 * 14385 * parse an XML in-memory block and use the given SAX function block 14386 * to handle the parsing callback. If sax is NULL, fallback to the default 14387 * DOM tree building routines. 14388 * 14389 * User data (void *) is stored within the parser context in the 14390 * context's _private member, so it is available nearly everywhere in libxml 14391 * 14392 * Returns the resulting document tree 14393 */ 14394 14395 xmlDocPtr 14396 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14397 int size, int recovery, void *data) { 14398 xmlDocPtr ret; 14399 xmlParserCtxtPtr ctxt; 14400 14401 xmlInitParser(); 14402 14403 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14404 if (ctxt == NULL) return(NULL); 14405 if (sax != NULL) { 14406 if (ctxt->sax != NULL) 14407 xmlFree(ctxt->sax); 14408 ctxt->sax = sax; 14409 } 14410 xmlDetectSAX2(ctxt); 14411 if (data!=NULL) { 14412 ctxt->_private=data; 14413 } 14414 14415 ctxt->recovery = recovery; 14416 14417 xmlParseDocument(ctxt); 14418 14419 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14420 else { 14421 ret = NULL; 14422 xmlFreeDoc(ctxt->myDoc); 14423 ctxt->myDoc = NULL; 14424 } 14425 if (sax != NULL) 14426 ctxt->sax = NULL; 14427 xmlFreeParserCtxt(ctxt); 14428 14429 return(ret); 14430 } 14431 14432 /** 14433 * xmlSAXParseMemory: 14434 * @sax: the SAX handler block 14435 * @buffer: an pointer to a char array 14436 * @size: the size of the array 14437 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14438 * documents 14439 * 14440 * parse an XML in-memory block and use the given SAX function block 14441 * to handle the parsing callback. If sax is NULL, fallback to the default 14442 * DOM tree building routines. 14443 * 14444 * Returns the resulting document tree 14445 */ 14446 xmlDocPtr 14447 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14448 int size, int recovery) { 14449 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14450 } 14451 14452 /** 14453 * xmlParseMemory: 14454 * @buffer: an pointer to a char array 14455 * @size: the size of the array 14456 * 14457 * parse an XML in-memory block and build a tree. 14458 * 14459 * Returns the resulting document tree 14460 */ 14461 14462 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14463 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14464 } 14465 14466 /** 14467 * xmlRecoverMemory: 14468 * @buffer: an pointer to a char array 14469 * @size: the size of the array 14470 * 14471 * parse an XML in-memory block and build a tree. 14472 * In the case the document is not Well Formed, an attempt to 14473 * build a tree is tried anyway 14474 * 14475 * Returns the resulting document tree or NULL in case of error 14476 */ 14477 14478 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14479 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14480 } 14481 14482 /** 14483 * xmlSAXUserParseMemory: 14484 * @sax: a SAX handler 14485 * @user_data: The user data returned on SAX callbacks 14486 * @buffer: an in-memory XML document input 14487 * @size: the length of the XML document in bytes 14488 * 14489 * A better SAX parsing routine. 14490 * parse an XML in-memory buffer and call the given SAX handler routines. 14491 * 14492 * Returns 0 in case of success or a error number otherwise 14493 */ 14494 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14495 const char *buffer, int size) { 14496 int ret = 0; 14497 xmlParserCtxtPtr ctxt; 14498 14499 xmlInitParser(); 14500 14501 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14502 if (ctxt == NULL) return -1; 14503 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14504 xmlFree(ctxt->sax); 14505 ctxt->sax = sax; 14506 xmlDetectSAX2(ctxt); 14507 14508 if (user_data != NULL) 14509 ctxt->userData = user_data; 14510 14511 xmlParseDocument(ctxt); 14512 14513 if (ctxt->wellFormed) 14514 ret = 0; 14515 else { 14516 if (ctxt->errNo != 0) 14517 ret = ctxt->errNo; 14518 else 14519 ret = -1; 14520 } 14521 if (sax != NULL) 14522 ctxt->sax = NULL; 14523 if (ctxt->myDoc != NULL) { 14524 xmlFreeDoc(ctxt->myDoc); 14525 ctxt->myDoc = NULL; 14526 } 14527 xmlFreeParserCtxt(ctxt); 14528 14529 return ret; 14530 } 14531 #endif /* LIBXML_SAX1_ENABLED */ 14532 14533 /** 14534 * xmlCreateDocParserCtxt: 14535 * @cur: a pointer to an array of xmlChar 14536 * 14537 * Creates a parser context for an XML in-memory document. 14538 * 14539 * Returns the new parser context or NULL 14540 */ 14541 xmlParserCtxtPtr 14542 xmlCreateDocParserCtxt(const xmlChar *cur) { 14543 int len; 14544 14545 if (cur == NULL) 14546 return(NULL); 14547 len = xmlStrlen(cur); 14548 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14549 } 14550 14551 #ifdef LIBXML_SAX1_ENABLED 14552 /** 14553 * xmlSAXParseDoc: 14554 * @sax: the SAX handler block 14555 * @cur: a pointer to an array of xmlChar 14556 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14557 * documents 14558 * 14559 * parse an XML in-memory document and build a tree. 14560 * It use the given SAX function block to handle the parsing callback. 14561 * If sax is NULL, fallback to the default DOM tree building routines. 14562 * 14563 * Returns the resulting document tree 14564 */ 14565 14566 xmlDocPtr 14567 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14568 xmlDocPtr ret; 14569 xmlParserCtxtPtr ctxt; 14570 xmlSAXHandlerPtr oldsax = NULL; 14571 14572 if (cur == NULL) return(NULL); 14573 14574 14575 ctxt = xmlCreateDocParserCtxt(cur); 14576 if (ctxt == NULL) return(NULL); 14577 if (sax != NULL) { 14578 oldsax = ctxt->sax; 14579 ctxt->sax = sax; 14580 ctxt->userData = NULL; 14581 } 14582 xmlDetectSAX2(ctxt); 14583 14584 xmlParseDocument(ctxt); 14585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14586 else { 14587 ret = NULL; 14588 xmlFreeDoc(ctxt->myDoc); 14589 ctxt->myDoc = NULL; 14590 } 14591 if (sax != NULL) 14592 ctxt->sax = oldsax; 14593 xmlFreeParserCtxt(ctxt); 14594 14595 return(ret); 14596 } 14597 14598 /** 14599 * xmlParseDoc: 14600 * @cur: a pointer to an array of xmlChar 14601 * 14602 * parse an XML in-memory document and build a tree. 14603 * 14604 * Returns the resulting document tree 14605 */ 14606 14607 xmlDocPtr 14608 xmlParseDoc(const xmlChar *cur) { 14609 return(xmlSAXParseDoc(NULL, cur, 0)); 14610 } 14611 #endif /* LIBXML_SAX1_ENABLED */ 14612 14613 #ifdef LIBXML_LEGACY_ENABLED 14614 /************************************************************************ 14615 * * 14616 * Specific function to keep track of entities references * 14617 * and used by the XSLT debugger * 14618 * * 14619 ************************************************************************/ 14620 14621 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14622 14623 /** 14624 * xmlAddEntityReference: 14625 * @ent : A valid entity 14626 * @firstNode : A valid first node for children of entity 14627 * @lastNode : A valid last node of children entity 14628 * 14629 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14630 */ 14631 static void 14632 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14633 xmlNodePtr lastNode) 14634 { 14635 if (xmlEntityRefFunc != NULL) { 14636 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14637 } 14638 } 14639 14640 14641 /** 14642 * xmlSetEntityReferenceFunc: 14643 * @func: A valid function 14644 * 14645 * Set the function to call call back when a xml reference has been made 14646 */ 14647 void 14648 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14649 { 14650 xmlEntityRefFunc = func; 14651 } 14652 #endif /* LIBXML_LEGACY_ENABLED */ 14653 14654 /************************************************************************ 14655 * * 14656 * Miscellaneous * 14657 * * 14658 ************************************************************************/ 14659 14660 #ifdef LIBXML_XPATH_ENABLED 14661 #include <libxml/xpath.h> 14662 #endif 14663 14664 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14665 static int xmlParserInitialized = 0; 14666 14667 /** 14668 * xmlInitParser: 14669 * 14670 * Initialization function for the XML parser. 14671 * This is not reentrant. Call once before processing in case of 14672 * use in multithreaded programs. 14673 */ 14674 14675 void 14676 xmlInitParser(void) { 14677 if (xmlParserInitialized != 0) 14678 return; 14679 14680 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL)) 14681 if (xmlFree == free) 14682 atexit(xmlCleanupParser); 14683 #endif 14684 14685 #ifdef LIBXML_THREAD_ENABLED 14686 __xmlGlobalInitMutexLock(); 14687 if (xmlParserInitialized == 0) { 14688 #endif 14689 xmlInitThreads(); 14690 xmlInitGlobals(); 14691 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14692 (xmlGenericError == NULL)) 14693 initGenericErrorDefaultFunc(NULL); 14694 xmlInitMemory(); 14695 xmlInitializeDict(); 14696 xmlInitCharEncodingHandlers(); 14697 xmlDefaultSAXHandlerInit(); 14698 xmlRegisterDefaultInputCallbacks(); 14699 #ifdef LIBXML_OUTPUT_ENABLED 14700 xmlRegisterDefaultOutputCallbacks(); 14701 #endif /* LIBXML_OUTPUT_ENABLED */ 14702 #ifdef LIBXML_HTML_ENABLED 14703 htmlInitAutoClose(); 14704 htmlDefaultSAXHandlerInit(); 14705 #endif 14706 #ifdef LIBXML_XPATH_ENABLED 14707 xmlXPathInit(); 14708 #endif 14709 xmlParserInitialized = 1; 14710 #ifdef LIBXML_THREAD_ENABLED 14711 } 14712 __xmlGlobalInitMutexUnlock(); 14713 #endif 14714 } 14715 14716 /** 14717 * xmlCleanupParser: 14718 * 14719 * This function name is somewhat misleading. It does not clean up 14720 * parser state, it cleans up memory allocated by the library itself. 14721 * It is a cleanup function for the XML library. It tries to reclaim all 14722 * related global memory allocated for the library processing. 14723 * It doesn't deallocate any document related memory. One should 14724 * call xmlCleanupParser() only when the process has finished using 14725 * the library and all XML/HTML documents built with it. 14726 * See also xmlInitParser() which has the opposite function of preparing 14727 * the library for operations. 14728 * 14729 * WARNING: if your application is multithreaded or has plugin support 14730 * calling this may crash the application if another thread or 14731 * a plugin is still using libxml2. It's sometimes very hard to 14732 * guess if libxml2 is in use in the application, some libraries 14733 * or plugins may use it without notice. In case of doubt abstain 14734 * from calling this function or do it just before calling exit() 14735 * to avoid leak reports from valgrind ! 14736 */ 14737 14738 void 14739 xmlCleanupParser(void) { 14740 if (!xmlParserInitialized) 14741 return; 14742 14743 xmlCleanupCharEncodingHandlers(); 14744 #ifdef LIBXML_CATALOG_ENABLED 14745 xmlCatalogCleanup(); 14746 #endif 14747 xmlDictCleanup(); 14748 xmlCleanupInputCallbacks(); 14749 #ifdef LIBXML_OUTPUT_ENABLED 14750 xmlCleanupOutputCallbacks(); 14751 #endif 14752 #ifdef LIBXML_SCHEMAS_ENABLED 14753 xmlSchemaCleanupTypes(); 14754 xmlRelaxNGCleanupTypes(); 14755 #endif 14756 xmlCleanupGlobals(); 14757 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14758 xmlCleanupMemory(); 14759 xmlParserInitialized = 0; 14760 } 14761 14762 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \ 14763 !defined(_WIN32) 14764 static void 14765 ATTRIBUTE_DESTRUCTOR 14766 xmlDestructor(void) { 14767 /* 14768 * Calling custom deallocation functions in a destructor can cause 14769 * problems, for example with Nokogiri. 14770 */ 14771 if (xmlFree == free) 14772 xmlCleanupParser(); 14773 } 14774 #endif 14775 14776 /************************************************************************ 14777 * * 14778 * New set (2.6.0) of simpler and more flexible APIs * 14779 * * 14780 ************************************************************************/ 14781 14782 /** 14783 * DICT_FREE: 14784 * @str: a string 14785 * 14786 * Free a string if it is not owned by the "dict" dictionary in the 14787 * current scope 14788 */ 14789 #define DICT_FREE(str) \ 14790 if ((str) && ((!dict) || \ 14791 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14792 xmlFree((char *)(str)); 14793 14794 /** 14795 * xmlCtxtReset: 14796 * @ctxt: an XML parser context 14797 * 14798 * Reset a parser context 14799 */ 14800 void 14801 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14802 { 14803 xmlParserInputPtr input; 14804 xmlDictPtr dict; 14805 14806 if (ctxt == NULL) 14807 return; 14808 14809 dict = ctxt->dict; 14810 14811 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14812 xmlFreeInputStream(input); 14813 } 14814 ctxt->inputNr = 0; 14815 ctxt->input = NULL; 14816 14817 ctxt->spaceNr = 0; 14818 if (ctxt->spaceTab != NULL) { 14819 ctxt->spaceTab[0] = -1; 14820 ctxt->space = &ctxt->spaceTab[0]; 14821 } else { 14822 ctxt->space = NULL; 14823 } 14824 14825 14826 ctxt->nodeNr = 0; 14827 ctxt->node = NULL; 14828 14829 ctxt->nameNr = 0; 14830 ctxt->name = NULL; 14831 14832 ctxt->nsNr = 0; 14833 14834 DICT_FREE(ctxt->version); 14835 ctxt->version = NULL; 14836 DICT_FREE(ctxt->encoding); 14837 ctxt->encoding = NULL; 14838 DICT_FREE(ctxt->directory); 14839 ctxt->directory = NULL; 14840 DICT_FREE(ctxt->extSubURI); 14841 ctxt->extSubURI = NULL; 14842 DICT_FREE(ctxt->extSubSystem); 14843 ctxt->extSubSystem = NULL; 14844 if (ctxt->myDoc != NULL) 14845 xmlFreeDoc(ctxt->myDoc); 14846 ctxt->myDoc = NULL; 14847 14848 ctxt->standalone = -1; 14849 ctxt->hasExternalSubset = 0; 14850 ctxt->hasPErefs = 0; 14851 ctxt->html = 0; 14852 ctxt->external = 0; 14853 ctxt->instate = XML_PARSER_START; 14854 ctxt->token = 0; 14855 14856 ctxt->wellFormed = 1; 14857 ctxt->nsWellFormed = 1; 14858 ctxt->disableSAX = 0; 14859 ctxt->valid = 1; 14860 #if 0 14861 ctxt->vctxt.userData = ctxt; 14862 ctxt->vctxt.error = xmlParserValidityError; 14863 ctxt->vctxt.warning = xmlParserValidityWarning; 14864 #endif 14865 ctxt->record_info = 0; 14866 ctxt->checkIndex = 0; 14867 ctxt->inSubset = 0; 14868 ctxt->errNo = XML_ERR_OK; 14869 ctxt->depth = 0; 14870 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14871 ctxt->catalogs = NULL; 14872 ctxt->nbentities = 0; 14873 ctxt->sizeentities = 0; 14874 ctxt->sizeentcopy = 0; 14875 xmlInitNodeInfoSeq(&ctxt->node_seq); 14876 14877 if (ctxt->attsDefault != NULL) { 14878 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 14879 ctxt->attsDefault = NULL; 14880 } 14881 if (ctxt->attsSpecial != NULL) { 14882 xmlHashFree(ctxt->attsSpecial, NULL); 14883 ctxt->attsSpecial = NULL; 14884 } 14885 14886 #ifdef LIBXML_CATALOG_ENABLED 14887 if (ctxt->catalogs != NULL) 14888 xmlCatalogFreeLocal(ctxt->catalogs); 14889 #endif 14890 if (ctxt->lastError.code != XML_ERR_OK) 14891 xmlResetError(&ctxt->lastError); 14892 } 14893 14894 /** 14895 * xmlCtxtResetPush: 14896 * @ctxt: an XML parser context 14897 * @chunk: a pointer to an array of chars 14898 * @size: number of chars in the array 14899 * @filename: an optional file name or URI 14900 * @encoding: the document encoding, or NULL 14901 * 14902 * Reset a push parser context 14903 * 14904 * Returns 0 in case of success and 1 in case of error 14905 */ 14906 int 14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14908 int size, const char *filename, const char *encoding) 14909 { 14910 xmlParserInputPtr inputStream; 14911 xmlParserInputBufferPtr buf; 14912 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14913 14914 if (ctxt == NULL) 14915 return(1); 14916 14917 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14918 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14919 14920 buf = xmlAllocParserInputBuffer(enc); 14921 if (buf == NULL) 14922 return(1); 14923 14924 if (ctxt == NULL) { 14925 xmlFreeParserInputBuffer(buf); 14926 return(1); 14927 } 14928 14929 xmlCtxtReset(ctxt); 14930 14931 if (filename == NULL) { 14932 ctxt->directory = NULL; 14933 } else { 14934 ctxt->directory = xmlParserGetDirectory(filename); 14935 } 14936 14937 inputStream = xmlNewInputStream(ctxt); 14938 if (inputStream == NULL) { 14939 xmlFreeParserInputBuffer(buf); 14940 return(1); 14941 } 14942 14943 if (filename == NULL) 14944 inputStream->filename = NULL; 14945 else 14946 inputStream->filename = (char *) 14947 xmlCanonicPath((const xmlChar *) filename); 14948 inputStream->buf = buf; 14949 xmlBufResetInput(buf->buffer, inputStream); 14950 14951 inputPush(ctxt, inputStream); 14952 14953 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14954 (ctxt->input->buf != NULL)) { 14955 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14956 size_t cur = ctxt->input->cur - ctxt->input->base; 14957 14958 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14959 14960 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14961 #ifdef DEBUG_PUSH 14962 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14963 #endif 14964 } 14965 14966 if (encoding != NULL) { 14967 xmlCharEncodingHandlerPtr hdlr; 14968 14969 if (ctxt->encoding != NULL) 14970 xmlFree((xmlChar *) ctxt->encoding); 14971 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14972 14973 hdlr = xmlFindCharEncodingHandler(encoding); 14974 if (hdlr != NULL) { 14975 xmlSwitchToEncoding(ctxt, hdlr); 14976 } else { 14977 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14978 "Unsupported encoding %s\n", BAD_CAST encoding); 14979 } 14980 } else if (enc != XML_CHAR_ENCODING_NONE) { 14981 xmlSwitchEncoding(ctxt, enc); 14982 } 14983 14984 return(0); 14985 } 14986 14987 14988 /** 14989 * xmlCtxtUseOptionsInternal: 14990 * @ctxt: an XML parser context 14991 * @options: a combination of xmlParserOption 14992 * @encoding: the user provided encoding to use 14993 * 14994 * Applies the options to the parser context 14995 * 14996 * Returns 0 in case of success, the set of unknown or unimplemented options 14997 * in case of error. 14998 */ 14999 static int 15000 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15001 { 15002 if (ctxt == NULL) 15003 return(-1); 15004 if (encoding != NULL) { 15005 if (ctxt->encoding != NULL) 15006 xmlFree((xmlChar *) ctxt->encoding); 15007 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15008 } 15009 if (options & XML_PARSE_RECOVER) { 15010 ctxt->recovery = 1; 15011 options -= XML_PARSE_RECOVER; 15012 ctxt->options |= XML_PARSE_RECOVER; 15013 } else 15014 ctxt->recovery = 0; 15015 if (options & XML_PARSE_DTDLOAD) { 15016 ctxt->loadsubset = XML_DETECT_IDS; 15017 options -= XML_PARSE_DTDLOAD; 15018 ctxt->options |= XML_PARSE_DTDLOAD; 15019 } else 15020 ctxt->loadsubset = 0; 15021 if (options & XML_PARSE_DTDATTR) { 15022 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15023 options -= XML_PARSE_DTDATTR; 15024 ctxt->options |= XML_PARSE_DTDATTR; 15025 } 15026 if (options & XML_PARSE_NOENT) { 15027 ctxt->replaceEntities = 1; 15028 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15029 options -= XML_PARSE_NOENT; 15030 ctxt->options |= XML_PARSE_NOENT; 15031 } else 15032 ctxt->replaceEntities = 0; 15033 if (options & XML_PARSE_PEDANTIC) { 15034 ctxt->pedantic = 1; 15035 options -= XML_PARSE_PEDANTIC; 15036 ctxt->options |= XML_PARSE_PEDANTIC; 15037 } else 15038 ctxt->pedantic = 0; 15039 if (options & XML_PARSE_NOBLANKS) { 15040 ctxt->keepBlanks = 0; 15041 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15042 options -= XML_PARSE_NOBLANKS; 15043 ctxt->options |= XML_PARSE_NOBLANKS; 15044 } else 15045 ctxt->keepBlanks = 1; 15046 if (options & XML_PARSE_DTDVALID) { 15047 ctxt->validate = 1; 15048 if (options & XML_PARSE_NOWARNING) 15049 ctxt->vctxt.warning = NULL; 15050 if (options & XML_PARSE_NOERROR) 15051 ctxt->vctxt.error = NULL; 15052 options -= XML_PARSE_DTDVALID; 15053 ctxt->options |= XML_PARSE_DTDVALID; 15054 } else 15055 ctxt->validate = 0; 15056 if (options & XML_PARSE_NOWARNING) { 15057 ctxt->sax->warning = NULL; 15058 options -= XML_PARSE_NOWARNING; 15059 } 15060 if (options & XML_PARSE_NOERROR) { 15061 ctxt->sax->error = NULL; 15062 ctxt->sax->fatalError = NULL; 15063 options -= XML_PARSE_NOERROR; 15064 } 15065 #ifdef LIBXML_SAX1_ENABLED 15066 if (options & XML_PARSE_SAX1) { 15067 ctxt->sax->startElement = xmlSAX2StartElement; 15068 ctxt->sax->endElement = xmlSAX2EndElement; 15069 ctxt->sax->startElementNs = NULL; 15070 ctxt->sax->endElementNs = NULL; 15071 ctxt->sax->initialized = 1; 15072 options -= XML_PARSE_SAX1; 15073 ctxt->options |= XML_PARSE_SAX1; 15074 } 15075 #endif /* LIBXML_SAX1_ENABLED */ 15076 if (options & XML_PARSE_NODICT) { 15077 ctxt->dictNames = 0; 15078 options -= XML_PARSE_NODICT; 15079 ctxt->options |= XML_PARSE_NODICT; 15080 } else { 15081 ctxt->dictNames = 1; 15082 } 15083 if (options & XML_PARSE_NOCDATA) { 15084 ctxt->sax->cdataBlock = NULL; 15085 options -= XML_PARSE_NOCDATA; 15086 ctxt->options |= XML_PARSE_NOCDATA; 15087 } 15088 if (options & XML_PARSE_NSCLEAN) { 15089 ctxt->options |= XML_PARSE_NSCLEAN; 15090 options -= XML_PARSE_NSCLEAN; 15091 } 15092 if (options & XML_PARSE_NONET) { 15093 ctxt->options |= XML_PARSE_NONET; 15094 options -= XML_PARSE_NONET; 15095 } 15096 if (options & XML_PARSE_COMPACT) { 15097 ctxt->options |= XML_PARSE_COMPACT; 15098 options -= XML_PARSE_COMPACT; 15099 } 15100 if (options & XML_PARSE_OLD10) { 15101 ctxt->options |= XML_PARSE_OLD10; 15102 options -= XML_PARSE_OLD10; 15103 } 15104 if (options & XML_PARSE_NOBASEFIX) { 15105 ctxt->options |= XML_PARSE_NOBASEFIX; 15106 options -= XML_PARSE_NOBASEFIX; 15107 } 15108 if (options & XML_PARSE_HUGE) { 15109 ctxt->options |= XML_PARSE_HUGE; 15110 options -= XML_PARSE_HUGE; 15111 if (ctxt->dict != NULL) 15112 xmlDictSetLimit(ctxt->dict, 0); 15113 } 15114 if (options & XML_PARSE_OLDSAX) { 15115 ctxt->options |= XML_PARSE_OLDSAX; 15116 options -= XML_PARSE_OLDSAX; 15117 } 15118 if (options & XML_PARSE_IGNORE_ENC) { 15119 ctxt->options |= XML_PARSE_IGNORE_ENC; 15120 options -= XML_PARSE_IGNORE_ENC; 15121 } 15122 if (options & XML_PARSE_BIG_LINES) { 15123 ctxt->options |= XML_PARSE_BIG_LINES; 15124 options -= XML_PARSE_BIG_LINES; 15125 } 15126 ctxt->linenumbers = 1; 15127 return (options); 15128 } 15129 15130 /** 15131 * xmlCtxtUseOptions: 15132 * @ctxt: an XML parser context 15133 * @options: a combination of xmlParserOption 15134 * 15135 * Applies the options to the parser context 15136 * 15137 * Returns 0 in case of success, the set of unknown or unimplemented options 15138 * in case of error. 15139 */ 15140 int 15141 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15142 { 15143 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15144 } 15145 15146 /** 15147 * xmlDoRead: 15148 * @ctxt: an XML parser context 15149 * @URL: the base URL to use for the document 15150 * @encoding: the document encoding, or NULL 15151 * @options: a combination of xmlParserOption 15152 * @reuse: keep the context for reuse 15153 * 15154 * Common front-end for the xmlRead functions 15155 * 15156 * Returns the resulting document tree or NULL 15157 */ 15158 static xmlDocPtr 15159 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15160 int options, int reuse) 15161 { 15162 xmlDocPtr ret; 15163 15164 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15165 if (encoding != NULL) { 15166 xmlCharEncodingHandlerPtr hdlr; 15167 15168 hdlr = xmlFindCharEncodingHandler(encoding); 15169 if (hdlr != NULL) 15170 xmlSwitchToEncoding(ctxt, hdlr); 15171 } 15172 if ((URL != NULL) && (ctxt->input != NULL) && 15173 (ctxt->input->filename == NULL)) 15174 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15175 xmlParseDocument(ctxt); 15176 if ((ctxt->wellFormed) || ctxt->recovery) 15177 ret = ctxt->myDoc; 15178 else { 15179 ret = NULL; 15180 if (ctxt->myDoc != NULL) { 15181 xmlFreeDoc(ctxt->myDoc); 15182 } 15183 } 15184 ctxt->myDoc = NULL; 15185 if (!reuse) { 15186 xmlFreeParserCtxt(ctxt); 15187 } 15188 15189 return (ret); 15190 } 15191 15192 /** 15193 * xmlReadDoc: 15194 * @cur: a pointer to a zero terminated string 15195 * @URL: the base URL to use for the document 15196 * @encoding: the document encoding, or NULL 15197 * @options: a combination of xmlParserOption 15198 * 15199 * parse an XML in-memory document and build a tree. 15200 * 15201 * Returns the resulting document tree 15202 */ 15203 xmlDocPtr 15204 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15205 { 15206 xmlParserCtxtPtr ctxt; 15207 15208 if (cur == NULL) 15209 return (NULL); 15210 xmlInitParser(); 15211 15212 ctxt = xmlCreateDocParserCtxt(cur); 15213 if (ctxt == NULL) 15214 return (NULL); 15215 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15216 } 15217 15218 /** 15219 * xmlReadFile: 15220 * @filename: a file or URL 15221 * @encoding: the document encoding, or NULL 15222 * @options: a combination of xmlParserOption 15223 * 15224 * parse an XML file from the filesystem or the network. 15225 * 15226 * Returns the resulting document tree 15227 */ 15228 xmlDocPtr 15229 xmlReadFile(const char *filename, const char *encoding, int options) 15230 { 15231 xmlParserCtxtPtr ctxt; 15232 15233 xmlInitParser(); 15234 ctxt = xmlCreateURLParserCtxt(filename, options); 15235 if (ctxt == NULL) 15236 return (NULL); 15237 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15238 } 15239 15240 /** 15241 * xmlReadMemory: 15242 * @buffer: a pointer to a char array 15243 * @size: the size of the array 15244 * @URL: the base URL to use for the document 15245 * @encoding: the document encoding, or NULL 15246 * @options: a combination of xmlParserOption 15247 * 15248 * parse an XML in-memory document and build a tree. 15249 * 15250 * Returns the resulting document tree 15251 */ 15252 xmlDocPtr 15253 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15254 { 15255 xmlParserCtxtPtr ctxt; 15256 15257 xmlInitParser(); 15258 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15259 if (ctxt == NULL) 15260 return (NULL); 15261 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15262 } 15263 15264 /** 15265 * xmlReadFd: 15266 * @fd: an open file descriptor 15267 * @URL: the base URL to use for the document 15268 * @encoding: the document encoding, or NULL 15269 * @options: a combination of xmlParserOption 15270 * 15271 * parse an XML from a file descriptor and build a tree. 15272 * NOTE that the file descriptor will not be closed when the 15273 * reader is closed or reset. 15274 * 15275 * Returns the resulting document tree 15276 */ 15277 xmlDocPtr 15278 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15279 { 15280 xmlParserCtxtPtr ctxt; 15281 xmlParserInputBufferPtr input; 15282 xmlParserInputPtr stream; 15283 15284 if (fd < 0) 15285 return (NULL); 15286 xmlInitParser(); 15287 15288 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15289 if (input == NULL) 15290 return (NULL); 15291 input->closecallback = NULL; 15292 ctxt = xmlNewParserCtxt(); 15293 if (ctxt == NULL) { 15294 xmlFreeParserInputBuffer(input); 15295 return (NULL); 15296 } 15297 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15298 if (stream == NULL) { 15299 xmlFreeParserInputBuffer(input); 15300 xmlFreeParserCtxt(ctxt); 15301 return (NULL); 15302 } 15303 inputPush(ctxt, stream); 15304 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15305 } 15306 15307 /** 15308 * xmlReadIO: 15309 * @ioread: an I/O read function 15310 * @ioclose: an I/O close function 15311 * @ioctx: an I/O handler 15312 * @URL: the base URL to use for the document 15313 * @encoding: the document encoding, or NULL 15314 * @options: a combination of xmlParserOption 15315 * 15316 * parse an XML document from I/O functions and source and build a tree. 15317 * 15318 * Returns the resulting document tree 15319 */ 15320 xmlDocPtr 15321 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15322 void *ioctx, const char *URL, const char *encoding, int options) 15323 { 15324 xmlParserCtxtPtr ctxt; 15325 xmlParserInputBufferPtr input; 15326 xmlParserInputPtr stream; 15327 15328 if (ioread == NULL) 15329 return (NULL); 15330 xmlInitParser(); 15331 15332 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15333 XML_CHAR_ENCODING_NONE); 15334 if (input == NULL) { 15335 if (ioclose != NULL) 15336 ioclose(ioctx); 15337 return (NULL); 15338 } 15339 ctxt = xmlNewParserCtxt(); 15340 if (ctxt == NULL) { 15341 xmlFreeParserInputBuffer(input); 15342 return (NULL); 15343 } 15344 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15345 if (stream == NULL) { 15346 xmlFreeParserInputBuffer(input); 15347 xmlFreeParserCtxt(ctxt); 15348 return (NULL); 15349 } 15350 inputPush(ctxt, stream); 15351 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15352 } 15353 15354 /** 15355 * xmlCtxtReadDoc: 15356 * @ctxt: an XML parser context 15357 * @cur: a pointer to a zero terminated string 15358 * @URL: the base URL to use for the document 15359 * @encoding: the document encoding, or NULL 15360 * @options: a combination of xmlParserOption 15361 * 15362 * parse an XML in-memory document and build a tree. 15363 * This reuses the existing @ctxt parser context 15364 * 15365 * Returns the resulting document tree 15366 */ 15367 xmlDocPtr 15368 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15369 const char *URL, const char *encoding, int options) 15370 { 15371 if (cur == NULL) 15372 return (NULL); 15373 return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL, 15374 encoding, options)); 15375 } 15376 15377 /** 15378 * xmlCtxtReadFile: 15379 * @ctxt: an XML parser context 15380 * @filename: a file or URL 15381 * @encoding: the document encoding, or NULL 15382 * @options: a combination of xmlParserOption 15383 * 15384 * parse an XML file from the filesystem or the network. 15385 * This reuses the existing @ctxt parser context 15386 * 15387 * Returns the resulting document tree 15388 */ 15389 xmlDocPtr 15390 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15391 const char *encoding, int options) 15392 { 15393 xmlParserInputPtr stream; 15394 15395 if (filename == NULL) 15396 return (NULL); 15397 if (ctxt == NULL) 15398 return (NULL); 15399 xmlInitParser(); 15400 15401 xmlCtxtReset(ctxt); 15402 15403 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15404 if (stream == NULL) { 15405 return (NULL); 15406 } 15407 inputPush(ctxt, stream); 15408 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15409 } 15410 15411 /** 15412 * xmlCtxtReadMemory: 15413 * @ctxt: an XML parser context 15414 * @buffer: a pointer to a char array 15415 * @size: the size of the array 15416 * @URL: the base URL to use for the document 15417 * @encoding: the document encoding, or NULL 15418 * @options: a combination of xmlParserOption 15419 * 15420 * parse an XML in-memory document and build a tree. 15421 * This reuses the existing @ctxt parser context 15422 * 15423 * Returns the resulting document tree 15424 */ 15425 xmlDocPtr 15426 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15427 const char *URL, const char *encoding, int options) 15428 { 15429 xmlParserInputBufferPtr input; 15430 xmlParserInputPtr stream; 15431 15432 if (ctxt == NULL) 15433 return (NULL); 15434 if (buffer == NULL) 15435 return (NULL); 15436 xmlInitParser(); 15437 15438 xmlCtxtReset(ctxt); 15439 15440 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15441 if (input == NULL) { 15442 return(NULL); 15443 } 15444 15445 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15446 if (stream == NULL) { 15447 xmlFreeParserInputBuffer(input); 15448 return(NULL); 15449 } 15450 15451 inputPush(ctxt, stream); 15452 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15453 } 15454 15455 /** 15456 * xmlCtxtReadFd: 15457 * @ctxt: an XML parser context 15458 * @fd: an open file descriptor 15459 * @URL: the base URL to use for the document 15460 * @encoding: the document encoding, or NULL 15461 * @options: a combination of xmlParserOption 15462 * 15463 * parse an XML from a file descriptor and build a tree. 15464 * This reuses the existing @ctxt parser context 15465 * NOTE that the file descriptor will not be closed when the 15466 * reader is closed or reset. 15467 * 15468 * Returns the resulting document tree 15469 */ 15470 xmlDocPtr 15471 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15472 const char *URL, const char *encoding, int options) 15473 { 15474 xmlParserInputBufferPtr input; 15475 xmlParserInputPtr stream; 15476 15477 if (fd < 0) 15478 return (NULL); 15479 if (ctxt == NULL) 15480 return (NULL); 15481 xmlInitParser(); 15482 15483 xmlCtxtReset(ctxt); 15484 15485 15486 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15487 if (input == NULL) 15488 return (NULL); 15489 input->closecallback = NULL; 15490 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15491 if (stream == NULL) { 15492 xmlFreeParserInputBuffer(input); 15493 return (NULL); 15494 } 15495 inputPush(ctxt, stream); 15496 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15497 } 15498 15499 /** 15500 * xmlCtxtReadIO: 15501 * @ctxt: an XML parser context 15502 * @ioread: an I/O read function 15503 * @ioclose: an I/O close function 15504 * @ioctx: an I/O handler 15505 * @URL: the base URL to use for the document 15506 * @encoding: the document encoding, or NULL 15507 * @options: a combination of xmlParserOption 15508 * 15509 * parse an XML document from I/O functions and source and build a tree. 15510 * This reuses the existing @ctxt parser context 15511 * 15512 * Returns the resulting document tree 15513 */ 15514 xmlDocPtr 15515 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15516 xmlInputCloseCallback ioclose, void *ioctx, 15517 const char *URL, 15518 const char *encoding, int options) 15519 { 15520 xmlParserInputBufferPtr input; 15521 xmlParserInputPtr stream; 15522 15523 if (ioread == NULL) 15524 return (NULL); 15525 if (ctxt == NULL) 15526 return (NULL); 15527 xmlInitParser(); 15528 15529 xmlCtxtReset(ctxt); 15530 15531 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15532 XML_CHAR_ENCODING_NONE); 15533 if (input == NULL) { 15534 if (ioclose != NULL) 15535 ioclose(ioctx); 15536 return (NULL); 15537 } 15538 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15539 if (stream == NULL) { 15540 xmlFreeParserInputBuffer(input); 15541 return (NULL); 15542 } 15543 inputPush(ctxt, stream); 15544 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15545 } 15546 15547