1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 /* To avoid EBCDIC trouble when parsing on zOS */ 34 #if defined(__MVS__) 35 #pragma convert("ISO8859-1") 36 #endif 37 38 #define IN_LIBXML 39 #include "libxml.h" 40 41 #if defined(_WIN32) 42 #define XML_DIR_SEP '\\' 43 #else 44 #define XML_DIR_SEP '/' 45 #endif 46 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <string.h> 50 #include <stdarg.h> 51 #include <stddef.h> 52 #include <ctype.h> 53 #include <stdlib.h> 54 #include <libxml/xmlmemory.h> 55 #include <libxml/threads.h> 56 #include <libxml/globals.h> 57 #include <libxml/tree.h> 58 #include <libxml/parser.h> 59 #include <libxml/parserInternals.h> 60 #include <libxml/valid.h> 61 #include <libxml/entities.h> 62 #include <libxml/xmlerror.h> 63 #include <libxml/encoding.h> 64 #include <libxml/xmlIO.h> 65 #include <libxml/uri.h> 66 #ifdef LIBXML_CATALOG_ENABLED 67 #include <libxml/catalog.h> 68 #endif 69 #ifdef LIBXML_SCHEMAS_ENABLED 70 #include <libxml/xmlschemastypes.h> 71 #include <libxml/relaxng.h> 72 #endif 73 74 #include "buf.h" 75 #include "enc.h" 76 77 struct _xmlStartTag { 78 const xmlChar *prefix; 79 const xmlChar *URI; 80 int line; 81 int nsNr; 82 }; 83 84 static void 85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 86 87 static xmlParserCtxtPtr 88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 89 const xmlChar *base, xmlParserCtxtPtr pctx); 90 91 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 92 93 static int 94 xmlParseElementStart(xmlParserCtxtPtr ctxt); 95 96 static void 97 xmlParseElementEnd(xmlParserCtxtPtr ctxt); 98 99 /************************************************************************ 100 * * 101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 102 * * 103 ************************************************************************/ 104 105 #define XML_PARSER_BIG_ENTITY 1000 106 #define XML_PARSER_LOT_ENTITY 5000 107 108 /* 109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 110 * replacement over the size in byte of the input indicates that you have 111 * and exponential behaviour. A value of 10 correspond to at least 3 entity 112 * replacement per byte of input. 113 */ 114 #define XML_PARSER_NON_LINEAR 10 115 116 /* 117 * xmlParserEntityCheck 118 * 119 * Function to check non-linear entity expansion behaviour 120 * This is here to detect and stop exponential linear entity expansion 121 * This is not a limitation of the parser but a safety 122 * boundary feature. It can be disabled with the XML_PARSE_HUGE 123 * parser option. 124 */ 125 static int 126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 127 xmlEntityPtr ent, size_t replacement) 128 { 129 size_t consumed = 0; 130 int i; 131 132 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 133 return (0); 134 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 135 return (1); 136 137 /* 138 * This may look absurd but is needed to detect 139 * entities problems 140 */ 141 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 142 (ent->content != NULL) && (ent->checked == 0) && 143 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 144 unsigned long oldnbent = ctxt->nbentities, diff; 145 xmlChar *rep; 146 147 ent->checked = 1; 148 149 ++ctxt->depth; 150 rep = xmlStringDecodeEntities(ctxt, ent->content, 151 XML_SUBSTITUTE_REF, 0, 0, 0); 152 --ctxt->depth; 153 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { 154 ent->content[0] = 0; 155 } 156 157 diff = ctxt->nbentities - oldnbent + 1; 158 if (diff > INT_MAX / 2) 159 diff = INT_MAX / 2; 160 ent->checked = diff * 2; 161 if (rep != NULL) { 162 if (xmlStrchr(rep, '<')) 163 ent->checked |= 1; 164 xmlFree(rep); 165 rep = NULL; 166 } 167 } 168 169 /* 170 * Prevent entity exponential check, not just replacement while 171 * parsing the DTD 172 * The check is potentially costly so do that only once in a thousand 173 */ 174 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && 175 (ctxt->nbentities % 1024 == 0)) { 176 for (i = 0;i < ctxt->inputNr;i++) { 177 consumed += ctxt->inputTab[i]->consumed + 178 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); 179 } 180 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { 181 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 182 ctxt->instate = XML_PARSER_EOF; 183 return (1); 184 } 185 consumed = 0; 186 } 187 188 189 190 if (replacement != 0) { 191 if (replacement < XML_MAX_TEXT_LENGTH) 192 return(0); 193 194 /* 195 * If the volume of entity copy reaches 10 times the 196 * amount of parsed data and over the large text threshold 197 * then that's very likely to be an abuse. 198 */ 199 if (ctxt->input != NULL) { 200 consumed = ctxt->input->consumed + 201 (ctxt->input->cur - ctxt->input->base); 202 } 203 consumed += ctxt->sizeentities; 204 205 if (replacement < XML_PARSER_NON_LINEAR * consumed) 206 return(0); 207 } else if (size != 0) { 208 /* 209 * Do the check based on the replacement size of the entity 210 */ 211 if (size < XML_PARSER_BIG_ENTITY) 212 return(0); 213 214 /* 215 * A limit on the amount of text data reasonably used 216 */ 217 if (ctxt->input != NULL) { 218 consumed = ctxt->input->consumed + 219 (ctxt->input->cur - ctxt->input->base); 220 } 221 consumed += ctxt->sizeentities; 222 223 if ((size < XML_PARSER_NON_LINEAR * consumed) && 224 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 225 return (0); 226 } else if (ent != NULL) { 227 /* 228 * use the number of parsed entities in the replacement 229 */ 230 size = ent->checked / 2; 231 232 /* 233 * The amount of data parsed counting entities size only once 234 */ 235 if (ctxt->input != NULL) { 236 consumed = ctxt->input->consumed + 237 (ctxt->input->cur - ctxt->input->base); 238 } 239 consumed += ctxt->sizeentities; 240 241 /* 242 * Check the density of entities for the amount of data 243 * knowing an entity reference will take at least 3 bytes 244 */ 245 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 246 return (0); 247 } else { 248 /* 249 * strange we got no data for checking 250 */ 251 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 252 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 253 (ctxt->nbentities <= 10000)) 254 return (0); 255 } 256 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 257 return (1); 258 } 259 260 /** 261 * xmlParserMaxDepth: 262 * 263 * arbitrary depth limit for the XML documents that we allow to 264 * process. This is not a limitation of the parser but a safety 265 * boundary feature. It can be disabled with the XML_PARSE_HUGE 266 * parser option. 267 */ 268 unsigned int xmlParserMaxDepth = 256; 269 270 271 272 #define SAX2 1 273 #define XML_PARSER_BIG_BUFFER_SIZE 300 274 #define XML_PARSER_BUFFER_SIZE 100 275 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 276 277 /** 278 * XML_PARSER_CHUNK_SIZE 279 * 280 * When calling GROW that's the minimal amount of data 281 * the parser expected to have received. It is not a hard 282 * limit but an optimization when reading strings like Names 283 * It is not strictly needed as long as inputs available characters 284 * are followed by 0, which should be provided by the I/O level 285 */ 286 #define XML_PARSER_CHUNK_SIZE 100 287 288 /* 289 * List of XML prefixed PI allowed by W3C specs 290 */ 291 292 static const char* const xmlW3CPIs[] = { 293 "xml-stylesheet", 294 "xml-model", 295 NULL 296 }; 297 298 299 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 300 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 301 const xmlChar **str); 302 303 static xmlParserErrors 304 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 305 xmlSAXHandlerPtr sax, 306 void *user_data, int depth, const xmlChar *URL, 307 const xmlChar *ID, xmlNodePtr *list); 308 309 static int 310 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 311 const char *encoding); 312 #ifdef LIBXML_LEGACY_ENABLED 313 static void 314 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 315 xmlNodePtr lastNode); 316 #endif /* LIBXML_LEGACY_ENABLED */ 317 318 static xmlParserErrors 319 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 320 const xmlChar *string, void *user_data, xmlNodePtr *lst); 321 322 static int 323 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 324 325 /************************************************************************ 326 * * 327 * Some factorized error routines * 328 * * 329 ************************************************************************/ 330 331 /** 332 * xmlErrAttributeDup: 333 * @ctxt: an XML parser context 334 * @prefix: the attribute prefix 335 * @localname: the attribute localname 336 * 337 * Handle a redefinition of attribute error 338 */ 339 static void 340 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 341 const xmlChar * localname) 342 { 343 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 344 (ctxt->instate == XML_PARSER_EOF)) 345 return; 346 if (ctxt != NULL) 347 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 348 349 if (prefix == NULL) 350 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 351 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 352 (const char *) localname, NULL, NULL, 0, 0, 353 "Attribute %s redefined\n", localname); 354 else 355 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 356 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 357 (const char *) prefix, (const char *) localname, 358 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 359 localname); 360 if (ctxt != NULL) { 361 ctxt->wellFormed = 0; 362 if (ctxt->recovery == 0) 363 ctxt->disableSAX = 1; 364 } 365 } 366 367 /** 368 * xmlFatalErr: 369 * @ctxt: an XML parser context 370 * @error: the error number 371 * @extra: extra information string 372 * 373 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 374 */ 375 static void 376 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 377 { 378 const char *errmsg; 379 380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 381 (ctxt->instate == XML_PARSER_EOF)) 382 return; 383 switch (error) { 384 case XML_ERR_INVALID_HEX_CHARREF: 385 errmsg = "CharRef: invalid hexadecimal value"; 386 break; 387 case XML_ERR_INVALID_DEC_CHARREF: 388 errmsg = "CharRef: invalid decimal value"; 389 break; 390 case XML_ERR_INVALID_CHARREF: 391 errmsg = "CharRef: invalid value"; 392 break; 393 case XML_ERR_INTERNAL_ERROR: 394 errmsg = "internal error"; 395 break; 396 case XML_ERR_PEREF_AT_EOF: 397 errmsg = "PEReference at end of document"; 398 break; 399 case XML_ERR_PEREF_IN_PROLOG: 400 errmsg = "PEReference in prolog"; 401 break; 402 case XML_ERR_PEREF_IN_EPILOG: 403 errmsg = "PEReference in epilog"; 404 break; 405 case XML_ERR_PEREF_NO_NAME: 406 errmsg = "PEReference: no name"; 407 break; 408 case XML_ERR_PEREF_SEMICOL_MISSING: 409 errmsg = "PEReference: expecting ';'"; 410 break; 411 case XML_ERR_ENTITY_LOOP: 412 errmsg = "Detected an entity reference loop"; 413 break; 414 case XML_ERR_ENTITY_NOT_STARTED: 415 errmsg = "EntityValue: \" or ' expected"; 416 break; 417 case XML_ERR_ENTITY_PE_INTERNAL: 418 errmsg = "PEReferences forbidden in internal subset"; 419 break; 420 case XML_ERR_ENTITY_NOT_FINISHED: 421 errmsg = "EntityValue: \" or ' expected"; 422 break; 423 case XML_ERR_ATTRIBUTE_NOT_STARTED: 424 errmsg = "AttValue: \" or ' expected"; 425 break; 426 case XML_ERR_LT_IN_ATTRIBUTE: 427 errmsg = "Unescaped '<' not allowed in attributes values"; 428 break; 429 case XML_ERR_LITERAL_NOT_STARTED: 430 errmsg = "SystemLiteral \" or ' expected"; 431 break; 432 case XML_ERR_LITERAL_NOT_FINISHED: 433 errmsg = "Unfinished System or Public ID \" or ' expected"; 434 break; 435 case XML_ERR_MISPLACED_CDATA_END: 436 errmsg = "Sequence ']]>' not allowed in content"; 437 break; 438 case XML_ERR_URI_REQUIRED: 439 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 440 break; 441 case XML_ERR_PUBID_REQUIRED: 442 errmsg = "PUBLIC, the Public Identifier is missing"; 443 break; 444 case XML_ERR_HYPHEN_IN_COMMENT: 445 errmsg = "Comment must not contain '--' (double-hyphen)"; 446 break; 447 case XML_ERR_PI_NOT_STARTED: 448 errmsg = "xmlParsePI : no target name"; 449 break; 450 case XML_ERR_RESERVED_XML_NAME: 451 errmsg = "Invalid PI name"; 452 break; 453 case XML_ERR_NOTATION_NOT_STARTED: 454 errmsg = "NOTATION: Name expected here"; 455 break; 456 case XML_ERR_NOTATION_NOT_FINISHED: 457 errmsg = "'>' required to close NOTATION declaration"; 458 break; 459 case XML_ERR_VALUE_REQUIRED: 460 errmsg = "Entity value required"; 461 break; 462 case XML_ERR_URI_FRAGMENT: 463 errmsg = "Fragment not allowed"; 464 break; 465 case XML_ERR_ATTLIST_NOT_STARTED: 466 errmsg = "'(' required to start ATTLIST enumeration"; 467 break; 468 case XML_ERR_NMTOKEN_REQUIRED: 469 errmsg = "NmToken expected in ATTLIST enumeration"; 470 break; 471 case XML_ERR_ATTLIST_NOT_FINISHED: 472 errmsg = "')' required to finish ATTLIST enumeration"; 473 break; 474 case XML_ERR_MIXED_NOT_STARTED: 475 errmsg = "MixedContentDecl : '|' or ')*' expected"; 476 break; 477 case XML_ERR_PCDATA_REQUIRED: 478 errmsg = "MixedContentDecl : '#PCDATA' expected"; 479 break; 480 case XML_ERR_ELEMCONTENT_NOT_STARTED: 481 errmsg = "ContentDecl : Name or '(' expected"; 482 break; 483 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 484 errmsg = "ContentDecl : ',' '|' or ')' expected"; 485 break; 486 case XML_ERR_PEREF_IN_INT_SUBSET: 487 errmsg = 488 "PEReference: forbidden within markup decl in internal subset"; 489 break; 490 case XML_ERR_GT_REQUIRED: 491 errmsg = "expected '>'"; 492 break; 493 case XML_ERR_CONDSEC_INVALID: 494 errmsg = "XML conditional section '[' expected"; 495 break; 496 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 497 errmsg = "Content error in the external subset"; 498 break; 499 case XML_ERR_CONDSEC_INVALID_KEYWORD: 500 errmsg = 501 "conditional section INCLUDE or IGNORE keyword expected"; 502 break; 503 case XML_ERR_CONDSEC_NOT_FINISHED: 504 errmsg = "XML conditional section not closed"; 505 break; 506 case XML_ERR_XMLDECL_NOT_STARTED: 507 errmsg = "Text declaration '<?xml' required"; 508 break; 509 case XML_ERR_XMLDECL_NOT_FINISHED: 510 errmsg = "parsing XML declaration: '?>' expected"; 511 break; 512 case XML_ERR_EXT_ENTITY_STANDALONE: 513 errmsg = "external parsed entities cannot be standalone"; 514 break; 515 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 516 errmsg = "EntityRef: expecting ';'"; 517 break; 518 case XML_ERR_DOCTYPE_NOT_FINISHED: 519 errmsg = "DOCTYPE improperly terminated"; 520 break; 521 case XML_ERR_LTSLASH_REQUIRED: 522 errmsg = "EndTag: '</' not found"; 523 break; 524 case XML_ERR_EQUAL_REQUIRED: 525 errmsg = "expected '='"; 526 break; 527 case XML_ERR_STRING_NOT_CLOSED: 528 errmsg = "String not closed expecting \" or '"; 529 break; 530 case XML_ERR_STRING_NOT_STARTED: 531 errmsg = "String not started expecting ' or \""; 532 break; 533 case XML_ERR_ENCODING_NAME: 534 errmsg = "Invalid XML encoding name"; 535 break; 536 case XML_ERR_STANDALONE_VALUE: 537 errmsg = "standalone accepts only 'yes' or 'no'"; 538 break; 539 case XML_ERR_DOCUMENT_EMPTY: 540 errmsg = "Document is empty"; 541 break; 542 case XML_ERR_DOCUMENT_END: 543 errmsg = "Extra content at the end of the document"; 544 break; 545 case XML_ERR_NOT_WELL_BALANCED: 546 errmsg = "chunk is not well balanced"; 547 break; 548 case XML_ERR_EXTRA_CONTENT: 549 errmsg = "extra content at the end of well balanced chunk"; 550 break; 551 case XML_ERR_VERSION_MISSING: 552 errmsg = "Malformed declaration expecting version"; 553 break; 554 case XML_ERR_NAME_TOO_LONG: 555 errmsg = "Name too long use XML_PARSE_HUGE option"; 556 break; 557 #if 0 558 case: 559 errmsg = ""; 560 break; 561 #endif 562 default: 563 errmsg = "Unregistered error message"; 564 } 565 if (ctxt != NULL) 566 ctxt->errNo = error; 567 if (info == NULL) { 568 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 569 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 570 errmsg); 571 } else { 572 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 573 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 574 errmsg, info); 575 } 576 if (ctxt != NULL) { 577 ctxt->wellFormed = 0; 578 if (ctxt->recovery == 0) 579 ctxt->disableSAX = 1; 580 } 581 } 582 583 /** 584 * xmlFatalErrMsg: 585 * @ctxt: an XML parser context 586 * @error: the error number 587 * @msg: the error message 588 * 589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 590 */ 591 static void LIBXML_ATTR_FORMAT(3,0) 592 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 593 const char *msg) 594 { 595 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 596 (ctxt->instate == XML_PARSER_EOF)) 597 return; 598 if (ctxt != NULL) 599 ctxt->errNo = error; 600 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 601 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 602 if (ctxt != NULL) { 603 ctxt->wellFormed = 0; 604 if (ctxt->recovery == 0) 605 ctxt->disableSAX = 1; 606 } 607 } 608 609 /** 610 * xmlWarningMsg: 611 * @ctxt: an XML parser context 612 * @error: the error number 613 * @msg: the error message 614 * @str1: extra data 615 * @str2: extra data 616 * 617 * Handle a warning. 618 */ 619 static void LIBXML_ATTR_FORMAT(3,0) 620 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 621 const char *msg, const xmlChar *str1, const xmlChar *str2) 622 { 623 xmlStructuredErrorFunc schannel = NULL; 624 625 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 626 (ctxt->instate == XML_PARSER_EOF)) 627 return; 628 if ((ctxt != NULL) && (ctxt->sax != NULL) && 629 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 630 schannel = ctxt->sax->serror; 631 if (ctxt != NULL) { 632 __xmlRaiseError(schannel, 633 (ctxt->sax) ? ctxt->sax->warning : NULL, 634 ctxt->userData, 635 ctxt, NULL, XML_FROM_PARSER, error, 636 XML_ERR_WARNING, NULL, 0, 637 (const char *) str1, (const char *) str2, NULL, 0, 0, 638 msg, (const char *) str1, (const char *) str2); 639 } else { 640 __xmlRaiseError(schannel, NULL, NULL, 641 ctxt, NULL, XML_FROM_PARSER, error, 642 XML_ERR_WARNING, NULL, 0, 643 (const char *) str1, (const char *) str2, NULL, 0, 0, 644 msg, (const char *) str1, (const char *) str2); 645 } 646 } 647 648 /** 649 * xmlValidityError: 650 * @ctxt: an XML parser context 651 * @error: the error number 652 * @msg: the error message 653 * @str1: extra data 654 * 655 * Handle a validity error. 656 */ 657 static void LIBXML_ATTR_FORMAT(3,0) 658 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 659 const char *msg, const xmlChar *str1, const xmlChar *str2) 660 { 661 xmlStructuredErrorFunc schannel = NULL; 662 663 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 664 (ctxt->instate == XML_PARSER_EOF)) 665 return; 666 if (ctxt != NULL) { 667 ctxt->errNo = error; 668 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 669 schannel = ctxt->sax->serror; 670 } 671 if (ctxt != NULL) { 672 __xmlRaiseError(schannel, 673 ctxt->vctxt.error, ctxt->vctxt.userData, 674 ctxt, NULL, XML_FROM_DTD, error, 675 XML_ERR_ERROR, NULL, 0, (const char *) str1, 676 (const char *) str2, NULL, 0, 0, 677 msg, (const char *) str1, (const char *) str2); 678 ctxt->valid = 0; 679 } else { 680 __xmlRaiseError(schannel, NULL, NULL, 681 ctxt, NULL, XML_FROM_DTD, error, 682 XML_ERR_ERROR, NULL, 0, (const char *) str1, 683 (const char *) str2, NULL, 0, 0, 684 msg, (const char *) str1, (const char *) str2); 685 } 686 } 687 688 /** 689 * xmlFatalErrMsgInt: 690 * @ctxt: an XML parser context 691 * @error: the error number 692 * @msg: the error message 693 * @val: an integer value 694 * 695 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 696 */ 697 static void LIBXML_ATTR_FORMAT(3,0) 698 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 699 const char *msg, int val) 700 { 701 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 702 (ctxt->instate == XML_PARSER_EOF)) 703 return; 704 if (ctxt != NULL) 705 ctxt->errNo = error; 706 __xmlRaiseError(NULL, NULL, NULL, 707 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 708 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 709 if (ctxt != NULL) { 710 ctxt->wellFormed = 0; 711 if (ctxt->recovery == 0) 712 ctxt->disableSAX = 1; 713 } 714 } 715 716 /** 717 * xmlFatalErrMsgStrIntStr: 718 * @ctxt: an XML parser context 719 * @error: the error number 720 * @msg: the error message 721 * @str1: an string info 722 * @val: an integer value 723 * @str2: an string info 724 * 725 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 726 */ 727 static void LIBXML_ATTR_FORMAT(3,0) 728 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 729 const char *msg, const xmlChar *str1, int val, 730 const xmlChar *str2) 731 { 732 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 733 (ctxt->instate == XML_PARSER_EOF)) 734 return; 735 if (ctxt != NULL) 736 ctxt->errNo = error; 737 __xmlRaiseError(NULL, NULL, NULL, 738 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 739 NULL, 0, (const char *) str1, (const char *) str2, 740 NULL, val, 0, msg, str1, val, str2); 741 if (ctxt != NULL) { 742 ctxt->wellFormed = 0; 743 if (ctxt->recovery == 0) 744 ctxt->disableSAX = 1; 745 } 746 } 747 748 /** 749 * xmlFatalErrMsgStr: 750 * @ctxt: an XML parser context 751 * @error: the error number 752 * @msg: the error message 753 * @val: a string value 754 * 755 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 756 */ 757 static void LIBXML_ATTR_FORMAT(3,0) 758 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 759 const char *msg, const xmlChar * val) 760 { 761 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 762 (ctxt->instate == XML_PARSER_EOF)) 763 return; 764 if (ctxt != NULL) 765 ctxt->errNo = error; 766 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 767 XML_FROM_PARSER, error, XML_ERR_FATAL, 768 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 769 val); 770 if (ctxt != NULL) { 771 ctxt->wellFormed = 0; 772 if (ctxt->recovery == 0) 773 ctxt->disableSAX = 1; 774 } 775 } 776 777 /** 778 * xmlErrMsgStr: 779 * @ctxt: an XML parser context 780 * @error: the error number 781 * @msg: the error message 782 * @val: a string value 783 * 784 * Handle a non fatal parser error 785 */ 786 static void LIBXML_ATTR_FORMAT(3,0) 787 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 788 const char *msg, const xmlChar * val) 789 { 790 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 791 (ctxt->instate == XML_PARSER_EOF)) 792 return; 793 if (ctxt != NULL) 794 ctxt->errNo = error; 795 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 796 XML_FROM_PARSER, error, XML_ERR_ERROR, 797 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 798 val); 799 } 800 801 /** 802 * xmlNsErr: 803 * @ctxt: an XML parser context 804 * @error: the error number 805 * @msg: the message 806 * @info1: extra information string 807 * @info2: extra information string 808 * 809 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 810 */ 811 static void LIBXML_ATTR_FORMAT(3,0) 812 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 813 const char *msg, 814 const xmlChar * info1, const xmlChar * info2, 815 const xmlChar * info3) 816 { 817 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 818 (ctxt->instate == XML_PARSER_EOF)) 819 return; 820 if (ctxt != NULL) 821 ctxt->errNo = error; 822 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 823 XML_ERR_ERROR, NULL, 0, (const char *) info1, 824 (const char *) info2, (const char *) info3, 0, 0, msg, 825 info1, info2, info3); 826 if (ctxt != NULL) 827 ctxt->nsWellFormed = 0; 828 } 829 830 /** 831 * xmlNsWarn 832 * @ctxt: an XML parser context 833 * @error: the error number 834 * @msg: the message 835 * @info1: extra information string 836 * @info2: extra information string 837 * 838 * Handle a namespace warning error 839 */ 840 static void LIBXML_ATTR_FORMAT(3,0) 841 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 842 const char *msg, 843 const xmlChar * info1, const xmlChar * info2, 844 const xmlChar * info3) 845 { 846 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 847 (ctxt->instate == XML_PARSER_EOF)) 848 return; 849 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 850 XML_ERR_WARNING, NULL, 0, (const char *) info1, 851 (const char *) info2, (const char *) info3, 0, 0, msg, 852 info1, info2, info3); 853 } 854 855 /************************************************************************ 856 * * 857 * Library wide options * 858 * * 859 ************************************************************************/ 860 861 /** 862 * xmlHasFeature: 863 * @feature: the feature to be examined 864 * 865 * Examines if the library has been compiled with a given feature. 866 * 867 * Returns a non-zero value if the feature exist, otherwise zero. 868 * Returns zero (0) if the feature does not exist or an unknown 869 * unknown feature is requested, non-zero otherwise. 870 */ 871 int 872 xmlHasFeature(xmlFeature feature) 873 { 874 switch (feature) { 875 case XML_WITH_THREAD: 876 #ifdef LIBXML_THREAD_ENABLED 877 return(1); 878 #else 879 return(0); 880 #endif 881 case XML_WITH_TREE: 882 #ifdef LIBXML_TREE_ENABLED 883 return(1); 884 #else 885 return(0); 886 #endif 887 case XML_WITH_OUTPUT: 888 #ifdef LIBXML_OUTPUT_ENABLED 889 return(1); 890 #else 891 return(0); 892 #endif 893 case XML_WITH_PUSH: 894 #ifdef LIBXML_PUSH_ENABLED 895 return(1); 896 #else 897 return(0); 898 #endif 899 case XML_WITH_READER: 900 #ifdef LIBXML_READER_ENABLED 901 return(1); 902 #else 903 return(0); 904 #endif 905 case XML_WITH_PATTERN: 906 #ifdef LIBXML_PATTERN_ENABLED 907 return(1); 908 #else 909 return(0); 910 #endif 911 case XML_WITH_WRITER: 912 #ifdef LIBXML_WRITER_ENABLED 913 return(1); 914 #else 915 return(0); 916 #endif 917 case XML_WITH_SAX1: 918 #ifdef LIBXML_SAX1_ENABLED 919 return(1); 920 #else 921 return(0); 922 #endif 923 case XML_WITH_FTP: 924 #ifdef LIBXML_FTP_ENABLED 925 return(1); 926 #else 927 return(0); 928 #endif 929 case XML_WITH_HTTP: 930 #ifdef LIBXML_HTTP_ENABLED 931 return(1); 932 #else 933 return(0); 934 #endif 935 case XML_WITH_VALID: 936 #ifdef LIBXML_VALID_ENABLED 937 return(1); 938 #else 939 return(0); 940 #endif 941 case XML_WITH_HTML: 942 #ifdef LIBXML_HTML_ENABLED 943 return(1); 944 #else 945 return(0); 946 #endif 947 case XML_WITH_LEGACY: 948 #ifdef LIBXML_LEGACY_ENABLED 949 return(1); 950 #else 951 return(0); 952 #endif 953 case XML_WITH_C14N: 954 #ifdef LIBXML_C14N_ENABLED 955 return(1); 956 #else 957 return(0); 958 #endif 959 case XML_WITH_CATALOG: 960 #ifdef LIBXML_CATALOG_ENABLED 961 return(1); 962 #else 963 return(0); 964 #endif 965 case XML_WITH_XPATH: 966 #ifdef LIBXML_XPATH_ENABLED 967 return(1); 968 #else 969 return(0); 970 #endif 971 case XML_WITH_XPTR: 972 #ifdef LIBXML_XPTR_ENABLED 973 return(1); 974 #else 975 return(0); 976 #endif 977 case XML_WITH_XINCLUDE: 978 #ifdef LIBXML_XINCLUDE_ENABLED 979 return(1); 980 #else 981 return(0); 982 #endif 983 case XML_WITH_ICONV: 984 #ifdef LIBXML_ICONV_ENABLED 985 return(1); 986 #else 987 return(0); 988 #endif 989 case XML_WITH_ISO8859X: 990 #ifdef LIBXML_ISO8859X_ENABLED 991 return(1); 992 #else 993 return(0); 994 #endif 995 case XML_WITH_UNICODE: 996 #ifdef LIBXML_UNICODE_ENABLED 997 return(1); 998 #else 999 return(0); 1000 #endif 1001 case XML_WITH_REGEXP: 1002 #ifdef LIBXML_REGEXP_ENABLED 1003 return(1); 1004 #else 1005 return(0); 1006 #endif 1007 case XML_WITH_AUTOMATA: 1008 #ifdef LIBXML_AUTOMATA_ENABLED 1009 return(1); 1010 #else 1011 return(0); 1012 #endif 1013 case XML_WITH_EXPR: 1014 #ifdef LIBXML_EXPR_ENABLED 1015 return(1); 1016 #else 1017 return(0); 1018 #endif 1019 case XML_WITH_SCHEMAS: 1020 #ifdef LIBXML_SCHEMAS_ENABLED 1021 return(1); 1022 #else 1023 return(0); 1024 #endif 1025 case XML_WITH_SCHEMATRON: 1026 #ifdef LIBXML_SCHEMATRON_ENABLED 1027 return(1); 1028 #else 1029 return(0); 1030 #endif 1031 case XML_WITH_MODULES: 1032 #ifdef LIBXML_MODULES_ENABLED 1033 return(1); 1034 #else 1035 return(0); 1036 #endif 1037 case XML_WITH_DEBUG: 1038 #ifdef LIBXML_DEBUG_ENABLED 1039 return(1); 1040 #else 1041 return(0); 1042 #endif 1043 case XML_WITH_DEBUG_MEM: 1044 #ifdef DEBUG_MEMORY_LOCATION 1045 return(1); 1046 #else 1047 return(0); 1048 #endif 1049 case XML_WITH_DEBUG_RUN: 1050 #ifdef LIBXML_DEBUG_RUNTIME 1051 return(1); 1052 #else 1053 return(0); 1054 #endif 1055 case XML_WITH_ZLIB: 1056 #ifdef LIBXML_ZLIB_ENABLED 1057 return(1); 1058 #else 1059 return(0); 1060 #endif 1061 case XML_WITH_LZMA: 1062 #ifdef LIBXML_LZMA_ENABLED 1063 return(1); 1064 #else 1065 return(0); 1066 #endif 1067 case XML_WITH_ICU: 1068 #ifdef LIBXML_ICU_ENABLED 1069 return(1); 1070 #else 1071 return(0); 1072 #endif 1073 default: 1074 break; 1075 } 1076 return(0); 1077 } 1078 1079 /************************************************************************ 1080 * * 1081 * SAX2 defaulted attributes handling * 1082 * * 1083 ************************************************************************/ 1084 1085 /** 1086 * xmlDetectSAX2: 1087 * @ctxt: an XML parser context 1088 * 1089 * Do the SAX2 detection and specific initialization 1090 */ 1091 static void 1092 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1093 xmlSAXHandlerPtr sax; 1094 1095 /* Avoid unused variable warning if features are disabled. */ 1096 (void) sax; 1097 1098 if (ctxt == NULL) return; 1099 sax = ctxt->sax; 1100 #ifdef LIBXML_SAX1_ENABLED 1101 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) && 1102 ((sax->startElementNs != NULL) || 1103 (sax->endElementNs != NULL) || 1104 ((sax->startElement == NULL) && (sax->endElement == NULL)))) 1105 ctxt->sax2 = 1; 1106 #else 1107 ctxt->sax2 = 1; 1108 #endif /* LIBXML_SAX1_ENABLED */ 1109 1110 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1111 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1112 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1113 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1114 (ctxt->str_xml_ns == NULL)) { 1115 xmlErrMemory(ctxt, NULL); 1116 } 1117 } 1118 1119 typedef struct _xmlDefAttrs xmlDefAttrs; 1120 typedef xmlDefAttrs *xmlDefAttrsPtr; 1121 struct _xmlDefAttrs { 1122 int nbAttrs; /* number of defaulted attributes on that element */ 1123 int maxAttrs; /* the size of the array */ 1124 #if __STDC_VERSION__ >= 199901L 1125 /* Using a C99 flexible array member avoids UBSan errors. */ 1126 const xmlChar *values[]; /* array of localname/prefix/values/external */ 1127 #else 1128 const xmlChar *values[5]; 1129 #endif 1130 }; 1131 1132 /** 1133 * xmlAttrNormalizeSpace: 1134 * @src: the source string 1135 * @dst: the target string 1136 * 1137 * Normalize the space in non CDATA attribute values: 1138 * If the attribute type is not CDATA, then the XML processor MUST further 1139 * process the normalized attribute value by discarding any leading and 1140 * trailing space (#x20) characters, and by replacing sequences of space 1141 * (#x20) characters by a single space (#x20) character. 1142 * Note that the size of dst need to be at least src, and if one doesn't need 1143 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1144 * passing src as dst is just fine. 1145 * 1146 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1147 * is needed. 1148 */ 1149 static xmlChar * 1150 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1151 { 1152 if ((src == NULL) || (dst == NULL)) 1153 return(NULL); 1154 1155 while (*src == 0x20) src++; 1156 while (*src != 0) { 1157 if (*src == 0x20) { 1158 while (*src == 0x20) src++; 1159 if (*src != 0) 1160 *dst++ = 0x20; 1161 } else { 1162 *dst++ = *src++; 1163 } 1164 } 1165 *dst = 0; 1166 if (dst == src) 1167 return(NULL); 1168 return(dst); 1169 } 1170 1171 /** 1172 * xmlAttrNormalizeSpace2: 1173 * @src: the source string 1174 * 1175 * Normalize the space in non CDATA attribute values, a slightly more complex 1176 * front end to avoid allocation problems when running on attribute values 1177 * coming from the input. 1178 * 1179 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1180 * is needed. 1181 */ 1182 static const xmlChar * 1183 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1184 { 1185 int i; 1186 int remove_head = 0; 1187 int need_realloc = 0; 1188 const xmlChar *cur; 1189 1190 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1191 return(NULL); 1192 i = *len; 1193 if (i <= 0) 1194 return(NULL); 1195 1196 cur = src; 1197 while (*cur == 0x20) { 1198 cur++; 1199 remove_head++; 1200 } 1201 while (*cur != 0) { 1202 if (*cur == 0x20) { 1203 cur++; 1204 if ((*cur == 0x20) || (*cur == 0)) { 1205 need_realloc = 1; 1206 break; 1207 } 1208 } else 1209 cur++; 1210 } 1211 if (need_realloc) { 1212 xmlChar *ret; 1213 1214 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1215 if (ret == NULL) { 1216 xmlErrMemory(ctxt, NULL); 1217 return(NULL); 1218 } 1219 xmlAttrNormalizeSpace(ret, ret); 1220 *len = (int) strlen((const char *)ret); 1221 return(ret); 1222 } else if (remove_head) { 1223 *len -= remove_head; 1224 memmove(src, src + remove_head, 1 + *len); 1225 return(src); 1226 } 1227 return(NULL); 1228 } 1229 1230 /** 1231 * xmlAddDefAttrs: 1232 * @ctxt: an XML parser context 1233 * @fullname: the element fullname 1234 * @fullattr: the attribute fullname 1235 * @value: the attribute value 1236 * 1237 * Add a defaulted attribute for an element 1238 */ 1239 static void 1240 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1241 const xmlChar *fullname, 1242 const xmlChar *fullattr, 1243 const xmlChar *value) { 1244 xmlDefAttrsPtr defaults; 1245 int len; 1246 const xmlChar *name; 1247 const xmlChar *prefix; 1248 1249 /* 1250 * Allows to detect attribute redefinitions 1251 */ 1252 if (ctxt->attsSpecial != NULL) { 1253 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1254 return; 1255 } 1256 1257 if (ctxt->attsDefault == NULL) { 1258 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1259 if (ctxt->attsDefault == NULL) 1260 goto mem_error; 1261 } 1262 1263 /* 1264 * split the element name into prefix:localname , the string found 1265 * are within the DTD and then not associated to namespace names. 1266 */ 1267 name = xmlSplitQName3(fullname, &len); 1268 if (name == NULL) { 1269 name = xmlDictLookup(ctxt->dict, fullname, -1); 1270 prefix = NULL; 1271 } else { 1272 name = xmlDictLookup(ctxt->dict, name, -1); 1273 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1274 } 1275 1276 /* 1277 * make sure there is some storage 1278 */ 1279 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1280 if (defaults == NULL) { 1281 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1282 (4 * 5) * sizeof(const xmlChar *)); 1283 if (defaults == NULL) 1284 goto mem_error; 1285 defaults->nbAttrs = 0; 1286 defaults->maxAttrs = 4; 1287 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1288 defaults, NULL) < 0) { 1289 xmlFree(defaults); 1290 goto mem_error; 1291 } 1292 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1293 xmlDefAttrsPtr temp; 1294 1295 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1296 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1297 if (temp == NULL) 1298 goto mem_error; 1299 defaults = temp; 1300 defaults->maxAttrs *= 2; 1301 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1302 defaults, NULL) < 0) { 1303 xmlFree(defaults); 1304 goto mem_error; 1305 } 1306 } 1307 1308 /* 1309 * Split the element name into prefix:localname , the string found 1310 * are within the DTD and hen not associated to namespace names. 1311 */ 1312 name = xmlSplitQName3(fullattr, &len); 1313 if (name == NULL) { 1314 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1315 prefix = NULL; 1316 } else { 1317 name = xmlDictLookup(ctxt->dict, name, -1); 1318 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1319 } 1320 1321 defaults->values[5 * defaults->nbAttrs] = name; 1322 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1323 /* intern the string and precompute the end */ 1324 len = xmlStrlen(value); 1325 value = xmlDictLookup(ctxt->dict, value, len); 1326 defaults->values[5 * defaults->nbAttrs + 2] = value; 1327 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1328 if (ctxt->external) 1329 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1330 else 1331 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1332 defaults->nbAttrs++; 1333 1334 return; 1335 1336 mem_error: 1337 xmlErrMemory(ctxt, NULL); 1338 return; 1339 } 1340 1341 /** 1342 * xmlAddSpecialAttr: 1343 * @ctxt: an XML parser context 1344 * @fullname: the element fullname 1345 * @fullattr: the attribute fullname 1346 * @type: the attribute type 1347 * 1348 * Register this attribute type 1349 */ 1350 static void 1351 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1352 const xmlChar *fullname, 1353 const xmlChar *fullattr, 1354 int type) 1355 { 1356 if (ctxt->attsSpecial == NULL) { 1357 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1358 if (ctxt->attsSpecial == NULL) 1359 goto mem_error; 1360 } 1361 1362 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1363 return; 1364 1365 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1366 (void *) (ptrdiff_t) type); 1367 return; 1368 1369 mem_error: 1370 xmlErrMemory(ctxt, NULL); 1371 return; 1372 } 1373 1374 /** 1375 * xmlCleanSpecialAttrCallback: 1376 * 1377 * Removes CDATA attributes from the special attribute table 1378 */ 1379 static void 1380 xmlCleanSpecialAttrCallback(void *payload, void *data, 1381 const xmlChar *fullname, const xmlChar *fullattr, 1382 const xmlChar *unused ATTRIBUTE_UNUSED) { 1383 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1384 1385 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1386 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1387 } 1388 } 1389 1390 /** 1391 * xmlCleanSpecialAttr: 1392 * @ctxt: an XML parser context 1393 * 1394 * Trim the list of attributes defined to remove all those of type 1395 * CDATA as they are not special. This call should be done when finishing 1396 * to parse the DTD and before starting to parse the document root. 1397 */ 1398 static void 1399 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1400 { 1401 if (ctxt->attsSpecial == NULL) 1402 return; 1403 1404 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1405 1406 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1407 xmlHashFree(ctxt->attsSpecial, NULL); 1408 ctxt->attsSpecial = NULL; 1409 } 1410 return; 1411 } 1412 1413 /** 1414 * xmlCheckLanguageID: 1415 * @lang: pointer to the string value 1416 * 1417 * Checks that the value conforms to the LanguageID production: 1418 * 1419 * NOTE: this is somewhat deprecated, those productions were removed from 1420 * the XML Second edition. 1421 * 1422 * [33] LanguageID ::= Langcode ('-' Subcode)* 1423 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1424 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1425 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1426 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1427 * [38] Subcode ::= ([a-z] | [A-Z])+ 1428 * 1429 * The current REC reference the successors of RFC 1766, currently 5646 1430 * 1431 * http://www.rfc-editor.org/rfc/rfc5646.txt 1432 * langtag = language 1433 * ["-" script] 1434 * ["-" region] 1435 * *("-" variant) 1436 * *("-" extension) 1437 * ["-" privateuse] 1438 * language = 2*3ALPHA ; shortest ISO 639 code 1439 * ["-" extlang] ; sometimes followed by 1440 * ; extended language subtags 1441 * / 4ALPHA ; or reserved for future use 1442 * / 5*8ALPHA ; or registered language subtag 1443 * 1444 * extlang = 3ALPHA ; selected ISO 639 codes 1445 * *2("-" 3ALPHA) ; permanently reserved 1446 * 1447 * script = 4ALPHA ; ISO 15924 code 1448 * 1449 * region = 2ALPHA ; ISO 3166-1 code 1450 * / 3DIGIT ; UN M.49 code 1451 * 1452 * variant = 5*8alphanum ; registered variants 1453 * / (DIGIT 3alphanum) 1454 * 1455 * extension = singleton 1*("-" (2*8alphanum)) 1456 * 1457 * ; Single alphanumerics 1458 * ; "x" reserved for private use 1459 * singleton = DIGIT ; 0 - 9 1460 * / %x41-57 ; A - W 1461 * / %x59-5A ; Y - Z 1462 * / %x61-77 ; a - w 1463 * / %x79-7A ; y - z 1464 * 1465 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1466 * The parser below doesn't try to cope with extension or privateuse 1467 * that could be added but that's not interoperable anyway 1468 * 1469 * Returns 1 if correct 0 otherwise 1470 **/ 1471 int 1472 xmlCheckLanguageID(const xmlChar * lang) 1473 { 1474 const xmlChar *cur = lang, *nxt; 1475 1476 if (cur == NULL) 1477 return (0); 1478 if (((cur[0] == 'i') && (cur[1] == '-')) || 1479 ((cur[0] == 'I') && (cur[1] == '-')) || 1480 ((cur[0] == 'x') && (cur[1] == '-')) || 1481 ((cur[0] == 'X') && (cur[1] == '-'))) { 1482 /* 1483 * Still allow IANA code and user code which were coming 1484 * from the previous version of the XML-1.0 specification 1485 * it's deprecated but we should not fail 1486 */ 1487 cur += 2; 1488 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1489 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1490 cur++; 1491 return(cur[0] == 0); 1492 } 1493 nxt = cur; 1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1496 nxt++; 1497 if (nxt - cur >= 4) { 1498 /* 1499 * Reserved 1500 */ 1501 if ((nxt - cur > 8) || (nxt[0] != 0)) 1502 return(0); 1503 return(1); 1504 } 1505 if (nxt - cur < 2) 1506 return(0); 1507 /* we got an ISO 639 code */ 1508 if (nxt[0] == 0) 1509 return(1); 1510 if (nxt[0] != '-') 1511 return(0); 1512 1513 nxt++; 1514 cur = nxt; 1515 /* now we can have extlang or script or region or variant */ 1516 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1517 goto region_m49; 1518 1519 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1520 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1521 nxt++; 1522 if (nxt - cur == 4) 1523 goto script; 1524 if (nxt - cur == 2) 1525 goto region; 1526 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1527 goto variant; 1528 if (nxt - cur != 3) 1529 return(0); 1530 /* we parsed an extlang */ 1531 if (nxt[0] == 0) 1532 return(1); 1533 if (nxt[0] != '-') 1534 return(0); 1535 1536 nxt++; 1537 cur = nxt; 1538 /* now we can have script or region or variant */ 1539 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1540 goto region_m49; 1541 1542 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1543 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1544 nxt++; 1545 if (nxt - cur == 2) 1546 goto region; 1547 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1548 goto variant; 1549 if (nxt - cur != 4) 1550 return(0); 1551 /* we parsed a script */ 1552 script: 1553 if (nxt[0] == 0) 1554 return(1); 1555 if (nxt[0] != '-') 1556 return(0); 1557 1558 nxt++; 1559 cur = nxt; 1560 /* now we can have region or variant */ 1561 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1562 goto region_m49; 1563 1564 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1565 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1566 nxt++; 1567 1568 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1569 goto variant; 1570 if (nxt - cur != 2) 1571 return(0); 1572 /* we parsed a region */ 1573 region: 1574 if (nxt[0] == 0) 1575 return(1); 1576 if (nxt[0] != '-') 1577 return(0); 1578 1579 nxt++; 1580 cur = nxt; 1581 /* now we can just have a variant */ 1582 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1583 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1584 nxt++; 1585 1586 if ((nxt - cur < 5) || (nxt - cur > 8)) 1587 return(0); 1588 1589 /* we parsed a variant */ 1590 variant: 1591 if (nxt[0] == 0) 1592 return(1); 1593 if (nxt[0] != '-') 1594 return(0); 1595 /* extensions and private use subtags not checked */ 1596 return (1); 1597 1598 region_m49: 1599 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1600 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1601 nxt += 3; 1602 goto region; 1603 } 1604 return(0); 1605 } 1606 1607 /************************************************************************ 1608 * * 1609 * Parser stacks related functions and macros * 1610 * * 1611 ************************************************************************/ 1612 1613 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1614 const xmlChar ** str); 1615 1616 #ifdef SAX2 1617 /** 1618 * nsPush: 1619 * @ctxt: an XML parser context 1620 * @prefix: the namespace prefix or NULL 1621 * @URL: the namespace name 1622 * 1623 * Pushes a new parser namespace on top of the ns stack 1624 * 1625 * Returns -1 in case of error, -2 if the namespace should be discarded 1626 * and the index in the stack otherwise. 1627 */ 1628 static int 1629 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1630 { 1631 if (ctxt->options & XML_PARSE_NSCLEAN) { 1632 int i; 1633 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1634 if (ctxt->nsTab[i] == prefix) { 1635 /* in scope */ 1636 if (ctxt->nsTab[i + 1] == URL) 1637 return(-2); 1638 /* out of scope keep it */ 1639 break; 1640 } 1641 } 1642 } 1643 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1644 ctxt->nsMax = 10; 1645 ctxt->nsNr = 0; 1646 ctxt->nsTab = (const xmlChar **) 1647 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1648 if (ctxt->nsTab == NULL) { 1649 xmlErrMemory(ctxt, NULL); 1650 ctxt->nsMax = 0; 1651 return (-1); 1652 } 1653 } else if (ctxt->nsNr >= ctxt->nsMax) { 1654 const xmlChar ** tmp; 1655 ctxt->nsMax *= 2; 1656 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1657 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1658 if (tmp == NULL) { 1659 xmlErrMemory(ctxt, NULL); 1660 ctxt->nsMax /= 2; 1661 return (-1); 1662 } 1663 ctxt->nsTab = tmp; 1664 } 1665 ctxt->nsTab[ctxt->nsNr++] = prefix; 1666 ctxt->nsTab[ctxt->nsNr++] = URL; 1667 return (ctxt->nsNr); 1668 } 1669 /** 1670 * nsPop: 1671 * @ctxt: an XML parser context 1672 * @nr: the number to pop 1673 * 1674 * Pops the top @nr parser prefix/namespace from the ns stack 1675 * 1676 * Returns the number of namespaces removed 1677 */ 1678 static int 1679 nsPop(xmlParserCtxtPtr ctxt, int nr) 1680 { 1681 int i; 1682 1683 if (ctxt->nsTab == NULL) return(0); 1684 if (ctxt->nsNr < nr) { 1685 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1686 nr = ctxt->nsNr; 1687 } 1688 if (ctxt->nsNr <= 0) 1689 return (0); 1690 1691 for (i = 0;i < nr;i++) { 1692 ctxt->nsNr--; 1693 ctxt->nsTab[ctxt->nsNr] = NULL; 1694 } 1695 return(nr); 1696 } 1697 #endif 1698 1699 static int 1700 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1701 const xmlChar **atts; 1702 int *attallocs; 1703 int maxatts; 1704 1705 if (ctxt->atts == NULL) { 1706 maxatts = 55; /* allow for 10 attrs by default */ 1707 atts = (const xmlChar **) 1708 xmlMalloc(maxatts * sizeof(xmlChar *)); 1709 if (atts == NULL) goto mem_error; 1710 ctxt->atts = atts; 1711 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1712 if (attallocs == NULL) goto mem_error; 1713 ctxt->attallocs = attallocs; 1714 ctxt->maxatts = maxatts; 1715 } else if (nr + 5 > ctxt->maxatts) { 1716 maxatts = (nr + 5) * 2; 1717 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1718 maxatts * sizeof(const xmlChar *)); 1719 if (atts == NULL) goto mem_error; 1720 ctxt->atts = atts; 1721 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1722 (maxatts / 5) * sizeof(int)); 1723 if (attallocs == NULL) goto mem_error; 1724 ctxt->attallocs = attallocs; 1725 ctxt->maxatts = maxatts; 1726 } 1727 return(ctxt->maxatts); 1728 mem_error: 1729 xmlErrMemory(ctxt, NULL); 1730 return(-1); 1731 } 1732 1733 /** 1734 * inputPush: 1735 * @ctxt: an XML parser context 1736 * @value: the parser input 1737 * 1738 * Pushes a new parser input on top of the input stack 1739 * 1740 * Returns -1 in case of error, the index in the stack otherwise 1741 */ 1742 int 1743 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1744 { 1745 if ((ctxt == NULL) || (value == NULL)) 1746 return(-1); 1747 if (ctxt->inputNr >= ctxt->inputMax) { 1748 ctxt->inputMax *= 2; 1749 ctxt->inputTab = 1750 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1751 ctxt->inputMax * 1752 sizeof(ctxt->inputTab[0])); 1753 if (ctxt->inputTab == NULL) { 1754 xmlErrMemory(ctxt, NULL); 1755 ctxt->inputMax /= 2; 1756 return (-1); 1757 } 1758 } 1759 ctxt->inputTab[ctxt->inputNr] = value; 1760 ctxt->input = value; 1761 return (ctxt->inputNr++); 1762 } 1763 /** 1764 * inputPop: 1765 * @ctxt: an XML parser context 1766 * 1767 * Pops the top parser input from the input stack 1768 * 1769 * Returns the input just removed 1770 */ 1771 xmlParserInputPtr 1772 inputPop(xmlParserCtxtPtr ctxt) 1773 { 1774 xmlParserInputPtr ret; 1775 1776 if (ctxt == NULL) 1777 return(NULL); 1778 if (ctxt->inputNr <= 0) 1779 return (NULL); 1780 ctxt->inputNr--; 1781 if (ctxt->inputNr > 0) 1782 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1783 else 1784 ctxt->input = NULL; 1785 ret = ctxt->inputTab[ctxt->inputNr]; 1786 ctxt->inputTab[ctxt->inputNr] = NULL; 1787 return (ret); 1788 } 1789 /** 1790 * nodePush: 1791 * @ctxt: an XML parser context 1792 * @value: the element node 1793 * 1794 * Pushes a new element node on top of the node stack 1795 * 1796 * Returns -1 in case of error, the index in the stack otherwise 1797 */ 1798 int 1799 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1800 { 1801 if (ctxt == NULL) return(0); 1802 if (ctxt->nodeNr >= ctxt->nodeMax) { 1803 xmlNodePtr *tmp; 1804 1805 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1806 ctxt->nodeMax * 2 * 1807 sizeof(ctxt->nodeTab[0])); 1808 if (tmp == NULL) { 1809 xmlErrMemory(ctxt, NULL); 1810 return (-1); 1811 } 1812 ctxt->nodeTab = tmp; 1813 ctxt->nodeMax *= 2; 1814 } 1815 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1816 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1817 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1818 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1819 xmlParserMaxDepth); 1820 xmlHaltParser(ctxt); 1821 return(-1); 1822 } 1823 ctxt->nodeTab[ctxt->nodeNr] = value; 1824 ctxt->node = value; 1825 return (ctxt->nodeNr++); 1826 } 1827 1828 /** 1829 * nodePop: 1830 * @ctxt: an XML parser context 1831 * 1832 * Pops the top element node from the node stack 1833 * 1834 * Returns the node just removed 1835 */ 1836 xmlNodePtr 1837 nodePop(xmlParserCtxtPtr ctxt) 1838 { 1839 xmlNodePtr ret; 1840 1841 if (ctxt == NULL) return(NULL); 1842 if (ctxt->nodeNr <= 0) 1843 return (NULL); 1844 ctxt->nodeNr--; 1845 if (ctxt->nodeNr > 0) 1846 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1847 else 1848 ctxt->node = NULL; 1849 ret = ctxt->nodeTab[ctxt->nodeNr]; 1850 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1851 return (ret); 1852 } 1853 1854 /** 1855 * nameNsPush: 1856 * @ctxt: an XML parser context 1857 * @value: the element name 1858 * @prefix: the element prefix 1859 * @URI: the element namespace name 1860 * @line: the current line number for error messages 1861 * @nsNr: the number of namespaces pushed on the namespace table 1862 * 1863 * Pushes a new element name/prefix/URL on top of the name stack 1864 * 1865 * Returns -1 in case of error, the index in the stack otherwise 1866 */ 1867 static int 1868 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1869 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr) 1870 { 1871 xmlStartTag *tag; 1872 1873 if (ctxt->nameNr >= ctxt->nameMax) { 1874 const xmlChar * *tmp; 1875 xmlStartTag *tmp2; 1876 ctxt->nameMax *= 2; 1877 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1878 ctxt->nameMax * 1879 sizeof(ctxt->nameTab[0])); 1880 if (tmp == NULL) { 1881 ctxt->nameMax /= 2; 1882 goto mem_error; 1883 } 1884 ctxt->nameTab = tmp; 1885 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab, 1886 ctxt->nameMax * 1887 sizeof(ctxt->pushTab[0])); 1888 if (tmp2 == NULL) { 1889 ctxt->nameMax /= 2; 1890 goto mem_error; 1891 } 1892 ctxt->pushTab = tmp2; 1893 } else if (ctxt->pushTab == NULL) { 1894 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax * 1895 sizeof(ctxt->pushTab[0])); 1896 if (ctxt->pushTab == NULL) 1897 goto mem_error; 1898 } 1899 ctxt->nameTab[ctxt->nameNr] = value; 1900 ctxt->name = value; 1901 tag = &ctxt->pushTab[ctxt->nameNr]; 1902 tag->prefix = prefix; 1903 tag->URI = URI; 1904 tag->line = line; 1905 tag->nsNr = nsNr; 1906 return (ctxt->nameNr++); 1907 mem_error: 1908 xmlErrMemory(ctxt, NULL); 1909 return (-1); 1910 } 1911 #ifdef LIBXML_PUSH_ENABLED 1912 /** 1913 * nameNsPop: 1914 * @ctxt: an XML parser context 1915 * 1916 * Pops the top element/prefix/URI name from the name stack 1917 * 1918 * Returns the name just removed 1919 */ 1920 static const xmlChar * 1921 nameNsPop(xmlParserCtxtPtr ctxt) 1922 { 1923 const xmlChar *ret; 1924 1925 if (ctxt->nameNr <= 0) 1926 return (NULL); 1927 ctxt->nameNr--; 1928 if (ctxt->nameNr > 0) 1929 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1930 else 1931 ctxt->name = NULL; 1932 ret = ctxt->nameTab[ctxt->nameNr]; 1933 ctxt->nameTab[ctxt->nameNr] = NULL; 1934 return (ret); 1935 } 1936 #endif /* LIBXML_PUSH_ENABLED */ 1937 1938 /** 1939 * namePush: 1940 * @ctxt: an XML parser context 1941 * @value: the element name 1942 * 1943 * Pushes a new element name on top of the name stack 1944 * 1945 * Returns -1 in case of error, the index in the stack otherwise 1946 */ 1947 int 1948 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1949 { 1950 if (ctxt == NULL) return (-1); 1951 1952 if (ctxt->nameNr >= ctxt->nameMax) { 1953 const xmlChar * *tmp; 1954 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1955 ctxt->nameMax * 2 * 1956 sizeof(ctxt->nameTab[0])); 1957 if (tmp == NULL) { 1958 goto mem_error; 1959 } 1960 ctxt->nameTab = tmp; 1961 ctxt->nameMax *= 2; 1962 } 1963 ctxt->nameTab[ctxt->nameNr] = value; 1964 ctxt->name = value; 1965 return (ctxt->nameNr++); 1966 mem_error: 1967 xmlErrMemory(ctxt, NULL); 1968 return (-1); 1969 } 1970 /** 1971 * namePop: 1972 * @ctxt: an XML parser context 1973 * 1974 * Pops the top element name from the name stack 1975 * 1976 * Returns the name just removed 1977 */ 1978 const xmlChar * 1979 namePop(xmlParserCtxtPtr ctxt) 1980 { 1981 const xmlChar *ret; 1982 1983 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1984 return (NULL); 1985 ctxt->nameNr--; 1986 if (ctxt->nameNr > 0) 1987 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1988 else 1989 ctxt->name = NULL; 1990 ret = ctxt->nameTab[ctxt->nameNr]; 1991 ctxt->nameTab[ctxt->nameNr] = NULL; 1992 return (ret); 1993 } 1994 1995 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1996 if (ctxt->spaceNr >= ctxt->spaceMax) { 1997 int *tmp; 1998 1999 ctxt->spaceMax *= 2; 2000 tmp = (int *) xmlRealloc(ctxt->spaceTab, 2001 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 2002 if (tmp == NULL) { 2003 xmlErrMemory(ctxt, NULL); 2004 ctxt->spaceMax /=2; 2005 return(-1); 2006 } 2007 ctxt->spaceTab = tmp; 2008 } 2009 ctxt->spaceTab[ctxt->spaceNr] = val; 2010 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 2011 return(ctxt->spaceNr++); 2012 } 2013 2014 static int spacePop(xmlParserCtxtPtr ctxt) { 2015 int ret; 2016 if (ctxt->spaceNr <= 0) return(0); 2017 ctxt->spaceNr--; 2018 if (ctxt->spaceNr > 0) 2019 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 2020 else 2021 ctxt->space = &ctxt->spaceTab[0]; 2022 ret = ctxt->spaceTab[ctxt->spaceNr]; 2023 ctxt->spaceTab[ctxt->spaceNr] = -1; 2024 return(ret); 2025 } 2026 2027 /* 2028 * Macros for accessing the content. Those should be used only by the parser, 2029 * and not exported. 2030 * 2031 * Dirty macros, i.e. one often need to make assumption on the context to 2032 * use them 2033 * 2034 * CUR_PTR return the current pointer to the xmlChar to be parsed. 2035 * To be used with extreme caution since operations consuming 2036 * characters may move the input buffer to a different location ! 2037 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2038 * This should be used internally by the parser 2039 * only to compare to ASCII values otherwise it would break when 2040 * running with UTF-8 encoding. 2041 * RAW same as CUR but in the input buffer, bypass any token 2042 * extraction that may have been done 2043 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2044 * to compare on ASCII based substring. 2045 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2046 * strings without newlines within the parser. 2047 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2048 * defined char within the parser. 2049 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2050 * 2051 * NEXT Skip to the next character, this does the proper decoding 2052 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2053 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2054 * CUR_CHAR(l) returns the current unicode character (int), set l 2055 * to the number of xmlChars used for the encoding [0-5]. 2056 * CUR_SCHAR same but operate on a string instead of the context 2057 * COPY_BUF copy the current unicode char to the target buffer, increment 2058 * the index 2059 * GROW, SHRINK handling of input buffers 2060 */ 2061 2062 #define RAW (*ctxt->input->cur) 2063 #define CUR (*ctxt->input->cur) 2064 #define NXT(val) ctxt->input->cur[(val)] 2065 #define CUR_PTR ctxt->input->cur 2066 #define BASE_PTR ctxt->input->base 2067 2068 #define CMP4( s, c1, c2, c3, c4 ) \ 2069 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2070 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2071 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2072 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2073 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2074 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2075 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2076 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2077 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2078 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2079 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2080 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2081 ((unsigned char *) s)[ 8 ] == c9 ) 2082 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2083 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2084 ((unsigned char *) s)[ 9 ] == c10 ) 2085 2086 #define SKIP(val) do { \ 2087 ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2088 if (*ctxt->input->cur == 0) \ 2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2090 } while (0) 2091 2092 #define SKIPL(val) do { \ 2093 int skipl; \ 2094 for(skipl=0; skipl<val; skipl++) { \ 2095 if (*(ctxt->input->cur) == '\n') { \ 2096 ctxt->input->line++; ctxt->input->col = 1; \ 2097 } else ctxt->input->col++; \ 2098 ctxt->input->cur++; \ 2099 } \ 2100 if (*ctxt->input->cur == 0) \ 2101 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2102 } while (0) 2103 2104 #define SHRINK if ((ctxt->progressive == 0) && \ 2105 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2106 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2107 xmlSHRINK (ctxt); 2108 2109 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2110 xmlParserInputShrink(ctxt->input); 2111 if (*ctxt->input->cur == 0) 2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2113 } 2114 2115 #define GROW if ((ctxt->progressive == 0) && \ 2116 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2117 xmlGROW (ctxt); 2118 2119 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2120 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur; 2121 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base; 2122 2123 if (((curEnd > XML_MAX_LOOKUP_LIMIT) || 2124 (curBase > XML_MAX_LOOKUP_LIMIT)) && 2125 ((ctxt->input->buf) && 2126 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) && 2127 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2128 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2129 xmlHaltParser(ctxt); 2130 return; 2131 } 2132 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2133 if ((ctxt->input->cur > ctxt->input->end) || 2134 (ctxt->input->cur < ctxt->input->base)) { 2135 xmlHaltParser(ctxt); 2136 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2137 return; 2138 } 2139 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) 2140 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2141 } 2142 2143 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2144 2145 #define NEXT xmlNextChar(ctxt) 2146 2147 #define NEXT1 { \ 2148 ctxt->input->col++; \ 2149 ctxt->input->cur++; \ 2150 if (*ctxt->input->cur == 0) \ 2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2152 } 2153 2154 #define NEXTL(l) do { \ 2155 if (*(ctxt->input->cur) == '\n') { \ 2156 ctxt->input->line++; ctxt->input->col = 1; \ 2157 } else ctxt->input->col++; \ 2158 ctxt->input->cur += l; \ 2159 } while (0) 2160 2161 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2162 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2163 2164 #define COPY_BUF(l,b,i,v) \ 2165 if (l == 1) b[i++] = (xmlChar) v; \ 2166 else i += xmlCopyCharMultiByte(&b[i],v) 2167 2168 #define CUR_CONSUMED \ 2169 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base)) 2170 2171 /** 2172 * xmlSkipBlankChars: 2173 * @ctxt: the XML parser context 2174 * 2175 * skip all blanks character found at that point in the input streams. 2176 * It pops up finished entities in the process if allowable at that point. 2177 * 2178 * Returns the number of space chars skipped 2179 */ 2180 2181 int 2182 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2183 int res = 0; 2184 2185 /* 2186 * It's Okay to use CUR/NEXT here since all the blanks are on 2187 * the ASCII range. 2188 */ 2189 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) || 2190 (ctxt->instate == XML_PARSER_START)) { 2191 const xmlChar *cur; 2192 /* 2193 * if we are in the document content, go really fast 2194 */ 2195 cur = ctxt->input->cur; 2196 while (IS_BLANK_CH(*cur)) { 2197 if (*cur == '\n') { 2198 ctxt->input->line++; ctxt->input->col = 1; 2199 } else { 2200 ctxt->input->col++; 2201 } 2202 cur++; 2203 if (res < INT_MAX) 2204 res++; 2205 if (*cur == 0) { 2206 ctxt->input->cur = cur; 2207 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2208 cur = ctxt->input->cur; 2209 } 2210 } 2211 ctxt->input->cur = cur; 2212 } else { 2213 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2214 2215 while (1) { 2216 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2217 NEXT; 2218 } else if (CUR == '%') { 2219 /* 2220 * Need to handle support of entities branching here 2221 */ 2222 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2223 break; 2224 xmlParsePEReference(ctxt); 2225 } else if (CUR == 0) { 2226 if (ctxt->inputNr <= 1) 2227 break; 2228 xmlPopInput(ctxt); 2229 } else { 2230 break; 2231 } 2232 2233 /* 2234 * Also increase the counter when entering or exiting a PERef. 2235 * The spec says: "When a parameter-entity reference is recognized 2236 * in the DTD and included, its replacement text MUST be enlarged 2237 * by the attachment of one leading and one following space (#x20) 2238 * character." 2239 */ 2240 if (res < INT_MAX) 2241 res++; 2242 } 2243 } 2244 return(res); 2245 } 2246 2247 /************************************************************************ 2248 * * 2249 * Commodity functions to handle entities * 2250 * * 2251 ************************************************************************/ 2252 2253 /** 2254 * xmlPopInput: 2255 * @ctxt: an XML parser context 2256 * 2257 * xmlPopInput: the current input pointed by ctxt->input came to an end 2258 * pop it and return the next char. 2259 * 2260 * Returns the current xmlChar in the parser context 2261 */ 2262 xmlChar 2263 xmlPopInput(xmlParserCtxtPtr ctxt) { 2264 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2265 if (xmlParserDebugEntities) 2266 xmlGenericError(xmlGenericErrorContext, 2267 "Popping input %d\n", ctxt->inputNr); 2268 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2269 (ctxt->instate != XML_PARSER_EOF)) 2270 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2271 "Unfinished entity outside the DTD"); 2272 xmlFreeInputStream(inputPop(ctxt)); 2273 if (*ctxt->input->cur == 0) 2274 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2275 return(CUR); 2276 } 2277 2278 /** 2279 * xmlPushInput: 2280 * @ctxt: an XML parser context 2281 * @input: an XML parser input fragment (entity, XML fragment ...). 2282 * 2283 * xmlPushInput: switch to a new input stream which is stacked on top 2284 * of the previous one(s). 2285 * Returns -1 in case of error or the index in the input stack 2286 */ 2287 int 2288 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2289 int ret; 2290 if (input == NULL) return(-1); 2291 2292 if (xmlParserDebugEntities) { 2293 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2294 xmlGenericError(xmlGenericErrorContext, 2295 "%s(%d): ", ctxt->input->filename, 2296 ctxt->input->line); 2297 xmlGenericError(xmlGenericErrorContext, 2298 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2299 } 2300 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2301 (ctxt->inputNr > 1024)) { 2302 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2303 while (ctxt->inputNr > 1) 2304 xmlFreeInputStream(inputPop(ctxt)); 2305 return(-1); 2306 } 2307 ret = inputPush(ctxt, input); 2308 if (ctxt->instate == XML_PARSER_EOF) 2309 return(-1); 2310 GROW; 2311 return(ret); 2312 } 2313 2314 /** 2315 * xmlParseCharRef: 2316 * @ctxt: an XML parser context 2317 * 2318 * parse Reference declarations 2319 * 2320 * [66] CharRef ::= '&#' [0-9]+ ';' | 2321 * '&#x' [0-9a-fA-F]+ ';' 2322 * 2323 * [ WFC: Legal Character ] 2324 * Characters referred to using character references must match the 2325 * production for Char. 2326 * 2327 * Returns the value parsed (as an int), 0 in case of error 2328 */ 2329 int 2330 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2331 int val = 0; 2332 int count = 0; 2333 2334 /* 2335 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2336 */ 2337 if ((RAW == '&') && (NXT(1) == '#') && 2338 (NXT(2) == 'x')) { 2339 SKIP(3); 2340 GROW; 2341 while (RAW != ';') { /* loop blocked by count */ 2342 if (count++ > 20) { 2343 count = 0; 2344 GROW; 2345 if (ctxt->instate == XML_PARSER_EOF) 2346 return(0); 2347 } 2348 if ((RAW >= '0') && (RAW <= '9')) 2349 val = val * 16 + (CUR - '0'); 2350 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2351 val = val * 16 + (CUR - 'a') + 10; 2352 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2353 val = val * 16 + (CUR - 'A') + 10; 2354 else { 2355 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2356 val = 0; 2357 break; 2358 } 2359 if (val > 0x110000) 2360 val = 0x110000; 2361 2362 NEXT; 2363 count++; 2364 } 2365 if (RAW == ';') { 2366 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2367 ctxt->input->col++; 2368 ctxt->input->cur++; 2369 } 2370 } else if ((RAW == '&') && (NXT(1) == '#')) { 2371 SKIP(2); 2372 GROW; 2373 while (RAW != ';') { /* loop blocked by count */ 2374 if (count++ > 20) { 2375 count = 0; 2376 GROW; 2377 if (ctxt->instate == XML_PARSER_EOF) 2378 return(0); 2379 } 2380 if ((RAW >= '0') && (RAW <= '9')) 2381 val = val * 10 + (CUR - '0'); 2382 else { 2383 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2384 val = 0; 2385 break; 2386 } 2387 if (val > 0x110000) 2388 val = 0x110000; 2389 2390 NEXT; 2391 count++; 2392 } 2393 if (RAW == ';') { 2394 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2395 ctxt->input->col++; 2396 ctxt->input->cur++; 2397 } 2398 } else { 2399 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2400 } 2401 2402 /* 2403 * [ WFC: Legal Character ] 2404 * Characters referred to using character references must match the 2405 * production for Char. 2406 */ 2407 if (val >= 0x110000) { 2408 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2409 "xmlParseCharRef: character reference out of bounds\n", 2410 val); 2411 } else if (IS_CHAR(val)) { 2412 return(val); 2413 } else { 2414 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2415 "xmlParseCharRef: invalid xmlChar value %d\n", 2416 val); 2417 } 2418 return(0); 2419 } 2420 2421 /** 2422 * xmlParseStringCharRef: 2423 * @ctxt: an XML parser context 2424 * @str: a pointer to an index in the string 2425 * 2426 * parse Reference declarations, variant parsing from a string rather 2427 * than an an input flow. 2428 * 2429 * [66] CharRef ::= '&#' [0-9]+ ';' | 2430 * '&#x' [0-9a-fA-F]+ ';' 2431 * 2432 * [ WFC: Legal Character ] 2433 * Characters referred to using character references must match the 2434 * production for Char. 2435 * 2436 * Returns the value parsed (as an int), 0 in case of error, str will be 2437 * updated to the current value of the index 2438 */ 2439 static int 2440 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2441 const xmlChar *ptr; 2442 xmlChar cur; 2443 int val = 0; 2444 2445 if ((str == NULL) || (*str == NULL)) return(0); 2446 ptr = *str; 2447 cur = *ptr; 2448 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2449 ptr += 3; 2450 cur = *ptr; 2451 while (cur != ';') { /* Non input consuming loop */ 2452 if ((cur >= '0') && (cur <= '9')) 2453 val = val * 16 + (cur - '0'); 2454 else if ((cur >= 'a') && (cur <= 'f')) 2455 val = val * 16 + (cur - 'a') + 10; 2456 else if ((cur >= 'A') && (cur <= 'F')) 2457 val = val * 16 + (cur - 'A') + 10; 2458 else { 2459 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2460 val = 0; 2461 break; 2462 } 2463 if (val > 0x110000) 2464 val = 0x110000; 2465 2466 ptr++; 2467 cur = *ptr; 2468 } 2469 if (cur == ';') 2470 ptr++; 2471 } else if ((cur == '&') && (ptr[1] == '#')){ 2472 ptr += 2; 2473 cur = *ptr; 2474 while (cur != ';') { /* Non input consuming loops */ 2475 if ((cur >= '0') && (cur <= '9')) 2476 val = val * 10 + (cur - '0'); 2477 else { 2478 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2479 val = 0; 2480 break; 2481 } 2482 if (val > 0x110000) 2483 val = 0x110000; 2484 2485 ptr++; 2486 cur = *ptr; 2487 } 2488 if (cur == ';') 2489 ptr++; 2490 } else { 2491 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2492 return(0); 2493 } 2494 *str = ptr; 2495 2496 /* 2497 * [ WFC: Legal Character ] 2498 * Characters referred to using character references must match the 2499 * production for Char. 2500 */ 2501 if (val >= 0x110000) { 2502 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2503 "xmlParseStringCharRef: character reference out of bounds\n", 2504 val); 2505 } else if (IS_CHAR(val)) { 2506 return(val); 2507 } else { 2508 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2509 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2510 val); 2511 } 2512 return(0); 2513 } 2514 2515 /** 2516 * xmlParserHandlePEReference: 2517 * @ctxt: the parser context 2518 * 2519 * [69] PEReference ::= '%' Name ';' 2520 * 2521 * [ WFC: No Recursion ] 2522 * A parsed entity must not contain a recursive 2523 * reference to itself, either directly or indirectly. 2524 * 2525 * [ WFC: Entity Declared ] 2526 * In a document without any DTD, a document with only an internal DTD 2527 * subset which contains no parameter entity references, or a document 2528 * with "standalone='yes'", ... ... The declaration of a parameter 2529 * entity must precede any reference to it... 2530 * 2531 * [ VC: Entity Declared ] 2532 * In a document with an external subset or external parameter entities 2533 * with "standalone='no'", ... ... The declaration of a parameter entity 2534 * must precede any reference to it... 2535 * 2536 * [ WFC: In DTD ] 2537 * Parameter-entity references may only appear in the DTD. 2538 * NOTE: misleading but this is handled. 2539 * 2540 * A PEReference may have been detected in the current input stream 2541 * the handling is done accordingly to 2542 * http://www.w3.org/TR/REC-xml#entproc 2543 * i.e. 2544 * - Included in literal in entity values 2545 * - Included as Parameter Entity reference within DTDs 2546 */ 2547 void 2548 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2549 switch(ctxt->instate) { 2550 case XML_PARSER_CDATA_SECTION: 2551 return; 2552 case XML_PARSER_COMMENT: 2553 return; 2554 case XML_PARSER_START_TAG: 2555 return; 2556 case XML_PARSER_END_TAG: 2557 return; 2558 case XML_PARSER_EOF: 2559 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2560 return; 2561 case XML_PARSER_PROLOG: 2562 case XML_PARSER_START: 2563 case XML_PARSER_MISC: 2564 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2565 return; 2566 case XML_PARSER_ENTITY_DECL: 2567 case XML_PARSER_CONTENT: 2568 case XML_PARSER_ATTRIBUTE_VALUE: 2569 case XML_PARSER_PI: 2570 case XML_PARSER_SYSTEM_LITERAL: 2571 case XML_PARSER_PUBLIC_LITERAL: 2572 /* we just ignore it there */ 2573 return; 2574 case XML_PARSER_EPILOG: 2575 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2576 return; 2577 case XML_PARSER_ENTITY_VALUE: 2578 /* 2579 * NOTE: in the case of entity values, we don't do the 2580 * substitution here since we need the literal 2581 * entity value to be able to save the internal 2582 * subset of the document. 2583 * This will be handled by xmlStringDecodeEntities 2584 */ 2585 return; 2586 case XML_PARSER_DTD: 2587 /* 2588 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2589 * In the internal DTD subset, parameter-entity references 2590 * can occur only where markup declarations can occur, not 2591 * within markup declarations. 2592 * In that case this is handled in xmlParseMarkupDecl 2593 */ 2594 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2595 return; 2596 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2597 return; 2598 break; 2599 case XML_PARSER_IGNORE: 2600 return; 2601 } 2602 2603 xmlParsePEReference(ctxt); 2604 } 2605 2606 /* 2607 * Macro used to grow the current buffer. 2608 * buffer##_size is expected to be a size_t 2609 * mem_error: is expected to handle memory allocation failures 2610 */ 2611 #define growBuffer(buffer, n) { \ 2612 xmlChar *tmp; \ 2613 size_t new_size = buffer##_size * 2 + n; \ 2614 if (new_size < buffer##_size) goto mem_error; \ 2615 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2616 if (tmp == NULL) goto mem_error; \ 2617 buffer = tmp; \ 2618 buffer##_size = new_size; \ 2619 } 2620 2621 /** 2622 * xmlStringLenDecodeEntities: 2623 * @ctxt: the parser context 2624 * @str: the input string 2625 * @len: the string length 2626 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2627 * @end: an end marker xmlChar, 0 if none 2628 * @end2: an end marker xmlChar, 0 if none 2629 * @end3: an end marker xmlChar, 0 if none 2630 * 2631 * Takes a entity string content and process to do the adequate substitutions. 2632 * 2633 * [67] Reference ::= EntityRef | CharRef 2634 * 2635 * [69] PEReference ::= '%' Name ';' 2636 * 2637 * Returns A newly allocated string with the substitution done. The caller 2638 * must deallocate it ! 2639 */ 2640 xmlChar * 2641 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2642 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2643 xmlChar *buffer = NULL; 2644 size_t buffer_size = 0; 2645 size_t nbchars = 0; 2646 2647 xmlChar *current = NULL; 2648 xmlChar *rep = NULL; 2649 const xmlChar *last; 2650 xmlEntityPtr ent; 2651 int c,l; 2652 2653 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2654 return(NULL); 2655 last = str + len; 2656 2657 if (((ctxt->depth > 40) && 2658 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2659 (ctxt->depth > 1024)) { 2660 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2661 return(NULL); 2662 } 2663 2664 /* 2665 * allocate a translation buffer. 2666 */ 2667 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2668 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2669 if (buffer == NULL) goto mem_error; 2670 2671 /* 2672 * OK loop until we reach one of the ending char or a size limit. 2673 * we are operating on already parsed values. 2674 */ 2675 if (str < last) 2676 c = CUR_SCHAR(str, l); 2677 else 2678 c = 0; 2679 while ((c != 0) && (c != end) && /* non input consuming loop */ 2680 (c != end2) && (c != end3) && 2681 (ctxt->instate != XML_PARSER_EOF)) { 2682 2683 if (c == 0) break; 2684 if ((c == '&') && (str[1] == '#')) { 2685 int val = xmlParseStringCharRef(ctxt, &str); 2686 if (val == 0) 2687 goto int_error; 2688 COPY_BUF(0,buffer,nbchars,val); 2689 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2690 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2691 } 2692 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2693 if (xmlParserDebugEntities) 2694 xmlGenericError(xmlGenericErrorContext, 2695 "String decoding Entity Reference: %.30s\n", 2696 str); 2697 ent = xmlParseStringEntityRef(ctxt, &str); 2698 xmlParserEntityCheck(ctxt, 0, ent, 0); 2699 if (ent != NULL) 2700 ctxt->nbentities += ent->checked / 2; 2701 if ((ent != NULL) && 2702 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2703 if (ent->content != NULL) { 2704 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2705 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2706 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2707 } 2708 } else { 2709 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2710 "predefined entity has no content\n"); 2711 goto int_error; 2712 } 2713 } else if ((ent != NULL) && (ent->content != NULL)) { 2714 ctxt->depth++; 2715 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2716 0, 0, 0); 2717 ctxt->depth--; 2718 if (rep == NULL) { 2719 ent->content[0] = 0; 2720 goto int_error; 2721 } 2722 2723 current = rep; 2724 while (*current != 0) { /* non input consuming loop */ 2725 buffer[nbchars++] = *current++; 2726 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2727 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2728 goto int_error; 2729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2730 } 2731 } 2732 xmlFree(rep); 2733 rep = NULL; 2734 } else if (ent != NULL) { 2735 int i = xmlStrlen(ent->name); 2736 const xmlChar *cur = ent->name; 2737 2738 buffer[nbchars++] = '&'; 2739 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2740 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2741 } 2742 for (;i > 0;i--) 2743 buffer[nbchars++] = *cur++; 2744 buffer[nbchars++] = ';'; 2745 } 2746 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2747 if (xmlParserDebugEntities) 2748 xmlGenericError(xmlGenericErrorContext, 2749 "String decoding PE Reference: %.30s\n", str); 2750 ent = xmlParseStringPEReference(ctxt, &str); 2751 xmlParserEntityCheck(ctxt, 0, ent, 0); 2752 if (ent != NULL) 2753 ctxt->nbentities += ent->checked / 2; 2754 if (ent != NULL) { 2755 if (ent->content == NULL) { 2756 /* 2757 * Note: external parsed entities will not be loaded, 2758 * it is not required for a non-validating parser to 2759 * complete external PEReferences coming from the 2760 * internal subset 2761 */ 2762 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2763 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2764 (ctxt->validate != 0)) { 2765 xmlLoadEntityContent(ctxt, ent); 2766 } else { 2767 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2768 "not validating will not read content for PE entity %s\n", 2769 ent->name, NULL); 2770 } 2771 } 2772 ctxt->depth++; 2773 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2774 0, 0, 0); 2775 ctxt->depth--; 2776 if (rep == NULL) { 2777 if (ent->content != NULL) 2778 ent->content[0] = 0; 2779 goto int_error; 2780 } 2781 current = rep; 2782 while (*current != 0) { /* non input consuming loop */ 2783 buffer[nbchars++] = *current++; 2784 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2785 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2786 goto int_error; 2787 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2788 } 2789 } 2790 xmlFree(rep); 2791 rep = NULL; 2792 } 2793 } else { 2794 COPY_BUF(l,buffer,nbchars,c); 2795 str += l; 2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2798 } 2799 } 2800 if (str < last) 2801 c = CUR_SCHAR(str, l); 2802 else 2803 c = 0; 2804 } 2805 buffer[nbchars] = 0; 2806 return(buffer); 2807 2808 mem_error: 2809 xmlErrMemory(ctxt, NULL); 2810 int_error: 2811 if (rep != NULL) 2812 xmlFree(rep); 2813 if (buffer != NULL) 2814 xmlFree(buffer); 2815 return(NULL); 2816 } 2817 2818 /** 2819 * xmlStringDecodeEntities: 2820 * @ctxt: the parser context 2821 * @str: the input string 2822 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2823 * @end: an end marker xmlChar, 0 if none 2824 * @end2: an end marker xmlChar, 0 if none 2825 * @end3: an end marker xmlChar, 0 if none 2826 * 2827 * Takes a entity string content and process to do the adequate substitutions. 2828 * 2829 * [67] Reference ::= EntityRef | CharRef 2830 * 2831 * [69] PEReference ::= '%' Name ';' 2832 * 2833 * Returns A newly allocated string with the substitution done. The caller 2834 * must deallocate it ! 2835 */ 2836 xmlChar * 2837 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2838 xmlChar end, xmlChar end2, xmlChar end3) { 2839 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2840 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2841 end, end2, end3)); 2842 } 2843 2844 /************************************************************************ 2845 * * 2846 * Commodity functions, cleanup needed ? * 2847 * * 2848 ************************************************************************/ 2849 2850 /** 2851 * areBlanks: 2852 * @ctxt: an XML parser context 2853 * @str: a xmlChar * 2854 * @len: the size of @str 2855 * @blank_chars: we know the chars are blanks 2856 * 2857 * Is this a sequence of blank chars that one can ignore ? 2858 * 2859 * Returns 1 if ignorable 0 otherwise. 2860 */ 2861 2862 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2863 int blank_chars) { 2864 int i, ret; 2865 xmlNodePtr lastChild; 2866 2867 /* 2868 * Don't spend time trying to differentiate them, the same callback is 2869 * used ! 2870 */ 2871 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2872 return(0); 2873 2874 /* 2875 * Check for xml:space value. 2876 */ 2877 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2878 (*(ctxt->space) == -2)) 2879 return(0); 2880 2881 /* 2882 * Check that the string is made of blanks 2883 */ 2884 if (blank_chars == 0) { 2885 for (i = 0;i < len;i++) 2886 if (!(IS_BLANK_CH(str[i]))) return(0); 2887 } 2888 2889 /* 2890 * Look if the element is mixed content in the DTD if available 2891 */ 2892 if (ctxt->node == NULL) return(0); 2893 if (ctxt->myDoc != NULL) { 2894 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2895 if (ret == 0) return(1); 2896 if (ret == 1) return(0); 2897 } 2898 2899 /* 2900 * Otherwise, heuristic :-\ 2901 */ 2902 if ((RAW != '<') && (RAW != 0xD)) return(0); 2903 if ((ctxt->node->children == NULL) && 2904 (RAW == '<') && (NXT(1) == '/')) return(0); 2905 2906 lastChild = xmlGetLastChild(ctxt->node); 2907 if (lastChild == NULL) { 2908 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2909 (ctxt->node->content != NULL)) return(0); 2910 } else if (xmlNodeIsText(lastChild)) 2911 return(0); 2912 else if ((ctxt->node->children != NULL) && 2913 (xmlNodeIsText(ctxt->node->children))) 2914 return(0); 2915 return(1); 2916 } 2917 2918 /************************************************************************ 2919 * * 2920 * Extra stuff for namespace support * 2921 * Relates to http://www.w3.org/TR/WD-xml-names * 2922 * * 2923 ************************************************************************/ 2924 2925 /** 2926 * xmlSplitQName: 2927 * @ctxt: an XML parser context 2928 * @name: an XML parser context 2929 * @prefix: a xmlChar ** 2930 * 2931 * parse an UTF8 encoded XML qualified name string 2932 * 2933 * [NS 5] QName ::= (Prefix ':')? LocalPart 2934 * 2935 * [NS 6] Prefix ::= NCName 2936 * 2937 * [NS 7] LocalPart ::= NCName 2938 * 2939 * Returns the local part, and prefix is updated 2940 * to get the Prefix if any. 2941 */ 2942 2943 xmlChar * 2944 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2945 xmlChar buf[XML_MAX_NAMELEN + 5]; 2946 xmlChar *buffer = NULL; 2947 int len = 0; 2948 int max = XML_MAX_NAMELEN; 2949 xmlChar *ret = NULL; 2950 const xmlChar *cur = name; 2951 int c; 2952 2953 if (prefix == NULL) return(NULL); 2954 *prefix = NULL; 2955 2956 if (cur == NULL) return(NULL); 2957 2958 #ifndef XML_XML_NAMESPACE 2959 /* xml: prefix is not really a namespace */ 2960 if ((cur[0] == 'x') && (cur[1] == 'm') && 2961 (cur[2] == 'l') && (cur[3] == ':')) 2962 return(xmlStrdup(name)); 2963 #endif 2964 2965 /* nasty but well=formed */ 2966 if (cur[0] == ':') 2967 return(xmlStrdup(name)); 2968 2969 c = *cur++; 2970 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2971 buf[len++] = c; 2972 c = *cur++; 2973 } 2974 if (len >= max) { 2975 /* 2976 * Okay someone managed to make a huge name, so he's ready to pay 2977 * for the processing speed. 2978 */ 2979 max = len * 2; 2980 2981 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2982 if (buffer == NULL) { 2983 xmlErrMemory(ctxt, NULL); 2984 return(NULL); 2985 } 2986 memcpy(buffer, buf, len); 2987 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2988 if (len + 10 > max) { 2989 xmlChar *tmp; 2990 2991 max *= 2; 2992 tmp = (xmlChar *) xmlRealloc(buffer, 2993 max * sizeof(xmlChar)); 2994 if (tmp == NULL) { 2995 xmlFree(buffer); 2996 xmlErrMemory(ctxt, NULL); 2997 return(NULL); 2998 } 2999 buffer = tmp; 3000 } 3001 buffer[len++] = c; 3002 c = *cur++; 3003 } 3004 buffer[len] = 0; 3005 } 3006 3007 if ((c == ':') && (*cur == 0)) { 3008 if (buffer != NULL) 3009 xmlFree(buffer); 3010 *prefix = NULL; 3011 return(xmlStrdup(name)); 3012 } 3013 3014 if (buffer == NULL) 3015 ret = xmlStrndup(buf, len); 3016 else { 3017 ret = buffer; 3018 buffer = NULL; 3019 max = XML_MAX_NAMELEN; 3020 } 3021 3022 3023 if (c == ':') { 3024 c = *cur; 3025 *prefix = ret; 3026 if (c == 0) { 3027 return(xmlStrndup(BAD_CAST "", 0)); 3028 } 3029 len = 0; 3030 3031 /* 3032 * Check that the first character is proper to start 3033 * a new name 3034 */ 3035 if (!(((c >= 0x61) && (c <= 0x7A)) || 3036 ((c >= 0x41) && (c <= 0x5A)) || 3037 (c == '_') || (c == ':'))) { 3038 int l; 3039 int first = CUR_SCHAR(cur, l); 3040 3041 if (!IS_LETTER(first) && (first != '_')) { 3042 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3043 "Name %s is not XML Namespace compliant\n", 3044 name); 3045 } 3046 } 3047 cur++; 3048 3049 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3050 buf[len++] = c; 3051 c = *cur++; 3052 } 3053 if (len >= max) { 3054 /* 3055 * Okay someone managed to make a huge name, so he's ready to pay 3056 * for the processing speed. 3057 */ 3058 max = len * 2; 3059 3060 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3061 if (buffer == NULL) { 3062 xmlErrMemory(ctxt, NULL); 3063 return(NULL); 3064 } 3065 memcpy(buffer, buf, len); 3066 while (c != 0) { /* tested bigname2.xml */ 3067 if (len + 10 > max) { 3068 xmlChar *tmp; 3069 3070 max *= 2; 3071 tmp = (xmlChar *) xmlRealloc(buffer, 3072 max * sizeof(xmlChar)); 3073 if (tmp == NULL) { 3074 xmlErrMemory(ctxt, NULL); 3075 xmlFree(buffer); 3076 return(NULL); 3077 } 3078 buffer = tmp; 3079 } 3080 buffer[len++] = c; 3081 c = *cur++; 3082 } 3083 buffer[len] = 0; 3084 } 3085 3086 if (buffer == NULL) 3087 ret = xmlStrndup(buf, len); 3088 else { 3089 ret = buffer; 3090 } 3091 } 3092 3093 return(ret); 3094 } 3095 3096 /************************************************************************ 3097 * * 3098 * The parser itself * 3099 * Relates to http://www.w3.org/TR/REC-xml * 3100 * * 3101 ************************************************************************/ 3102 3103 /************************************************************************ 3104 * * 3105 * Routines to parse Name, NCName and NmToken * 3106 * * 3107 ************************************************************************/ 3108 #ifdef DEBUG 3109 static unsigned long nbParseName = 0; 3110 static unsigned long nbParseNmToken = 0; 3111 static unsigned long nbParseNCName = 0; 3112 static unsigned long nbParseNCNameComplex = 0; 3113 static unsigned long nbParseNameComplex = 0; 3114 static unsigned long nbParseStringName = 0; 3115 #endif 3116 3117 /* 3118 * The two following functions are related to the change of accepted 3119 * characters for Name and NmToken in the Revision 5 of XML-1.0 3120 * They correspond to the modified production [4] and the new production [4a] 3121 * changes in that revision. Also note that the macros used for the 3122 * productions Letter, Digit, CombiningChar and Extender are not needed 3123 * anymore. 3124 * We still keep compatibility to pre-revision5 parsing semantic if the 3125 * new XML_PARSE_OLD10 option is given to the parser. 3126 */ 3127 static int 3128 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3129 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3130 /* 3131 * Use the new checks of production [4] [4a] amd [5] of the 3132 * Update 5 of XML-1.0 3133 */ 3134 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3135 (((c >= 'a') && (c <= 'z')) || 3136 ((c >= 'A') && (c <= 'Z')) || 3137 (c == '_') || (c == ':') || 3138 ((c >= 0xC0) && (c <= 0xD6)) || 3139 ((c >= 0xD8) && (c <= 0xF6)) || 3140 ((c >= 0xF8) && (c <= 0x2FF)) || 3141 ((c >= 0x370) && (c <= 0x37D)) || 3142 ((c >= 0x37F) && (c <= 0x1FFF)) || 3143 ((c >= 0x200C) && (c <= 0x200D)) || 3144 ((c >= 0x2070) && (c <= 0x218F)) || 3145 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3146 ((c >= 0x3001) && (c <= 0xD7FF)) || 3147 ((c >= 0xF900) && (c <= 0xFDCF)) || 3148 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3149 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3150 return(1); 3151 } else { 3152 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3153 return(1); 3154 } 3155 return(0); 3156 } 3157 3158 static int 3159 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3161 /* 3162 * Use the new checks of production [4] [4a] amd [5] of the 3163 * Update 5 of XML-1.0 3164 */ 3165 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3166 (((c >= 'a') && (c <= 'z')) || 3167 ((c >= 'A') && (c <= 'Z')) || 3168 ((c >= '0') && (c <= '9')) || /* !start */ 3169 (c == '_') || (c == ':') || 3170 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3171 ((c >= 0xC0) && (c <= 0xD6)) || 3172 ((c >= 0xD8) && (c <= 0xF6)) || 3173 ((c >= 0xF8) && (c <= 0x2FF)) || 3174 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3175 ((c >= 0x370) && (c <= 0x37D)) || 3176 ((c >= 0x37F) && (c <= 0x1FFF)) || 3177 ((c >= 0x200C) && (c <= 0x200D)) || 3178 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3179 ((c >= 0x2070) && (c <= 0x218F)) || 3180 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3181 ((c >= 0x3001) && (c <= 0xD7FF)) || 3182 ((c >= 0xF900) && (c <= 0xFDCF)) || 3183 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3184 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3185 return(1); 3186 } else { 3187 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3188 (c == '.') || (c == '-') || 3189 (c == '_') || (c == ':') || 3190 (IS_COMBINING(c)) || 3191 (IS_EXTENDER(c))) 3192 return(1); 3193 } 3194 return(0); 3195 } 3196 3197 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3198 int *len, int *alloc, int normalize); 3199 3200 static const xmlChar * 3201 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3202 int len = 0, l; 3203 int c; 3204 int count = 0; 3205 3206 #ifdef DEBUG 3207 nbParseNameComplex++; 3208 #endif 3209 3210 /* 3211 * Handler for more complex cases 3212 */ 3213 GROW; 3214 if (ctxt->instate == XML_PARSER_EOF) 3215 return(NULL); 3216 c = CUR_CHAR(l); 3217 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3218 /* 3219 * Use the new checks of production [4] [4a] amd [5] of the 3220 * Update 5 of XML-1.0 3221 */ 3222 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3223 (!(((c >= 'a') && (c <= 'z')) || 3224 ((c >= 'A') && (c <= 'Z')) || 3225 (c == '_') || (c == ':') || 3226 ((c >= 0xC0) && (c <= 0xD6)) || 3227 ((c >= 0xD8) && (c <= 0xF6)) || 3228 ((c >= 0xF8) && (c <= 0x2FF)) || 3229 ((c >= 0x370) && (c <= 0x37D)) || 3230 ((c >= 0x37F) && (c <= 0x1FFF)) || 3231 ((c >= 0x200C) && (c <= 0x200D)) || 3232 ((c >= 0x2070) && (c <= 0x218F)) || 3233 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3234 ((c >= 0x3001) && (c <= 0xD7FF)) || 3235 ((c >= 0xF900) && (c <= 0xFDCF)) || 3236 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3237 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3238 return(NULL); 3239 } 3240 len += l; 3241 NEXTL(l); 3242 c = CUR_CHAR(l); 3243 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3244 (((c >= 'a') && (c <= 'z')) || 3245 ((c >= 'A') && (c <= 'Z')) || 3246 ((c >= '0') && (c <= '9')) || /* !start */ 3247 (c == '_') || (c == ':') || 3248 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3249 ((c >= 0xC0) && (c <= 0xD6)) || 3250 ((c >= 0xD8) && (c <= 0xF6)) || 3251 ((c >= 0xF8) && (c <= 0x2FF)) || 3252 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3253 ((c >= 0x370) && (c <= 0x37D)) || 3254 ((c >= 0x37F) && (c <= 0x1FFF)) || 3255 ((c >= 0x200C) && (c <= 0x200D)) || 3256 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3257 ((c >= 0x2070) && (c <= 0x218F)) || 3258 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3259 ((c >= 0x3001) && (c <= 0xD7FF)) || 3260 ((c >= 0xF900) && (c <= 0xFDCF)) || 3261 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3262 ((c >= 0x10000) && (c <= 0xEFFFF)) 3263 )) { 3264 if (count++ > XML_PARSER_CHUNK_SIZE) { 3265 count = 0; 3266 GROW; 3267 if (ctxt->instate == XML_PARSER_EOF) 3268 return(NULL); 3269 } 3270 len += l; 3271 NEXTL(l); 3272 c = CUR_CHAR(l); 3273 } 3274 } else { 3275 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3276 (!IS_LETTER(c) && (c != '_') && 3277 (c != ':'))) { 3278 return(NULL); 3279 } 3280 len += l; 3281 NEXTL(l); 3282 c = CUR_CHAR(l); 3283 3284 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3285 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3286 (c == '.') || (c == '-') || 3287 (c == '_') || (c == ':') || 3288 (IS_COMBINING(c)) || 3289 (IS_EXTENDER(c)))) { 3290 if (count++ > XML_PARSER_CHUNK_SIZE) { 3291 count = 0; 3292 GROW; 3293 if (ctxt->instate == XML_PARSER_EOF) 3294 return(NULL); 3295 } 3296 len += l; 3297 NEXTL(l); 3298 c = CUR_CHAR(l); 3299 } 3300 } 3301 if ((len > XML_MAX_NAME_LENGTH) && 3302 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3303 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3304 return(NULL); 3305 } 3306 if (ctxt->input->cur - ctxt->input->base < len) { 3307 /* 3308 * There were a couple of bugs where PERefs lead to to a change 3309 * of the buffer. Check the buffer size to avoid passing an invalid 3310 * pointer to xmlDictLookup. 3311 */ 3312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3313 "unexpected change of input buffer"); 3314 return (NULL); 3315 } 3316 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3317 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3318 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3319 } 3320 3321 /** 3322 * xmlParseName: 3323 * @ctxt: an XML parser context 3324 * 3325 * parse an XML name. 3326 * 3327 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3328 * CombiningChar | Extender 3329 * 3330 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3331 * 3332 * [6] Names ::= Name (#x20 Name)* 3333 * 3334 * Returns the Name parsed or NULL 3335 */ 3336 3337 const xmlChar * 3338 xmlParseName(xmlParserCtxtPtr ctxt) { 3339 const xmlChar *in; 3340 const xmlChar *ret; 3341 int count = 0; 3342 3343 GROW; 3344 3345 #ifdef DEBUG 3346 nbParseName++; 3347 #endif 3348 3349 /* 3350 * Accelerator for simple ASCII names 3351 */ 3352 in = ctxt->input->cur; 3353 if (((*in >= 0x61) && (*in <= 0x7A)) || 3354 ((*in >= 0x41) && (*in <= 0x5A)) || 3355 (*in == '_') || (*in == ':')) { 3356 in++; 3357 while (((*in >= 0x61) && (*in <= 0x7A)) || 3358 ((*in >= 0x41) && (*in <= 0x5A)) || 3359 ((*in >= 0x30) && (*in <= 0x39)) || 3360 (*in == '_') || (*in == '-') || 3361 (*in == ':') || (*in == '.')) 3362 in++; 3363 if ((*in > 0) && (*in < 0x80)) { 3364 count = in - ctxt->input->cur; 3365 if ((count > XML_MAX_NAME_LENGTH) && 3366 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3367 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3368 return(NULL); 3369 } 3370 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3371 ctxt->input->cur = in; 3372 ctxt->input->col += count; 3373 if (ret == NULL) 3374 xmlErrMemory(ctxt, NULL); 3375 return(ret); 3376 } 3377 } 3378 /* accelerator for special cases */ 3379 return(xmlParseNameComplex(ctxt)); 3380 } 3381 3382 static const xmlChar * 3383 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3384 int len = 0, l; 3385 int c; 3386 int count = 0; 3387 size_t startPosition = 0; 3388 3389 #ifdef DEBUG 3390 nbParseNCNameComplex++; 3391 #endif 3392 3393 /* 3394 * Handler for more complex cases 3395 */ 3396 GROW; 3397 startPosition = CUR_PTR - BASE_PTR; 3398 c = CUR_CHAR(l); 3399 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3400 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3401 return(NULL); 3402 } 3403 3404 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3405 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3406 if (count++ > XML_PARSER_CHUNK_SIZE) { 3407 if ((len > XML_MAX_NAME_LENGTH) && 3408 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3409 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3410 return(NULL); 3411 } 3412 count = 0; 3413 GROW; 3414 if (ctxt->instate == XML_PARSER_EOF) 3415 return(NULL); 3416 } 3417 len += l; 3418 NEXTL(l); 3419 c = CUR_CHAR(l); 3420 if (c == 0) { 3421 count = 0; 3422 /* 3423 * when shrinking to extend the buffer we really need to preserve 3424 * the part of the name we already parsed. Hence rolling back 3425 * by current length. 3426 */ 3427 ctxt->input->cur -= l; 3428 GROW; 3429 if (ctxt->instate == XML_PARSER_EOF) 3430 return(NULL); 3431 ctxt->input->cur += l; 3432 c = CUR_CHAR(l); 3433 } 3434 } 3435 if ((len > XML_MAX_NAME_LENGTH) && 3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3438 return(NULL); 3439 } 3440 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3441 } 3442 3443 /** 3444 * xmlParseNCName: 3445 * @ctxt: an XML parser context 3446 * @len: length of the string parsed 3447 * 3448 * parse an XML name. 3449 * 3450 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3451 * CombiningChar | Extender 3452 * 3453 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3454 * 3455 * Returns the Name parsed or NULL 3456 */ 3457 3458 static const xmlChar * 3459 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3460 const xmlChar *in, *e; 3461 const xmlChar *ret; 3462 int count = 0; 3463 3464 #ifdef DEBUG 3465 nbParseNCName++; 3466 #endif 3467 3468 /* 3469 * Accelerator for simple ASCII names 3470 */ 3471 in = ctxt->input->cur; 3472 e = ctxt->input->end; 3473 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3474 ((*in >= 0x41) && (*in <= 0x5A)) || 3475 (*in == '_')) && (in < e)) { 3476 in++; 3477 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3478 ((*in >= 0x41) && (*in <= 0x5A)) || 3479 ((*in >= 0x30) && (*in <= 0x39)) || 3480 (*in == '_') || (*in == '-') || 3481 (*in == '.')) && (in < e)) 3482 in++; 3483 if (in >= e) 3484 goto complex; 3485 if ((*in > 0) && (*in < 0x80)) { 3486 count = in - ctxt->input->cur; 3487 if ((count > XML_MAX_NAME_LENGTH) && 3488 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3489 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3490 return(NULL); 3491 } 3492 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3493 ctxt->input->cur = in; 3494 ctxt->input->col += count; 3495 if (ret == NULL) { 3496 xmlErrMemory(ctxt, NULL); 3497 } 3498 return(ret); 3499 } 3500 } 3501 complex: 3502 return(xmlParseNCNameComplex(ctxt)); 3503 } 3504 3505 /** 3506 * xmlParseNameAndCompare: 3507 * @ctxt: an XML parser context 3508 * 3509 * parse an XML name and compares for match 3510 * (specialized for endtag parsing) 3511 * 3512 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3513 * and the name for mismatch 3514 */ 3515 3516 static const xmlChar * 3517 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3518 register const xmlChar *cmp = other; 3519 register const xmlChar *in; 3520 const xmlChar *ret; 3521 3522 GROW; 3523 if (ctxt->instate == XML_PARSER_EOF) 3524 return(NULL); 3525 3526 in = ctxt->input->cur; 3527 while (*in != 0 && *in == *cmp) { 3528 ++in; 3529 ++cmp; 3530 } 3531 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3532 /* success */ 3533 ctxt->input->col += in - ctxt->input->cur; 3534 ctxt->input->cur = in; 3535 return (const xmlChar*) 1; 3536 } 3537 /* failure (or end of input buffer), check with full function */ 3538 ret = xmlParseName (ctxt); 3539 /* strings coming from the dictionary direct compare possible */ 3540 if (ret == other) { 3541 return (const xmlChar*) 1; 3542 } 3543 return ret; 3544 } 3545 3546 /** 3547 * xmlParseStringName: 3548 * @ctxt: an XML parser context 3549 * @str: a pointer to the string pointer (IN/OUT) 3550 * 3551 * parse an XML name. 3552 * 3553 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3554 * CombiningChar | Extender 3555 * 3556 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3557 * 3558 * [6] Names ::= Name (#x20 Name)* 3559 * 3560 * Returns the Name parsed or NULL. The @str pointer 3561 * is updated to the current location in the string. 3562 */ 3563 3564 static xmlChar * 3565 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3566 xmlChar buf[XML_MAX_NAMELEN + 5]; 3567 const xmlChar *cur = *str; 3568 int len = 0, l; 3569 int c; 3570 3571 #ifdef DEBUG 3572 nbParseStringName++; 3573 #endif 3574 3575 c = CUR_SCHAR(cur, l); 3576 if (!xmlIsNameStartChar(ctxt, c)) { 3577 return(NULL); 3578 } 3579 3580 COPY_BUF(l,buf,len,c); 3581 cur += l; 3582 c = CUR_SCHAR(cur, l); 3583 while (xmlIsNameChar(ctxt, c)) { 3584 COPY_BUF(l,buf,len,c); 3585 cur += l; 3586 c = CUR_SCHAR(cur, l); 3587 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3588 /* 3589 * Okay someone managed to make a huge name, so he's ready to pay 3590 * for the processing speed. 3591 */ 3592 xmlChar *buffer; 3593 int max = len * 2; 3594 3595 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3596 if (buffer == NULL) { 3597 xmlErrMemory(ctxt, NULL); 3598 return(NULL); 3599 } 3600 memcpy(buffer, buf, len); 3601 while (xmlIsNameChar(ctxt, c)) { 3602 if (len + 10 > max) { 3603 xmlChar *tmp; 3604 3605 if ((len > XML_MAX_NAME_LENGTH) && 3606 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3607 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3608 xmlFree(buffer); 3609 return(NULL); 3610 } 3611 max *= 2; 3612 tmp = (xmlChar *) xmlRealloc(buffer, 3613 max * sizeof(xmlChar)); 3614 if (tmp == NULL) { 3615 xmlErrMemory(ctxt, NULL); 3616 xmlFree(buffer); 3617 return(NULL); 3618 } 3619 buffer = tmp; 3620 } 3621 COPY_BUF(l,buffer,len,c); 3622 cur += l; 3623 c = CUR_SCHAR(cur, l); 3624 } 3625 buffer[len] = 0; 3626 *str = cur; 3627 return(buffer); 3628 } 3629 } 3630 if ((len > XML_MAX_NAME_LENGTH) && 3631 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3632 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3633 return(NULL); 3634 } 3635 *str = cur; 3636 return(xmlStrndup(buf, len)); 3637 } 3638 3639 /** 3640 * xmlParseNmtoken: 3641 * @ctxt: an XML parser context 3642 * 3643 * parse an XML Nmtoken. 3644 * 3645 * [7] Nmtoken ::= (NameChar)+ 3646 * 3647 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3648 * 3649 * Returns the Nmtoken parsed or NULL 3650 */ 3651 3652 xmlChar * 3653 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3654 xmlChar buf[XML_MAX_NAMELEN + 5]; 3655 int len = 0, l; 3656 int c; 3657 int count = 0; 3658 3659 #ifdef DEBUG 3660 nbParseNmToken++; 3661 #endif 3662 3663 GROW; 3664 if (ctxt->instate == XML_PARSER_EOF) 3665 return(NULL); 3666 c = CUR_CHAR(l); 3667 3668 while (xmlIsNameChar(ctxt, c)) { 3669 if (count++ > XML_PARSER_CHUNK_SIZE) { 3670 count = 0; 3671 GROW; 3672 } 3673 COPY_BUF(l,buf,len,c); 3674 NEXTL(l); 3675 c = CUR_CHAR(l); 3676 if (c == 0) { 3677 count = 0; 3678 GROW; 3679 if (ctxt->instate == XML_PARSER_EOF) 3680 return(NULL); 3681 c = CUR_CHAR(l); 3682 } 3683 if (len >= XML_MAX_NAMELEN) { 3684 /* 3685 * Okay someone managed to make a huge token, so he's ready to pay 3686 * for the processing speed. 3687 */ 3688 xmlChar *buffer; 3689 int max = len * 2; 3690 3691 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3692 if (buffer == NULL) { 3693 xmlErrMemory(ctxt, NULL); 3694 return(NULL); 3695 } 3696 memcpy(buffer, buf, len); 3697 while (xmlIsNameChar(ctxt, c)) { 3698 if (count++ > XML_PARSER_CHUNK_SIZE) { 3699 count = 0; 3700 GROW; 3701 if (ctxt->instate == XML_PARSER_EOF) { 3702 xmlFree(buffer); 3703 return(NULL); 3704 } 3705 } 3706 if (len + 10 > max) { 3707 xmlChar *tmp; 3708 3709 if ((max > XML_MAX_NAME_LENGTH) && 3710 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3711 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3712 xmlFree(buffer); 3713 return(NULL); 3714 } 3715 max *= 2; 3716 tmp = (xmlChar *) xmlRealloc(buffer, 3717 max * sizeof(xmlChar)); 3718 if (tmp == NULL) { 3719 xmlErrMemory(ctxt, NULL); 3720 xmlFree(buffer); 3721 return(NULL); 3722 } 3723 buffer = tmp; 3724 } 3725 COPY_BUF(l,buffer,len,c); 3726 NEXTL(l); 3727 c = CUR_CHAR(l); 3728 } 3729 buffer[len] = 0; 3730 return(buffer); 3731 } 3732 } 3733 if (len == 0) 3734 return(NULL); 3735 if ((len > XML_MAX_NAME_LENGTH) && 3736 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3737 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3738 return(NULL); 3739 } 3740 return(xmlStrndup(buf, len)); 3741 } 3742 3743 /** 3744 * xmlParseEntityValue: 3745 * @ctxt: an XML parser context 3746 * @orig: if non-NULL store a copy of the original entity value 3747 * 3748 * parse a value for ENTITY declarations 3749 * 3750 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3751 * "'" ([^%&'] | PEReference | Reference)* "'" 3752 * 3753 * Returns the EntityValue parsed with reference substituted or NULL 3754 */ 3755 3756 xmlChar * 3757 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3758 xmlChar *buf = NULL; 3759 int len = 0; 3760 int size = XML_PARSER_BUFFER_SIZE; 3761 int c, l; 3762 xmlChar stop; 3763 xmlChar *ret = NULL; 3764 const xmlChar *cur = NULL; 3765 xmlParserInputPtr input; 3766 3767 if (RAW == '"') stop = '"'; 3768 else if (RAW == '\'') stop = '\''; 3769 else { 3770 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3771 return(NULL); 3772 } 3773 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3774 if (buf == NULL) { 3775 xmlErrMemory(ctxt, NULL); 3776 return(NULL); 3777 } 3778 3779 /* 3780 * The content of the entity definition is copied in a buffer. 3781 */ 3782 3783 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3784 input = ctxt->input; 3785 GROW; 3786 if (ctxt->instate == XML_PARSER_EOF) 3787 goto error; 3788 NEXT; 3789 c = CUR_CHAR(l); 3790 /* 3791 * NOTE: 4.4.5 Included in Literal 3792 * When a parameter entity reference appears in a literal entity 3793 * value, ... a single or double quote character in the replacement 3794 * text is always treated as a normal data character and will not 3795 * terminate the literal. 3796 * In practice it means we stop the loop only when back at parsing 3797 * the initial entity and the quote is found 3798 */ 3799 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3800 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3801 if (len + 5 >= size) { 3802 xmlChar *tmp; 3803 3804 size *= 2; 3805 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3806 if (tmp == NULL) { 3807 xmlErrMemory(ctxt, NULL); 3808 goto error; 3809 } 3810 buf = tmp; 3811 } 3812 COPY_BUF(l,buf,len,c); 3813 NEXTL(l); 3814 3815 GROW; 3816 c = CUR_CHAR(l); 3817 if (c == 0) { 3818 GROW; 3819 c = CUR_CHAR(l); 3820 } 3821 } 3822 buf[len] = 0; 3823 if (ctxt->instate == XML_PARSER_EOF) 3824 goto error; 3825 if (c != stop) { 3826 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3827 goto error; 3828 } 3829 NEXT; 3830 3831 /* 3832 * Raise problem w.r.t. '&' and '%' being used in non-entities 3833 * reference constructs. Note Charref will be handled in 3834 * xmlStringDecodeEntities() 3835 */ 3836 cur = buf; 3837 while (*cur != 0) { /* non input consuming */ 3838 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3839 xmlChar *name; 3840 xmlChar tmp = *cur; 3841 int nameOk = 0; 3842 3843 cur++; 3844 name = xmlParseStringName(ctxt, &cur); 3845 if (name != NULL) { 3846 nameOk = 1; 3847 xmlFree(name); 3848 } 3849 if ((nameOk == 0) || (*cur != ';')) { 3850 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3851 "EntityValue: '%c' forbidden except for entities references\n", 3852 tmp); 3853 goto error; 3854 } 3855 if ((tmp == '%') && (ctxt->inSubset == 1) && 3856 (ctxt->inputNr == 1)) { 3857 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3858 goto error; 3859 } 3860 if (*cur == 0) 3861 break; 3862 } 3863 cur++; 3864 } 3865 3866 /* 3867 * Then PEReference entities are substituted. 3868 * 3869 * NOTE: 4.4.7 Bypassed 3870 * When a general entity reference appears in the EntityValue in 3871 * an entity declaration, it is bypassed and left as is. 3872 * so XML_SUBSTITUTE_REF is not set here. 3873 */ 3874 ++ctxt->depth; 3875 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3876 0, 0, 0); 3877 --ctxt->depth; 3878 if (orig != NULL) { 3879 *orig = buf; 3880 buf = NULL; 3881 } 3882 3883 error: 3884 if (buf != NULL) 3885 xmlFree(buf); 3886 return(ret); 3887 } 3888 3889 /** 3890 * xmlParseAttValueComplex: 3891 * @ctxt: an XML parser context 3892 * @len: the resulting attribute len 3893 * @normalize: whether to apply the inner normalization 3894 * 3895 * parse a value for an attribute, this is the fallback function 3896 * of xmlParseAttValue() when the attribute parsing requires handling 3897 * of non-ASCII characters, or normalization compaction. 3898 * 3899 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3900 */ 3901 static xmlChar * 3902 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3903 xmlChar limit = 0; 3904 xmlChar *buf = NULL; 3905 xmlChar *rep = NULL; 3906 size_t len = 0; 3907 size_t buf_size = 0; 3908 int c, l, in_space = 0; 3909 xmlChar *current = NULL; 3910 xmlEntityPtr ent; 3911 3912 if (NXT(0) == '"') { 3913 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3914 limit = '"'; 3915 NEXT; 3916 } else if (NXT(0) == '\'') { 3917 limit = '\''; 3918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3919 NEXT; 3920 } else { 3921 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3922 return(NULL); 3923 } 3924 3925 /* 3926 * allocate a translation buffer. 3927 */ 3928 buf_size = XML_PARSER_BUFFER_SIZE; 3929 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3930 if (buf == NULL) goto mem_error; 3931 3932 /* 3933 * OK loop until we reach one of the ending char or a size limit. 3934 */ 3935 c = CUR_CHAR(l); 3936 while (((NXT(0) != limit) && /* checked */ 3937 (IS_CHAR(c)) && (c != '<')) && 3938 (ctxt->instate != XML_PARSER_EOF)) { 3939 /* 3940 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3941 * special option is given 3942 */ 3943 if ((len > XML_MAX_TEXT_LENGTH) && 3944 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3945 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3946 "AttValue length too long\n"); 3947 goto mem_error; 3948 } 3949 if (c == '&') { 3950 in_space = 0; 3951 if (NXT(1) == '#') { 3952 int val = xmlParseCharRef(ctxt); 3953 3954 if (val == '&') { 3955 if (ctxt->replaceEntities) { 3956 if (len + 10 > buf_size) { 3957 growBuffer(buf, 10); 3958 } 3959 buf[len++] = '&'; 3960 } else { 3961 /* 3962 * The reparsing will be done in xmlStringGetNodeList() 3963 * called by the attribute() function in SAX.c 3964 */ 3965 if (len + 10 > buf_size) { 3966 growBuffer(buf, 10); 3967 } 3968 buf[len++] = '&'; 3969 buf[len++] = '#'; 3970 buf[len++] = '3'; 3971 buf[len++] = '8'; 3972 buf[len++] = ';'; 3973 } 3974 } else if (val != 0) { 3975 if (len + 10 > buf_size) { 3976 growBuffer(buf, 10); 3977 } 3978 len += xmlCopyChar(0, &buf[len], val); 3979 } 3980 } else { 3981 ent = xmlParseEntityRef(ctxt); 3982 ctxt->nbentities++; 3983 if (ent != NULL) 3984 ctxt->nbentities += ent->owner; 3985 if ((ent != NULL) && 3986 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3987 if (len + 10 > buf_size) { 3988 growBuffer(buf, 10); 3989 } 3990 if ((ctxt->replaceEntities == 0) && 3991 (ent->content[0] == '&')) { 3992 buf[len++] = '&'; 3993 buf[len++] = '#'; 3994 buf[len++] = '3'; 3995 buf[len++] = '8'; 3996 buf[len++] = ';'; 3997 } else { 3998 buf[len++] = ent->content[0]; 3999 } 4000 } else if ((ent != NULL) && 4001 (ctxt->replaceEntities != 0)) { 4002 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4003 ++ctxt->depth; 4004 rep = xmlStringDecodeEntities(ctxt, ent->content, 4005 XML_SUBSTITUTE_REF, 4006 0, 0, 0); 4007 --ctxt->depth; 4008 if (rep != NULL) { 4009 current = rep; 4010 while (*current != 0) { /* non input consuming */ 4011 if ((*current == 0xD) || (*current == 0xA) || 4012 (*current == 0x9)) { 4013 buf[len++] = 0x20; 4014 current++; 4015 } else 4016 buf[len++] = *current++; 4017 if (len + 10 > buf_size) { 4018 growBuffer(buf, 10); 4019 } 4020 } 4021 xmlFree(rep); 4022 rep = NULL; 4023 } 4024 } else { 4025 if (len + 10 > buf_size) { 4026 growBuffer(buf, 10); 4027 } 4028 if (ent->content != NULL) 4029 buf[len++] = ent->content[0]; 4030 } 4031 } else if (ent != NULL) { 4032 int i = xmlStrlen(ent->name); 4033 const xmlChar *cur = ent->name; 4034 4035 /* 4036 * This may look absurd but is needed to detect 4037 * entities problems 4038 */ 4039 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4040 (ent->content != NULL) && (ent->checked == 0)) { 4041 unsigned long oldnbent = ctxt->nbentities, diff; 4042 4043 ++ctxt->depth; 4044 rep = xmlStringDecodeEntities(ctxt, ent->content, 4045 XML_SUBSTITUTE_REF, 0, 0, 0); 4046 --ctxt->depth; 4047 4048 diff = ctxt->nbentities - oldnbent + 1; 4049 if (diff > INT_MAX / 2) 4050 diff = INT_MAX / 2; 4051 ent->checked = diff * 2; 4052 if (rep != NULL) { 4053 if (xmlStrchr(rep, '<')) 4054 ent->checked |= 1; 4055 xmlFree(rep); 4056 rep = NULL; 4057 } else { 4058 ent->content[0] = 0; 4059 } 4060 } 4061 4062 /* 4063 * Just output the reference 4064 */ 4065 buf[len++] = '&'; 4066 while (len + i + 10 > buf_size) { 4067 growBuffer(buf, i + 10); 4068 } 4069 for (;i > 0;i--) 4070 buf[len++] = *cur++; 4071 buf[len++] = ';'; 4072 } 4073 } 4074 } else { 4075 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4076 if ((len != 0) || (!normalize)) { 4077 if ((!normalize) || (!in_space)) { 4078 COPY_BUF(l,buf,len,0x20); 4079 while (len + 10 > buf_size) { 4080 growBuffer(buf, 10); 4081 } 4082 } 4083 in_space = 1; 4084 } 4085 } else { 4086 in_space = 0; 4087 COPY_BUF(l,buf,len,c); 4088 if (len + 10 > buf_size) { 4089 growBuffer(buf, 10); 4090 } 4091 } 4092 NEXTL(l); 4093 } 4094 GROW; 4095 c = CUR_CHAR(l); 4096 } 4097 if (ctxt->instate == XML_PARSER_EOF) 4098 goto error; 4099 4100 if ((in_space) && (normalize)) { 4101 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4102 } 4103 buf[len] = 0; 4104 if (RAW == '<') { 4105 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4106 } else if (RAW != limit) { 4107 if ((c != 0) && (!IS_CHAR(c))) { 4108 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4109 "invalid character in attribute value\n"); 4110 } else { 4111 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4112 "AttValue: ' expected\n"); 4113 } 4114 } else 4115 NEXT; 4116 4117 /* 4118 * There we potentially risk an overflow, don't allow attribute value of 4119 * length more than INT_MAX it is a very reasonable assumption ! 4120 */ 4121 if (len >= INT_MAX) { 4122 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4123 "AttValue length too long\n"); 4124 goto mem_error; 4125 } 4126 4127 if (attlen != NULL) *attlen = (int) len; 4128 return(buf); 4129 4130 mem_error: 4131 xmlErrMemory(ctxt, NULL); 4132 error: 4133 if (buf != NULL) 4134 xmlFree(buf); 4135 if (rep != NULL) 4136 xmlFree(rep); 4137 return(NULL); 4138 } 4139 4140 /** 4141 * xmlParseAttValue: 4142 * @ctxt: an XML parser context 4143 * 4144 * parse a value for an attribute 4145 * Note: the parser won't do substitution of entities here, this 4146 * will be handled later in xmlStringGetNodeList 4147 * 4148 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4149 * "'" ([^<&'] | Reference)* "'" 4150 * 4151 * 3.3.3 Attribute-Value Normalization: 4152 * Before the value of an attribute is passed to the application or 4153 * checked for validity, the XML processor must normalize it as follows: 4154 * - a character reference is processed by appending the referenced 4155 * character to the attribute value 4156 * - an entity reference is processed by recursively processing the 4157 * replacement text of the entity 4158 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4159 * appending #x20 to the normalized value, except that only a single 4160 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4161 * parsed entity or the literal entity value of an internal parsed entity 4162 * - other characters are processed by appending them to the normalized value 4163 * If the declared value is not CDATA, then the XML processor must further 4164 * process the normalized attribute value by discarding any leading and 4165 * trailing space (#x20) characters, and by replacing sequences of space 4166 * (#x20) characters by a single space (#x20) character. 4167 * All attributes for which no declaration has been read should be treated 4168 * by a non-validating parser as if declared CDATA. 4169 * 4170 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4171 */ 4172 4173 4174 xmlChar * 4175 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4176 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4177 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4178 } 4179 4180 /** 4181 * xmlParseSystemLiteral: 4182 * @ctxt: an XML parser context 4183 * 4184 * parse an XML Literal 4185 * 4186 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4187 * 4188 * Returns the SystemLiteral parsed or NULL 4189 */ 4190 4191 xmlChar * 4192 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4193 xmlChar *buf = NULL; 4194 int len = 0; 4195 int size = XML_PARSER_BUFFER_SIZE; 4196 int cur, l; 4197 xmlChar stop; 4198 int state = ctxt->instate; 4199 int count = 0; 4200 4201 SHRINK; 4202 if (RAW == '"') { 4203 NEXT; 4204 stop = '"'; 4205 } else if (RAW == '\'') { 4206 NEXT; 4207 stop = '\''; 4208 } else { 4209 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4210 return(NULL); 4211 } 4212 4213 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4214 if (buf == NULL) { 4215 xmlErrMemory(ctxt, NULL); 4216 return(NULL); 4217 } 4218 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4219 cur = CUR_CHAR(l); 4220 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4221 if (len + 5 >= size) { 4222 xmlChar *tmp; 4223 4224 if ((size > XML_MAX_NAME_LENGTH) && 4225 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4226 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4227 xmlFree(buf); 4228 ctxt->instate = (xmlParserInputState) state; 4229 return(NULL); 4230 } 4231 size *= 2; 4232 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4233 if (tmp == NULL) { 4234 xmlFree(buf); 4235 xmlErrMemory(ctxt, NULL); 4236 ctxt->instate = (xmlParserInputState) state; 4237 return(NULL); 4238 } 4239 buf = tmp; 4240 } 4241 count++; 4242 if (count > 50) { 4243 SHRINK; 4244 GROW; 4245 count = 0; 4246 if (ctxt->instate == XML_PARSER_EOF) { 4247 xmlFree(buf); 4248 return(NULL); 4249 } 4250 } 4251 COPY_BUF(l,buf,len,cur); 4252 NEXTL(l); 4253 cur = CUR_CHAR(l); 4254 if (cur == 0) { 4255 GROW; 4256 SHRINK; 4257 cur = CUR_CHAR(l); 4258 } 4259 } 4260 buf[len] = 0; 4261 ctxt->instate = (xmlParserInputState) state; 4262 if (!IS_CHAR(cur)) { 4263 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4264 } else { 4265 NEXT; 4266 } 4267 return(buf); 4268 } 4269 4270 /** 4271 * xmlParsePubidLiteral: 4272 * @ctxt: an XML parser context 4273 * 4274 * parse an XML public literal 4275 * 4276 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4277 * 4278 * Returns the PubidLiteral parsed or NULL. 4279 */ 4280 4281 xmlChar * 4282 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4283 xmlChar *buf = NULL; 4284 int len = 0; 4285 int size = XML_PARSER_BUFFER_SIZE; 4286 xmlChar cur; 4287 xmlChar stop; 4288 int count = 0; 4289 xmlParserInputState oldstate = ctxt->instate; 4290 4291 SHRINK; 4292 if (RAW == '"') { 4293 NEXT; 4294 stop = '"'; 4295 } else if (RAW == '\'') { 4296 NEXT; 4297 stop = '\''; 4298 } else { 4299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4300 return(NULL); 4301 } 4302 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4303 if (buf == NULL) { 4304 xmlErrMemory(ctxt, NULL); 4305 return(NULL); 4306 } 4307 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4308 cur = CUR; 4309 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4310 if (len + 1 >= size) { 4311 xmlChar *tmp; 4312 4313 if ((size > XML_MAX_NAME_LENGTH) && 4314 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4316 xmlFree(buf); 4317 return(NULL); 4318 } 4319 size *= 2; 4320 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4321 if (tmp == NULL) { 4322 xmlErrMemory(ctxt, NULL); 4323 xmlFree(buf); 4324 return(NULL); 4325 } 4326 buf = tmp; 4327 } 4328 buf[len++] = cur; 4329 count++; 4330 if (count > 50) { 4331 SHRINK; 4332 GROW; 4333 count = 0; 4334 if (ctxt->instate == XML_PARSER_EOF) { 4335 xmlFree(buf); 4336 return(NULL); 4337 } 4338 } 4339 NEXT; 4340 cur = CUR; 4341 if (cur == 0) { 4342 GROW; 4343 SHRINK; 4344 cur = CUR; 4345 } 4346 } 4347 buf[len] = 0; 4348 if (cur != stop) { 4349 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4350 } else { 4351 NEXT; 4352 } 4353 ctxt->instate = oldstate; 4354 return(buf); 4355 } 4356 4357 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4358 4359 /* 4360 * used for the test in the inner loop of the char data testing 4361 */ 4362 static const unsigned char test_char_data[256] = { 4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4364 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4368 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4369 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4370 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4371 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4372 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4373 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4374 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4375 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4378 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4395 }; 4396 4397 /** 4398 * xmlParseCharData: 4399 * @ctxt: an XML parser context 4400 * @cdata: int indicating whether we are within a CDATA section 4401 * 4402 * parse a CharData section. 4403 * if we are within a CDATA section ']]>' marks an end of section. 4404 * 4405 * The right angle bracket (>) may be represented using the string ">", 4406 * and must, for compatibility, be escaped using ">" or a character 4407 * reference when it appears in the string "]]>" in content, when that 4408 * string is not marking the end of a CDATA section. 4409 * 4410 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4411 */ 4412 4413 void 4414 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4415 const xmlChar *in; 4416 int nbchar = 0; 4417 int line = ctxt->input->line; 4418 int col = ctxt->input->col; 4419 int ccol; 4420 4421 SHRINK; 4422 GROW; 4423 /* 4424 * Accelerated common case where input don't need to be 4425 * modified before passing it to the handler. 4426 */ 4427 if (!cdata) { 4428 in = ctxt->input->cur; 4429 do { 4430 get_more_space: 4431 while (*in == 0x20) { in++; ctxt->input->col++; } 4432 if (*in == 0xA) { 4433 do { 4434 ctxt->input->line++; ctxt->input->col = 1; 4435 in++; 4436 } while (*in == 0xA); 4437 goto get_more_space; 4438 } 4439 if (*in == '<') { 4440 nbchar = in - ctxt->input->cur; 4441 if (nbchar > 0) { 4442 const xmlChar *tmp = ctxt->input->cur; 4443 ctxt->input->cur = in; 4444 4445 if ((ctxt->sax != NULL) && 4446 (ctxt->sax->ignorableWhitespace != 4447 ctxt->sax->characters)) { 4448 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4449 if (ctxt->sax->ignorableWhitespace != NULL) 4450 ctxt->sax->ignorableWhitespace(ctxt->userData, 4451 tmp, nbchar); 4452 } else { 4453 if (ctxt->sax->characters != NULL) 4454 ctxt->sax->characters(ctxt->userData, 4455 tmp, nbchar); 4456 if (*ctxt->space == -1) 4457 *ctxt->space = -2; 4458 } 4459 } else if ((ctxt->sax != NULL) && 4460 (ctxt->sax->characters != NULL)) { 4461 ctxt->sax->characters(ctxt->userData, 4462 tmp, nbchar); 4463 } 4464 } 4465 return; 4466 } 4467 4468 get_more: 4469 ccol = ctxt->input->col; 4470 while (test_char_data[*in]) { 4471 in++; 4472 ccol++; 4473 } 4474 ctxt->input->col = ccol; 4475 if (*in == 0xA) { 4476 do { 4477 ctxt->input->line++; ctxt->input->col = 1; 4478 in++; 4479 } while (*in == 0xA); 4480 goto get_more; 4481 } 4482 if (*in == ']') { 4483 if ((in[1] == ']') && (in[2] == '>')) { 4484 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4485 ctxt->input->cur = in + 1; 4486 return; 4487 } 4488 in++; 4489 ctxt->input->col++; 4490 goto get_more; 4491 } 4492 nbchar = in - ctxt->input->cur; 4493 if (nbchar > 0) { 4494 if ((ctxt->sax != NULL) && 4495 (ctxt->sax->ignorableWhitespace != 4496 ctxt->sax->characters) && 4497 (IS_BLANK_CH(*ctxt->input->cur))) { 4498 const xmlChar *tmp = ctxt->input->cur; 4499 ctxt->input->cur = in; 4500 4501 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4502 if (ctxt->sax->ignorableWhitespace != NULL) 4503 ctxt->sax->ignorableWhitespace(ctxt->userData, 4504 tmp, nbchar); 4505 } else { 4506 if (ctxt->sax->characters != NULL) 4507 ctxt->sax->characters(ctxt->userData, 4508 tmp, nbchar); 4509 if (*ctxt->space == -1) 4510 *ctxt->space = -2; 4511 } 4512 line = ctxt->input->line; 4513 col = ctxt->input->col; 4514 } else if (ctxt->sax != NULL) { 4515 if (ctxt->sax->characters != NULL) 4516 ctxt->sax->characters(ctxt->userData, 4517 ctxt->input->cur, nbchar); 4518 line = ctxt->input->line; 4519 col = ctxt->input->col; 4520 } 4521 /* something really bad happened in the SAX callback */ 4522 if (ctxt->instate != XML_PARSER_CONTENT) 4523 return; 4524 } 4525 ctxt->input->cur = in; 4526 if (*in == 0xD) { 4527 in++; 4528 if (*in == 0xA) { 4529 ctxt->input->cur = in; 4530 in++; 4531 ctxt->input->line++; ctxt->input->col = 1; 4532 continue; /* while */ 4533 } 4534 in--; 4535 } 4536 if (*in == '<') { 4537 return; 4538 } 4539 if (*in == '&') { 4540 return; 4541 } 4542 SHRINK; 4543 GROW; 4544 if (ctxt->instate == XML_PARSER_EOF) 4545 return; 4546 in = ctxt->input->cur; 4547 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 4548 nbchar = 0; 4549 } 4550 ctxt->input->line = line; 4551 ctxt->input->col = col; 4552 xmlParseCharDataComplex(ctxt, cdata); 4553 } 4554 4555 /** 4556 * xmlParseCharDataComplex: 4557 * @ctxt: an XML parser context 4558 * @cdata: int indicating whether we are within a CDATA section 4559 * 4560 * parse a CharData section.this is the fallback function 4561 * of xmlParseCharData() when the parsing requires handling 4562 * of non-ASCII characters. 4563 */ 4564 static void 4565 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4566 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4567 int nbchar = 0; 4568 int cur, l; 4569 int count = 0; 4570 4571 SHRINK; 4572 GROW; 4573 cur = CUR_CHAR(l); 4574 while ((cur != '<') && /* checked */ 4575 (cur != '&') && 4576 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4577 if ((cur == ']') && (NXT(1) == ']') && 4578 (NXT(2) == '>')) { 4579 if (cdata) break; 4580 else { 4581 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4582 } 4583 } 4584 COPY_BUF(l,buf,nbchar,cur); 4585 /* move current position before possible calling of ctxt->sax->characters */ 4586 NEXTL(l); 4587 cur = CUR_CHAR(l); 4588 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4589 buf[nbchar] = 0; 4590 4591 /* 4592 * OK the segment is to be consumed as chars. 4593 */ 4594 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4595 if (areBlanks(ctxt, buf, nbchar, 0)) { 4596 if (ctxt->sax->ignorableWhitespace != NULL) 4597 ctxt->sax->ignorableWhitespace(ctxt->userData, 4598 buf, nbchar); 4599 } else { 4600 if (ctxt->sax->characters != NULL) 4601 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4602 if ((ctxt->sax->characters != 4603 ctxt->sax->ignorableWhitespace) && 4604 (*ctxt->space == -1)) 4605 *ctxt->space = -2; 4606 } 4607 } 4608 nbchar = 0; 4609 /* something really bad happened in the SAX callback */ 4610 if (ctxt->instate != XML_PARSER_CONTENT) 4611 return; 4612 } 4613 count++; 4614 if (count > 50) { 4615 SHRINK; 4616 GROW; 4617 count = 0; 4618 if (ctxt->instate == XML_PARSER_EOF) 4619 return; 4620 } 4621 } 4622 if (nbchar != 0) { 4623 buf[nbchar] = 0; 4624 /* 4625 * OK the segment is to be consumed as chars. 4626 */ 4627 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4628 if (areBlanks(ctxt, buf, nbchar, 0)) { 4629 if (ctxt->sax->ignorableWhitespace != NULL) 4630 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4631 } else { 4632 if (ctxt->sax->characters != NULL) 4633 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4634 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4635 (*ctxt->space == -1)) 4636 *ctxt->space = -2; 4637 } 4638 } 4639 } 4640 if ((cur != 0) && (!IS_CHAR(cur))) { 4641 /* Generate the error and skip the offending character */ 4642 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4643 "PCDATA invalid Char value %d\n", 4644 cur); 4645 NEXTL(l); 4646 } 4647 } 4648 4649 /** 4650 * xmlParseExternalID: 4651 * @ctxt: an XML parser context 4652 * @publicID: a xmlChar** receiving PubidLiteral 4653 * @strict: indicate whether we should restrict parsing to only 4654 * production [75], see NOTE below 4655 * 4656 * Parse an External ID or a Public ID 4657 * 4658 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4659 * 'PUBLIC' S PubidLiteral S SystemLiteral 4660 * 4661 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4662 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4663 * 4664 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4665 * 4666 * Returns the function returns SystemLiteral and in the second 4667 * case publicID receives PubidLiteral, is strict is off 4668 * it is possible to return NULL and have publicID set. 4669 */ 4670 4671 xmlChar * 4672 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4673 xmlChar *URI = NULL; 4674 4675 SHRINK; 4676 4677 *publicID = NULL; 4678 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4679 SKIP(6); 4680 if (SKIP_BLANKS == 0) { 4681 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4682 "Space required after 'SYSTEM'\n"); 4683 } 4684 URI = xmlParseSystemLiteral(ctxt); 4685 if (URI == NULL) { 4686 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4687 } 4688 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4689 SKIP(6); 4690 if (SKIP_BLANKS == 0) { 4691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4692 "Space required after 'PUBLIC'\n"); 4693 } 4694 *publicID = xmlParsePubidLiteral(ctxt); 4695 if (*publicID == NULL) { 4696 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4697 } 4698 if (strict) { 4699 /* 4700 * We don't handle [83] so "S SystemLiteral" is required. 4701 */ 4702 if (SKIP_BLANKS == 0) { 4703 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4704 "Space required after the Public Identifier\n"); 4705 } 4706 } else { 4707 /* 4708 * We handle [83] so we return immediately, if 4709 * "S SystemLiteral" is not detected. We skip blanks if no 4710 * system literal was found, but this is harmless since we must 4711 * be at the end of a NotationDecl. 4712 */ 4713 if (SKIP_BLANKS == 0) return(NULL); 4714 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4715 } 4716 URI = xmlParseSystemLiteral(ctxt); 4717 if (URI == NULL) { 4718 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4719 } 4720 } 4721 return(URI); 4722 } 4723 4724 /** 4725 * xmlParseCommentComplex: 4726 * @ctxt: an XML parser context 4727 * @buf: the already parsed part of the buffer 4728 * @len: number of bytes in the buffer 4729 * @size: allocated size of the buffer 4730 * 4731 * Skip an XML (SGML) comment <!-- .... --> 4732 * The spec says that "For compatibility, the string "--" (double-hyphen) 4733 * must not occur within comments. " 4734 * This is the slow routine in case the accelerator for ascii didn't work 4735 * 4736 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4737 */ 4738 static void 4739 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4740 size_t len, size_t size) { 4741 int q, ql; 4742 int r, rl; 4743 int cur, l; 4744 size_t count = 0; 4745 int inputid; 4746 4747 inputid = ctxt->input->id; 4748 4749 if (buf == NULL) { 4750 len = 0; 4751 size = XML_PARSER_BUFFER_SIZE; 4752 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4753 if (buf == NULL) { 4754 xmlErrMemory(ctxt, NULL); 4755 return; 4756 } 4757 } 4758 GROW; /* Assure there's enough input data */ 4759 q = CUR_CHAR(ql); 4760 if (q == 0) 4761 goto not_terminated; 4762 if (!IS_CHAR(q)) { 4763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4764 "xmlParseComment: invalid xmlChar value %d\n", 4765 q); 4766 xmlFree (buf); 4767 return; 4768 } 4769 NEXTL(ql); 4770 r = CUR_CHAR(rl); 4771 if (r == 0) 4772 goto not_terminated; 4773 if (!IS_CHAR(r)) { 4774 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4775 "xmlParseComment: invalid xmlChar value %d\n", 4776 q); 4777 xmlFree (buf); 4778 return; 4779 } 4780 NEXTL(rl); 4781 cur = CUR_CHAR(l); 4782 if (cur == 0) 4783 goto not_terminated; 4784 while (IS_CHAR(cur) && /* checked */ 4785 ((cur != '>') || 4786 (r != '-') || (q != '-'))) { 4787 if ((r == '-') && (q == '-')) { 4788 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4789 } 4790 if ((len > XML_MAX_TEXT_LENGTH) && 4791 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4792 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4793 "Comment too big found", NULL); 4794 xmlFree (buf); 4795 return; 4796 } 4797 if (len + 5 >= size) { 4798 xmlChar *new_buf; 4799 size_t new_size; 4800 4801 new_size = size * 2; 4802 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4803 if (new_buf == NULL) { 4804 xmlFree (buf); 4805 xmlErrMemory(ctxt, NULL); 4806 return; 4807 } 4808 buf = new_buf; 4809 size = new_size; 4810 } 4811 COPY_BUF(ql,buf,len,q); 4812 q = r; 4813 ql = rl; 4814 r = cur; 4815 rl = l; 4816 4817 count++; 4818 if (count > 50) { 4819 SHRINK; 4820 GROW; 4821 count = 0; 4822 if (ctxt->instate == XML_PARSER_EOF) { 4823 xmlFree(buf); 4824 return; 4825 } 4826 } 4827 NEXTL(l); 4828 cur = CUR_CHAR(l); 4829 if (cur == 0) { 4830 SHRINK; 4831 GROW; 4832 cur = CUR_CHAR(l); 4833 } 4834 } 4835 buf[len] = 0; 4836 if (cur == 0) { 4837 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4838 "Comment not terminated \n<!--%.50s\n", buf); 4839 } else if (!IS_CHAR(cur)) { 4840 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4841 "xmlParseComment: invalid xmlChar value %d\n", 4842 cur); 4843 } else { 4844 if (inputid != ctxt->input->id) { 4845 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4846 "Comment doesn't start and stop in the same" 4847 " entity\n"); 4848 } 4849 NEXT; 4850 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4851 (!ctxt->disableSAX)) 4852 ctxt->sax->comment(ctxt->userData, buf); 4853 } 4854 xmlFree(buf); 4855 return; 4856 not_terminated: 4857 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4858 "Comment not terminated\n", NULL); 4859 xmlFree(buf); 4860 return; 4861 } 4862 4863 /** 4864 * xmlParseComment: 4865 * @ctxt: an XML parser context 4866 * 4867 * Skip an XML (SGML) comment <!-- .... --> 4868 * The spec says that "For compatibility, the string "--" (double-hyphen) 4869 * must not occur within comments. " 4870 * 4871 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4872 */ 4873 void 4874 xmlParseComment(xmlParserCtxtPtr ctxt) { 4875 xmlChar *buf = NULL; 4876 size_t size = XML_PARSER_BUFFER_SIZE; 4877 size_t len = 0; 4878 xmlParserInputState state; 4879 const xmlChar *in; 4880 size_t nbchar = 0; 4881 int ccol; 4882 int inputid; 4883 4884 /* 4885 * Check that there is a comment right here. 4886 */ 4887 if ((RAW != '<') || (NXT(1) != '!') || 4888 (NXT(2) != '-') || (NXT(3) != '-')) return; 4889 state = ctxt->instate; 4890 ctxt->instate = XML_PARSER_COMMENT; 4891 inputid = ctxt->input->id; 4892 SKIP(4); 4893 SHRINK; 4894 GROW; 4895 4896 /* 4897 * Accelerated common case where input don't need to be 4898 * modified before passing it to the handler. 4899 */ 4900 in = ctxt->input->cur; 4901 do { 4902 if (*in == 0xA) { 4903 do { 4904 ctxt->input->line++; ctxt->input->col = 1; 4905 in++; 4906 } while (*in == 0xA); 4907 } 4908 get_more: 4909 ccol = ctxt->input->col; 4910 while (((*in > '-') && (*in <= 0x7F)) || 4911 ((*in >= 0x20) && (*in < '-')) || 4912 (*in == 0x09)) { 4913 in++; 4914 ccol++; 4915 } 4916 ctxt->input->col = ccol; 4917 if (*in == 0xA) { 4918 do { 4919 ctxt->input->line++; ctxt->input->col = 1; 4920 in++; 4921 } while (*in == 0xA); 4922 goto get_more; 4923 } 4924 nbchar = in - ctxt->input->cur; 4925 /* 4926 * save current set of data 4927 */ 4928 if (nbchar > 0) { 4929 if ((ctxt->sax != NULL) && 4930 (ctxt->sax->comment != NULL)) { 4931 if (buf == NULL) { 4932 if ((*in == '-') && (in[1] == '-')) 4933 size = nbchar + 1; 4934 else 4935 size = XML_PARSER_BUFFER_SIZE + nbchar; 4936 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4937 if (buf == NULL) { 4938 xmlErrMemory(ctxt, NULL); 4939 ctxt->instate = state; 4940 return; 4941 } 4942 len = 0; 4943 } else if (len + nbchar + 1 >= size) { 4944 xmlChar *new_buf; 4945 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4946 new_buf = (xmlChar *) xmlRealloc(buf, 4947 size * sizeof(xmlChar)); 4948 if (new_buf == NULL) { 4949 xmlFree (buf); 4950 xmlErrMemory(ctxt, NULL); 4951 ctxt->instate = state; 4952 return; 4953 } 4954 buf = new_buf; 4955 } 4956 memcpy(&buf[len], ctxt->input->cur, nbchar); 4957 len += nbchar; 4958 buf[len] = 0; 4959 } 4960 } 4961 if ((len > XML_MAX_TEXT_LENGTH) && 4962 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4963 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4964 "Comment too big found", NULL); 4965 xmlFree (buf); 4966 return; 4967 } 4968 ctxt->input->cur = in; 4969 if (*in == 0xA) { 4970 in++; 4971 ctxt->input->line++; ctxt->input->col = 1; 4972 } 4973 if (*in == 0xD) { 4974 in++; 4975 if (*in == 0xA) { 4976 ctxt->input->cur = in; 4977 in++; 4978 ctxt->input->line++; ctxt->input->col = 1; 4979 goto get_more; 4980 } 4981 in--; 4982 } 4983 SHRINK; 4984 GROW; 4985 if (ctxt->instate == XML_PARSER_EOF) { 4986 xmlFree(buf); 4987 return; 4988 } 4989 in = ctxt->input->cur; 4990 if (*in == '-') { 4991 if (in[1] == '-') { 4992 if (in[2] == '>') { 4993 if (ctxt->input->id != inputid) { 4994 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4995 "comment doesn't start and stop in the" 4996 " same entity\n"); 4997 } 4998 SKIP(3); 4999 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5000 (!ctxt->disableSAX)) { 5001 if (buf != NULL) 5002 ctxt->sax->comment(ctxt->userData, buf); 5003 else 5004 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5005 } 5006 if (buf != NULL) 5007 xmlFree(buf); 5008 if (ctxt->instate != XML_PARSER_EOF) 5009 ctxt->instate = state; 5010 return; 5011 } 5012 if (buf != NULL) { 5013 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5014 "Double hyphen within comment: " 5015 "<!--%.50s\n", 5016 buf); 5017 } else 5018 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5019 "Double hyphen within comment\n", NULL); 5020 if (ctxt->instate == XML_PARSER_EOF) { 5021 xmlFree(buf); 5022 return; 5023 } 5024 in++; 5025 ctxt->input->col++; 5026 } 5027 in++; 5028 ctxt->input->col++; 5029 goto get_more; 5030 } 5031 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 5032 xmlParseCommentComplex(ctxt, buf, len, size); 5033 ctxt->instate = state; 5034 return; 5035 } 5036 5037 5038 /** 5039 * xmlParsePITarget: 5040 * @ctxt: an XML parser context 5041 * 5042 * parse the name of a PI 5043 * 5044 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5045 * 5046 * Returns the PITarget name or NULL 5047 */ 5048 5049 const xmlChar * 5050 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5051 const xmlChar *name; 5052 5053 name = xmlParseName(ctxt); 5054 if ((name != NULL) && 5055 ((name[0] == 'x') || (name[0] == 'X')) && 5056 ((name[1] == 'm') || (name[1] == 'M')) && 5057 ((name[2] == 'l') || (name[2] == 'L'))) { 5058 int i; 5059 if ((name[0] == 'x') && (name[1] == 'm') && 5060 (name[2] == 'l') && (name[3] == 0)) { 5061 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5062 "XML declaration allowed only at the start of the document\n"); 5063 return(name); 5064 } else if (name[3] == 0) { 5065 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5066 return(name); 5067 } 5068 for (i = 0;;i++) { 5069 if (xmlW3CPIs[i] == NULL) break; 5070 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5071 return(name); 5072 } 5073 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5074 "xmlParsePITarget: invalid name prefix 'xml'\n", 5075 NULL, NULL); 5076 } 5077 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5078 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5079 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5080 } 5081 return(name); 5082 } 5083 5084 #ifdef LIBXML_CATALOG_ENABLED 5085 /** 5086 * xmlParseCatalogPI: 5087 * @ctxt: an XML parser context 5088 * @catalog: the PI value string 5089 * 5090 * parse an XML Catalog Processing Instruction. 5091 * 5092 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5093 * 5094 * Occurs only if allowed by the user and if happening in the Misc 5095 * part of the document before any doctype information 5096 * This will add the given catalog to the parsing context in order 5097 * to be used if there is a resolution need further down in the document 5098 */ 5099 5100 static void 5101 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5102 xmlChar *URL = NULL; 5103 const xmlChar *tmp, *base; 5104 xmlChar marker; 5105 5106 tmp = catalog; 5107 while (IS_BLANK_CH(*tmp)) tmp++; 5108 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5109 goto error; 5110 tmp += 7; 5111 while (IS_BLANK_CH(*tmp)) tmp++; 5112 if (*tmp != '=') { 5113 return; 5114 } 5115 tmp++; 5116 while (IS_BLANK_CH(*tmp)) tmp++; 5117 marker = *tmp; 5118 if ((marker != '\'') && (marker != '"')) 5119 goto error; 5120 tmp++; 5121 base = tmp; 5122 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5123 if (*tmp == 0) 5124 goto error; 5125 URL = xmlStrndup(base, tmp - base); 5126 tmp++; 5127 while (IS_BLANK_CH(*tmp)) tmp++; 5128 if (*tmp != 0) 5129 goto error; 5130 5131 if (URL != NULL) { 5132 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5133 xmlFree(URL); 5134 } 5135 return; 5136 5137 error: 5138 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5139 "Catalog PI syntax error: %s\n", 5140 catalog, NULL); 5141 if (URL != NULL) 5142 xmlFree(URL); 5143 } 5144 #endif 5145 5146 /** 5147 * xmlParsePI: 5148 * @ctxt: an XML parser context 5149 * 5150 * parse an XML Processing Instruction. 5151 * 5152 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5153 * 5154 * The processing is transferred to SAX once parsed. 5155 */ 5156 5157 void 5158 xmlParsePI(xmlParserCtxtPtr ctxt) { 5159 xmlChar *buf = NULL; 5160 size_t len = 0; 5161 size_t size = XML_PARSER_BUFFER_SIZE; 5162 int cur, l; 5163 const xmlChar *target; 5164 xmlParserInputState state; 5165 int count = 0; 5166 5167 if ((RAW == '<') && (NXT(1) == '?')) { 5168 int inputid = ctxt->input->id; 5169 state = ctxt->instate; 5170 ctxt->instate = XML_PARSER_PI; 5171 /* 5172 * this is a Processing Instruction. 5173 */ 5174 SKIP(2); 5175 SHRINK; 5176 5177 /* 5178 * Parse the target name and check for special support like 5179 * namespace. 5180 */ 5181 target = xmlParsePITarget(ctxt); 5182 if (target != NULL) { 5183 if ((RAW == '?') && (NXT(1) == '>')) { 5184 if (inputid != ctxt->input->id) { 5185 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5186 "PI declaration doesn't start and stop in" 5187 " the same entity\n"); 5188 } 5189 SKIP(2); 5190 5191 /* 5192 * SAX: PI detected. 5193 */ 5194 if ((ctxt->sax) && (!ctxt->disableSAX) && 5195 (ctxt->sax->processingInstruction != NULL)) 5196 ctxt->sax->processingInstruction(ctxt->userData, 5197 target, NULL); 5198 if (ctxt->instate != XML_PARSER_EOF) 5199 ctxt->instate = state; 5200 return; 5201 } 5202 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5203 if (buf == NULL) { 5204 xmlErrMemory(ctxt, NULL); 5205 ctxt->instate = state; 5206 return; 5207 } 5208 if (SKIP_BLANKS == 0) { 5209 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5210 "ParsePI: PI %s space expected\n", target); 5211 } 5212 cur = CUR_CHAR(l); 5213 while (IS_CHAR(cur) && /* checked */ 5214 ((cur != '?') || (NXT(1) != '>'))) { 5215 if (len + 5 >= size) { 5216 xmlChar *tmp; 5217 size_t new_size = size * 2; 5218 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5219 if (tmp == NULL) { 5220 xmlErrMemory(ctxt, NULL); 5221 xmlFree(buf); 5222 ctxt->instate = state; 5223 return; 5224 } 5225 buf = tmp; 5226 size = new_size; 5227 } 5228 count++; 5229 if (count > 50) { 5230 SHRINK; 5231 GROW; 5232 if (ctxt->instate == XML_PARSER_EOF) { 5233 xmlFree(buf); 5234 return; 5235 } 5236 count = 0; 5237 if ((len > XML_MAX_TEXT_LENGTH) && 5238 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5239 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5240 "PI %s too big found", target); 5241 xmlFree(buf); 5242 ctxt->instate = state; 5243 return; 5244 } 5245 } 5246 COPY_BUF(l,buf,len,cur); 5247 NEXTL(l); 5248 cur = CUR_CHAR(l); 5249 if (cur == 0) { 5250 SHRINK; 5251 GROW; 5252 cur = CUR_CHAR(l); 5253 } 5254 } 5255 if ((len > XML_MAX_TEXT_LENGTH) && 5256 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5257 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5258 "PI %s too big found", target); 5259 xmlFree(buf); 5260 ctxt->instate = state; 5261 return; 5262 } 5263 buf[len] = 0; 5264 if (cur != '?') { 5265 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5266 "ParsePI: PI %s never end ...\n", target); 5267 } else { 5268 if (inputid != ctxt->input->id) { 5269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5270 "PI declaration doesn't start and stop in" 5271 " the same entity\n"); 5272 } 5273 SKIP(2); 5274 5275 #ifdef LIBXML_CATALOG_ENABLED 5276 if (((state == XML_PARSER_MISC) || 5277 (state == XML_PARSER_START)) && 5278 (xmlStrEqual(target, XML_CATALOG_PI))) { 5279 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5280 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5281 (allow == XML_CATA_ALLOW_ALL)) 5282 xmlParseCatalogPI(ctxt, buf); 5283 } 5284 #endif 5285 5286 5287 /* 5288 * SAX: PI detected. 5289 */ 5290 if ((ctxt->sax) && (!ctxt->disableSAX) && 5291 (ctxt->sax->processingInstruction != NULL)) 5292 ctxt->sax->processingInstruction(ctxt->userData, 5293 target, buf); 5294 } 5295 xmlFree(buf); 5296 } else { 5297 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5298 } 5299 if (ctxt->instate != XML_PARSER_EOF) 5300 ctxt->instate = state; 5301 } 5302 } 5303 5304 /** 5305 * xmlParseNotationDecl: 5306 * @ctxt: an XML parser context 5307 * 5308 * parse a notation declaration 5309 * 5310 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5311 * 5312 * Hence there is actually 3 choices: 5313 * 'PUBLIC' S PubidLiteral 5314 * 'PUBLIC' S PubidLiteral S SystemLiteral 5315 * and 'SYSTEM' S SystemLiteral 5316 * 5317 * See the NOTE on xmlParseExternalID(). 5318 */ 5319 5320 void 5321 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5322 const xmlChar *name; 5323 xmlChar *Pubid; 5324 xmlChar *Systemid; 5325 5326 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5327 int inputid = ctxt->input->id; 5328 SHRINK; 5329 SKIP(10); 5330 if (SKIP_BLANKS == 0) { 5331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5332 "Space required after '<!NOTATION'\n"); 5333 return; 5334 } 5335 5336 name = xmlParseName(ctxt); 5337 if (name == NULL) { 5338 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5339 return; 5340 } 5341 if (xmlStrchr(name, ':') != NULL) { 5342 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5343 "colons are forbidden from notation names '%s'\n", 5344 name, NULL, NULL); 5345 } 5346 if (SKIP_BLANKS == 0) { 5347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5348 "Space required after the NOTATION name'\n"); 5349 return; 5350 } 5351 5352 /* 5353 * Parse the IDs. 5354 */ 5355 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5356 SKIP_BLANKS; 5357 5358 if (RAW == '>') { 5359 if (inputid != ctxt->input->id) { 5360 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5361 "Notation declaration doesn't start and stop" 5362 " in the same entity\n"); 5363 } 5364 NEXT; 5365 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5366 (ctxt->sax->notationDecl != NULL)) 5367 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5368 } else { 5369 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5370 } 5371 if (Systemid != NULL) xmlFree(Systemid); 5372 if (Pubid != NULL) xmlFree(Pubid); 5373 } 5374 } 5375 5376 /** 5377 * xmlParseEntityDecl: 5378 * @ctxt: an XML parser context 5379 * 5380 * parse <!ENTITY declarations 5381 * 5382 * [70] EntityDecl ::= GEDecl | PEDecl 5383 * 5384 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5385 * 5386 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5387 * 5388 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5389 * 5390 * [74] PEDef ::= EntityValue | ExternalID 5391 * 5392 * [76] NDataDecl ::= S 'NDATA' S Name 5393 * 5394 * [ VC: Notation Declared ] 5395 * The Name must match the declared name of a notation. 5396 */ 5397 5398 void 5399 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5400 const xmlChar *name = NULL; 5401 xmlChar *value = NULL; 5402 xmlChar *URI = NULL, *literal = NULL; 5403 const xmlChar *ndata = NULL; 5404 int isParameter = 0; 5405 xmlChar *orig = NULL; 5406 5407 /* GROW; done in the caller */ 5408 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5409 int inputid = ctxt->input->id; 5410 SHRINK; 5411 SKIP(8); 5412 if (SKIP_BLANKS == 0) { 5413 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5414 "Space required after '<!ENTITY'\n"); 5415 } 5416 5417 if (RAW == '%') { 5418 NEXT; 5419 if (SKIP_BLANKS == 0) { 5420 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5421 "Space required after '%%'\n"); 5422 } 5423 isParameter = 1; 5424 } 5425 5426 name = xmlParseName(ctxt); 5427 if (name == NULL) { 5428 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5429 "xmlParseEntityDecl: no name\n"); 5430 return; 5431 } 5432 if (xmlStrchr(name, ':') != NULL) { 5433 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5434 "colons are forbidden from entities names '%s'\n", 5435 name, NULL, NULL); 5436 } 5437 if (SKIP_BLANKS == 0) { 5438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5439 "Space required after the entity name\n"); 5440 } 5441 5442 ctxt->instate = XML_PARSER_ENTITY_DECL; 5443 /* 5444 * handle the various case of definitions... 5445 */ 5446 if (isParameter) { 5447 if ((RAW == '"') || (RAW == '\'')) { 5448 value = xmlParseEntityValue(ctxt, &orig); 5449 if (value) { 5450 if ((ctxt->sax != NULL) && 5451 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5452 ctxt->sax->entityDecl(ctxt->userData, name, 5453 XML_INTERNAL_PARAMETER_ENTITY, 5454 NULL, NULL, value); 5455 } 5456 } else { 5457 URI = xmlParseExternalID(ctxt, &literal, 1); 5458 if ((URI == NULL) && (literal == NULL)) { 5459 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5460 } 5461 if (URI) { 5462 xmlURIPtr uri; 5463 5464 uri = xmlParseURI((const char *) URI); 5465 if (uri == NULL) { 5466 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5467 "Invalid URI: %s\n", URI); 5468 /* 5469 * This really ought to be a well formedness error 5470 * but the XML Core WG decided otherwise c.f. issue 5471 * E26 of the XML erratas. 5472 */ 5473 } else { 5474 if (uri->fragment != NULL) { 5475 /* 5476 * Okay this is foolish to block those but not 5477 * invalid URIs. 5478 */ 5479 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5480 } else { 5481 if ((ctxt->sax != NULL) && 5482 (!ctxt->disableSAX) && 5483 (ctxt->sax->entityDecl != NULL)) 5484 ctxt->sax->entityDecl(ctxt->userData, name, 5485 XML_EXTERNAL_PARAMETER_ENTITY, 5486 literal, URI, NULL); 5487 } 5488 xmlFreeURI(uri); 5489 } 5490 } 5491 } 5492 } else { 5493 if ((RAW == '"') || (RAW == '\'')) { 5494 value = xmlParseEntityValue(ctxt, &orig); 5495 if ((ctxt->sax != NULL) && 5496 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5497 ctxt->sax->entityDecl(ctxt->userData, name, 5498 XML_INTERNAL_GENERAL_ENTITY, 5499 NULL, NULL, value); 5500 /* 5501 * For expat compatibility in SAX mode. 5502 */ 5503 if ((ctxt->myDoc == NULL) || 5504 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5505 if (ctxt->myDoc == NULL) { 5506 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5507 if (ctxt->myDoc == NULL) { 5508 xmlErrMemory(ctxt, "New Doc failed"); 5509 return; 5510 } 5511 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5512 } 5513 if (ctxt->myDoc->intSubset == NULL) 5514 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5515 BAD_CAST "fake", NULL, NULL); 5516 5517 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5518 NULL, NULL, value); 5519 } 5520 } else { 5521 URI = xmlParseExternalID(ctxt, &literal, 1); 5522 if ((URI == NULL) && (literal == NULL)) { 5523 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5524 } 5525 if (URI) { 5526 xmlURIPtr uri; 5527 5528 uri = xmlParseURI((const char *)URI); 5529 if (uri == NULL) { 5530 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5531 "Invalid URI: %s\n", URI); 5532 /* 5533 * This really ought to be a well formedness error 5534 * but the XML Core WG decided otherwise c.f. issue 5535 * E26 of the XML erratas. 5536 */ 5537 } else { 5538 if (uri->fragment != NULL) { 5539 /* 5540 * Okay this is foolish to block those but not 5541 * invalid URIs. 5542 */ 5543 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5544 } 5545 xmlFreeURI(uri); 5546 } 5547 } 5548 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5549 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5550 "Space required before 'NDATA'\n"); 5551 } 5552 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5553 SKIP(5); 5554 if (SKIP_BLANKS == 0) { 5555 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5556 "Space required after 'NDATA'\n"); 5557 } 5558 ndata = xmlParseName(ctxt); 5559 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5560 (ctxt->sax->unparsedEntityDecl != NULL)) 5561 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5562 literal, URI, ndata); 5563 } else { 5564 if ((ctxt->sax != NULL) && 5565 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5566 ctxt->sax->entityDecl(ctxt->userData, name, 5567 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5568 literal, URI, NULL); 5569 /* 5570 * For expat compatibility in SAX mode. 5571 * assuming the entity replacement was asked for 5572 */ 5573 if ((ctxt->replaceEntities != 0) && 5574 ((ctxt->myDoc == NULL) || 5575 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5576 if (ctxt->myDoc == NULL) { 5577 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5578 if (ctxt->myDoc == NULL) { 5579 xmlErrMemory(ctxt, "New Doc failed"); 5580 return; 5581 } 5582 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5583 } 5584 5585 if (ctxt->myDoc->intSubset == NULL) 5586 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5587 BAD_CAST "fake", NULL, NULL); 5588 xmlSAX2EntityDecl(ctxt, name, 5589 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5590 literal, URI, NULL); 5591 } 5592 } 5593 } 5594 } 5595 if (ctxt->instate == XML_PARSER_EOF) 5596 goto done; 5597 SKIP_BLANKS; 5598 if (RAW != '>') { 5599 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5600 "xmlParseEntityDecl: entity %s not terminated\n", name); 5601 xmlHaltParser(ctxt); 5602 } else { 5603 if (inputid != ctxt->input->id) { 5604 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5605 "Entity declaration doesn't start and stop in" 5606 " the same entity\n"); 5607 } 5608 NEXT; 5609 } 5610 if (orig != NULL) { 5611 /* 5612 * Ugly mechanism to save the raw entity value. 5613 */ 5614 xmlEntityPtr cur = NULL; 5615 5616 if (isParameter) { 5617 if ((ctxt->sax != NULL) && 5618 (ctxt->sax->getParameterEntity != NULL)) 5619 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5620 } else { 5621 if ((ctxt->sax != NULL) && 5622 (ctxt->sax->getEntity != NULL)) 5623 cur = ctxt->sax->getEntity(ctxt->userData, name); 5624 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5625 cur = xmlSAX2GetEntity(ctxt, name); 5626 } 5627 } 5628 if ((cur != NULL) && (cur->orig == NULL)) { 5629 cur->orig = orig; 5630 orig = NULL; 5631 } 5632 } 5633 5634 done: 5635 if (value != NULL) xmlFree(value); 5636 if (URI != NULL) xmlFree(URI); 5637 if (literal != NULL) xmlFree(literal); 5638 if (orig != NULL) xmlFree(orig); 5639 } 5640 } 5641 5642 /** 5643 * xmlParseDefaultDecl: 5644 * @ctxt: an XML parser context 5645 * @value: Receive a possible fixed default value for the attribute 5646 * 5647 * Parse an attribute default declaration 5648 * 5649 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5650 * 5651 * [ VC: Required Attribute ] 5652 * if the default declaration is the keyword #REQUIRED, then the 5653 * attribute must be specified for all elements of the type in the 5654 * attribute-list declaration. 5655 * 5656 * [ VC: Attribute Default Legal ] 5657 * The declared default value must meet the lexical constraints of 5658 * the declared attribute type c.f. xmlValidateAttributeDecl() 5659 * 5660 * [ VC: Fixed Attribute Default ] 5661 * if an attribute has a default value declared with the #FIXED 5662 * keyword, instances of that attribute must match the default value. 5663 * 5664 * [ WFC: No < in Attribute Values ] 5665 * handled in xmlParseAttValue() 5666 * 5667 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5668 * or XML_ATTRIBUTE_FIXED. 5669 */ 5670 5671 int 5672 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5673 int val; 5674 xmlChar *ret; 5675 5676 *value = NULL; 5677 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5678 SKIP(9); 5679 return(XML_ATTRIBUTE_REQUIRED); 5680 } 5681 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5682 SKIP(8); 5683 return(XML_ATTRIBUTE_IMPLIED); 5684 } 5685 val = XML_ATTRIBUTE_NONE; 5686 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5687 SKIP(6); 5688 val = XML_ATTRIBUTE_FIXED; 5689 if (SKIP_BLANKS == 0) { 5690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5691 "Space required after '#FIXED'\n"); 5692 } 5693 } 5694 ret = xmlParseAttValue(ctxt); 5695 ctxt->instate = XML_PARSER_DTD; 5696 if (ret == NULL) { 5697 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5698 "Attribute default value declaration error\n"); 5699 } else 5700 *value = ret; 5701 return(val); 5702 } 5703 5704 /** 5705 * xmlParseNotationType: 5706 * @ctxt: an XML parser context 5707 * 5708 * parse an Notation attribute type. 5709 * 5710 * Note: the leading 'NOTATION' S part has already being parsed... 5711 * 5712 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5713 * 5714 * [ VC: Notation Attributes ] 5715 * Values of this type must match one of the notation names included 5716 * in the declaration; all notation names in the declaration must be declared. 5717 * 5718 * Returns: the notation attribute tree built while parsing 5719 */ 5720 5721 xmlEnumerationPtr 5722 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5723 const xmlChar *name; 5724 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5725 5726 if (RAW != '(') { 5727 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5728 return(NULL); 5729 } 5730 SHRINK; 5731 do { 5732 NEXT; 5733 SKIP_BLANKS; 5734 name = xmlParseName(ctxt); 5735 if (name == NULL) { 5736 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5737 "Name expected in NOTATION declaration\n"); 5738 xmlFreeEnumeration(ret); 5739 return(NULL); 5740 } 5741 tmp = ret; 5742 while (tmp != NULL) { 5743 if (xmlStrEqual(name, tmp->name)) { 5744 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5745 "standalone: attribute notation value token %s duplicated\n", 5746 name, NULL); 5747 if (!xmlDictOwns(ctxt->dict, name)) 5748 xmlFree((xmlChar *) name); 5749 break; 5750 } 5751 tmp = tmp->next; 5752 } 5753 if (tmp == NULL) { 5754 cur = xmlCreateEnumeration(name); 5755 if (cur == NULL) { 5756 xmlFreeEnumeration(ret); 5757 return(NULL); 5758 } 5759 if (last == NULL) ret = last = cur; 5760 else { 5761 last->next = cur; 5762 last = cur; 5763 } 5764 } 5765 SKIP_BLANKS; 5766 } while (RAW == '|'); 5767 if (RAW != ')') { 5768 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5769 xmlFreeEnumeration(ret); 5770 return(NULL); 5771 } 5772 NEXT; 5773 return(ret); 5774 } 5775 5776 /** 5777 * xmlParseEnumerationType: 5778 * @ctxt: an XML parser context 5779 * 5780 * parse an Enumeration attribute type. 5781 * 5782 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5783 * 5784 * [ VC: Enumeration ] 5785 * Values of this type must match one of the Nmtoken tokens in 5786 * the declaration 5787 * 5788 * Returns: the enumeration attribute tree built while parsing 5789 */ 5790 5791 xmlEnumerationPtr 5792 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5793 xmlChar *name; 5794 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5795 5796 if (RAW != '(') { 5797 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5798 return(NULL); 5799 } 5800 SHRINK; 5801 do { 5802 NEXT; 5803 SKIP_BLANKS; 5804 name = xmlParseNmtoken(ctxt); 5805 if (name == NULL) { 5806 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5807 return(ret); 5808 } 5809 tmp = ret; 5810 while (tmp != NULL) { 5811 if (xmlStrEqual(name, tmp->name)) { 5812 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5813 "standalone: attribute enumeration value token %s duplicated\n", 5814 name, NULL); 5815 if (!xmlDictOwns(ctxt->dict, name)) 5816 xmlFree(name); 5817 break; 5818 } 5819 tmp = tmp->next; 5820 } 5821 if (tmp == NULL) { 5822 cur = xmlCreateEnumeration(name); 5823 if (!xmlDictOwns(ctxt->dict, name)) 5824 xmlFree(name); 5825 if (cur == NULL) { 5826 xmlFreeEnumeration(ret); 5827 return(NULL); 5828 } 5829 if (last == NULL) ret = last = cur; 5830 else { 5831 last->next = cur; 5832 last = cur; 5833 } 5834 } 5835 SKIP_BLANKS; 5836 } while (RAW == '|'); 5837 if (RAW != ')') { 5838 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5839 return(ret); 5840 } 5841 NEXT; 5842 return(ret); 5843 } 5844 5845 /** 5846 * xmlParseEnumeratedType: 5847 * @ctxt: an XML parser context 5848 * @tree: the enumeration tree built while parsing 5849 * 5850 * parse an Enumerated attribute type. 5851 * 5852 * [57] EnumeratedType ::= NotationType | Enumeration 5853 * 5854 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5855 * 5856 * 5857 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5858 */ 5859 5860 int 5861 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5862 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5863 SKIP(8); 5864 if (SKIP_BLANKS == 0) { 5865 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5866 "Space required after 'NOTATION'\n"); 5867 return(0); 5868 } 5869 *tree = xmlParseNotationType(ctxt); 5870 if (*tree == NULL) return(0); 5871 return(XML_ATTRIBUTE_NOTATION); 5872 } 5873 *tree = xmlParseEnumerationType(ctxt); 5874 if (*tree == NULL) return(0); 5875 return(XML_ATTRIBUTE_ENUMERATION); 5876 } 5877 5878 /** 5879 * xmlParseAttributeType: 5880 * @ctxt: an XML parser context 5881 * @tree: the enumeration tree built while parsing 5882 * 5883 * parse the Attribute list def for an element 5884 * 5885 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5886 * 5887 * [55] StringType ::= 'CDATA' 5888 * 5889 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5890 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5891 * 5892 * Validity constraints for attribute values syntax are checked in 5893 * xmlValidateAttributeValue() 5894 * 5895 * [ VC: ID ] 5896 * Values of type ID must match the Name production. A name must not 5897 * appear more than once in an XML document as a value of this type; 5898 * i.e., ID values must uniquely identify the elements which bear them. 5899 * 5900 * [ VC: One ID per Element Type ] 5901 * No element type may have more than one ID attribute specified. 5902 * 5903 * [ VC: ID Attribute Default ] 5904 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5905 * 5906 * [ VC: IDREF ] 5907 * Values of type IDREF must match the Name production, and values 5908 * of type IDREFS must match Names; each IDREF Name must match the value 5909 * of an ID attribute on some element in the XML document; i.e. IDREF 5910 * values must match the value of some ID attribute. 5911 * 5912 * [ VC: Entity Name ] 5913 * Values of type ENTITY must match the Name production, values 5914 * of type ENTITIES must match Names; each Entity Name must match the 5915 * name of an unparsed entity declared in the DTD. 5916 * 5917 * [ VC: Name Token ] 5918 * Values of type NMTOKEN must match the Nmtoken production; values 5919 * of type NMTOKENS must match Nmtokens. 5920 * 5921 * Returns the attribute type 5922 */ 5923 int 5924 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5925 SHRINK; 5926 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5927 SKIP(5); 5928 return(XML_ATTRIBUTE_CDATA); 5929 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5930 SKIP(6); 5931 return(XML_ATTRIBUTE_IDREFS); 5932 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5933 SKIP(5); 5934 return(XML_ATTRIBUTE_IDREF); 5935 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5936 SKIP(2); 5937 return(XML_ATTRIBUTE_ID); 5938 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5939 SKIP(6); 5940 return(XML_ATTRIBUTE_ENTITY); 5941 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5942 SKIP(8); 5943 return(XML_ATTRIBUTE_ENTITIES); 5944 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5945 SKIP(8); 5946 return(XML_ATTRIBUTE_NMTOKENS); 5947 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5948 SKIP(7); 5949 return(XML_ATTRIBUTE_NMTOKEN); 5950 } 5951 return(xmlParseEnumeratedType(ctxt, tree)); 5952 } 5953 5954 /** 5955 * xmlParseAttributeListDecl: 5956 * @ctxt: an XML parser context 5957 * 5958 * : parse the Attribute list def for an element 5959 * 5960 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5961 * 5962 * [53] AttDef ::= S Name S AttType S DefaultDecl 5963 * 5964 */ 5965 void 5966 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5967 const xmlChar *elemName; 5968 const xmlChar *attrName; 5969 xmlEnumerationPtr tree; 5970 5971 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5972 int inputid = ctxt->input->id; 5973 5974 SKIP(9); 5975 if (SKIP_BLANKS == 0) { 5976 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5977 "Space required after '<!ATTLIST'\n"); 5978 } 5979 elemName = xmlParseName(ctxt); 5980 if (elemName == NULL) { 5981 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5982 "ATTLIST: no name for Element\n"); 5983 return; 5984 } 5985 SKIP_BLANKS; 5986 GROW; 5987 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5988 int type; 5989 int def; 5990 xmlChar *defaultValue = NULL; 5991 5992 GROW; 5993 tree = NULL; 5994 attrName = xmlParseName(ctxt); 5995 if (attrName == NULL) { 5996 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5997 "ATTLIST: no name for Attribute\n"); 5998 break; 5999 } 6000 GROW; 6001 if (SKIP_BLANKS == 0) { 6002 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6003 "Space required after the attribute name\n"); 6004 break; 6005 } 6006 6007 type = xmlParseAttributeType(ctxt, &tree); 6008 if (type <= 0) { 6009 break; 6010 } 6011 6012 GROW; 6013 if (SKIP_BLANKS == 0) { 6014 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6015 "Space required after the attribute type\n"); 6016 if (tree != NULL) 6017 xmlFreeEnumeration(tree); 6018 break; 6019 } 6020 6021 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6022 if (def <= 0) { 6023 if (defaultValue != NULL) 6024 xmlFree(defaultValue); 6025 if (tree != NULL) 6026 xmlFreeEnumeration(tree); 6027 break; 6028 } 6029 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6030 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6031 6032 GROW; 6033 if (RAW != '>') { 6034 if (SKIP_BLANKS == 0) { 6035 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6036 "Space required after the attribute default value\n"); 6037 if (defaultValue != NULL) 6038 xmlFree(defaultValue); 6039 if (tree != NULL) 6040 xmlFreeEnumeration(tree); 6041 break; 6042 } 6043 } 6044 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6045 (ctxt->sax->attributeDecl != NULL)) 6046 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6047 type, def, defaultValue, tree); 6048 else if (tree != NULL) 6049 xmlFreeEnumeration(tree); 6050 6051 if ((ctxt->sax2) && (defaultValue != NULL) && 6052 (def != XML_ATTRIBUTE_IMPLIED) && 6053 (def != XML_ATTRIBUTE_REQUIRED)) { 6054 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6055 } 6056 if (ctxt->sax2) { 6057 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6058 } 6059 if (defaultValue != NULL) 6060 xmlFree(defaultValue); 6061 GROW; 6062 } 6063 if (RAW == '>') { 6064 if (inputid != ctxt->input->id) { 6065 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6066 "Attribute list declaration doesn't start and" 6067 " stop in the same entity\n"); 6068 } 6069 NEXT; 6070 } 6071 } 6072 } 6073 6074 /** 6075 * xmlParseElementMixedContentDecl: 6076 * @ctxt: an XML parser context 6077 * @inputchk: the input used for the current entity, needed for boundary checks 6078 * 6079 * parse the declaration for a Mixed Element content 6080 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6081 * 6082 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6083 * '(' S? '#PCDATA' S? ')' 6084 * 6085 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6086 * 6087 * [ VC: No Duplicate Types ] 6088 * The same name must not appear more than once in a single 6089 * mixed-content declaration. 6090 * 6091 * returns: the list of the xmlElementContentPtr describing the element choices 6092 */ 6093 xmlElementContentPtr 6094 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6095 xmlElementContentPtr ret = NULL, cur = NULL, n; 6096 const xmlChar *elem = NULL; 6097 6098 GROW; 6099 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6100 SKIP(7); 6101 SKIP_BLANKS; 6102 SHRINK; 6103 if (RAW == ')') { 6104 if (ctxt->input->id != inputchk) { 6105 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6106 "Element content declaration doesn't start and" 6107 " stop in the same entity\n"); 6108 } 6109 NEXT; 6110 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6111 if (ret == NULL) 6112 return(NULL); 6113 if (RAW == '*') { 6114 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6115 NEXT; 6116 } 6117 return(ret); 6118 } 6119 if ((RAW == '(') || (RAW == '|')) { 6120 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6121 if (ret == NULL) return(NULL); 6122 } 6123 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6124 NEXT; 6125 if (elem == NULL) { 6126 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6127 if (ret == NULL) { 6128 xmlFreeDocElementContent(ctxt->myDoc, cur); 6129 return(NULL); 6130 } 6131 ret->c1 = cur; 6132 if (cur != NULL) 6133 cur->parent = ret; 6134 cur = ret; 6135 } else { 6136 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6137 if (n == NULL) { 6138 xmlFreeDocElementContent(ctxt->myDoc, ret); 6139 return(NULL); 6140 } 6141 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6142 if (n->c1 != NULL) 6143 n->c1->parent = n; 6144 cur->c2 = n; 6145 if (n != NULL) 6146 n->parent = cur; 6147 cur = n; 6148 } 6149 SKIP_BLANKS; 6150 elem = xmlParseName(ctxt); 6151 if (elem == NULL) { 6152 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6153 "xmlParseElementMixedContentDecl : Name expected\n"); 6154 xmlFreeDocElementContent(ctxt->myDoc, ret); 6155 return(NULL); 6156 } 6157 SKIP_BLANKS; 6158 GROW; 6159 } 6160 if ((RAW == ')') && (NXT(1) == '*')) { 6161 if (elem != NULL) { 6162 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6163 XML_ELEMENT_CONTENT_ELEMENT); 6164 if (cur->c2 != NULL) 6165 cur->c2->parent = cur; 6166 } 6167 if (ret != NULL) 6168 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6169 if (ctxt->input->id != inputchk) { 6170 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6171 "Element content declaration doesn't start and" 6172 " stop in the same entity\n"); 6173 } 6174 SKIP(2); 6175 } else { 6176 xmlFreeDocElementContent(ctxt->myDoc, ret); 6177 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6178 return(NULL); 6179 } 6180 6181 } else { 6182 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6183 } 6184 return(ret); 6185 } 6186 6187 /** 6188 * xmlParseElementChildrenContentDeclPriv: 6189 * @ctxt: an XML parser context 6190 * @inputchk: the input used for the current entity, needed for boundary checks 6191 * @depth: the level of recursion 6192 * 6193 * parse the declaration for a Mixed Element content 6194 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6195 * 6196 * 6197 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6198 * 6199 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6200 * 6201 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6202 * 6203 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6204 * 6205 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6206 * TODO Parameter-entity replacement text must be properly nested 6207 * with parenthesized groups. That is to say, if either of the 6208 * opening or closing parentheses in a choice, seq, or Mixed 6209 * construct is contained in the replacement text for a parameter 6210 * entity, both must be contained in the same replacement text. For 6211 * interoperability, if a parameter-entity reference appears in a 6212 * choice, seq, or Mixed construct, its replacement text should not 6213 * be empty, and neither the first nor last non-blank character of 6214 * the replacement text should be a connector (| or ,). 6215 * 6216 * Returns the tree of xmlElementContentPtr describing the element 6217 * hierarchy. 6218 */ 6219 static xmlElementContentPtr 6220 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6221 int depth) { 6222 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6223 const xmlChar *elem; 6224 xmlChar type = 0; 6225 6226 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6227 (depth > 2048)) { 6228 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6229 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6230 depth); 6231 return(NULL); 6232 } 6233 SKIP_BLANKS; 6234 GROW; 6235 if (RAW == '(') { 6236 int inputid = ctxt->input->id; 6237 6238 /* Recurse on first child */ 6239 NEXT; 6240 SKIP_BLANKS; 6241 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6242 depth + 1); 6243 if (cur == NULL) 6244 return(NULL); 6245 SKIP_BLANKS; 6246 GROW; 6247 } else { 6248 elem = xmlParseName(ctxt); 6249 if (elem == NULL) { 6250 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6251 return(NULL); 6252 } 6253 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6254 if (cur == NULL) { 6255 xmlErrMemory(ctxt, NULL); 6256 return(NULL); 6257 } 6258 GROW; 6259 if (RAW == '?') { 6260 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6261 NEXT; 6262 } else if (RAW == '*') { 6263 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6264 NEXT; 6265 } else if (RAW == '+') { 6266 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6267 NEXT; 6268 } else { 6269 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6270 } 6271 GROW; 6272 } 6273 SKIP_BLANKS; 6274 SHRINK; 6275 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6276 /* 6277 * Each loop we parse one separator and one element. 6278 */ 6279 if (RAW == ',') { 6280 if (type == 0) type = CUR; 6281 6282 /* 6283 * Detect "Name | Name , Name" error 6284 */ 6285 else if (type != CUR) { 6286 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6287 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6288 type); 6289 if ((last != NULL) && (last != ret)) 6290 xmlFreeDocElementContent(ctxt->myDoc, last); 6291 if (ret != NULL) 6292 xmlFreeDocElementContent(ctxt->myDoc, ret); 6293 return(NULL); 6294 } 6295 NEXT; 6296 6297 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6298 if (op == NULL) { 6299 if ((last != NULL) && (last != ret)) 6300 xmlFreeDocElementContent(ctxt->myDoc, last); 6301 xmlFreeDocElementContent(ctxt->myDoc, ret); 6302 return(NULL); 6303 } 6304 if (last == NULL) { 6305 op->c1 = ret; 6306 if (ret != NULL) 6307 ret->parent = op; 6308 ret = cur = op; 6309 } else { 6310 cur->c2 = op; 6311 if (op != NULL) 6312 op->parent = cur; 6313 op->c1 = last; 6314 if (last != NULL) 6315 last->parent = op; 6316 cur =op; 6317 last = NULL; 6318 } 6319 } else if (RAW == '|') { 6320 if (type == 0) type = CUR; 6321 6322 /* 6323 * Detect "Name , Name | Name" error 6324 */ 6325 else if (type != CUR) { 6326 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6327 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6328 type); 6329 if ((last != NULL) && (last != ret)) 6330 xmlFreeDocElementContent(ctxt->myDoc, last); 6331 if (ret != NULL) 6332 xmlFreeDocElementContent(ctxt->myDoc, ret); 6333 return(NULL); 6334 } 6335 NEXT; 6336 6337 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6338 if (op == NULL) { 6339 if ((last != NULL) && (last != ret)) 6340 xmlFreeDocElementContent(ctxt->myDoc, last); 6341 if (ret != NULL) 6342 xmlFreeDocElementContent(ctxt->myDoc, ret); 6343 return(NULL); 6344 } 6345 if (last == NULL) { 6346 op->c1 = ret; 6347 if (ret != NULL) 6348 ret->parent = op; 6349 ret = cur = op; 6350 } else { 6351 cur->c2 = op; 6352 if (op != NULL) 6353 op->parent = cur; 6354 op->c1 = last; 6355 if (last != NULL) 6356 last->parent = op; 6357 cur =op; 6358 last = NULL; 6359 } 6360 } else { 6361 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6362 if ((last != NULL) && (last != ret)) 6363 xmlFreeDocElementContent(ctxt->myDoc, last); 6364 if (ret != NULL) 6365 xmlFreeDocElementContent(ctxt->myDoc, ret); 6366 return(NULL); 6367 } 6368 GROW; 6369 SKIP_BLANKS; 6370 GROW; 6371 if (RAW == '(') { 6372 int inputid = ctxt->input->id; 6373 /* Recurse on second child */ 6374 NEXT; 6375 SKIP_BLANKS; 6376 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6377 depth + 1); 6378 if (last == NULL) { 6379 if (ret != NULL) 6380 xmlFreeDocElementContent(ctxt->myDoc, ret); 6381 return(NULL); 6382 } 6383 SKIP_BLANKS; 6384 } else { 6385 elem = xmlParseName(ctxt); 6386 if (elem == NULL) { 6387 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6388 if (ret != NULL) 6389 xmlFreeDocElementContent(ctxt->myDoc, ret); 6390 return(NULL); 6391 } 6392 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6393 if (last == NULL) { 6394 if (ret != NULL) 6395 xmlFreeDocElementContent(ctxt->myDoc, ret); 6396 return(NULL); 6397 } 6398 if (RAW == '?') { 6399 last->ocur = XML_ELEMENT_CONTENT_OPT; 6400 NEXT; 6401 } else if (RAW == '*') { 6402 last->ocur = XML_ELEMENT_CONTENT_MULT; 6403 NEXT; 6404 } else if (RAW == '+') { 6405 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6406 NEXT; 6407 } else { 6408 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6409 } 6410 } 6411 SKIP_BLANKS; 6412 GROW; 6413 } 6414 if ((cur != NULL) && (last != NULL)) { 6415 cur->c2 = last; 6416 if (last != NULL) 6417 last->parent = cur; 6418 } 6419 if (ctxt->input->id != inputchk) { 6420 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6421 "Element content declaration doesn't start and stop in" 6422 " the same entity\n"); 6423 } 6424 NEXT; 6425 if (RAW == '?') { 6426 if (ret != NULL) { 6427 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6428 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6429 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6430 else 6431 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6432 } 6433 NEXT; 6434 } else if (RAW == '*') { 6435 if (ret != NULL) { 6436 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6437 cur = ret; 6438 /* 6439 * Some normalization: 6440 * (a | b* | c?)* == (a | b | c)* 6441 */ 6442 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6443 if ((cur->c1 != NULL) && 6444 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6445 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6446 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6447 if ((cur->c2 != NULL) && 6448 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6449 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6450 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6451 cur = cur->c2; 6452 } 6453 } 6454 NEXT; 6455 } else if (RAW == '+') { 6456 if (ret != NULL) { 6457 int found = 0; 6458 6459 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6460 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6461 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6462 else 6463 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6464 /* 6465 * Some normalization: 6466 * (a | b*)+ == (a | b)* 6467 * (a | b?)+ == (a | b)* 6468 */ 6469 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6470 if ((cur->c1 != NULL) && 6471 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6472 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6473 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6474 found = 1; 6475 } 6476 if ((cur->c2 != NULL) && 6477 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6478 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6479 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6480 found = 1; 6481 } 6482 cur = cur->c2; 6483 } 6484 if (found) 6485 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6486 } 6487 NEXT; 6488 } 6489 return(ret); 6490 } 6491 6492 /** 6493 * xmlParseElementChildrenContentDecl: 6494 * @ctxt: an XML parser context 6495 * @inputchk: the input used for the current entity, needed for boundary checks 6496 * 6497 * parse the declaration for a Mixed Element content 6498 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6499 * 6500 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6501 * 6502 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6503 * 6504 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6505 * 6506 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6507 * 6508 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6509 * TODO Parameter-entity replacement text must be properly nested 6510 * with parenthesized groups. That is to say, if either of the 6511 * opening or closing parentheses in a choice, seq, or Mixed 6512 * construct is contained in the replacement text for a parameter 6513 * entity, both must be contained in the same replacement text. For 6514 * interoperability, if a parameter-entity reference appears in a 6515 * choice, seq, or Mixed construct, its replacement text should not 6516 * be empty, and neither the first nor last non-blank character of 6517 * the replacement text should be a connector (| or ,). 6518 * 6519 * Returns the tree of xmlElementContentPtr describing the element 6520 * hierarchy. 6521 */ 6522 xmlElementContentPtr 6523 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6524 /* stub left for API/ABI compat */ 6525 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6526 } 6527 6528 /** 6529 * xmlParseElementContentDecl: 6530 * @ctxt: an XML parser context 6531 * @name: the name of the element being defined. 6532 * @result: the Element Content pointer will be stored here if any 6533 * 6534 * parse the declaration for an Element content either Mixed or Children, 6535 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6536 * 6537 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6538 * 6539 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6540 */ 6541 6542 int 6543 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6544 xmlElementContentPtr *result) { 6545 6546 xmlElementContentPtr tree = NULL; 6547 int inputid = ctxt->input->id; 6548 int res; 6549 6550 *result = NULL; 6551 6552 if (RAW != '(') { 6553 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6554 "xmlParseElementContentDecl : %s '(' expected\n", name); 6555 return(-1); 6556 } 6557 NEXT; 6558 GROW; 6559 if (ctxt->instate == XML_PARSER_EOF) 6560 return(-1); 6561 SKIP_BLANKS; 6562 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6563 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6564 res = XML_ELEMENT_TYPE_MIXED; 6565 } else { 6566 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6567 res = XML_ELEMENT_TYPE_ELEMENT; 6568 } 6569 SKIP_BLANKS; 6570 *result = tree; 6571 return(res); 6572 } 6573 6574 /** 6575 * xmlParseElementDecl: 6576 * @ctxt: an XML parser context 6577 * 6578 * parse an Element declaration. 6579 * 6580 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6581 * 6582 * [ VC: Unique Element Type Declaration ] 6583 * No element type may be declared more than once 6584 * 6585 * Returns the type of the element, or -1 in case of error 6586 */ 6587 int 6588 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6589 const xmlChar *name; 6590 int ret = -1; 6591 xmlElementContentPtr content = NULL; 6592 6593 /* GROW; done in the caller */ 6594 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6595 int inputid = ctxt->input->id; 6596 6597 SKIP(9); 6598 if (SKIP_BLANKS == 0) { 6599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6600 "Space required after 'ELEMENT'\n"); 6601 return(-1); 6602 } 6603 name = xmlParseName(ctxt); 6604 if (name == NULL) { 6605 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6606 "xmlParseElementDecl: no name for Element\n"); 6607 return(-1); 6608 } 6609 if (SKIP_BLANKS == 0) { 6610 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6611 "Space required after the element name\n"); 6612 } 6613 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6614 SKIP(5); 6615 /* 6616 * Element must always be empty. 6617 */ 6618 ret = XML_ELEMENT_TYPE_EMPTY; 6619 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6620 (NXT(2) == 'Y')) { 6621 SKIP(3); 6622 /* 6623 * Element is a generic container. 6624 */ 6625 ret = XML_ELEMENT_TYPE_ANY; 6626 } else if (RAW == '(') { 6627 ret = xmlParseElementContentDecl(ctxt, name, &content); 6628 } else { 6629 /* 6630 * [ WFC: PEs in Internal Subset ] error handling. 6631 */ 6632 if ((RAW == '%') && (ctxt->external == 0) && 6633 (ctxt->inputNr == 1)) { 6634 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6635 "PEReference: forbidden within markup decl in internal subset\n"); 6636 } else { 6637 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6638 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6639 } 6640 return(-1); 6641 } 6642 6643 SKIP_BLANKS; 6644 6645 if (RAW != '>') { 6646 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6647 if (content != NULL) { 6648 xmlFreeDocElementContent(ctxt->myDoc, content); 6649 } 6650 } else { 6651 if (inputid != ctxt->input->id) { 6652 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6653 "Element declaration doesn't start and stop in" 6654 " the same entity\n"); 6655 } 6656 6657 NEXT; 6658 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6659 (ctxt->sax->elementDecl != NULL)) { 6660 if (content != NULL) 6661 content->parent = NULL; 6662 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6663 content); 6664 if ((content != NULL) && (content->parent == NULL)) { 6665 /* 6666 * this is a trick: if xmlAddElementDecl is called, 6667 * instead of copying the full tree it is plugged directly 6668 * if called from the parser. Avoid duplicating the 6669 * interfaces or change the API/ABI 6670 */ 6671 xmlFreeDocElementContent(ctxt->myDoc, content); 6672 } 6673 } else if (content != NULL) { 6674 xmlFreeDocElementContent(ctxt->myDoc, content); 6675 } 6676 } 6677 } 6678 return(ret); 6679 } 6680 6681 /** 6682 * xmlParseConditionalSections 6683 * @ctxt: an XML parser context 6684 * 6685 * [61] conditionalSect ::= includeSect | ignoreSect 6686 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6687 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6688 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6689 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6690 */ 6691 6692 static void 6693 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6694 int *inputIds = NULL; 6695 size_t inputIdsSize = 0; 6696 size_t depth = 0; 6697 6698 while (ctxt->instate != XML_PARSER_EOF) { 6699 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6700 int id = ctxt->input->id; 6701 6702 SKIP(3); 6703 SKIP_BLANKS; 6704 6705 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6706 SKIP(7); 6707 SKIP_BLANKS; 6708 if (RAW != '[') { 6709 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6710 xmlHaltParser(ctxt); 6711 goto error; 6712 } 6713 if (ctxt->input->id != id) { 6714 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6715 "All markup of the conditional section is" 6716 " not in the same entity\n"); 6717 } 6718 NEXT; 6719 6720 if (inputIdsSize <= depth) { 6721 int *tmp; 6722 6723 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2); 6724 tmp = (int *) xmlRealloc(inputIds, 6725 inputIdsSize * sizeof(int)); 6726 if (tmp == NULL) { 6727 xmlErrMemory(ctxt, NULL); 6728 goto error; 6729 } 6730 inputIds = tmp; 6731 } 6732 inputIds[depth] = id; 6733 depth++; 6734 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6735 int state; 6736 xmlParserInputState instate; 6737 size_t ignoreDepth = 0; 6738 6739 SKIP(6); 6740 SKIP_BLANKS; 6741 if (RAW != '[') { 6742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6743 xmlHaltParser(ctxt); 6744 goto error; 6745 } 6746 if (ctxt->input->id != id) { 6747 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6748 "All markup of the conditional section is" 6749 " not in the same entity\n"); 6750 } 6751 NEXT; 6752 6753 /* 6754 * Parse up to the end of the conditional section but disable 6755 * SAX event generating DTD building in the meantime 6756 */ 6757 state = ctxt->disableSAX; 6758 instate = ctxt->instate; 6759 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6760 ctxt->instate = XML_PARSER_IGNORE; 6761 6762 while (RAW != 0) { 6763 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6764 SKIP(3); 6765 ignoreDepth++; 6766 /* Check for integer overflow */ 6767 if (ignoreDepth == 0) { 6768 xmlErrMemory(ctxt, NULL); 6769 goto error; 6770 } 6771 } else if ((RAW == ']') && (NXT(1) == ']') && 6772 (NXT(2) == '>')) { 6773 if (ignoreDepth == 0) 6774 break; 6775 SKIP(3); 6776 ignoreDepth--; 6777 } else { 6778 NEXT; 6779 } 6780 } 6781 6782 ctxt->disableSAX = state; 6783 ctxt->instate = instate; 6784 6785 if (RAW == 0) { 6786 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6787 goto error; 6788 } 6789 if (ctxt->input->id != id) { 6790 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6791 "All markup of the conditional section is" 6792 " not in the same entity\n"); 6793 } 6794 SKIP(3); 6795 } else { 6796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6797 xmlHaltParser(ctxt); 6798 goto error; 6799 } 6800 } else if ((depth > 0) && 6801 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6802 depth--; 6803 if (ctxt->input->id != inputIds[depth]) { 6804 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6805 "All markup of the conditional section is not" 6806 " in the same entity\n"); 6807 } 6808 SKIP(3); 6809 } else { 6810 int id = ctxt->input->id; 6811 unsigned long cons = CUR_CONSUMED; 6812 6813 xmlParseMarkupDecl(ctxt); 6814 6815 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { 6816 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6817 xmlHaltParser(ctxt); 6818 goto error; 6819 } 6820 } 6821 6822 if (depth == 0) 6823 break; 6824 6825 SKIP_BLANKS; 6826 GROW; 6827 } 6828 6829 error: 6830 xmlFree(inputIds); 6831 } 6832 6833 /** 6834 * xmlParseMarkupDecl: 6835 * @ctxt: an XML parser context 6836 * 6837 * parse Markup declarations 6838 * 6839 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6840 * NotationDecl | PI | Comment 6841 * 6842 * [ VC: Proper Declaration/PE Nesting ] 6843 * Parameter-entity replacement text must be properly nested with 6844 * markup declarations. That is to say, if either the first character 6845 * or the last character of a markup declaration (markupdecl above) is 6846 * contained in the replacement text for a parameter-entity reference, 6847 * both must be contained in the same replacement text. 6848 * 6849 * [ WFC: PEs in Internal Subset ] 6850 * In the internal DTD subset, parameter-entity references can occur 6851 * only where markup declarations can occur, not within markup declarations. 6852 * (This does not apply to references that occur in external parameter 6853 * entities or to the external subset.) 6854 */ 6855 void 6856 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6857 GROW; 6858 if (CUR == '<') { 6859 if (NXT(1) == '!') { 6860 switch (NXT(2)) { 6861 case 'E': 6862 if (NXT(3) == 'L') 6863 xmlParseElementDecl(ctxt); 6864 else if (NXT(3) == 'N') 6865 xmlParseEntityDecl(ctxt); 6866 break; 6867 case 'A': 6868 xmlParseAttributeListDecl(ctxt); 6869 break; 6870 case 'N': 6871 xmlParseNotationDecl(ctxt); 6872 break; 6873 case '-': 6874 xmlParseComment(ctxt); 6875 break; 6876 default: 6877 /* there is an error but it will be detected later */ 6878 break; 6879 } 6880 } else if (NXT(1) == '?') { 6881 xmlParsePI(ctxt); 6882 } 6883 } 6884 6885 /* 6886 * detect requirement to exit there and act accordingly 6887 * and avoid having instate overridden later on 6888 */ 6889 if (ctxt->instate == XML_PARSER_EOF) 6890 return; 6891 6892 ctxt->instate = XML_PARSER_DTD; 6893 } 6894 6895 /** 6896 * xmlParseTextDecl: 6897 * @ctxt: an XML parser context 6898 * 6899 * parse an XML declaration header for external entities 6900 * 6901 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6902 */ 6903 6904 void 6905 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6906 xmlChar *version; 6907 const xmlChar *encoding; 6908 int oldstate; 6909 6910 /* 6911 * We know that '<?xml' is here. 6912 */ 6913 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6914 SKIP(5); 6915 } else { 6916 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6917 return; 6918 } 6919 6920 /* Avoid expansion of parameter entities when skipping blanks. */ 6921 oldstate = ctxt->instate; 6922 ctxt->instate = XML_PARSER_START; 6923 6924 if (SKIP_BLANKS == 0) { 6925 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6926 "Space needed after '<?xml'\n"); 6927 } 6928 6929 /* 6930 * We may have the VersionInfo here. 6931 */ 6932 version = xmlParseVersionInfo(ctxt); 6933 if (version == NULL) 6934 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6935 else { 6936 if (SKIP_BLANKS == 0) { 6937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6938 "Space needed here\n"); 6939 } 6940 } 6941 ctxt->input->version = version; 6942 6943 /* 6944 * We must have the encoding declaration 6945 */ 6946 encoding = xmlParseEncodingDecl(ctxt); 6947 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6948 /* 6949 * The XML REC instructs us to stop parsing right here 6950 */ 6951 ctxt->instate = oldstate; 6952 return; 6953 } 6954 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6955 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6956 "Missing encoding in text declaration\n"); 6957 } 6958 6959 SKIP_BLANKS; 6960 if ((RAW == '?') && (NXT(1) == '>')) { 6961 SKIP(2); 6962 } else if (RAW == '>') { 6963 /* Deprecated old WD ... */ 6964 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6965 NEXT; 6966 } else { 6967 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6968 MOVETO_ENDTAG(CUR_PTR); 6969 NEXT; 6970 } 6971 6972 ctxt->instate = oldstate; 6973 } 6974 6975 /** 6976 * xmlParseExternalSubset: 6977 * @ctxt: an XML parser context 6978 * @ExternalID: the external identifier 6979 * @SystemID: the system identifier (or URL) 6980 * 6981 * parse Markup declarations from an external subset 6982 * 6983 * [30] extSubset ::= textDecl? extSubsetDecl 6984 * 6985 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6986 */ 6987 void 6988 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6989 const xmlChar *SystemID) { 6990 xmlDetectSAX2(ctxt); 6991 GROW; 6992 6993 if ((ctxt->encoding == NULL) && 6994 (ctxt->input->end - ctxt->input->cur >= 4)) { 6995 xmlChar start[4]; 6996 xmlCharEncoding enc; 6997 6998 start[0] = RAW; 6999 start[1] = NXT(1); 7000 start[2] = NXT(2); 7001 start[3] = NXT(3); 7002 enc = xmlDetectCharEncoding(start, 4); 7003 if (enc != XML_CHAR_ENCODING_NONE) 7004 xmlSwitchEncoding(ctxt, enc); 7005 } 7006 7007 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7008 xmlParseTextDecl(ctxt); 7009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7010 /* 7011 * The XML REC instructs us to stop parsing right here 7012 */ 7013 xmlHaltParser(ctxt); 7014 return; 7015 } 7016 } 7017 if (ctxt->myDoc == NULL) { 7018 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7019 if (ctxt->myDoc == NULL) { 7020 xmlErrMemory(ctxt, "New Doc failed"); 7021 return; 7022 } 7023 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7024 } 7025 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7026 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7027 7028 ctxt->instate = XML_PARSER_DTD; 7029 ctxt->external = 1; 7030 SKIP_BLANKS; 7031 while (((RAW == '<') && (NXT(1) == '?')) || 7032 ((RAW == '<') && (NXT(1) == '!')) || 7033 (RAW == '%')) { 7034 int id = ctxt->input->id; 7035 unsigned long cons = CUR_CONSUMED; 7036 7037 GROW; 7038 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7039 xmlParseConditionalSections(ctxt); 7040 } else 7041 xmlParseMarkupDecl(ctxt); 7042 SKIP_BLANKS; 7043 7044 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { 7045 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7046 break; 7047 } 7048 } 7049 7050 if (RAW != 0) { 7051 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7052 } 7053 7054 } 7055 7056 /** 7057 * xmlParseReference: 7058 * @ctxt: an XML parser context 7059 * 7060 * parse and handle entity references in content, depending on the SAX 7061 * interface, this may end-up in a call to character() if this is a 7062 * CharRef, a predefined entity, if there is no reference() callback. 7063 * or if the parser was asked to switch to that mode. 7064 * 7065 * [67] Reference ::= EntityRef | CharRef 7066 */ 7067 void 7068 xmlParseReference(xmlParserCtxtPtr ctxt) { 7069 xmlEntityPtr ent; 7070 xmlChar *val; 7071 int was_checked; 7072 xmlNodePtr list = NULL; 7073 xmlParserErrors ret = XML_ERR_OK; 7074 7075 7076 if (RAW != '&') 7077 return; 7078 7079 /* 7080 * Simple case of a CharRef 7081 */ 7082 if (NXT(1) == '#') { 7083 int i = 0; 7084 xmlChar out[16]; 7085 int hex = NXT(2); 7086 int value = xmlParseCharRef(ctxt); 7087 7088 if (value == 0) 7089 return; 7090 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7091 /* 7092 * So we are using non-UTF-8 buffers 7093 * Check that the char fit on 8bits, if not 7094 * generate a CharRef. 7095 */ 7096 if (value <= 0xFF) { 7097 out[0] = value; 7098 out[1] = 0; 7099 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7100 (!ctxt->disableSAX)) 7101 ctxt->sax->characters(ctxt->userData, out, 1); 7102 } else { 7103 if ((hex == 'x') || (hex == 'X')) 7104 snprintf((char *)out, sizeof(out), "#x%X", value); 7105 else 7106 snprintf((char *)out, sizeof(out), "#%d", value); 7107 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7108 (!ctxt->disableSAX)) 7109 ctxt->sax->reference(ctxt->userData, out); 7110 } 7111 } else { 7112 /* 7113 * Just encode the value in UTF-8 7114 */ 7115 COPY_BUF(0 ,out, i, value); 7116 out[i] = 0; 7117 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7118 (!ctxt->disableSAX)) 7119 ctxt->sax->characters(ctxt->userData, out, i); 7120 } 7121 return; 7122 } 7123 7124 /* 7125 * We are seeing an entity reference 7126 */ 7127 ent = xmlParseEntityRef(ctxt); 7128 if (ent == NULL) return; 7129 if (!ctxt->wellFormed) 7130 return; 7131 was_checked = ent->checked; 7132 7133 /* special case of predefined entities */ 7134 if ((ent->name == NULL) || 7135 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7136 val = ent->content; 7137 if (val == NULL) return; 7138 /* 7139 * inline the entity. 7140 */ 7141 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7142 (!ctxt->disableSAX)) 7143 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7144 return; 7145 } 7146 7147 /* 7148 * The first reference to the entity trigger a parsing phase 7149 * where the ent->children is filled with the result from 7150 * the parsing. 7151 * Note: external parsed entities will not be loaded, it is not 7152 * required for a non-validating parser, unless the parsing option 7153 * of validating, or substituting entities were given. Doing so is 7154 * far more secure as the parser will only process data coming from 7155 * the document entity by default. 7156 */ 7157 if (((ent->checked == 0) || 7158 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7159 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7160 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7161 unsigned long oldnbent = ctxt->nbentities, diff; 7162 7163 /* 7164 * This is a bit hackish but this seems the best 7165 * way to make sure both SAX and DOM entity support 7166 * behaves okay. 7167 */ 7168 void *user_data; 7169 if (ctxt->userData == ctxt) 7170 user_data = NULL; 7171 else 7172 user_data = ctxt->userData; 7173 7174 /* 7175 * Check that this entity is well formed 7176 * 4.3.2: An internal general parsed entity is well-formed 7177 * if its replacement text matches the production labeled 7178 * content. 7179 */ 7180 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7181 ctxt->depth++; 7182 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7183 user_data, &list); 7184 ctxt->depth--; 7185 7186 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7187 ctxt->depth++; 7188 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7189 user_data, ctxt->depth, ent->URI, 7190 ent->ExternalID, &list); 7191 ctxt->depth--; 7192 } else { 7193 ret = XML_ERR_ENTITY_PE_INTERNAL; 7194 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7195 "invalid entity type found\n", NULL); 7196 } 7197 7198 /* 7199 * Store the number of entities needing parsing for this entity 7200 * content and do checkings 7201 */ 7202 diff = ctxt->nbentities - oldnbent + 1; 7203 if (diff > INT_MAX / 2) 7204 diff = INT_MAX / 2; 7205 ent->checked = diff * 2; 7206 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7207 ent->checked |= 1; 7208 if (ret == XML_ERR_ENTITY_LOOP) { 7209 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7210 xmlHaltParser(ctxt); 7211 xmlFreeNodeList(list); 7212 return; 7213 } 7214 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7215 xmlFreeNodeList(list); 7216 return; 7217 } 7218 7219 if ((ret == XML_ERR_OK) && (list != NULL)) { 7220 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7221 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7222 (ent->children == NULL)) { 7223 ent->children = list; 7224 /* 7225 * Prune it directly in the generated document 7226 * except for single text nodes. 7227 */ 7228 if ((ctxt->replaceEntities == 0) || 7229 (ctxt->parseMode == XML_PARSE_READER) || 7230 ((list->type == XML_TEXT_NODE) && 7231 (list->next == NULL))) { 7232 ent->owner = 1; 7233 while (list != NULL) { 7234 list->parent = (xmlNodePtr) ent; 7235 if (list->doc != ent->doc) 7236 xmlSetTreeDoc(list, ent->doc); 7237 if (list->next == NULL) 7238 ent->last = list; 7239 list = list->next; 7240 } 7241 list = NULL; 7242 } else { 7243 ent->owner = 0; 7244 while (list != NULL) { 7245 list->parent = (xmlNodePtr) ctxt->node; 7246 list->doc = ctxt->myDoc; 7247 if (list->next == NULL) 7248 ent->last = list; 7249 list = list->next; 7250 } 7251 list = ent->children; 7252 #ifdef LIBXML_LEGACY_ENABLED 7253 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7254 xmlAddEntityReference(ent, list, NULL); 7255 #endif /* LIBXML_LEGACY_ENABLED */ 7256 } 7257 } else { 7258 xmlFreeNodeList(list); 7259 list = NULL; 7260 } 7261 } else if ((ret != XML_ERR_OK) && 7262 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7263 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7264 "Entity '%s' failed to parse\n", ent->name); 7265 if (ent->content != NULL) 7266 ent->content[0] = 0; 7267 xmlParserEntityCheck(ctxt, 0, ent, 0); 7268 } else if (list != NULL) { 7269 xmlFreeNodeList(list); 7270 list = NULL; 7271 } 7272 if (ent->checked == 0) 7273 ent->checked = 2; 7274 7275 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7276 was_checked = 0; 7277 } else if (ent->checked != 1) { 7278 ctxt->nbentities += ent->checked / 2; 7279 } 7280 7281 /* 7282 * Now that the entity content has been gathered 7283 * provide it to the application, this can take different forms based 7284 * on the parsing modes. 7285 */ 7286 if (ent->children == NULL) { 7287 /* 7288 * Probably running in SAX mode and the callbacks don't 7289 * build the entity content. So unless we already went 7290 * though parsing for first checking go though the entity 7291 * content to generate callbacks associated to the entity 7292 */ 7293 if (was_checked != 0) { 7294 void *user_data; 7295 /* 7296 * This is a bit hackish but this seems the best 7297 * way to make sure both SAX and DOM entity support 7298 * behaves okay. 7299 */ 7300 if (ctxt->userData == ctxt) 7301 user_data = NULL; 7302 else 7303 user_data = ctxt->userData; 7304 7305 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7306 ctxt->depth++; 7307 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7308 ent->content, user_data, NULL); 7309 ctxt->depth--; 7310 } else if (ent->etype == 7311 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7312 ctxt->depth++; 7313 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7314 ctxt->sax, user_data, ctxt->depth, 7315 ent->URI, ent->ExternalID, NULL); 7316 ctxt->depth--; 7317 } else { 7318 ret = XML_ERR_ENTITY_PE_INTERNAL; 7319 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7320 "invalid entity type found\n", NULL); 7321 } 7322 if (ret == XML_ERR_ENTITY_LOOP) { 7323 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7324 return; 7325 } 7326 } 7327 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7328 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7329 /* 7330 * Entity reference callback comes second, it's somewhat 7331 * superfluous but a compatibility to historical behaviour 7332 */ 7333 ctxt->sax->reference(ctxt->userData, ent->name); 7334 } 7335 return; 7336 } 7337 7338 /* 7339 * If we didn't get any children for the entity being built 7340 */ 7341 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7342 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7343 /* 7344 * Create a node. 7345 */ 7346 ctxt->sax->reference(ctxt->userData, ent->name); 7347 return; 7348 } 7349 7350 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7351 /* 7352 * There is a problem on the handling of _private for entities 7353 * (bug 155816): Should we copy the content of the field from 7354 * the entity (possibly overwriting some value set by the user 7355 * when a copy is created), should we leave it alone, or should 7356 * we try to take care of different situations? The problem 7357 * is exacerbated by the usage of this field by the xmlReader. 7358 * To fix this bug, we look at _private on the created node 7359 * and, if it's NULL, we copy in whatever was in the entity. 7360 * If it's not NULL we leave it alone. This is somewhat of a 7361 * hack - maybe we should have further tests to determine 7362 * what to do. 7363 */ 7364 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7365 /* 7366 * Seems we are generating the DOM content, do 7367 * a simple tree copy for all references except the first 7368 * In the first occurrence list contains the replacement. 7369 */ 7370 if (((list == NULL) && (ent->owner == 0)) || 7371 (ctxt->parseMode == XML_PARSE_READER)) { 7372 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7373 7374 /* 7375 * We are copying here, make sure there is no abuse 7376 */ 7377 ctxt->sizeentcopy += ent->length + 5; 7378 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7379 return; 7380 7381 /* 7382 * when operating on a reader, the entities definitions 7383 * are always owning the entities subtree. 7384 if (ctxt->parseMode == XML_PARSE_READER) 7385 ent->owner = 1; 7386 */ 7387 7388 cur = ent->children; 7389 while (cur != NULL) { 7390 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7391 if (nw != NULL) { 7392 if (nw->_private == NULL) 7393 nw->_private = cur->_private; 7394 if (firstChild == NULL){ 7395 firstChild = nw; 7396 } 7397 nw = xmlAddChild(ctxt->node, nw); 7398 } 7399 if (cur == ent->last) { 7400 /* 7401 * needed to detect some strange empty 7402 * node cases in the reader tests 7403 */ 7404 if ((ctxt->parseMode == XML_PARSE_READER) && 7405 (nw != NULL) && 7406 (nw->type == XML_ELEMENT_NODE) && 7407 (nw->children == NULL)) 7408 nw->extra = 1; 7409 7410 break; 7411 } 7412 cur = cur->next; 7413 } 7414 #ifdef LIBXML_LEGACY_ENABLED 7415 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7416 xmlAddEntityReference(ent, firstChild, nw); 7417 #endif /* LIBXML_LEGACY_ENABLED */ 7418 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7419 xmlNodePtr nw = NULL, cur, next, last, 7420 firstChild = NULL; 7421 7422 /* 7423 * We are copying here, make sure there is no abuse 7424 */ 7425 ctxt->sizeentcopy += ent->length + 5; 7426 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7427 return; 7428 7429 /* 7430 * Copy the entity child list and make it the new 7431 * entity child list. The goal is to make sure any 7432 * ID or REF referenced will be the one from the 7433 * document content and not the entity copy. 7434 */ 7435 cur = ent->children; 7436 ent->children = NULL; 7437 last = ent->last; 7438 ent->last = NULL; 7439 while (cur != NULL) { 7440 next = cur->next; 7441 cur->next = NULL; 7442 cur->parent = NULL; 7443 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7444 if (nw != NULL) { 7445 if (nw->_private == NULL) 7446 nw->_private = cur->_private; 7447 if (firstChild == NULL){ 7448 firstChild = cur; 7449 } 7450 xmlAddChild((xmlNodePtr) ent, nw); 7451 xmlAddChild(ctxt->node, cur); 7452 } 7453 if (cur == last) 7454 break; 7455 cur = next; 7456 } 7457 if (ent->owner == 0) 7458 ent->owner = 1; 7459 #ifdef LIBXML_LEGACY_ENABLED 7460 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7461 xmlAddEntityReference(ent, firstChild, nw); 7462 #endif /* LIBXML_LEGACY_ENABLED */ 7463 } else { 7464 const xmlChar *nbktext; 7465 7466 /* 7467 * the name change is to avoid coalescing of the 7468 * node with a possible previous text one which 7469 * would make ent->children a dangling pointer 7470 */ 7471 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7472 -1); 7473 if (ent->children->type == XML_TEXT_NODE) 7474 ent->children->name = nbktext; 7475 if ((ent->last != ent->children) && 7476 (ent->last->type == XML_TEXT_NODE)) 7477 ent->last->name = nbktext; 7478 xmlAddChildList(ctxt->node, ent->children); 7479 } 7480 7481 /* 7482 * This is to avoid a nasty side effect, see 7483 * characters() in SAX.c 7484 */ 7485 ctxt->nodemem = 0; 7486 ctxt->nodelen = 0; 7487 return; 7488 } 7489 } 7490 } 7491 7492 /** 7493 * xmlParseEntityRef: 7494 * @ctxt: an XML parser context 7495 * 7496 * parse ENTITY references declarations 7497 * 7498 * [68] EntityRef ::= '&' Name ';' 7499 * 7500 * [ WFC: Entity Declared ] 7501 * In a document without any DTD, a document with only an internal DTD 7502 * subset which contains no parameter entity references, or a document 7503 * with "standalone='yes'", the Name given in the entity reference 7504 * must match that in an entity declaration, except that well-formed 7505 * documents need not declare any of the following entities: amp, lt, 7506 * gt, apos, quot. The declaration of a parameter entity must precede 7507 * any reference to it. Similarly, the declaration of a general entity 7508 * must precede any reference to it which appears in a default value in an 7509 * attribute-list declaration. Note that if entities are declared in the 7510 * external subset or in external parameter entities, a non-validating 7511 * processor is not obligated to read and process their declarations; 7512 * for such documents, the rule that an entity must be declared is a 7513 * well-formedness constraint only if standalone='yes'. 7514 * 7515 * [ WFC: Parsed Entity ] 7516 * An entity reference must not contain the name of an unparsed entity 7517 * 7518 * Returns the xmlEntityPtr if found, or NULL otherwise. 7519 */ 7520 xmlEntityPtr 7521 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7522 const xmlChar *name; 7523 xmlEntityPtr ent = NULL; 7524 7525 GROW; 7526 if (ctxt->instate == XML_PARSER_EOF) 7527 return(NULL); 7528 7529 if (RAW != '&') 7530 return(NULL); 7531 NEXT; 7532 name = xmlParseName(ctxt); 7533 if (name == NULL) { 7534 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7535 "xmlParseEntityRef: no name\n"); 7536 return(NULL); 7537 } 7538 if (RAW != ';') { 7539 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7540 return(NULL); 7541 } 7542 NEXT; 7543 7544 /* 7545 * Predefined entities override any extra definition 7546 */ 7547 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7548 ent = xmlGetPredefinedEntity(name); 7549 if (ent != NULL) 7550 return(ent); 7551 } 7552 7553 /* 7554 * Increase the number of entity references parsed 7555 */ 7556 ctxt->nbentities++; 7557 7558 /* 7559 * Ask first SAX for entity resolution, otherwise try the 7560 * entities which may have stored in the parser context. 7561 */ 7562 if (ctxt->sax != NULL) { 7563 if (ctxt->sax->getEntity != NULL) 7564 ent = ctxt->sax->getEntity(ctxt->userData, name); 7565 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7566 (ctxt->options & XML_PARSE_OLDSAX)) 7567 ent = xmlGetPredefinedEntity(name); 7568 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7569 (ctxt->userData==ctxt)) { 7570 ent = xmlSAX2GetEntity(ctxt, name); 7571 } 7572 } 7573 if (ctxt->instate == XML_PARSER_EOF) 7574 return(NULL); 7575 /* 7576 * [ WFC: Entity Declared ] 7577 * In a document without any DTD, a document with only an 7578 * internal DTD subset which contains no parameter entity 7579 * references, or a document with "standalone='yes'", the 7580 * Name given in the entity reference must match that in an 7581 * entity declaration, except that well-formed documents 7582 * need not declare any of the following entities: amp, lt, 7583 * gt, apos, quot. 7584 * The declaration of a parameter entity must precede any 7585 * reference to it. 7586 * Similarly, the declaration of a general entity must 7587 * precede any reference to it which appears in a default 7588 * value in an attribute-list declaration. Note that if 7589 * entities are declared in the external subset or in 7590 * external parameter entities, a non-validating processor 7591 * is not obligated to read and process their declarations; 7592 * for such documents, the rule that an entity must be 7593 * declared is a well-formedness constraint only if 7594 * standalone='yes'. 7595 */ 7596 if (ent == NULL) { 7597 if ((ctxt->standalone == 1) || 7598 ((ctxt->hasExternalSubset == 0) && 7599 (ctxt->hasPErefs == 0))) { 7600 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7601 "Entity '%s' not defined\n", name); 7602 } else { 7603 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7604 "Entity '%s' not defined\n", name); 7605 if ((ctxt->inSubset == 0) && 7606 (ctxt->sax != NULL) && 7607 (ctxt->sax->reference != NULL)) { 7608 ctxt->sax->reference(ctxt->userData, name); 7609 } 7610 } 7611 xmlParserEntityCheck(ctxt, 0, ent, 0); 7612 ctxt->valid = 0; 7613 } 7614 7615 /* 7616 * [ WFC: Parsed Entity ] 7617 * An entity reference must not contain the name of an 7618 * unparsed entity 7619 */ 7620 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7621 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7622 "Entity reference to unparsed entity %s\n", name); 7623 } 7624 7625 /* 7626 * [ WFC: No External Entity References ] 7627 * Attribute values cannot contain direct or indirect 7628 * entity references to external entities. 7629 */ 7630 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7631 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7632 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7633 "Attribute references external entity '%s'\n", name); 7634 } 7635 /* 7636 * [ WFC: No < in Attribute Values ] 7637 * The replacement text of any entity referred to directly or 7638 * indirectly in an attribute value (other than "<") must 7639 * not contain a <. 7640 */ 7641 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7642 (ent != NULL) && 7643 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7644 if (((ent->checked & 1) || (ent->checked == 0)) && 7645 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7646 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7647 "'<' in entity '%s' is not allowed in attributes values\n", name); 7648 } 7649 } 7650 7651 /* 7652 * Internal check, no parameter entities here ... 7653 */ 7654 else { 7655 switch (ent->etype) { 7656 case XML_INTERNAL_PARAMETER_ENTITY: 7657 case XML_EXTERNAL_PARAMETER_ENTITY: 7658 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7659 "Attempt to reference the parameter entity '%s'\n", 7660 name); 7661 break; 7662 default: 7663 break; 7664 } 7665 } 7666 7667 /* 7668 * [ WFC: No Recursion ] 7669 * A parsed entity must not contain a recursive reference 7670 * to itself, either directly or indirectly. 7671 * Done somewhere else 7672 */ 7673 return(ent); 7674 } 7675 7676 /** 7677 * xmlParseStringEntityRef: 7678 * @ctxt: an XML parser context 7679 * @str: a pointer to an index in the string 7680 * 7681 * parse ENTITY references declarations, but this version parses it from 7682 * a string value. 7683 * 7684 * [68] EntityRef ::= '&' Name ';' 7685 * 7686 * [ WFC: Entity Declared ] 7687 * In a document without any DTD, a document with only an internal DTD 7688 * subset which contains no parameter entity references, or a document 7689 * with "standalone='yes'", the Name given in the entity reference 7690 * must match that in an entity declaration, except that well-formed 7691 * documents need not declare any of the following entities: amp, lt, 7692 * gt, apos, quot. The declaration of a parameter entity must precede 7693 * any reference to it. Similarly, the declaration of a general entity 7694 * must precede any reference to it which appears in a default value in an 7695 * attribute-list declaration. Note that if entities are declared in the 7696 * external subset or in external parameter entities, a non-validating 7697 * processor is not obligated to read and process their declarations; 7698 * for such documents, the rule that an entity must be declared is a 7699 * well-formedness constraint only if standalone='yes'. 7700 * 7701 * [ WFC: Parsed Entity ] 7702 * An entity reference must not contain the name of an unparsed entity 7703 * 7704 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7705 * is updated to the current location in the string. 7706 */ 7707 static xmlEntityPtr 7708 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7709 xmlChar *name; 7710 const xmlChar *ptr; 7711 xmlChar cur; 7712 xmlEntityPtr ent = NULL; 7713 7714 if ((str == NULL) || (*str == NULL)) 7715 return(NULL); 7716 ptr = *str; 7717 cur = *ptr; 7718 if (cur != '&') 7719 return(NULL); 7720 7721 ptr++; 7722 name = xmlParseStringName(ctxt, &ptr); 7723 if (name == NULL) { 7724 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7725 "xmlParseStringEntityRef: no name\n"); 7726 *str = ptr; 7727 return(NULL); 7728 } 7729 if (*ptr != ';') { 7730 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7731 xmlFree(name); 7732 *str = ptr; 7733 return(NULL); 7734 } 7735 ptr++; 7736 7737 7738 /* 7739 * Predefined entities override any extra definition 7740 */ 7741 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7742 ent = xmlGetPredefinedEntity(name); 7743 if (ent != NULL) { 7744 xmlFree(name); 7745 *str = ptr; 7746 return(ent); 7747 } 7748 } 7749 7750 /* 7751 * Increase the number of entity references parsed 7752 */ 7753 ctxt->nbentities++; 7754 7755 /* 7756 * Ask first SAX for entity resolution, otherwise try the 7757 * entities which may have stored in the parser context. 7758 */ 7759 if (ctxt->sax != NULL) { 7760 if (ctxt->sax->getEntity != NULL) 7761 ent = ctxt->sax->getEntity(ctxt->userData, name); 7762 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7763 ent = xmlGetPredefinedEntity(name); 7764 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7765 ent = xmlSAX2GetEntity(ctxt, name); 7766 } 7767 } 7768 if (ctxt->instate == XML_PARSER_EOF) { 7769 xmlFree(name); 7770 return(NULL); 7771 } 7772 7773 /* 7774 * [ WFC: Entity Declared ] 7775 * In a document without any DTD, a document with only an 7776 * internal DTD subset which contains no parameter entity 7777 * references, or a document with "standalone='yes'", the 7778 * Name given in the entity reference must match that in an 7779 * entity declaration, except that well-formed documents 7780 * need not declare any of the following entities: amp, lt, 7781 * gt, apos, quot. 7782 * The declaration of a parameter entity must precede any 7783 * reference to it. 7784 * Similarly, the declaration of a general entity must 7785 * precede any reference to it which appears in a default 7786 * value in an attribute-list declaration. Note that if 7787 * entities are declared in the external subset or in 7788 * external parameter entities, a non-validating processor 7789 * is not obligated to read and process their declarations; 7790 * for such documents, the rule that an entity must be 7791 * declared is a well-formedness constraint only if 7792 * standalone='yes'. 7793 */ 7794 if (ent == NULL) { 7795 if ((ctxt->standalone == 1) || 7796 ((ctxt->hasExternalSubset == 0) && 7797 (ctxt->hasPErefs == 0))) { 7798 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7799 "Entity '%s' not defined\n", name); 7800 } else { 7801 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7802 "Entity '%s' not defined\n", 7803 name); 7804 } 7805 xmlParserEntityCheck(ctxt, 0, ent, 0); 7806 /* TODO ? check regressions ctxt->valid = 0; */ 7807 } 7808 7809 /* 7810 * [ WFC: Parsed Entity ] 7811 * An entity reference must not contain the name of an 7812 * unparsed entity 7813 */ 7814 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7816 "Entity reference to unparsed entity %s\n", name); 7817 } 7818 7819 /* 7820 * [ WFC: No External Entity References ] 7821 * Attribute values cannot contain direct or indirect 7822 * entity references to external entities. 7823 */ 7824 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7825 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7826 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7827 "Attribute references external entity '%s'\n", name); 7828 } 7829 /* 7830 * [ WFC: No < in Attribute Values ] 7831 * The replacement text of any entity referred to directly or 7832 * indirectly in an attribute value (other than "<") must 7833 * not contain a <. 7834 */ 7835 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7836 (ent != NULL) && (ent->content != NULL) && 7837 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7838 (xmlStrchr(ent->content, '<'))) { 7839 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7840 "'<' in entity '%s' is not allowed in attributes values\n", 7841 name); 7842 } 7843 7844 /* 7845 * Internal check, no parameter entities here ... 7846 */ 7847 else { 7848 switch (ent->etype) { 7849 case XML_INTERNAL_PARAMETER_ENTITY: 7850 case XML_EXTERNAL_PARAMETER_ENTITY: 7851 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7852 "Attempt to reference the parameter entity '%s'\n", 7853 name); 7854 break; 7855 default: 7856 break; 7857 } 7858 } 7859 7860 /* 7861 * [ WFC: No Recursion ] 7862 * A parsed entity must not contain a recursive reference 7863 * to itself, either directly or indirectly. 7864 * Done somewhere else 7865 */ 7866 7867 xmlFree(name); 7868 *str = ptr; 7869 return(ent); 7870 } 7871 7872 /** 7873 * xmlParsePEReference: 7874 * @ctxt: an XML parser context 7875 * 7876 * parse PEReference declarations 7877 * The entity content is handled directly by pushing it's content as 7878 * a new input stream. 7879 * 7880 * [69] PEReference ::= '%' Name ';' 7881 * 7882 * [ WFC: No Recursion ] 7883 * A parsed entity must not contain a recursive 7884 * reference to itself, either directly or indirectly. 7885 * 7886 * [ WFC: Entity Declared ] 7887 * In a document without any DTD, a document with only an internal DTD 7888 * subset which contains no parameter entity references, or a document 7889 * with "standalone='yes'", ... ... The declaration of a parameter 7890 * entity must precede any reference to it... 7891 * 7892 * [ VC: Entity Declared ] 7893 * In a document with an external subset or external parameter entities 7894 * with "standalone='no'", ... ... The declaration of a parameter entity 7895 * must precede any reference to it... 7896 * 7897 * [ WFC: In DTD ] 7898 * Parameter-entity references may only appear in the DTD. 7899 * NOTE: misleading but this is handled. 7900 */ 7901 void 7902 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7903 { 7904 const xmlChar *name; 7905 xmlEntityPtr entity = NULL; 7906 xmlParserInputPtr input; 7907 7908 if (RAW != '%') 7909 return; 7910 NEXT; 7911 name = xmlParseName(ctxt); 7912 if (name == NULL) { 7913 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 7914 return; 7915 } 7916 if (xmlParserDebugEntities) 7917 xmlGenericError(xmlGenericErrorContext, 7918 "PEReference: %s\n", name); 7919 if (RAW != ';') { 7920 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 7921 return; 7922 } 7923 7924 NEXT; 7925 7926 /* 7927 * Increase the number of entity references parsed 7928 */ 7929 ctxt->nbentities++; 7930 7931 /* 7932 * Request the entity from SAX 7933 */ 7934 if ((ctxt->sax != NULL) && 7935 (ctxt->sax->getParameterEntity != NULL)) 7936 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7937 if (ctxt->instate == XML_PARSER_EOF) 7938 return; 7939 if (entity == NULL) { 7940 /* 7941 * [ WFC: Entity Declared ] 7942 * In a document without any DTD, a document with only an 7943 * internal DTD subset which contains no parameter entity 7944 * references, or a document with "standalone='yes'", ... 7945 * ... The declaration of a parameter entity must precede 7946 * any reference to it... 7947 */ 7948 if ((ctxt->standalone == 1) || 7949 ((ctxt->hasExternalSubset == 0) && 7950 (ctxt->hasPErefs == 0))) { 7951 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7952 "PEReference: %%%s; not found\n", 7953 name); 7954 } else { 7955 /* 7956 * [ VC: Entity Declared ] 7957 * In a document with an external subset or external 7958 * parameter entities with "standalone='no'", ... 7959 * ... The declaration of a parameter entity must 7960 * precede any reference to it... 7961 */ 7962 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 7963 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 7964 "PEReference: %%%s; not found\n", 7965 name, NULL); 7966 } else 7967 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7968 "PEReference: %%%s; not found\n", 7969 name, NULL); 7970 ctxt->valid = 0; 7971 } 7972 xmlParserEntityCheck(ctxt, 0, NULL, 0); 7973 } else { 7974 /* 7975 * Internal checking in case the entity quest barfed 7976 */ 7977 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7978 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7979 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7980 "Internal: %%%s; is not a parameter entity\n", 7981 name, NULL); 7982 } else { 7983 xmlChar start[4]; 7984 xmlCharEncoding enc; 7985 7986 if (xmlParserEntityCheck(ctxt, 0, entity, 0)) 7987 return; 7988 7989 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7990 ((ctxt->options & XML_PARSE_NOENT) == 0) && 7991 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 7992 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 7993 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 7994 (ctxt->replaceEntities == 0) && 7995 (ctxt->validate == 0)) 7996 return; 7997 7998 input = xmlNewEntityInputStream(ctxt, entity); 7999 if (xmlPushInput(ctxt, input) < 0) { 8000 xmlFreeInputStream(input); 8001 return; 8002 } 8003 8004 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 8005 /* 8006 * Get the 4 first bytes and decode the charset 8007 * if enc != XML_CHAR_ENCODING_NONE 8008 * plug some encoding conversion routines. 8009 * Note that, since we may have some non-UTF8 8010 * encoding (like UTF16, bug 135229), the 'length' 8011 * is not known, but we can calculate based upon 8012 * the amount of data in the buffer. 8013 */ 8014 GROW 8015 if (ctxt->instate == XML_PARSER_EOF) 8016 return; 8017 if ((ctxt->input->end - ctxt->input->cur)>=4) { 8018 start[0] = RAW; 8019 start[1] = NXT(1); 8020 start[2] = NXT(2); 8021 start[3] = NXT(3); 8022 enc = xmlDetectCharEncoding(start, 4); 8023 if (enc != XML_CHAR_ENCODING_NONE) { 8024 xmlSwitchEncoding(ctxt, enc); 8025 } 8026 } 8027 8028 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8029 (IS_BLANK_CH(NXT(5)))) { 8030 xmlParseTextDecl(ctxt); 8031 } 8032 } 8033 } 8034 } 8035 ctxt->hasPErefs = 1; 8036 } 8037 8038 /** 8039 * xmlLoadEntityContent: 8040 * @ctxt: an XML parser context 8041 * @entity: an unloaded system entity 8042 * 8043 * Load the original content of the given system entity from the 8044 * ExternalID/SystemID given. This is to be used for Included in Literal 8045 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8046 * 8047 * Returns 0 in case of success and -1 in case of failure 8048 */ 8049 static int 8050 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8051 xmlParserInputPtr input; 8052 xmlBufferPtr buf; 8053 int l, c; 8054 int count = 0; 8055 8056 if ((ctxt == NULL) || (entity == NULL) || 8057 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8058 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8059 (entity->content != NULL)) { 8060 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8061 "xmlLoadEntityContent parameter error"); 8062 return(-1); 8063 } 8064 8065 if (xmlParserDebugEntities) 8066 xmlGenericError(xmlGenericErrorContext, 8067 "Reading %s entity content input\n", entity->name); 8068 8069 buf = xmlBufferCreate(); 8070 if (buf == NULL) { 8071 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8072 "xmlLoadEntityContent parameter error"); 8073 return(-1); 8074 } 8075 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); 8076 8077 input = xmlNewEntityInputStream(ctxt, entity); 8078 if (input == NULL) { 8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8080 "xmlLoadEntityContent input error"); 8081 xmlBufferFree(buf); 8082 return(-1); 8083 } 8084 8085 /* 8086 * Push the entity as the current input, read char by char 8087 * saving to the buffer until the end of the entity or an error 8088 */ 8089 if (xmlPushInput(ctxt, input) < 0) { 8090 xmlBufferFree(buf); 8091 xmlFreeInputStream(input); 8092 return(-1); 8093 } 8094 8095 GROW; 8096 c = CUR_CHAR(l); 8097 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8098 (IS_CHAR(c))) { 8099 xmlBufferAdd(buf, ctxt->input->cur, l); 8100 if (count++ > XML_PARSER_CHUNK_SIZE) { 8101 count = 0; 8102 GROW; 8103 if (ctxt->instate == XML_PARSER_EOF) { 8104 xmlBufferFree(buf); 8105 return(-1); 8106 } 8107 } 8108 NEXTL(l); 8109 c = CUR_CHAR(l); 8110 if (c == 0) { 8111 count = 0; 8112 GROW; 8113 if (ctxt->instate == XML_PARSER_EOF) { 8114 xmlBufferFree(buf); 8115 return(-1); 8116 } 8117 c = CUR_CHAR(l); 8118 } 8119 } 8120 8121 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8122 xmlPopInput(ctxt); 8123 } else if (!IS_CHAR(c)) { 8124 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8125 "xmlLoadEntityContent: invalid char value %d\n", 8126 c); 8127 xmlBufferFree(buf); 8128 return(-1); 8129 } 8130 entity->content = buf->content; 8131 buf->content = NULL; 8132 xmlBufferFree(buf); 8133 8134 return(0); 8135 } 8136 8137 /** 8138 * xmlParseStringPEReference: 8139 * @ctxt: an XML parser context 8140 * @str: a pointer to an index in the string 8141 * 8142 * parse PEReference declarations 8143 * 8144 * [69] PEReference ::= '%' Name ';' 8145 * 8146 * [ WFC: No Recursion ] 8147 * A parsed entity must not contain a recursive 8148 * reference to itself, either directly or indirectly. 8149 * 8150 * [ WFC: Entity Declared ] 8151 * In a document without any DTD, a document with only an internal DTD 8152 * subset which contains no parameter entity references, or a document 8153 * with "standalone='yes'", ... ... The declaration of a parameter 8154 * entity must precede any reference to it... 8155 * 8156 * [ VC: Entity Declared ] 8157 * In a document with an external subset or external parameter entities 8158 * with "standalone='no'", ... ... The declaration of a parameter entity 8159 * must precede any reference to it... 8160 * 8161 * [ WFC: In DTD ] 8162 * Parameter-entity references may only appear in the DTD. 8163 * NOTE: misleading but this is handled. 8164 * 8165 * Returns the string of the entity content. 8166 * str is updated to the current value of the index 8167 */ 8168 static xmlEntityPtr 8169 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8170 const xmlChar *ptr; 8171 xmlChar cur; 8172 xmlChar *name; 8173 xmlEntityPtr entity = NULL; 8174 8175 if ((str == NULL) || (*str == NULL)) return(NULL); 8176 ptr = *str; 8177 cur = *ptr; 8178 if (cur != '%') 8179 return(NULL); 8180 ptr++; 8181 name = xmlParseStringName(ctxt, &ptr); 8182 if (name == NULL) { 8183 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8184 "xmlParseStringPEReference: no name\n"); 8185 *str = ptr; 8186 return(NULL); 8187 } 8188 cur = *ptr; 8189 if (cur != ';') { 8190 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8191 xmlFree(name); 8192 *str = ptr; 8193 return(NULL); 8194 } 8195 ptr++; 8196 8197 /* 8198 * Increase the number of entity references parsed 8199 */ 8200 ctxt->nbentities++; 8201 8202 /* 8203 * Request the entity from SAX 8204 */ 8205 if ((ctxt->sax != NULL) && 8206 (ctxt->sax->getParameterEntity != NULL)) 8207 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8208 if (ctxt->instate == XML_PARSER_EOF) { 8209 xmlFree(name); 8210 *str = ptr; 8211 return(NULL); 8212 } 8213 if (entity == NULL) { 8214 /* 8215 * [ WFC: Entity Declared ] 8216 * In a document without any DTD, a document with only an 8217 * internal DTD subset which contains no parameter entity 8218 * references, or a document with "standalone='yes'", ... 8219 * ... The declaration of a parameter entity must precede 8220 * any reference to it... 8221 */ 8222 if ((ctxt->standalone == 1) || 8223 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8225 "PEReference: %%%s; not found\n", name); 8226 } else { 8227 /* 8228 * [ VC: Entity Declared ] 8229 * In a document with an external subset or external 8230 * parameter entities with "standalone='no'", ... 8231 * ... The declaration of a parameter entity must 8232 * precede any reference to it... 8233 */ 8234 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8235 "PEReference: %%%s; not found\n", 8236 name, NULL); 8237 ctxt->valid = 0; 8238 } 8239 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8240 } else { 8241 /* 8242 * Internal checking in case the entity quest barfed 8243 */ 8244 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8245 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8246 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8247 "%%%s; is not a parameter entity\n", 8248 name, NULL); 8249 } 8250 } 8251 ctxt->hasPErefs = 1; 8252 xmlFree(name); 8253 *str = ptr; 8254 return(entity); 8255 } 8256 8257 /** 8258 * xmlParseDocTypeDecl: 8259 * @ctxt: an XML parser context 8260 * 8261 * parse a DOCTYPE declaration 8262 * 8263 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8264 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8265 * 8266 * [ VC: Root Element Type ] 8267 * The Name in the document type declaration must match the element 8268 * type of the root element. 8269 */ 8270 8271 void 8272 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8273 const xmlChar *name = NULL; 8274 xmlChar *ExternalID = NULL; 8275 xmlChar *URI = NULL; 8276 8277 /* 8278 * We know that '<!DOCTYPE' has been detected. 8279 */ 8280 SKIP(9); 8281 8282 SKIP_BLANKS; 8283 8284 /* 8285 * Parse the DOCTYPE name. 8286 */ 8287 name = xmlParseName(ctxt); 8288 if (name == NULL) { 8289 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8290 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8291 } 8292 ctxt->intSubName = name; 8293 8294 SKIP_BLANKS; 8295 8296 /* 8297 * Check for SystemID and ExternalID 8298 */ 8299 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8300 8301 if ((URI != NULL) || (ExternalID != NULL)) { 8302 ctxt->hasExternalSubset = 1; 8303 } 8304 ctxt->extSubURI = URI; 8305 ctxt->extSubSystem = ExternalID; 8306 8307 SKIP_BLANKS; 8308 8309 /* 8310 * Create and update the internal subset. 8311 */ 8312 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8313 (!ctxt->disableSAX)) 8314 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8315 if (ctxt->instate == XML_PARSER_EOF) 8316 return; 8317 8318 /* 8319 * Is there any internal subset declarations ? 8320 * they are handled separately in xmlParseInternalSubset() 8321 */ 8322 if (RAW == '[') 8323 return; 8324 8325 /* 8326 * We should be at the end of the DOCTYPE declaration. 8327 */ 8328 if (RAW != '>') { 8329 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8330 } 8331 NEXT; 8332 } 8333 8334 /** 8335 * xmlParseInternalSubset: 8336 * @ctxt: an XML parser context 8337 * 8338 * parse the internal subset declaration 8339 * 8340 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8341 */ 8342 8343 static void 8344 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8345 /* 8346 * Is there any DTD definition ? 8347 */ 8348 if (RAW == '[') { 8349 int baseInputNr = ctxt->inputNr; 8350 ctxt->instate = XML_PARSER_DTD; 8351 NEXT; 8352 /* 8353 * Parse the succession of Markup declarations and 8354 * PEReferences. 8355 * Subsequence (markupdecl | PEReference | S)* 8356 */ 8357 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8358 (ctxt->instate != XML_PARSER_EOF)) { 8359 int id = ctxt->input->id; 8360 unsigned long cons = CUR_CONSUMED; 8361 8362 SKIP_BLANKS; 8363 xmlParseMarkupDecl(ctxt); 8364 xmlParsePEReference(ctxt); 8365 8366 /* 8367 * Conditional sections are allowed from external entities included 8368 * by PE References in the internal subset. 8369 */ 8370 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && 8371 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 8372 xmlParseConditionalSections(ctxt); 8373 } 8374 8375 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { 8376 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8377 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8378 if (ctxt->inputNr > baseInputNr) 8379 xmlPopInput(ctxt); 8380 else 8381 break; 8382 } 8383 } 8384 if (RAW == ']') { 8385 NEXT; 8386 SKIP_BLANKS; 8387 } 8388 } 8389 8390 /* 8391 * We should be at the end of the DOCTYPE declaration. 8392 */ 8393 if (RAW != '>') { 8394 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8395 return; 8396 } 8397 NEXT; 8398 } 8399 8400 #ifdef LIBXML_SAX1_ENABLED 8401 /** 8402 * xmlParseAttribute: 8403 * @ctxt: an XML parser context 8404 * @value: a xmlChar ** used to store the value of the attribute 8405 * 8406 * parse an attribute 8407 * 8408 * [41] Attribute ::= Name Eq AttValue 8409 * 8410 * [ WFC: No External Entity References ] 8411 * Attribute values cannot contain direct or indirect entity references 8412 * to external entities. 8413 * 8414 * [ WFC: No < in Attribute Values ] 8415 * The replacement text of any entity referred to directly or indirectly in 8416 * an attribute value (other than "<") must not contain a <. 8417 * 8418 * [ VC: Attribute Value Type ] 8419 * The attribute must have been declared; the value must be of the type 8420 * declared for it. 8421 * 8422 * [25] Eq ::= S? '=' S? 8423 * 8424 * With namespace: 8425 * 8426 * [NS 11] Attribute ::= QName Eq AttValue 8427 * 8428 * Also the case QName == xmlns:??? is handled independently as a namespace 8429 * definition. 8430 * 8431 * Returns the attribute name, and the value in *value. 8432 */ 8433 8434 const xmlChar * 8435 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8436 const xmlChar *name; 8437 xmlChar *val; 8438 8439 *value = NULL; 8440 GROW; 8441 name = xmlParseName(ctxt); 8442 if (name == NULL) { 8443 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8444 "error parsing attribute name\n"); 8445 return(NULL); 8446 } 8447 8448 /* 8449 * read the value 8450 */ 8451 SKIP_BLANKS; 8452 if (RAW == '=') { 8453 NEXT; 8454 SKIP_BLANKS; 8455 val = xmlParseAttValue(ctxt); 8456 ctxt->instate = XML_PARSER_CONTENT; 8457 } else { 8458 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8459 "Specification mandates value for attribute %s\n", name); 8460 return(NULL); 8461 } 8462 8463 /* 8464 * Check that xml:lang conforms to the specification 8465 * No more registered as an error, just generate a warning now 8466 * since this was deprecated in XML second edition 8467 */ 8468 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8469 if (!xmlCheckLanguageID(val)) { 8470 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8471 "Malformed value for xml:lang : %s\n", 8472 val, NULL); 8473 } 8474 } 8475 8476 /* 8477 * Check that xml:space conforms to the specification 8478 */ 8479 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8480 if (xmlStrEqual(val, BAD_CAST "default")) 8481 *(ctxt->space) = 0; 8482 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8483 *(ctxt->space) = 1; 8484 else { 8485 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8486 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8487 val, NULL); 8488 } 8489 } 8490 8491 *value = val; 8492 return(name); 8493 } 8494 8495 /** 8496 * xmlParseStartTag: 8497 * @ctxt: an XML parser context 8498 * 8499 * parse a start of tag either for rule element or 8500 * EmptyElement. In both case we don't parse the tag closing chars. 8501 * 8502 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8503 * 8504 * [ WFC: Unique Att Spec ] 8505 * No attribute name may appear more than once in the same start-tag or 8506 * empty-element tag. 8507 * 8508 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8509 * 8510 * [ WFC: Unique Att Spec ] 8511 * No attribute name may appear more than once in the same start-tag or 8512 * empty-element tag. 8513 * 8514 * With namespace: 8515 * 8516 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8517 * 8518 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8519 * 8520 * Returns the element name parsed 8521 */ 8522 8523 const xmlChar * 8524 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8525 const xmlChar *name; 8526 const xmlChar *attname; 8527 xmlChar *attvalue; 8528 const xmlChar **atts = ctxt->atts; 8529 int nbatts = 0; 8530 int maxatts = ctxt->maxatts; 8531 int i; 8532 8533 if (RAW != '<') return(NULL); 8534 NEXT1; 8535 8536 name = xmlParseName(ctxt); 8537 if (name == NULL) { 8538 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8539 "xmlParseStartTag: invalid element name\n"); 8540 return(NULL); 8541 } 8542 8543 /* 8544 * Now parse the attributes, it ends up with the ending 8545 * 8546 * (S Attribute)* S? 8547 */ 8548 SKIP_BLANKS; 8549 GROW; 8550 8551 while (((RAW != '>') && 8552 ((RAW != '/') || (NXT(1) != '>')) && 8553 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8554 int id = ctxt->input->id; 8555 unsigned long cons = CUR_CONSUMED; 8556 8557 attname = xmlParseAttribute(ctxt, &attvalue); 8558 if ((attname != NULL) && (attvalue != NULL)) { 8559 /* 8560 * [ WFC: Unique Att Spec ] 8561 * No attribute name may appear more than once in the same 8562 * start-tag or empty-element tag. 8563 */ 8564 for (i = 0; i < nbatts;i += 2) { 8565 if (xmlStrEqual(atts[i], attname)) { 8566 xmlErrAttributeDup(ctxt, NULL, attname); 8567 xmlFree(attvalue); 8568 goto failed; 8569 } 8570 } 8571 /* 8572 * Add the pair to atts 8573 */ 8574 if (atts == NULL) { 8575 maxatts = 22; /* allow for 10 attrs by default */ 8576 atts = (const xmlChar **) 8577 xmlMalloc(maxatts * sizeof(xmlChar *)); 8578 if (atts == NULL) { 8579 xmlErrMemory(ctxt, NULL); 8580 if (attvalue != NULL) 8581 xmlFree(attvalue); 8582 goto failed; 8583 } 8584 ctxt->atts = atts; 8585 ctxt->maxatts = maxatts; 8586 } else if (nbatts + 4 > maxatts) { 8587 const xmlChar **n; 8588 8589 maxatts *= 2; 8590 n = (const xmlChar **) xmlRealloc((void *) atts, 8591 maxatts * sizeof(const xmlChar *)); 8592 if (n == NULL) { 8593 xmlErrMemory(ctxt, NULL); 8594 if (attvalue != NULL) 8595 xmlFree(attvalue); 8596 goto failed; 8597 } 8598 atts = n; 8599 ctxt->atts = atts; 8600 ctxt->maxatts = maxatts; 8601 } 8602 atts[nbatts++] = attname; 8603 atts[nbatts++] = attvalue; 8604 atts[nbatts] = NULL; 8605 atts[nbatts + 1] = NULL; 8606 } else { 8607 if (attvalue != NULL) 8608 xmlFree(attvalue); 8609 } 8610 8611 failed: 8612 8613 GROW 8614 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8615 break; 8616 if (SKIP_BLANKS == 0) { 8617 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8618 "attributes construct error\n"); 8619 } 8620 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) && 8621 (attname == NULL) && (attvalue == NULL)) { 8622 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8623 "xmlParseStartTag: problem parsing attributes\n"); 8624 break; 8625 } 8626 SHRINK; 8627 GROW; 8628 } 8629 8630 /* 8631 * SAX: Start of Element ! 8632 */ 8633 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8634 (!ctxt->disableSAX)) { 8635 if (nbatts > 0) 8636 ctxt->sax->startElement(ctxt->userData, name, atts); 8637 else 8638 ctxt->sax->startElement(ctxt->userData, name, NULL); 8639 } 8640 8641 if (atts != NULL) { 8642 /* Free only the content strings */ 8643 for (i = 1;i < nbatts;i+=2) 8644 if (atts[i] != NULL) 8645 xmlFree((xmlChar *) atts[i]); 8646 } 8647 return(name); 8648 } 8649 8650 /** 8651 * xmlParseEndTag1: 8652 * @ctxt: an XML parser context 8653 * @line: line of the start tag 8654 * @nsNr: number of namespaces on the start tag 8655 * 8656 * parse an end of tag 8657 * 8658 * [42] ETag ::= '</' Name S? '>' 8659 * 8660 * With namespace 8661 * 8662 * [NS 9] ETag ::= '</' QName S? '>' 8663 */ 8664 8665 static void 8666 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8667 const xmlChar *name; 8668 8669 GROW; 8670 if ((RAW != '<') || (NXT(1) != '/')) { 8671 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8672 "xmlParseEndTag: '</' not found\n"); 8673 return; 8674 } 8675 SKIP(2); 8676 8677 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8678 8679 /* 8680 * We should definitely be at the ending "S? '>'" part 8681 */ 8682 GROW; 8683 SKIP_BLANKS; 8684 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8685 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8686 } else 8687 NEXT1; 8688 8689 /* 8690 * [ WFC: Element Type Match ] 8691 * The Name in an element's end-tag must match the element type in the 8692 * start-tag. 8693 * 8694 */ 8695 if (name != (xmlChar*)1) { 8696 if (name == NULL) name = BAD_CAST "unparsable"; 8697 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8698 "Opening and ending tag mismatch: %s line %d and %s\n", 8699 ctxt->name, line, name); 8700 } 8701 8702 /* 8703 * SAX: End of Tag 8704 */ 8705 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8706 (!ctxt->disableSAX)) 8707 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8708 8709 namePop(ctxt); 8710 spacePop(ctxt); 8711 return; 8712 } 8713 8714 /** 8715 * xmlParseEndTag: 8716 * @ctxt: an XML parser context 8717 * 8718 * parse an end of tag 8719 * 8720 * [42] ETag ::= '</' Name S? '>' 8721 * 8722 * With namespace 8723 * 8724 * [NS 9] ETag ::= '</' QName S? '>' 8725 */ 8726 8727 void 8728 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8729 xmlParseEndTag1(ctxt, 0); 8730 } 8731 #endif /* LIBXML_SAX1_ENABLED */ 8732 8733 /************************************************************************ 8734 * * 8735 * SAX 2 specific operations * 8736 * * 8737 ************************************************************************/ 8738 8739 /* 8740 * xmlGetNamespace: 8741 * @ctxt: an XML parser context 8742 * @prefix: the prefix to lookup 8743 * 8744 * Lookup the namespace name for the @prefix (which ca be NULL) 8745 * The prefix must come from the @ctxt->dict dictionary 8746 * 8747 * Returns the namespace name or NULL if not bound 8748 */ 8749 static const xmlChar * 8750 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8751 int i; 8752 8753 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8754 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8755 if (ctxt->nsTab[i] == prefix) { 8756 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8757 return(NULL); 8758 return(ctxt->nsTab[i + 1]); 8759 } 8760 return(NULL); 8761 } 8762 8763 /** 8764 * xmlParseQName: 8765 * @ctxt: an XML parser context 8766 * @prefix: pointer to store the prefix part 8767 * 8768 * parse an XML Namespace QName 8769 * 8770 * [6] QName ::= (Prefix ':')? LocalPart 8771 * [7] Prefix ::= NCName 8772 * [8] LocalPart ::= NCName 8773 * 8774 * Returns the Name parsed or NULL 8775 */ 8776 8777 static const xmlChar * 8778 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8779 const xmlChar *l, *p; 8780 8781 GROW; 8782 8783 l = xmlParseNCName(ctxt); 8784 if (l == NULL) { 8785 if (CUR == ':') { 8786 l = xmlParseName(ctxt); 8787 if (l != NULL) { 8788 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8789 "Failed to parse QName '%s'\n", l, NULL, NULL); 8790 *prefix = NULL; 8791 return(l); 8792 } 8793 } 8794 return(NULL); 8795 } 8796 if (CUR == ':') { 8797 NEXT; 8798 p = l; 8799 l = xmlParseNCName(ctxt); 8800 if (l == NULL) { 8801 xmlChar *tmp; 8802 8803 if (ctxt->instate == XML_PARSER_EOF) 8804 return(NULL); 8805 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8806 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8807 l = xmlParseNmtoken(ctxt); 8808 if (l == NULL) { 8809 if (ctxt->instate == XML_PARSER_EOF) 8810 return(NULL); 8811 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8812 } else { 8813 tmp = xmlBuildQName(l, p, NULL, 0); 8814 xmlFree((char *)l); 8815 } 8816 p = xmlDictLookup(ctxt->dict, tmp, -1); 8817 if (tmp != NULL) xmlFree(tmp); 8818 *prefix = NULL; 8819 return(p); 8820 } 8821 if (CUR == ':') { 8822 xmlChar *tmp; 8823 8824 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8825 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8826 NEXT; 8827 tmp = (xmlChar *) xmlParseName(ctxt); 8828 if (tmp != NULL) { 8829 tmp = xmlBuildQName(tmp, l, NULL, 0); 8830 l = xmlDictLookup(ctxt->dict, tmp, -1); 8831 if (tmp != NULL) xmlFree(tmp); 8832 *prefix = p; 8833 return(l); 8834 } 8835 if (ctxt->instate == XML_PARSER_EOF) 8836 return(NULL); 8837 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8838 l = xmlDictLookup(ctxt->dict, tmp, -1); 8839 if (tmp != NULL) xmlFree(tmp); 8840 *prefix = p; 8841 return(l); 8842 } 8843 *prefix = p; 8844 } else 8845 *prefix = NULL; 8846 return(l); 8847 } 8848 8849 /** 8850 * xmlParseQNameAndCompare: 8851 * @ctxt: an XML parser context 8852 * @name: the localname 8853 * @prefix: the prefix, if any. 8854 * 8855 * parse an XML name and compares for match 8856 * (specialized for endtag parsing) 8857 * 8858 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8859 * and the name for mismatch 8860 */ 8861 8862 static const xmlChar * 8863 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8864 xmlChar const *prefix) { 8865 const xmlChar *cmp; 8866 const xmlChar *in; 8867 const xmlChar *ret; 8868 const xmlChar *prefix2; 8869 8870 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8871 8872 GROW; 8873 in = ctxt->input->cur; 8874 8875 cmp = prefix; 8876 while (*in != 0 && *in == *cmp) { 8877 ++in; 8878 ++cmp; 8879 } 8880 if ((*cmp == 0) && (*in == ':')) { 8881 in++; 8882 cmp = name; 8883 while (*in != 0 && *in == *cmp) { 8884 ++in; 8885 ++cmp; 8886 } 8887 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8888 /* success */ 8889 ctxt->input->col += in - ctxt->input->cur; 8890 ctxt->input->cur = in; 8891 return((const xmlChar*) 1); 8892 } 8893 } 8894 /* 8895 * all strings coms from the dictionary, equality can be done directly 8896 */ 8897 ret = xmlParseQName (ctxt, &prefix2); 8898 if ((ret == name) && (prefix == prefix2)) 8899 return((const xmlChar*) 1); 8900 return ret; 8901 } 8902 8903 /** 8904 * xmlParseAttValueInternal: 8905 * @ctxt: an XML parser context 8906 * @len: attribute len result 8907 * @alloc: whether the attribute was reallocated as a new string 8908 * @normalize: if 1 then further non-CDATA normalization must be done 8909 * 8910 * parse a value for an attribute. 8911 * NOTE: if no normalization is needed, the routine will return pointers 8912 * directly from the data buffer. 8913 * 8914 * 3.3.3 Attribute-Value Normalization: 8915 * Before the value of an attribute is passed to the application or 8916 * checked for validity, the XML processor must normalize it as follows: 8917 * - a character reference is processed by appending the referenced 8918 * character to the attribute value 8919 * - an entity reference is processed by recursively processing the 8920 * replacement text of the entity 8921 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8922 * appending #x20 to the normalized value, except that only a single 8923 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8924 * parsed entity or the literal entity value of an internal parsed entity 8925 * - other characters are processed by appending them to the normalized value 8926 * If the declared value is not CDATA, then the XML processor must further 8927 * process the normalized attribute value by discarding any leading and 8928 * trailing space (#x20) characters, and by replacing sequences of space 8929 * (#x20) characters by a single space (#x20) character. 8930 * All attributes for which no declaration has been read should be treated 8931 * by a non-validating parser as if declared CDATA. 8932 * 8933 * Returns the AttValue parsed or NULL. The value has to be freed by the 8934 * caller if it was copied, this can be detected by val[*len] == 0. 8935 */ 8936 8937 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \ 8938 const xmlChar *oldbase = ctxt->input->base;\ 8939 GROW;\ 8940 if (ctxt->instate == XML_PARSER_EOF)\ 8941 return(NULL);\ 8942 if (oldbase != ctxt->input->base) {\ 8943 ptrdiff_t delta = ctxt->input->base - oldbase;\ 8944 start = start + delta;\ 8945 in = in + delta;\ 8946 }\ 8947 end = ctxt->input->end; 8948 8949 static xmlChar * 8950 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8951 int normalize) 8952 { 8953 xmlChar limit = 0; 8954 const xmlChar *in = NULL, *start, *end, *last; 8955 xmlChar *ret = NULL; 8956 int line, col; 8957 8958 GROW; 8959 in = (xmlChar *) CUR_PTR; 8960 line = ctxt->input->line; 8961 col = ctxt->input->col; 8962 if (*in != '"' && *in != '\'') { 8963 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8964 return (NULL); 8965 } 8966 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8967 8968 /* 8969 * try to handle in this routine the most common case where no 8970 * allocation of a new string is required and where content is 8971 * pure ASCII. 8972 */ 8973 limit = *in++; 8974 col++; 8975 end = ctxt->input->end; 8976 start = in; 8977 if (in >= end) { 8978 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8979 } 8980 if (normalize) { 8981 /* 8982 * Skip any leading spaces 8983 */ 8984 while ((in < end) && (*in != limit) && 8985 ((*in == 0x20) || (*in == 0x9) || 8986 (*in == 0xA) || (*in == 0xD))) { 8987 if (*in == 0xA) { 8988 line++; col = 1; 8989 } else { 8990 col++; 8991 } 8992 in++; 8993 start = in; 8994 if (in >= end) { 8995 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8996 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8997 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8998 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8999 "AttValue length too long\n"); 9000 return(NULL); 9001 } 9002 } 9003 } 9004 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9005 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9006 col++; 9007 if ((*in++ == 0x20) && (*in == 0x20)) break; 9008 if (in >= end) { 9009 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9010 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9011 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9012 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9013 "AttValue length too long\n"); 9014 return(NULL); 9015 } 9016 } 9017 } 9018 last = in; 9019 /* 9020 * skip the trailing blanks 9021 */ 9022 while ((last[-1] == 0x20) && (last > start)) last--; 9023 while ((in < end) && (*in != limit) && 9024 ((*in == 0x20) || (*in == 0x9) || 9025 (*in == 0xA) || (*in == 0xD))) { 9026 if (*in == 0xA) { 9027 line++, col = 1; 9028 } else { 9029 col++; 9030 } 9031 in++; 9032 if (in >= end) { 9033 const xmlChar *oldbase = ctxt->input->base; 9034 GROW; 9035 if (ctxt->instate == XML_PARSER_EOF) 9036 return(NULL); 9037 if (oldbase != ctxt->input->base) { 9038 ptrdiff_t delta = ctxt->input->base - oldbase; 9039 start = start + delta; 9040 in = in + delta; 9041 last = last + delta; 9042 } 9043 end = ctxt->input->end; 9044 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9045 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9046 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9047 "AttValue length too long\n"); 9048 return(NULL); 9049 } 9050 } 9051 } 9052 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9053 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9055 "AttValue length too long\n"); 9056 return(NULL); 9057 } 9058 if (*in != limit) goto need_complex; 9059 } else { 9060 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9061 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9062 in++; 9063 col++; 9064 if (in >= end) { 9065 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9066 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9067 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9068 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9069 "AttValue length too long\n"); 9070 return(NULL); 9071 } 9072 } 9073 } 9074 last = in; 9075 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9076 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9077 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9078 "AttValue length too long\n"); 9079 return(NULL); 9080 } 9081 if (*in != limit) goto need_complex; 9082 } 9083 in++; 9084 col++; 9085 if (len != NULL) { 9086 *len = last - start; 9087 ret = (xmlChar *) start; 9088 } else { 9089 if (alloc) *alloc = 1; 9090 ret = xmlStrndup(start, last - start); 9091 } 9092 CUR_PTR = in; 9093 ctxt->input->line = line; 9094 ctxt->input->col = col; 9095 if (alloc) *alloc = 0; 9096 return ret; 9097 need_complex: 9098 if (alloc) *alloc = 1; 9099 return xmlParseAttValueComplex(ctxt, len, normalize); 9100 } 9101 9102 /** 9103 * xmlParseAttribute2: 9104 * @ctxt: an XML parser context 9105 * @pref: the element prefix 9106 * @elem: the element name 9107 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9108 * @value: a xmlChar ** used to store the value of the attribute 9109 * @len: an int * to save the length of the attribute 9110 * @alloc: an int * to indicate if the attribute was allocated 9111 * 9112 * parse an attribute in the new SAX2 framework. 9113 * 9114 * Returns the attribute name, and the value in *value, . 9115 */ 9116 9117 static const xmlChar * 9118 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9119 const xmlChar * pref, const xmlChar * elem, 9120 const xmlChar ** prefix, xmlChar ** value, 9121 int *len, int *alloc) 9122 { 9123 const xmlChar *name; 9124 xmlChar *val, *internal_val = NULL; 9125 int normalize = 0; 9126 9127 *value = NULL; 9128 GROW; 9129 name = xmlParseQName(ctxt, prefix); 9130 if (name == NULL) { 9131 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9132 "error parsing attribute name\n"); 9133 return (NULL); 9134 } 9135 9136 /* 9137 * get the type if needed 9138 */ 9139 if (ctxt->attsSpecial != NULL) { 9140 int type; 9141 9142 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9143 pref, elem, *prefix, name); 9144 if (type != 0) 9145 normalize = 1; 9146 } 9147 9148 /* 9149 * read the value 9150 */ 9151 SKIP_BLANKS; 9152 if (RAW == '=') { 9153 NEXT; 9154 SKIP_BLANKS; 9155 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9156 if (normalize) { 9157 /* 9158 * Sometimes a second normalisation pass for spaces is needed 9159 * but that only happens if charrefs or entities references 9160 * have been used in the attribute value, i.e. the attribute 9161 * value have been extracted in an allocated string already. 9162 */ 9163 if (*alloc) { 9164 const xmlChar *val2; 9165 9166 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9167 if ((val2 != NULL) && (val2 != val)) { 9168 xmlFree(val); 9169 val = (xmlChar *) val2; 9170 } 9171 } 9172 } 9173 ctxt->instate = XML_PARSER_CONTENT; 9174 } else { 9175 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9176 "Specification mandates value for attribute %s\n", 9177 name); 9178 return (NULL); 9179 } 9180 9181 if (*prefix == ctxt->str_xml) { 9182 /* 9183 * Check that xml:lang conforms to the specification 9184 * No more registered as an error, just generate a warning now 9185 * since this was deprecated in XML second edition 9186 */ 9187 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9188 internal_val = xmlStrndup(val, *len); 9189 if (!xmlCheckLanguageID(internal_val)) { 9190 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9191 "Malformed value for xml:lang : %s\n", 9192 internal_val, NULL); 9193 } 9194 } 9195 9196 /* 9197 * Check that xml:space conforms to the specification 9198 */ 9199 if (xmlStrEqual(name, BAD_CAST "space")) { 9200 internal_val = xmlStrndup(val, *len); 9201 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9202 *(ctxt->space) = 0; 9203 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9204 *(ctxt->space) = 1; 9205 else { 9206 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9207 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9208 internal_val, NULL); 9209 } 9210 } 9211 if (internal_val) { 9212 xmlFree(internal_val); 9213 } 9214 } 9215 9216 *value = val; 9217 return (name); 9218 } 9219 /** 9220 * xmlParseStartTag2: 9221 * @ctxt: an XML parser context 9222 * 9223 * parse a start of tag either for rule element or 9224 * EmptyElement. In both case we don't parse the tag closing chars. 9225 * This routine is called when running SAX2 parsing 9226 * 9227 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9228 * 9229 * [ WFC: Unique Att Spec ] 9230 * No attribute name may appear more than once in the same start-tag or 9231 * empty-element tag. 9232 * 9233 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9234 * 9235 * [ WFC: Unique Att Spec ] 9236 * No attribute name may appear more than once in the same start-tag or 9237 * empty-element tag. 9238 * 9239 * With namespace: 9240 * 9241 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9242 * 9243 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9244 * 9245 * Returns the element name parsed 9246 */ 9247 9248 static const xmlChar * 9249 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9250 const xmlChar **URI, int *tlen) { 9251 const xmlChar *localname; 9252 const xmlChar *prefix; 9253 const xmlChar *attname; 9254 const xmlChar *aprefix; 9255 const xmlChar *nsname; 9256 xmlChar *attvalue; 9257 const xmlChar **atts = ctxt->atts; 9258 int maxatts = ctxt->maxatts; 9259 int nratts, nbatts, nbdef, inputid; 9260 int i, j, nbNs, attval; 9261 unsigned long cur; 9262 int nsNr = ctxt->nsNr; 9263 9264 if (RAW != '<') return(NULL); 9265 NEXT1; 9266 9267 /* 9268 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9269 * point since the attribute values may be stored as pointers to 9270 * the buffer and calling SHRINK would destroy them ! 9271 * The Shrinking is only possible once the full set of attribute 9272 * callbacks have been done. 9273 */ 9274 SHRINK; 9275 cur = ctxt->input->cur - ctxt->input->base; 9276 inputid = ctxt->input->id; 9277 nbatts = 0; 9278 nratts = 0; 9279 nbdef = 0; 9280 nbNs = 0; 9281 attval = 0; 9282 /* Forget any namespaces added during an earlier parse of this element. */ 9283 ctxt->nsNr = nsNr; 9284 9285 localname = xmlParseQName(ctxt, &prefix); 9286 if (localname == NULL) { 9287 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9288 "StartTag: invalid element name\n"); 9289 return(NULL); 9290 } 9291 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9292 9293 /* 9294 * Now parse the attributes, it ends up with the ending 9295 * 9296 * (S Attribute)* S? 9297 */ 9298 SKIP_BLANKS; 9299 GROW; 9300 9301 while (((RAW != '>') && 9302 ((RAW != '/') || (NXT(1) != '>')) && 9303 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9304 int id = ctxt->input->id; 9305 unsigned long cons = CUR_CONSUMED; 9306 int len = -1, alloc = 0; 9307 9308 attname = xmlParseAttribute2(ctxt, prefix, localname, 9309 &aprefix, &attvalue, &len, &alloc); 9310 if ((attname == NULL) || (attvalue == NULL)) 9311 goto next_attr; 9312 if (len < 0) len = xmlStrlen(attvalue); 9313 9314 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9315 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9316 xmlURIPtr uri; 9317 9318 if (URL == NULL) { 9319 xmlErrMemory(ctxt, "dictionary allocation failure"); 9320 if ((attvalue != NULL) && (alloc != 0)) 9321 xmlFree(attvalue); 9322 localname = NULL; 9323 goto done; 9324 } 9325 if (*URL != 0) { 9326 uri = xmlParseURI((const char *) URL); 9327 if (uri == NULL) { 9328 xmlNsErr(ctxt, XML_WAR_NS_URI, 9329 "xmlns: '%s' is not a valid URI\n", 9330 URL, NULL, NULL); 9331 } else { 9332 if (uri->scheme == NULL) { 9333 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9334 "xmlns: URI %s is not absolute\n", 9335 URL, NULL, NULL); 9336 } 9337 xmlFreeURI(uri); 9338 } 9339 if (URL == ctxt->str_xml_ns) { 9340 if (attname != ctxt->str_xml) { 9341 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9342 "xml namespace URI cannot be the default namespace\n", 9343 NULL, NULL, NULL); 9344 } 9345 goto next_attr; 9346 } 9347 if ((len == 29) && 9348 (xmlStrEqual(URL, 9349 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9350 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9351 "reuse of the xmlns namespace name is forbidden\n", 9352 NULL, NULL, NULL); 9353 goto next_attr; 9354 } 9355 } 9356 /* 9357 * check that it's not a defined namespace 9358 */ 9359 for (j = 1;j <= nbNs;j++) 9360 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9361 break; 9362 if (j <= nbNs) 9363 xmlErrAttributeDup(ctxt, NULL, attname); 9364 else 9365 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9366 9367 } else if (aprefix == ctxt->str_xmlns) { 9368 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9369 xmlURIPtr uri; 9370 9371 if (attname == ctxt->str_xml) { 9372 if (URL != ctxt->str_xml_ns) { 9373 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9374 "xml namespace prefix mapped to wrong URI\n", 9375 NULL, NULL, NULL); 9376 } 9377 /* 9378 * Do not keep a namespace definition node 9379 */ 9380 goto next_attr; 9381 } 9382 if (URL == ctxt->str_xml_ns) { 9383 if (attname != ctxt->str_xml) { 9384 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9385 "xml namespace URI mapped to wrong prefix\n", 9386 NULL, NULL, NULL); 9387 } 9388 goto next_attr; 9389 } 9390 if (attname == ctxt->str_xmlns) { 9391 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9392 "redefinition of the xmlns prefix is forbidden\n", 9393 NULL, NULL, NULL); 9394 goto next_attr; 9395 } 9396 if ((len == 29) && 9397 (xmlStrEqual(URL, 9398 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9399 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9400 "reuse of the xmlns namespace name is forbidden\n", 9401 NULL, NULL, NULL); 9402 goto next_attr; 9403 } 9404 if ((URL == NULL) || (URL[0] == 0)) { 9405 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9406 "xmlns:%s: Empty XML namespace is not allowed\n", 9407 attname, NULL, NULL); 9408 goto next_attr; 9409 } else { 9410 uri = xmlParseURI((const char *) URL); 9411 if (uri == NULL) { 9412 xmlNsErr(ctxt, XML_WAR_NS_URI, 9413 "xmlns:%s: '%s' is not a valid URI\n", 9414 attname, URL, NULL); 9415 } else { 9416 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9417 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9418 "xmlns:%s: URI %s is not absolute\n", 9419 attname, URL, NULL); 9420 } 9421 xmlFreeURI(uri); 9422 } 9423 } 9424 9425 /* 9426 * check that it's not a defined namespace 9427 */ 9428 for (j = 1;j <= nbNs;j++) 9429 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9430 break; 9431 if (j <= nbNs) 9432 xmlErrAttributeDup(ctxt, aprefix, attname); 9433 else 9434 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9435 9436 } else { 9437 /* 9438 * Add the pair to atts 9439 */ 9440 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9441 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9442 goto next_attr; 9443 } 9444 maxatts = ctxt->maxatts; 9445 atts = ctxt->atts; 9446 } 9447 ctxt->attallocs[nratts++] = alloc; 9448 atts[nbatts++] = attname; 9449 atts[nbatts++] = aprefix; 9450 /* 9451 * The namespace URI field is used temporarily to point at the 9452 * base of the current input buffer for non-alloced attributes. 9453 * When the input buffer is reallocated, all the pointers become 9454 * invalid, but they can be reconstructed later. 9455 */ 9456 if (alloc) 9457 atts[nbatts++] = NULL; 9458 else 9459 atts[nbatts++] = ctxt->input->base; 9460 atts[nbatts++] = attvalue; 9461 attvalue += len; 9462 atts[nbatts++] = attvalue; 9463 /* 9464 * tag if some deallocation is needed 9465 */ 9466 if (alloc != 0) attval = 1; 9467 attvalue = NULL; /* moved into atts */ 9468 } 9469 9470 next_attr: 9471 if ((attvalue != NULL) && (alloc != 0)) { 9472 xmlFree(attvalue); 9473 attvalue = NULL; 9474 } 9475 9476 GROW 9477 if (ctxt->instate == XML_PARSER_EOF) 9478 break; 9479 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9480 break; 9481 if (SKIP_BLANKS == 0) { 9482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9483 "attributes construct error\n"); 9484 break; 9485 } 9486 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) && 9487 (attname == NULL) && (attvalue == NULL)) { 9488 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9489 "xmlParseStartTag: problem parsing attributes\n"); 9490 break; 9491 } 9492 GROW; 9493 } 9494 9495 if (ctxt->input->id != inputid) { 9496 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9497 "Unexpected change of input\n"); 9498 localname = NULL; 9499 goto done; 9500 } 9501 9502 /* Reconstruct attribute value pointers. */ 9503 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9504 if (atts[i+2] != NULL) { 9505 /* 9506 * Arithmetic on dangling pointers is technically undefined 9507 * behavior, but well... 9508 */ 9509 ptrdiff_t offset = ctxt->input->base - atts[i+2]; 9510 atts[i+2] = NULL; /* Reset repurposed namespace URI */ 9511 atts[i+3] += offset; /* value */ 9512 atts[i+4] += offset; /* valuend */ 9513 } 9514 } 9515 9516 /* 9517 * The attributes defaulting 9518 */ 9519 if (ctxt->attsDefault != NULL) { 9520 xmlDefAttrsPtr defaults; 9521 9522 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9523 if (defaults != NULL) { 9524 for (i = 0;i < defaults->nbAttrs;i++) { 9525 attname = defaults->values[5 * i]; 9526 aprefix = defaults->values[5 * i + 1]; 9527 9528 /* 9529 * special work for namespaces defaulted defs 9530 */ 9531 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9532 /* 9533 * check that it's not a defined namespace 9534 */ 9535 for (j = 1;j <= nbNs;j++) 9536 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9537 break; 9538 if (j <= nbNs) continue; 9539 9540 nsname = xmlGetNamespace(ctxt, NULL); 9541 if (nsname != defaults->values[5 * i + 2]) { 9542 if (nsPush(ctxt, NULL, 9543 defaults->values[5 * i + 2]) > 0) 9544 nbNs++; 9545 } 9546 } else if (aprefix == ctxt->str_xmlns) { 9547 /* 9548 * check that it's not a defined namespace 9549 */ 9550 for (j = 1;j <= nbNs;j++) 9551 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9552 break; 9553 if (j <= nbNs) continue; 9554 9555 nsname = xmlGetNamespace(ctxt, attname); 9556 if (nsname != defaults->values[2]) { 9557 if (nsPush(ctxt, attname, 9558 defaults->values[5 * i + 2]) > 0) 9559 nbNs++; 9560 } 9561 } else { 9562 /* 9563 * check that it's not a defined attribute 9564 */ 9565 for (j = 0;j < nbatts;j+=5) { 9566 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9567 break; 9568 } 9569 if (j < nbatts) continue; 9570 9571 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9572 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9573 localname = NULL; 9574 goto done; 9575 } 9576 maxatts = ctxt->maxatts; 9577 atts = ctxt->atts; 9578 } 9579 atts[nbatts++] = attname; 9580 atts[nbatts++] = aprefix; 9581 if (aprefix == NULL) 9582 atts[nbatts++] = NULL; 9583 else 9584 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9585 atts[nbatts++] = defaults->values[5 * i + 2]; 9586 atts[nbatts++] = defaults->values[5 * i + 3]; 9587 if ((ctxt->standalone == 1) && 9588 (defaults->values[5 * i + 4] != NULL)) { 9589 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9590 "standalone: attribute %s on %s defaulted from external subset\n", 9591 attname, localname); 9592 } 9593 nbdef++; 9594 } 9595 } 9596 } 9597 } 9598 9599 /* 9600 * The attributes checkings 9601 */ 9602 for (i = 0; i < nbatts;i += 5) { 9603 /* 9604 * The default namespace does not apply to attribute names. 9605 */ 9606 if (atts[i + 1] != NULL) { 9607 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9608 if (nsname == NULL) { 9609 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9610 "Namespace prefix %s for %s on %s is not defined\n", 9611 atts[i + 1], atts[i], localname); 9612 } 9613 atts[i + 2] = nsname; 9614 } else 9615 nsname = NULL; 9616 /* 9617 * [ WFC: Unique Att Spec ] 9618 * No attribute name may appear more than once in the same 9619 * start-tag or empty-element tag. 9620 * As extended by the Namespace in XML REC. 9621 */ 9622 for (j = 0; j < i;j += 5) { 9623 if (atts[i] == atts[j]) { 9624 if (atts[i+1] == atts[j+1]) { 9625 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9626 break; 9627 } 9628 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9629 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9630 "Namespaced Attribute %s in '%s' redefined\n", 9631 atts[i], nsname, NULL); 9632 break; 9633 } 9634 } 9635 } 9636 } 9637 9638 nsname = xmlGetNamespace(ctxt, prefix); 9639 if ((prefix != NULL) && (nsname == NULL)) { 9640 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9641 "Namespace prefix %s on %s is not defined\n", 9642 prefix, localname, NULL); 9643 } 9644 *pref = prefix; 9645 *URI = nsname; 9646 9647 /* 9648 * SAX: Start of Element ! 9649 */ 9650 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9651 (!ctxt->disableSAX)) { 9652 if (nbNs > 0) 9653 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9654 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9655 nbatts / 5, nbdef, atts); 9656 else 9657 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9658 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9659 } 9660 9661 done: 9662 /* 9663 * Free up attribute allocated strings if needed 9664 */ 9665 if (attval != 0) { 9666 for (i = 3,j = 0; j < nratts;i += 5,j++) 9667 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9668 xmlFree((xmlChar *) atts[i]); 9669 } 9670 9671 return(localname); 9672 } 9673 9674 /** 9675 * xmlParseEndTag2: 9676 * @ctxt: an XML parser context 9677 * @line: line of the start tag 9678 * @nsNr: number of namespaces on the start tag 9679 * 9680 * parse an end of tag 9681 * 9682 * [42] ETag ::= '</' Name S? '>' 9683 * 9684 * With namespace 9685 * 9686 * [NS 9] ETag ::= '</' QName S? '>' 9687 */ 9688 9689 static void 9690 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) { 9691 const xmlChar *name; 9692 9693 GROW; 9694 if ((RAW != '<') || (NXT(1) != '/')) { 9695 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9696 return; 9697 } 9698 SKIP(2); 9699 9700 if (tag->prefix == NULL) 9701 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9702 else 9703 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix); 9704 9705 /* 9706 * We should definitely be at the ending "S? '>'" part 9707 */ 9708 GROW; 9709 if (ctxt->instate == XML_PARSER_EOF) 9710 return; 9711 SKIP_BLANKS; 9712 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9713 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9714 } else 9715 NEXT1; 9716 9717 /* 9718 * [ WFC: Element Type Match ] 9719 * The Name in an element's end-tag must match the element type in the 9720 * start-tag. 9721 * 9722 */ 9723 if (name != (xmlChar*)1) { 9724 if (name == NULL) name = BAD_CAST "unparsable"; 9725 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9726 "Opening and ending tag mismatch: %s line %d and %s\n", 9727 ctxt->name, tag->line, name); 9728 } 9729 9730 /* 9731 * SAX: End of Tag 9732 */ 9733 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9734 (!ctxt->disableSAX)) 9735 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix, 9736 tag->URI); 9737 9738 spacePop(ctxt); 9739 if (tag->nsNr != 0) 9740 nsPop(ctxt, tag->nsNr); 9741 } 9742 9743 /** 9744 * xmlParseCDSect: 9745 * @ctxt: an XML parser context 9746 * 9747 * Parse escaped pure raw content. 9748 * 9749 * [18] CDSect ::= CDStart CData CDEnd 9750 * 9751 * [19] CDStart ::= '<![CDATA[' 9752 * 9753 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9754 * 9755 * [21] CDEnd ::= ']]>' 9756 */ 9757 void 9758 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9759 xmlChar *buf = NULL; 9760 int len = 0; 9761 int size = XML_PARSER_BUFFER_SIZE; 9762 int r, rl; 9763 int s, sl; 9764 int cur, l; 9765 int count = 0; 9766 9767 /* Check 2.6.0 was NXT(0) not RAW */ 9768 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9769 SKIP(9); 9770 } else 9771 return; 9772 9773 ctxt->instate = XML_PARSER_CDATA_SECTION; 9774 r = CUR_CHAR(rl); 9775 if (!IS_CHAR(r)) { 9776 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9777 ctxt->instate = XML_PARSER_CONTENT; 9778 return; 9779 } 9780 NEXTL(rl); 9781 s = CUR_CHAR(sl); 9782 if (!IS_CHAR(s)) { 9783 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9784 ctxt->instate = XML_PARSER_CONTENT; 9785 return; 9786 } 9787 NEXTL(sl); 9788 cur = CUR_CHAR(l); 9789 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9790 if (buf == NULL) { 9791 xmlErrMemory(ctxt, NULL); 9792 return; 9793 } 9794 while (IS_CHAR(cur) && 9795 ((r != ']') || (s != ']') || (cur != '>'))) { 9796 if (len + 5 >= size) { 9797 xmlChar *tmp; 9798 9799 if ((size > XML_MAX_TEXT_LENGTH) && 9800 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9801 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9802 "CData section too big found", NULL); 9803 xmlFree (buf); 9804 return; 9805 } 9806 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9807 if (tmp == NULL) { 9808 xmlFree(buf); 9809 xmlErrMemory(ctxt, NULL); 9810 return; 9811 } 9812 buf = tmp; 9813 size *= 2; 9814 } 9815 COPY_BUF(rl,buf,len,r); 9816 r = s; 9817 rl = sl; 9818 s = cur; 9819 sl = l; 9820 count++; 9821 if (count > 50) { 9822 SHRINK; 9823 GROW; 9824 if (ctxt->instate == XML_PARSER_EOF) { 9825 xmlFree(buf); 9826 return; 9827 } 9828 count = 0; 9829 } 9830 NEXTL(l); 9831 cur = CUR_CHAR(l); 9832 } 9833 buf[len] = 0; 9834 ctxt->instate = XML_PARSER_CONTENT; 9835 if (cur != '>') { 9836 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9837 "CData section not finished\n%.50s\n", buf); 9838 xmlFree(buf); 9839 return; 9840 } 9841 NEXTL(l); 9842 9843 /* 9844 * OK the buffer is to be consumed as cdata. 9845 */ 9846 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9847 if (ctxt->sax->cdataBlock != NULL) 9848 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9849 else if (ctxt->sax->characters != NULL) 9850 ctxt->sax->characters(ctxt->userData, buf, len); 9851 } 9852 xmlFree(buf); 9853 } 9854 9855 /** 9856 * xmlParseContentInternal: 9857 * @ctxt: an XML parser context 9858 * 9859 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of 9860 * unexpected EOF to the caller. 9861 */ 9862 9863 static void 9864 xmlParseContentInternal(xmlParserCtxtPtr ctxt) { 9865 int nameNr = ctxt->nameNr; 9866 9867 GROW; 9868 while ((RAW != 0) && 9869 (ctxt->instate != XML_PARSER_EOF)) { 9870 int id = ctxt->input->id; 9871 unsigned long cons = CUR_CONSUMED; 9872 const xmlChar *cur = ctxt->input->cur; 9873 9874 /* 9875 * First case : a Processing Instruction. 9876 */ 9877 if ((*cur == '<') && (cur[1] == '?')) { 9878 xmlParsePI(ctxt); 9879 } 9880 9881 /* 9882 * Second case : a CDSection 9883 */ 9884 /* 2.6.0 test was *cur not RAW */ 9885 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9886 xmlParseCDSect(ctxt); 9887 } 9888 9889 /* 9890 * Third case : a comment 9891 */ 9892 else if ((*cur == '<') && (NXT(1) == '!') && 9893 (NXT(2) == '-') && (NXT(3) == '-')) { 9894 xmlParseComment(ctxt); 9895 ctxt->instate = XML_PARSER_CONTENT; 9896 } 9897 9898 /* 9899 * Fourth case : a sub-element. 9900 */ 9901 else if (*cur == '<') { 9902 if (NXT(1) == '/') { 9903 if (ctxt->nameNr <= nameNr) 9904 break; 9905 xmlParseElementEnd(ctxt); 9906 } else { 9907 xmlParseElementStart(ctxt); 9908 } 9909 } 9910 9911 /* 9912 * Fifth case : a reference. If if has not been resolved, 9913 * parsing returns it's Name, create the node 9914 */ 9915 9916 else if (*cur == '&') { 9917 xmlParseReference(ctxt); 9918 } 9919 9920 /* 9921 * Last case, text. Note that References are handled directly. 9922 */ 9923 else { 9924 xmlParseCharData(ctxt, 0); 9925 } 9926 9927 GROW; 9928 SHRINK; 9929 9930 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) { 9931 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9932 "detected an error in element content\n"); 9933 xmlHaltParser(ctxt); 9934 break; 9935 } 9936 } 9937 } 9938 9939 /** 9940 * xmlParseContent: 9941 * @ctxt: an XML parser context 9942 * 9943 * Parse a content sequence. Stops at EOF or '</'. 9944 * 9945 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9946 */ 9947 9948 void 9949 xmlParseContent(xmlParserCtxtPtr ctxt) { 9950 int nameNr = ctxt->nameNr; 9951 9952 xmlParseContentInternal(ctxt); 9953 9954 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) { 9955 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 9956 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 9957 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9958 "Premature end of data in tag %s line %d\n", 9959 name, line, NULL); 9960 } 9961 } 9962 9963 /** 9964 * xmlParseElement: 9965 * @ctxt: an XML parser context 9966 * 9967 * parse an XML element 9968 * 9969 * [39] element ::= EmptyElemTag | STag content ETag 9970 * 9971 * [ WFC: Element Type Match ] 9972 * The Name in an element's end-tag must match the element type in the 9973 * start-tag. 9974 * 9975 */ 9976 9977 void 9978 xmlParseElement(xmlParserCtxtPtr ctxt) { 9979 if (xmlParseElementStart(ctxt) != 0) 9980 return; 9981 9982 xmlParseContentInternal(ctxt); 9983 if (ctxt->instate == XML_PARSER_EOF) 9984 return; 9985 9986 if (CUR == 0) { 9987 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 9988 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 9989 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9990 "Premature end of data in tag %s line %d\n", 9991 name, line, NULL); 9992 return; 9993 } 9994 9995 xmlParseElementEnd(ctxt); 9996 } 9997 9998 /** 9999 * xmlParseElementStart: 10000 * @ctxt: an XML parser context 10001 * 10002 * Parse the start of an XML element. Returns -1 in case of error, 0 if an 10003 * opening tag was parsed, 1 if an empty element was parsed. 10004 */ 10005 static int 10006 xmlParseElementStart(xmlParserCtxtPtr ctxt) { 10007 const xmlChar *name; 10008 const xmlChar *prefix = NULL; 10009 const xmlChar *URI = NULL; 10010 xmlParserNodeInfo node_info; 10011 int line, tlen = 0; 10012 xmlNodePtr ret; 10013 int nsNr = ctxt->nsNr; 10014 10015 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10016 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10017 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10018 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10019 xmlParserMaxDepth); 10020 xmlHaltParser(ctxt); 10021 return(-1); 10022 } 10023 10024 /* Capture start position */ 10025 if (ctxt->record_info) { 10026 node_info.begin_pos = ctxt->input->consumed + 10027 (CUR_PTR - ctxt->input->base); 10028 node_info.begin_line = ctxt->input->line; 10029 } 10030 10031 if (ctxt->spaceNr == 0) 10032 spacePush(ctxt, -1); 10033 else if (*ctxt->space == -2) 10034 spacePush(ctxt, -1); 10035 else 10036 spacePush(ctxt, *ctxt->space); 10037 10038 line = ctxt->input->line; 10039 #ifdef LIBXML_SAX1_ENABLED 10040 if (ctxt->sax2) 10041 #endif /* LIBXML_SAX1_ENABLED */ 10042 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10043 #ifdef LIBXML_SAX1_ENABLED 10044 else 10045 name = xmlParseStartTag(ctxt); 10046 #endif /* LIBXML_SAX1_ENABLED */ 10047 if (ctxt->instate == XML_PARSER_EOF) 10048 return(-1); 10049 if (name == NULL) { 10050 spacePop(ctxt); 10051 return(-1); 10052 } 10053 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); 10054 ret = ctxt->node; 10055 10056 #ifdef LIBXML_VALID_ENABLED 10057 /* 10058 * [ VC: Root Element Type ] 10059 * The Name in the document type declaration must match the element 10060 * type of the root element. 10061 */ 10062 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10063 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10064 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10065 #endif /* LIBXML_VALID_ENABLED */ 10066 10067 /* 10068 * Check for an Empty Element. 10069 */ 10070 if ((RAW == '/') && (NXT(1) == '>')) { 10071 SKIP(2); 10072 if (ctxt->sax2) { 10073 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10074 (!ctxt->disableSAX)) 10075 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10076 #ifdef LIBXML_SAX1_ENABLED 10077 } else { 10078 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10079 (!ctxt->disableSAX)) 10080 ctxt->sax->endElement(ctxt->userData, name); 10081 #endif /* LIBXML_SAX1_ENABLED */ 10082 } 10083 namePop(ctxt); 10084 spacePop(ctxt); 10085 if (nsNr != ctxt->nsNr) 10086 nsPop(ctxt, ctxt->nsNr - nsNr); 10087 if ( ret != NULL && ctxt->record_info ) { 10088 node_info.end_pos = ctxt->input->consumed + 10089 (CUR_PTR - ctxt->input->base); 10090 node_info.end_line = ctxt->input->line; 10091 node_info.node = ret; 10092 xmlParserAddNodeInfo(ctxt, &node_info); 10093 } 10094 return(1); 10095 } 10096 if (RAW == '>') { 10097 NEXT1; 10098 } else { 10099 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10100 "Couldn't find end of Start Tag %s line %d\n", 10101 name, line, NULL); 10102 10103 /* 10104 * end of parsing of this node. 10105 */ 10106 nodePop(ctxt); 10107 namePop(ctxt); 10108 spacePop(ctxt); 10109 if (nsNr != ctxt->nsNr) 10110 nsPop(ctxt, ctxt->nsNr - nsNr); 10111 10112 /* 10113 * Capture end position and add node 10114 */ 10115 if ( ret != NULL && ctxt->record_info ) { 10116 node_info.end_pos = ctxt->input->consumed + 10117 (CUR_PTR - ctxt->input->base); 10118 node_info.end_line = ctxt->input->line; 10119 node_info.node = ret; 10120 xmlParserAddNodeInfo(ctxt, &node_info); 10121 } 10122 return(-1); 10123 } 10124 10125 return(0); 10126 } 10127 10128 /** 10129 * xmlParseElementEnd: 10130 * @ctxt: an XML parser context 10131 * 10132 * Parse the end of an XML element. 10133 */ 10134 static void 10135 xmlParseElementEnd(xmlParserCtxtPtr ctxt) { 10136 xmlParserNodeInfo node_info; 10137 xmlNodePtr ret = ctxt->node; 10138 10139 if (ctxt->nameNr <= 0) 10140 return; 10141 10142 /* 10143 * parse the end of tag: '</' should be here. 10144 */ 10145 if (ctxt->sax2) { 10146 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 10147 namePop(ctxt); 10148 } 10149 #ifdef LIBXML_SAX1_ENABLED 10150 else 10151 xmlParseEndTag1(ctxt, 0); 10152 #endif /* LIBXML_SAX1_ENABLED */ 10153 10154 /* 10155 * Capture end position and add node 10156 */ 10157 if ( ret != NULL && ctxt->record_info ) { 10158 node_info.end_pos = ctxt->input->consumed + 10159 (CUR_PTR - ctxt->input->base); 10160 node_info.end_line = ctxt->input->line; 10161 node_info.node = ret; 10162 xmlParserAddNodeInfo(ctxt, &node_info); 10163 } 10164 } 10165 10166 /** 10167 * xmlParseVersionNum: 10168 * @ctxt: an XML parser context 10169 * 10170 * parse the XML version value. 10171 * 10172 * [26] VersionNum ::= '1.' [0-9]+ 10173 * 10174 * In practice allow [0-9].[0-9]+ at that level 10175 * 10176 * Returns the string giving the XML version number, or NULL 10177 */ 10178 xmlChar * 10179 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10180 xmlChar *buf = NULL; 10181 int len = 0; 10182 int size = 10; 10183 xmlChar cur; 10184 10185 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10186 if (buf == NULL) { 10187 xmlErrMemory(ctxt, NULL); 10188 return(NULL); 10189 } 10190 cur = CUR; 10191 if (!((cur >= '0') && (cur <= '9'))) { 10192 xmlFree(buf); 10193 return(NULL); 10194 } 10195 buf[len++] = cur; 10196 NEXT; 10197 cur=CUR; 10198 if (cur != '.') { 10199 xmlFree(buf); 10200 return(NULL); 10201 } 10202 buf[len++] = cur; 10203 NEXT; 10204 cur=CUR; 10205 while ((cur >= '0') && (cur <= '9')) { 10206 if (len + 1 >= size) { 10207 xmlChar *tmp; 10208 10209 size *= 2; 10210 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10211 if (tmp == NULL) { 10212 xmlFree(buf); 10213 xmlErrMemory(ctxt, NULL); 10214 return(NULL); 10215 } 10216 buf = tmp; 10217 } 10218 buf[len++] = cur; 10219 NEXT; 10220 cur=CUR; 10221 } 10222 buf[len] = 0; 10223 return(buf); 10224 } 10225 10226 /** 10227 * xmlParseVersionInfo: 10228 * @ctxt: an XML parser context 10229 * 10230 * parse the XML version. 10231 * 10232 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10233 * 10234 * [25] Eq ::= S? '=' S? 10235 * 10236 * Returns the version string, e.g. "1.0" 10237 */ 10238 10239 xmlChar * 10240 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10241 xmlChar *version = NULL; 10242 10243 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10244 SKIP(7); 10245 SKIP_BLANKS; 10246 if (RAW != '=') { 10247 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10248 return(NULL); 10249 } 10250 NEXT; 10251 SKIP_BLANKS; 10252 if (RAW == '"') { 10253 NEXT; 10254 version = xmlParseVersionNum(ctxt); 10255 if (RAW != '"') { 10256 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10257 } else 10258 NEXT; 10259 } else if (RAW == '\''){ 10260 NEXT; 10261 version = xmlParseVersionNum(ctxt); 10262 if (RAW != '\'') { 10263 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10264 } else 10265 NEXT; 10266 } else { 10267 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10268 } 10269 } 10270 return(version); 10271 } 10272 10273 /** 10274 * xmlParseEncName: 10275 * @ctxt: an XML parser context 10276 * 10277 * parse the XML encoding name 10278 * 10279 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10280 * 10281 * Returns the encoding name value or NULL 10282 */ 10283 xmlChar * 10284 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10285 xmlChar *buf = NULL; 10286 int len = 0; 10287 int size = 10; 10288 xmlChar cur; 10289 10290 cur = CUR; 10291 if (((cur >= 'a') && (cur <= 'z')) || 10292 ((cur >= 'A') && (cur <= 'Z'))) { 10293 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10294 if (buf == NULL) { 10295 xmlErrMemory(ctxt, NULL); 10296 return(NULL); 10297 } 10298 10299 buf[len++] = cur; 10300 NEXT; 10301 cur = CUR; 10302 while (((cur >= 'a') && (cur <= 'z')) || 10303 ((cur >= 'A') && (cur <= 'Z')) || 10304 ((cur >= '0') && (cur <= '9')) || 10305 (cur == '.') || (cur == '_') || 10306 (cur == '-')) { 10307 if (len + 1 >= size) { 10308 xmlChar *tmp; 10309 10310 size *= 2; 10311 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10312 if (tmp == NULL) { 10313 xmlErrMemory(ctxt, NULL); 10314 xmlFree(buf); 10315 return(NULL); 10316 } 10317 buf = tmp; 10318 } 10319 buf[len++] = cur; 10320 NEXT; 10321 cur = CUR; 10322 if (cur == 0) { 10323 SHRINK; 10324 GROW; 10325 cur = CUR; 10326 } 10327 } 10328 buf[len] = 0; 10329 } else { 10330 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10331 } 10332 return(buf); 10333 } 10334 10335 /** 10336 * xmlParseEncodingDecl: 10337 * @ctxt: an XML parser context 10338 * 10339 * parse the XML encoding declaration 10340 * 10341 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10342 * 10343 * this setups the conversion filters. 10344 * 10345 * Returns the encoding value or NULL 10346 */ 10347 10348 const xmlChar * 10349 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10350 xmlChar *encoding = NULL; 10351 10352 SKIP_BLANKS; 10353 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10354 SKIP(8); 10355 SKIP_BLANKS; 10356 if (RAW != '=') { 10357 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10358 return(NULL); 10359 } 10360 NEXT; 10361 SKIP_BLANKS; 10362 if (RAW == '"') { 10363 NEXT; 10364 encoding = xmlParseEncName(ctxt); 10365 if (RAW != '"') { 10366 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10367 xmlFree((xmlChar *) encoding); 10368 return(NULL); 10369 } else 10370 NEXT; 10371 } else if (RAW == '\''){ 10372 NEXT; 10373 encoding = xmlParseEncName(ctxt); 10374 if (RAW != '\'') { 10375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10376 xmlFree((xmlChar *) encoding); 10377 return(NULL); 10378 } else 10379 NEXT; 10380 } else { 10381 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10382 } 10383 10384 /* 10385 * Non standard parsing, allowing the user to ignore encoding 10386 */ 10387 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10388 xmlFree((xmlChar *) encoding); 10389 return(NULL); 10390 } 10391 10392 /* 10393 * UTF-16 encoding switch has already taken place at this stage, 10394 * more over the little-endian/big-endian selection is already done 10395 */ 10396 if ((encoding != NULL) && 10397 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10398 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10399 /* 10400 * If no encoding was passed to the parser, that we are 10401 * using UTF-16 and no decoder is present i.e. the 10402 * document is apparently UTF-8 compatible, then raise an 10403 * encoding mismatch fatal error 10404 */ 10405 if ((ctxt->encoding == NULL) && 10406 (ctxt->input->buf != NULL) && 10407 (ctxt->input->buf->encoder == NULL)) { 10408 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10409 "Document labelled UTF-16 but has UTF-8 content\n"); 10410 } 10411 if (ctxt->encoding != NULL) 10412 xmlFree((xmlChar *) ctxt->encoding); 10413 ctxt->encoding = encoding; 10414 } 10415 /* 10416 * UTF-8 encoding is handled natively 10417 */ 10418 else if ((encoding != NULL) && 10419 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10420 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10421 if (ctxt->encoding != NULL) 10422 xmlFree((xmlChar *) ctxt->encoding); 10423 ctxt->encoding = encoding; 10424 } 10425 else if (encoding != NULL) { 10426 xmlCharEncodingHandlerPtr handler; 10427 10428 if (ctxt->input->encoding != NULL) 10429 xmlFree((xmlChar *) ctxt->input->encoding); 10430 ctxt->input->encoding = encoding; 10431 10432 handler = xmlFindCharEncodingHandler((const char *) encoding); 10433 if (handler != NULL) { 10434 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10435 /* failed to convert */ 10436 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10437 return(NULL); 10438 } 10439 } else { 10440 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10441 "Unsupported encoding %s\n", encoding); 10442 return(NULL); 10443 } 10444 } 10445 } 10446 return(encoding); 10447 } 10448 10449 /** 10450 * xmlParseSDDecl: 10451 * @ctxt: an XML parser context 10452 * 10453 * parse the XML standalone declaration 10454 * 10455 * [32] SDDecl ::= S 'standalone' Eq 10456 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10457 * 10458 * [ VC: Standalone Document Declaration ] 10459 * TODO The standalone document declaration must have the value "no" 10460 * if any external markup declarations contain declarations of: 10461 * - attributes with default values, if elements to which these 10462 * attributes apply appear in the document without specifications 10463 * of values for these attributes, or 10464 * - entities (other than amp, lt, gt, apos, quot), if references 10465 * to those entities appear in the document, or 10466 * - attributes with values subject to normalization, where the 10467 * attribute appears in the document with a value which will change 10468 * as a result of normalization, or 10469 * - element types with element content, if white space occurs directly 10470 * within any instance of those types. 10471 * 10472 * Returns: 10473 * 1 if standalone="yes" 10474 * 0 if standalone="no" 10475 * -2 if standalone attribute is missing or invalid 10476 * (A standalone value of -2 means that the XML declaration was found, 10477 * but no value was specified for the standalone attribute). 10478 */ 10479 10480 int 10481 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10482 int standalone = -2; 10483 10484 SKIP_BLANKS; 10485 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10486 SKIP(10); 10487 SKIP_BLANKS; 10488 if (RAW != '=') { 10489 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10490 return(standalone); 10491 } 10492 NEXT; 10493 SKIP_BLANKS; 10494 if (RAW == '\''){ 10495 NEXT; 10496 if ((RAW == 'n') && (NXT(1) == 'o')) { 10497 standalone = 0; 10498 SKIP(2); 10499 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10500 (NXT(2) == 's')) { 10501 standalone = 1; 10502 SKIP(3); 10503 } else { 10504 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10505 } 10506 if (RAW != '\'') { 10507 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10508 } else 10509 NEXT; 10510 } else if (RAW == '"'){ 10511 NEXT; 10512 if ((RAW == 'n') && (NXT(1) == 'o')) { 10513 standalone = 0; 10514 SKIP(2); 10515 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10516 (NXT(2) == 's')) { 10517 standalone = 1; 10518 SKIP(3); 10519 } else { 10520 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10521 } 10522 if (RAW != '"') { 10523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10524 } else 10525 NEXT; 10526 } else { 10527 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10528 } 10529 } 10530 return(standalone); 10531 } 10532 10533 /** 10534 * xmlParseXMLDecl: 10535 * @ctxt: an XML parser context 10536 * 10537 * parse an XML declaration header 10538 * 10539 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10540 */ 10541 10542 void 10543 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10544 xmlChar *version; 10545 10546 /* 10547 * This value for standalone indicates that the document has an 10548 * XML declaration but it does not have a standalone attribute. 10549 * It will be overwritten later if a standalone attribute is found. 10550 */ 10551 ctxt->input->standalone = -2; 10552 10553 /* 10554 * We know that '<?xml' is here. 10555 */ 10556 SKIP(5); 10557 10558 if (!IS_BLANK_CH(RAW)) { 10559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10560 "Blank needed after '<?xml'\n"); 10561 } 10562 SKIP_BLANKS; 10563 10564 /* 10565 * We must have the VersionInfo here. 10566 */ 10567 version = xmlParseVersionInfo(ctxt); 10568 if (version == NULL) { 10569 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10570 } else { 10571 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10572 /* 10573 * Changed here for XML-1.0 5th edition 10574 */ 10575 if (ctxt->options & XML_PARSE_OLD10) { 10576 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10577 "Unsupported version '%s'\n", 10578 version); 10579 } else { 10580 if ((version[0] == '1') && ((version[1] == '.'))) { 10581 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10582 "Unsupported version '%s'\n", 10583 version, NULL); 10584 } else { 10585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10586 "Unsupported version '%s'\n", 10587 version); 10588 } 10589 } 10590 } 10591 if (ctxt->version != NULL) 10592 xmlFree((void *) ctxt->version); 10593 ctxt->version = version; 10594 } 10595 10596 /* 10597 * We may have the encoding declaration 10598 */ 10599 if (!IS_BLANK_CH(RAW)) { 10600 if ((RAW == '?') && (NXT(1) == '>')) { 10601 SKIP(2); 10602 return; 10603 } 10604 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10605 } 10606 xmlParseEncodingDecl(ctxt); 10607 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10608 (ctxt->instate == XML_PARSER_EOF)) { 10609 /* 10610 * The XML REC instructs us to stop parsing right here 10611 */ 10612 return; 10613 } 10614 10615 /* 10616 * We may have the standalone status. 10617 */ 10618 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10619 if ((RAW == '?') && (NXT(1) == '>')) { 10620 SKIP(2); 10621 return; 10622 } 10623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10624 } 10625 10626 /* 10627 * We can grow the input buffer freely at that point 10628 */ 10629 GROW; 10630 10631 SKIP_BLANKS; 10632 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10633 10634 SKIP_BLANKS; 10635 if ((RAW == '?') && (NXT(1) == '>')) { 10636 SKIP(2); 10637 } else if (RAW == '>') { 10638 /* Deprecated old WD ... */ 10639 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10640 NEXT; 10641 } else { 10642 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10643 MOVETO_ENDTAG(CUR_PTR); 10644 NEXT; 10645 } 10646 } 10647 10648 /** 10649 * xmlParseMisc: 10650 * @ctxt: an XML parser context 10651 * 10652 * parse an XML Misc* optional field. 10653 * 10654 * [27] Misc ::= Comment | PI | S 10655 */ 10656 10657 void 10658 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10659 while (ctxt->instate != XML_PARSER_EOF) { 10660 SKIP_BLANKS; 10661 GROW; 10662 if ((RAW == '<') && (NXT(1) == '?')) { 10663 xmlParsePI(ctxt); 10664 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) { 10665 xmlParseComment(ctxt); 10666 } else { 10667 break; 10668 } 10669 } 10670 } 10671 10672 /** 10673 * xmlParseDocument: 10674 * @ctxt: an XML parser context 10675 * 10676 * parse an XML document (and build a tree if using the standard SAX 10677 * interface). 10678 * 10679 * [1] document ::= prolog element Misc* 10680 * 10681 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10682 * 10683 * Returns 0, -1 in case of error. the parser context is augmented 10684 * as a result of the parsing. 10685 */ 10686 10687 int 10688 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10689 xmlChar start[4]; 10690 xmlCharEncoding enc; 10691 10692 xmlInitParser(); 10693 10694 if ((ctxt == NULL) || (ctxt->input == NULL)) 10695 return(-1); 10696 10697 GROW; 10698 10699 /* 10700 * SAX: detecting the level. 10701 */ 10702 xmlDetectSAX2(ctxt); 10703 10704 /* 10705 * SAX: beginning of the document processing. 10706 */ 10707 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10708 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10709 if (ctxt->instate == XML_PARSER_EOF) 10710 return(-1); 10711 10712 if ((ctxt->encoding == NULL) && 10713 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10714 /* 10715 * Get the 4 first bytes and decode the charset 10716 * if enc != XML_CHAR_ENCODING_NONE 10717 * plug some encoding conversion routines. 10718 */ 10719 start[0] = RAW; 10720 start[1] = NXT(1); 10721 start[2] = NXT(2); 10722 start[3] = NXT(3); 10723 enc = xmlDetectCharEncoding(&start[0], 4); 10724 if (enc != XML_CHAR_ENCODING_NONE) { 10725 xmlSwitchEncoding(ctxt, enc); 10726 } 10727 } 10728 10729 10730 if (CUR == 0) { 10731 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10732 return(-1); 10733 } 10734 10735 /* 10736 * Check for the XMLDecl in the Prolog. 10737 * do not GROW here to avoid the detected encoder to decode more 10738 * than just the first line, unless the amount of data is really 10739 * too small to hold "<?xml version="1.0" encoding="foo" 10740 */ 10741 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10742 GROW; 10743 } 10744 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10745 10746 /* 10747 * Note that we will switch encoding on the fly. 10748 */ 10749 xmlParseXMLDecl(ctxt); 10750 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10751 (ctxt->instate == XML_PARSER_EOF)) { 10752 /* 10753 * The XML REC instructs us to stop parsing right here 10754 */ 10755 return(-1); 10756 } 10757 ctxt->standalone = ctxt->input->standalone; 10758 SKIP_BLANKS; 10759 } else { 10760 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10761 } 10762 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10763 ctxt->sax->startDocument(ctxt->userData); 10764 if (ctxt->instate == XML_PARSER_EOF) 10765 return(-1); 10766 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10767 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10768 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10769 } 10770 10771 /* 10772 * The Misc part of the Prolog 10773 */ 10774 xmlParseMisc(ctxt); 10775 10776 /* 10777 * Then possibly doc type declaration(s) and more Misc 10778 * (doctypedecl Misc*)? 10779 */ 10780 GROW; 10781 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10782 10783 ctxt->inSubset = 1; 10784 xmlParseDocTypeDecl(ctxt); 10785 if (RAW == '[') { 10786 ctxt->instate = XML_PARSER_DTD; 10787 xmlParseInternalSubset(ctxt); 10788 if (ctxt->instate == XML_PARSER_EOF) 10789 return(-1); 10790 } 10791 10792 /* 10793 * Create and update the external subset. 10794 */ 10795 ctxt->inSubset = 2; 10796 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10797 (!ctxt->disableSAX)) 10798 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10799 ctxt->extSubSystem, ctxt->extSubURI); 10800 if (ctxt->instate == XML_PARSER_EOF) 10801 return(-1); 10802 ctxt->inSubset = 0; 10803 10804 xmlCleanSpecialAttr(ctxt); 10805 10806 ctxt->instate = XML_PARSER_PROLOG; 10807 xmlParseMisc(ctxt); 10808 } 10809 10810 /* 10811 * Time to start parsing the tree itself 10812 */ 10813 GROW; 10814 if (RAW != '<') { 10815 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10816 "Start tag expected, '<' not found\n"); 10817 } else { 10818 ctxt->instate = XML_PARSER_CONTENT; 10819 xmlParseElement(ctxt); 10820 ctxt->instate = XML_PARSER_EPILOG; 10821 10822 10823 /* 10824 * The Misc part at the end 10825 */ 10826 xmlParseMisc(ctxt); 10827 10828 if (RAW != 0) { 10829 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10830 } 10831 ctxt->instate = XML_PARSER_EOF; 10832 } 10833 10834 /* 10835 * SAX: end of the document processing. 10836 */ 10837 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10838 ctxt->sax->endDocument(ctxt->userData); 10839 10840 /* 10841 * Remove locally kept entity definitions if the tree was not built 10842 */ 10843 if ((ctxt->myDoc != NULL) && 10844 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10845 xmlFreeDoc(ctxt->myDoc); 10846 ctxt->myDoc = NULL; 10847 } 10848 10849 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10850 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10851 if (ctxt->valid) 10852 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10853 if (ctxt->nsWellFormed) 10854 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10855 if (ctxt->options & XML_PARSE_OLD10) 10856 ctxt->myDoc->properties |= XML_DOC_OLD10; 10857 } 10858 if (! ctxt->wellFormed) { 10859 ctxt->valid = 0; 10860 return(-1); 10861 } 10862 return(0); 10863 } 10864 10865 /** 10866 * xmlParseExtParsedEnt: 10867 * @ctxt: an XML parser context 10868 * 10869 * parse a general parsed entity 10870 * An external general parsed entity is well-formed if it matches the 10871 * production labeled extParsedEnt. 10872 * 10873 * [78] extParsedEnt ::= TextDecl? content 10874 * 10875 * Returns 0, -1 in case of error. the parser context is augmented 10876 * as a result of the parsing. 10877 */ 10878 10879 int 10880 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10881 xmlChar start[4]; 10882 xmlCharEncoding enc; 10883 10884 if ((ctxt == NULL) || (ctxt->input == NULL)) 10885 return(-1); 10886 10887 xmlDetectSAX2(ctxt); 10888 10889 GROW; 10890 10891 /* 10892 * SAX: beginning of the document processing. 10893 */ 10894 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10895 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10896 10897 /* 10898 * Get the 4 first bytes and decode the charset 10899 * if enc != XML_CHAR_ENCODING_NONE 10900 * plug some encoding conversion routines. 10901 */ 10902 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10903 start[0] = RAW; 10904 start[1] = NXT(1); 10905 start[2] = NXT(2); 10906 start[3] = NXT(3); 10907 enc = xmlDetectCharEncoding(start, 4); 10908 if (enc != XML_CHAR_ENCODING_NONE) { 10909 xmlSwitchEncoding(ctxt, enc); 10910 } 10911 } 10912 10913 10914 if (CUR == 0) { 10915 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10916 } 10917 10918 /* 10919 * Check for the XMLDecl in the Prolog. 10920 */ 10921 GROW; 10922 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10923 10924 /* 10925 * Note that we will switch encoding on the fly. 10926 */ 10927 xmlParseXMLDecl(ctxt); 10928 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10929 /* 10930 * The XML REC instructs us to stop parsing right here 10931 */ 10932 return(-1); 10933 } 10934 SKIP_BLANKS; 10935 } else { 10936 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10937 } 10938 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10939 ctxt->sax->startDocument(ctxt->userData); 10940 if (ctxt->instate == XML_PARSER_EOF) 10941 return(-1); 10942 10943 /* 10944 * Doing validity checking on chunk doesn't make sense 10945 */ 10946 ctxt->instate = XML_PARSER_CONTENT; 10947 ctxt->validate = 0; 10948 ctxt->loadsubset = 0; 10949 ctxt->depth = 0; 10950 10951 xmlParseContent(ctxt); 10952 if (ctxt->instate == XML_PARSER_EOF) 10953 return(-1); 10954 10955 if ((RAW == '<') && (NXT(1) == '/')) { 10956 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10957 } else if (RAW != 0) { 10958 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10959 } 10960 10961 /* 10962 * SAX: end of the document processing. 10963 */ 10964 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10965 ctxt->sax->endDocument(ctxt->userData); 10966 10967 if (! ctxt->wellFormed) return(-1); 10968 return(0); 10969 } 10970 10971 #ifdef LIBXML_PUSH_ENABLED 10972 /************************************************************************ 10973 * * 10974 * Progressive parsing interfaces * 10975 * * 10976 ************************************************************************/ 10977 10978 /** 10979 * xmlParseLookupSequence: 10980 * @ctxt: an XML parser context 10981 * @first: the first char to lookup 10982 * @next: the next char to lookup or zero 10983 * @third: the next char to lookup or zero 10984 * 10985 * Try to find if a sequence (first, next, third) or just (first next) or 10986 * (first) is available in the input stream. 10987 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10988 * to avoid rescanning sequences of bytes, it DOES change the state of the 10989 * parser, do not use liberally. 10990 * 10991 * Returns the index to the current parsing point if the full sequence 10992 * is available, -1 otherwise. 10993 */ 10994 static int 10995 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10996 xmlChar next, xmlChar third) { 10997 int base, len; 10998 xmlParserInputPtr in; 10999 const xmlChar *buf; 11000 11001 in = ctxt->input; 11002 if (in == NULL) return(-1); 11003 base = in->cur - in->base; 11004 if (base < 0) return(-1); 11005 if (ctxt->checkIndex > base) 11006 base = ctxt->checkIndex; 11007 if (in->buf == NULL) { 11008 buf = in->base; 11009 len = in->length; 11010 } else { 11011 buf = xmlBufContent(in->buf->buffer); 11012 len = xmlBufUse(in->buf->buffer); 11013 } 11014 /* take into account the sequence length */ 11015 if (third) len -= 2; 11016 else if (next) len --; 11017 for (;base < len;base++) { 11018 if (buf[base] == first) { 11019 if (third != 0) { 11020 if ((buf[base + 1] != next) || 11021 (buf[base + 2] != third)) continue; 11022 } else if (next != 0) { 11023 if (buf[base + 1] != next) continue; 11024 } 11025 ctxt->checkIndex = 0; 11026 #ifdef DEBUG_PUSH 11027 if (next == 0) 11028 xmlGenericError(xmlGenericErrorContext, 11029 "PP: lookup '%c' found at %d\n", 11030 first, base); 11031 else if (third == 0) 11032 xmlGenericError(xmlGenericErrorContext, 11033 "PP: lookup '%c%c' found at %d\n", 11034 first, next, base); 11035 else 11036 xmlGenericError(xmlGenericErrorContext, 11037 "PP: lookup '%c%c%c' found at %d\n", 11038 first, next, third, base); 11039 #endif 11040 return(base - (in->cur - in->base)); 11041 } 11042 } 11043 ctxt->checkIndex = base; 11044 #ifdef DEBUG_PUSH 11045 if (next == 0) 11046 xmlGenericError(xmlGenericErrorContext, 11047 "PP: lookup '%c' failed\n", first); 11048 else if (third == 0) 11049 xmlGenericError(xmlGenericErrorContext, 11050 "PP: lookup '%c%c' failed\n", first, next); 11051 else 11052 xmlGenericError(xmlGenericErrorContext, 11053 "PP: lookup '%c%c%c' failed\n", first, next, third); 11054 #endif 11055 return(-1); 11056 } 11057 11058 /** 11059 * xmlParseGetLasts: 11060 * @ctxt: an XML parser context 11061 * @lastlt: pointer to store the last '<' from the input 11062 * @lastgt: pointer to store the last '>' from the input 11063 * 11064 * Lookup the last < and > in the current chunk 11065 */ 11066 static void 11067 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11068 const xmlChar **lastgt) { 11069 const xmlChar *tmp; 11070 11071 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11072 xmlGenericError(xmlGenericErrorContext, 11073 "Internal error: xmlParseGetLasts\n"); 11074 return; 11075 } 11076 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11077 tmp = ctxt->input->end; 11078 tmp--; 11079 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11080 if (tmp < ctxt->input->base) { 11081 *lastlt = NULL; 11082 *lastgt = NULL; 11083 } else { 11084 *lastlt = tmp; 11085 tmp++; 11086 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11087 if (*tmp == '\'') { 11088 tmp++; 11089 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11090 if (tmp < ctxt->input->end) tmp++; 11091 } else if (*tmp == '"') { 11092 tmp++; 11093 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11094 if (tmp < ctxt->input->end) tmp++; 11095 } else 11096 tmp++; 11097 } 11098 if (tmp < ctxt->input->end) 11099 *lastgt = tmp; 11100 else { 11101 tmp = *lastlt; 11102 tmp--; 11103 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11104 if (tmp >= ctxt->input->base) 11105 *lastgt = tmp; 11106 else 11107 *lastgt = NULL; 11108 } 11109 } 11110 } else { 11111 *lastlt = NULL; 11112 *lastgt = NULL; 11113 } 11114 } 11115 /** 11116 * xmlCheckCdataPush: 11117 * @cur: pointer to the block of characters 11118 * @len: length of the block in bytes 11119 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11120 * 11121 * Check that the block of characters is okay as SCdata content [20] 11122 * 11123 * Returns the number of bytes to pass if okay, a negative index where an 11124 * UTF-8 error occurred otherwise 11125 */ 11126 static int 11127 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11128 int ix; 11129 unsigned char c; 11130 int codepoint; 11131 11132 if ((utf == NULL) || (len <= 0)) 11133 return(0); 11134 11135 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11136 c = utf[ix]; 11137 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11138 if (c >= 0x20) 11139 ix++; 11140 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11141 ix++; 11142 else 11143 return(-ix); 11144 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11145 if (ix + 2 > len) return(complete ? -ix : ix); 11146 if ((utf[ix+1] & 0xc0 ) != 0x80) 11147 return(-ix); 11148 codepoint = (utf[ix] & 0x1f) << 6; 11149 codepoint |= utf[ix+1] & 0x3f; 11150 if (!xmlIsCharQ(codepoint)) 11151 return(-ix); 11152 ix += 2; 11153 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11154 if (ix + 3 > len) return(complete ? -ix : ix); 11155 if (((utf[ix+1] & 0xc0) != 0x80) || 11156 ((utf[ix+2] & 0xc0) != 0x80)) 11157 return(-ix); 11158 codepoint = (utf[ix] & 0xf) << 12; 11159 codepoint |= (utf[ix+1] & 0x3f) << 6; 11160 codepoint |= utf[ix+2] & 0x3f; 11161 if (!xmlIsCharQ(codepoint)) 11162 return(-ix); 11163 ix += 3; 11164 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11165 if (ix + 4 > len) return(complete ? -ix : ix); 11166 if (((utf[ix+1] & 0xc0) != 0x80) || 11167 ((utf[ix+2] & 0xc0) != 0x80) || 11168 ((utf[ix+3] & 0xc0) != 0x80)) 11169 return(-ix); 11170 codepoint = (utf[ix] & 0x7) << 18; 11171 codepoint |= (utf[ix+1] & 0x3f) << 12; 11172 codepoint |= (utf[ix+2] & 0x3f) << 6; 11173 codepoint |= utf[ix+3] & 0x3f; 11174 if (!xmlIsCharQ(codepoint)) 11175 return(-ix); 11176 ix += 4; 11177 } else /* unknown encoding */ 11178 return(-ix); 11179 } 11180 return(ix); 11181 } 11182 11183 /** 11184 * xmlParseTryOrFinish: 11185 * @ctxt: an XML parser context 11186 * @terminate: last chunk indicator 11187 * 11188 * Try to progress on parsing 11189 * 11190 * Returns zero if no parsing was possible 11191 */ 11192 static int 11193 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11194 int ret = 0; 11195 int avail, tlen; 11196 xmlChar cur, next; 11197 const xmlChar *lastlt, *lastgt; 11198 11199 if (ctxt->input == NULL) 11200 return(0); 11201 11202 #ifdef DEBUG_PUSH 11203 switch (ctxt->instate) { 11204 case XML_PARSER_EOF: 11205 xmlGenericError(xmlGenericErrorContext, 11206 "PP: try EOF\n"); break; 11207 case XML_PARSER_START: 11208 xmlGenericError(xmlGenericErrorContext, 11209 "PP: try START\n"); break; 11210 case XML_PARSER_MISC: 11211 xmlGenericError(xmlGenericErrorContext, 11212 "PP: try MISC\n");break; 11213 case XML_PARSER_COMMENT: 11214 xmlGenericError(xmlGenericErrorContext, 11215 "PP: try COMMENT\n");break; 11216 case XML_PARSER_PROLOG: 11217 xmlGenericError(xmlGenericErrorContext, 11218 "PP: try PROLOG\n");break; 11219 case XML_PARSER_START_TAG: 11220 xmlGenericError(xmlGenericErrorContext, 11221 "PP: try START_TAG\n");break; 11222 case XML_PARSER_CONTENT: 11223 xmlGenericError(xmlGenericErrorContext, 11224 "PP: try CONTENT\n");break; 11225 case XML_PARSER_CDATA_SECTION: 11226 xmlGenericError(xmlGenericErrorContext, 11227 "PP: try CDATA_SECTION\n");break; 11228 case XML_PARSER_END_TAG: 11229 xmlGenericError(xmlGenericErrorContext, 11230 "PP: try END_TAG\n");break; 11231 case XML_PARSER_ENTITY_DECL: 11232 xmlGenericError(xmlGenericErrorContext, 11233 "PP: try ENTITY_DECL\n");break; 11234 case XML_PARSER_ENTITY_VALUE: 11235 xmlGenericError(xmlGenericErrorContext, 11236 "PP: try ENTITY_VALUE\n");break; 11237 case XML_PARSER_ATTRIBUTE_VALUE: 11238 xmlGenericError(xmlGenericErrorContext, 11239 "PP: try ATTRIBUTE_VALUE\n");break; 11240 case XML_PARSER_DTD: 11241 xmlGenericError(xmlGenericErrorContext, 11242 "PP: try DTD\n");break; 11243 case XML_PARSER_EPILOG: 11244 xmlGenericError(xmlGenericErrorContext, 11245 "PP: try EPILOG\n");break; 11246 case XML_PARSER_PI: 11247 xmlGenericError(xmlGenericErrorContext, 11248 "PP: try PI\n");break; 11249 case XML_PARSER_IGNORE: 11250 xmlGenericError(xmlGenericErrorContext, 11251 "PP: try IGNORE\n");break; 11252 } 11253 #endif 11254 11255 if ((ctxt->input != NULL) && 11256 (ctxt->input->cur - ctxt->input->base > 4096)) { 11257 xmlSHRINK(ctxt); 11258 ctxt->checkIndex = 0; 11259 } 11260 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11261 11262 while (ctxt->instate != XML_PARSER_EOF) { 11263 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11264 return(0); 11265 11266 if (ctxt->input == NULL) break; 11267 if (ctxt->input->buf == NULL) 11268 avail = ctxt->input->length - 11269 (ctxt->input->cur - ctxt->input->base); 11270 else { 11271 /* 11272 * If we are operating on converted input, try to flush 11273 * remaining chars to avoid them stalling in the non-converted 11274 * buffer. But do not do this in document start where 11275 * encoding="..." may not have been read and we work on a 11276 * guessed encoding. 11277 */ 11278 if ((ctxt->instate != XML_PARSER_START) && 11279 (ctxt->input->buf->raw != NULL) && 11280 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11281 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11282 ctxt->input); 11283 size_t current = ctxt->input->cur - ctxt->input->base; 11284 11285 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11286 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11287 base, current); 11288 } 11289 avail = xmlBufUse(ctxt->input->buf->buffer) - 11290 (ctxt->input->cur - ctxt->input->base); 11291 } 11292 if (avail < 1) 11293 goto done; 11294 switch (ctxt->instate) { 11295 case XML_PARSER_EOF: 11296 /* 11297 * Document parsing is done ! 11298 */ 11299 goto done; 11300 case XML_PARSER_START: 11301 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11302 xmlChar start[4]; 11303 xmlCharEncoding enc; 11304 11305 /* 11306 * Very first chars read from the document flow. 11307 */ 11308 if (avail < 4) 11309 goto done; 11310 11311 /* 11312 * Get the 4 first bytes and decode the charset 11313 * if enc != XML_CHAR_ENCODING_NONE 11314 * plug some encoding conversion routines, 11315 * else xmlSwitchEncoding will set to (default) 11316 * UTF8. 11317 */ 11318 start[0] = RAW; 11319 start[1] = NXT(1); 11320 start[2] = NXT(2); 11321 start[3] = NXT(3); 11322 enc = xmlDetectCharEncoding(start, 4); 11323 xmlSwitchEncoding(ctxt, enc); 11324 break; 11325 } 11326 11327 if (avail < 2) 11328 goto done; 11329 cur = ctxt->input->cur[0]; 11330 next = ctxt->input->cur[1]; 11331 if (cur == 0) { 11332 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11333 ctxt->sax->setDocumentLocator(ctxt->userData, 11334 &xmlDefaultSAXLocator); 11335 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11336 xmlHaltParser(ctxt); 11337 #ifdef DEBUG_PUSH 11338 xmlGenericError(xmlGenericErrorContext, 11339 "PP: entering EOF\n"); 11340 #endif 11341 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11342 ctxt->sax->endDocument(ctxt->userData); 11343 goto done; 11344 } 11345 if ((cur == '<') && (next == '?')) { 11346 /* PI or XML decl */ 11347 if (avail < 5) return(ret); 11348 if ((!terminate) && 11349 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11350 return(ret); 11351 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11352 ctxt->sax->setDocumentLocator(ctxt->userData, 11353 &xmlDefaultSAXLocator); 11354 if ((ctxt->input->cur[2] == 'x') && 11355 (ctxt->input->cur[3] == 'm') && 11356 (ctxt->input->cur[4] == 'l') && 11357 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11358 ret += 5; 11359 #ifdef DEBUG_PUSH 11360 xmlGenericError(xmlGenericErrorContext, 11361 "PP: Parsing XML Decl\n"); 11362 #endif 11363 xmlParseXMLDecl(ctxt); 11364 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11365 /* 11366 * The XML REC instructs us to stop parsing right 11367 * here 11368 */ 11369 xmlHaltParser(ctxt); 11370 return(0); 11371 } 11372 ctxt->standalone = ctxt->input->standalone; 11373 if ((ctxt->encoding == NULL) && 11374 (ctxt->input->encoding != NULL)) 11375 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11376 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11377 (!ctxt->disableSAX)) 11378 ctxt->sax->startDocument(ctxt->userData); 11379 ctxt->instate = XML_PARSER_MISC; 11380 #ifdef DEBUG_PUSH 11381 xmlGenericError(xmlGenericErrorContext, 11382 "PP: entering MISC\n"); 11383 #endif 11384 } else { 11385 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11386 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11387 (!ctxt->disableSAX)) 11388 ctxt->sax->startDocument(ctxt->userData); 11389 ctxt->instate = XML_PARSER_MISC; 11390 #ifdef DEBUG_PUSH 11391 xmlGenericError(xmlGenericErrorContext, 11392 "PP: entering MISC\n"); 11393 #endif 11394 } 11395 } else { 11396 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11397 ctxt->sax->setDocumentLocator(ctxt->userData, 11398 &xmlDefaultSAXLocator); 11399 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11400 if (ctxt->version == NULL) { 11401 xmlErrMemory(ctxt, NULL); 11402 break; 11403 } 11404 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11405 (!ctxt->disableSAX)) 11406 ctxt->sax->startDocument(ctxt->userData); 11407 ctxt->instate = XML_PARSER_MISC; 11408 #ifdef DEBUG_PUSH 11409 xmlGenericError(xmlGenericErrorContext, 11410 "PP: entering MISC\n"); 11411 #endif 11412 } 11413 break; 11414 case XML_PARSER_START_TAG: { 11415 const xmlChar *name; 11416 const xmlChar *prefix = NULL; 11417 const xmlChar *URI = NULL; 11418 int line = ctxt->input->line; 11419 int nsNr = ctxt->nsNr; 11420 11421 if ((avail < 2) && (ctxt->inputNr == 1)) 11422 goto done; 11423 cur = ctxt->input->cur[0]; 11424 if (cur != '<') { 11425 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11426 xmlHaltParser(ctxt); 11427 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11428 ctxt->sax->endDocument(ctxt->userData); 11429 goto done; 11430 } 11431 if (!terminate) { 11432 if (ctxt->progressive) { 11433 /* > can be found unescaped in attribute values */ 11434 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11435 goto done; 11436 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11437 goto done; 11438 } 11439 } 11440 if (ctxt->spaceNr == 0) 11441 spacePush(ctxt, -1); 11442 else if (*ctxt->space == -2) 11443 spacePush(ctxt, -1); 11444 else 11445 spacePush(ctxt, *ctxt->space); 11446 #ifdef LIBXML_SAX1_ENABLED 11447 if (ctxt->sax2) 11448 #endif /* LIBXML_SAX1_ENABLED */ 11449 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11450 #ifdef LIBXML_SAX1_ENABLED 11451 else 11452 name = xmlParseStartTag(ctxt); 11453 #endif /* LIBXML_SAX1_ENABLED */ 11454 if (ctxt->instate == XML_PARSER_EOF) 11455 goto done; 11456 if (name == NULL) { 11457 spacePop(ctxt); 11458 xmlHaltParser(ctxt); 11459 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11460 ctxt->sax->endDocument(ctxt->userData); 11461 goto done; 11462 } 11463 #ifdef LIBXML_VALID_ENABLED 11464 /* 11465 * [ VC: Root Element Type ] 11466 * The Name in the document type declaration must match 11467 * the element type of the root element. 11468 */ 11469 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11470 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11471 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11472 #endif /* LIBXML_VALID_ENABLED */ 11473 11474 /* 11475 * Check for an Empty Element. 11476 */ 11477 if ((RAW == '/') && (NXT(1) == '>')) { 11478 SKIP(2); 11479 11480 if (ctxt->sax2) { 11481 if ((ctxt->sax != NULL) && 11482 (ctxt->sax->endElementNs != NULL) && 11483 (!ctxt->disableSAX)) 11484 ctxt->sax->endElementNs(ctxt->userData, name, 11485 prefix, URI); 11486 if (ctxt->nsNr - nsNr > 0) 11487 nsPop(ctxt, ctxt->nsNr - nsNr); 11488 #ifdef LIBXML_SAX1_ENABLED 11489 } else { 11490 if ((ctxt->sax != NULL) && 11491 (ctxt->sax->endElement != NULL) && 11492 (!ctxt->disableSAX)) 11493 ctxt->sax->endElement(ctxt->userData, name); 11494 #endif /* LIBXML_SAX1_ENABLED */ 11495 } 11496 if (ctxt->instate == XML_PARSER_EOF) 11497 goto done; 11498 spacePop(ctxt); 11499 if (ctxt->nameNr == 0) { 11500 ctxt->instate = XML_PARSER_EPILOG; 11501 } else { 11502 ctxt->instate = XML_PARSER_CONTENT; 11503 } 11504 ctxt->progressive = 1; 11505 break; 11506 } 11507 if (RAW == '>') { 11508 NEXT; 11509 } else { 11510 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11511 "Couldn't find end of Start Tag %s\n", 11512 name); 11513 nodePop(ctxt); 11514 spacePop(ctxt); 11515 } 11516 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); 11517 11518 ctxt->instate = XML_PARSER_CONTENT; 11519 ctxt->progressive = 1; 11520 break; 11521 } 11522 case XML_PARSER_CONTENT: { 11523 int id; 11524 unsigned long cons; 11525 if ((avail < 2) && (ctxt->inputNr == 1)) 11526 goto done; 11527 cur = ctxt->input->cur[0]; 11528 next = ctxt->input->cur[1]; 11529 11530 id = ctxt->input->id; 11531 cons = CUR_CONSUMED; 11532 if ((cur == '<') && (next == '/')) { 11533 ctxt->instate = XML_PARSER_END_TAG; 11534 break; 11535 } else if ((cur == '<') && (next == '?')) { 11536 if ((!terminate) && 11537 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11538 ctxt->progressive = XML_PARSER_PI; 11539 goto done; 11540 } 11541 xmlParsePI(ctxt); 11542 ctxt->instate = XML_PARSER_CONTENT; 11543 ctxt->progressive = 1; 11544 } else if ((cur == '<') && (next != '!')) { 11545 ctxt->instate = XML_PARSER_START_TAG; 11546 break; 11547 } else if ((cur == '<') && (next == '!') && 11548 (ctxt->input->cur[2] == '-') && 11549 (ctxt->input->cur[3] == '-')) { 11550 int term; 11551 11552 if (avail < 4) 11553 goto done; 11554 ctxt->input->cur += 4; 11555 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11556 ctxt->input->cur -= 4; 11557 if ((!terminate) && (term < 0)) { 11558 ctxt->progressive = XML_PARSER_COMMENT; 11559 goto done; 11560 } 11561 xmlParseComment(ctxt); 11562 ctxt->instate = XML_PARSER_CONTENT; 11563 ctxt->progressive = 1; 11564 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11565 (ctxt->input->cur[2] == '[') && 11566 (ctxt->input->cur[3] == 'C') && 11567 (ctxt->input->cur[4] == 'D') && 11568 (ctxt->input->cur[5] == 'A') && 11569 (ctxt->input->cur[6] == 'T') && 11570 (ctxt->input->cur[7] == 'A') && 11571 (ctxt->input->cur[8] == '[')) { 11572 SKIP(9); 11573 ctxt->instate = XML_PARSER_CDATA_SECTION; 11574 break; 11575 } else if ((cur == '<') && (next == '!') && 11576 (avail < 9)) { 11577 goto done; 11578 } else if (cur == '&') { 11579 if ((!terminate) && 11580 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11581 goto done; 11582 xmlParseReference(ctxt); 11583 } else { 11584 /* TODO Avoid the extra copy, handle directly !!! */ 11585 /* 11586 * Goal of the following test is: 11587 * - minimize calls to the SAX 'character' callback 11588 * when they are mergeable 11589 * - handle an problem for isBlank when we only parse 11590 * a sequence of blank chars and the next one is 11591 * not available to check against '<' presence. 11592 * - tries to homogenize the differences in SAX 11593 * callbacks between the push and pull versions 11594 * of the parser. 11595 */ 11596 if ((ctxt->inputNr == 1) && 11597 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11598 if (!terminate) { 11599 if (ctxt->progressive) { 11600 if ((lastlt == NULL) || 11601 (ctxt->input->cur > lastlt)) 11602 goto done; 11603 } else if (xmlParseLookupSequence(ctxt, 11604 '<', 0, 0) < 0) { 11605 goto done; 11606 } 11607 } 11608 } 11609 ctxt->checkIndex = 0; 11610 xmlParseCharData(ctxt, 0); 11611 } 11612 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) { 11613 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11614 "detected an error in element content\n"); 11615 xmlHaltParser(ctxt); 11616 break; 11617 } 11618 break; 11619 } 11620 case XML_PARSER_END_TAG: 11621 if (avail < 2) 11622 goto done; 11623 if (!terminate) { 11624 if (ctxt->progressive) { 11625 /* > can be found unescaped in attribute values */ 11626 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11627 goto done; 11628 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11629 goto done; 11630 } 11631 } 11632 if (ctxt->sax2) { 11633 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 11634 nameNsPop(ctxt); 11635 } 11636 #ifdef LIBXML_SAX1_ENABLED 11637 else 11638 xmlParseEndTag1(ctxt, 0); 11639 #endif /* LIBXML_SAX1_ENABLED */ 11640 if (ctxt->instate == XML_PARSER_EOF) { 11641 /* Nothing */ 11642 } else if (ctxt->nameNr == 0) { 11643 ctxt->instate = XML_PARSER_EPILOG; 11644 } else { 11645 ctxt->instate = XML_PARSER_CONTENT; 11646 } 11647 break; 11648 case XML_PARSER_CDATA_SECTION: { 11649 /* 11650 * The Push mode need to have the SAX callback for 11651 * cdataBlock merge back contiguous callbacks. 11652 */ 11653 int base; 11654 11655 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11656 if (base < 0) { 11657 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11658 int tmp; 11659 11660 tmp = xmlCheckCdataPush(ctxt->input->cur, 11661 XML_PARSER_BIG_BUFFER_SIZE, 0); 11662 if (tmp < 0) { 11663 tmp = -tmp; 11664 ctxt->input->cur += tmp; 11665 goto encoding_error; 11666 } 11667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11668 if (ctxt->sax->cdataBlock != NULL) 11669 ctxt->sax->cdataBlock(ctxt->userData, 11670 ctxt->input->cur, tmp); 11671 else if (ctxt->sax->characters != NULL) 11672 ctxt->sax->characters(ctxt->userData, 11673 ctxt->input->cur, tmp); 11674 } 11675 if (ctxt->instate == XML_PARSER_EOF) 11676 goto done; 11677 SKIPL(tmp); 11678 ctxt->checkIndex = 0; 11679 } 11680 goto done; 11681 } else { 11682 int tmp; 11683 11684 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11685 if ((tmp < 0) || (tmp != base)) { 11686 tmp = -tmp; 11687 ctxt->input->cur += tmp; 11688 goto encoding_error; 11689 } 11690 if ((ctxt->sax != NULL) && (base == 0) && 11691 (ctxt->sax->cdataBlock != NULL) && 11692 (!ctxt->disableSAX)) { 11693 /* 11694 * Special case to provide identical behaviour 11695 * between pull and push parsers on enpty CDATA 11696 * sections 11697 */ 11698 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11699 (!strncmp((const char *)&ctxt->input->cur[-9], 11700 "<![CDATA[", 9))) 11701 ctxt->sax->cdataBlock(ctxt->userData, 11702 BAD_CAST "", 0); 11703 } else if ((ctxt->sax != NULL) && (base > 0) && 11704 (!ctxt->disableSAX)) { 11705 if (ctxt->sax->cdataBlock != NULL) 11706 ctxt->sax->cdataBlock(ctxt->userData, 11707 ctxt->input->cur, base); 11708 else if (ctxt->sax->characters != NULL) 11709 ctxt->sax->characters(ctxt->userData, 11710 ctxt->input->cur, base); 11711 } 11712 if (ctxt->instate == XML_PARSER_EOF) 11713 goto done; 11714 SKIPL(base + 3); 11715 ctxt->checkIndex = 0; 11716 ctxt->instate = XML_PARSER_CONTENT; 11717 #ifdef DEBUG_PUSH 11718 xmlGenericError(xmlGenericErrorContext, 11719 "PP: entering CONTENT\n"); 11720 #endif 11721 } 11722 break; 11723 } 11724 case XML_PARSER_MISC: 11725 SKIP_BLANKS; 11726 if (ctxt->input->buf == NULL) 11727 avail = ctxt->input->length - 11728 (ctxt->input->cur - ctxt->input->base); 11729 else 11730 avail = xmlBufUse(ctxt->input->buf->buffer) - 11731 (ctxt->input->cur - ctxt->input->base); 11732 if (avail < 2) 11733 goto done; 11734 cur = ctxt->input->cur[0]; 11735 next = ctxt->input->cur[1]; 11736 if ((cur == '<') && (next == '?')) { 11737 if ((!terminate) && 11738 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11739 ctxt->progressive = XML_PARSER_PI; 11740 goto done; 11741 } 11742 #ifdef DEBUG_PUSH 11743 xmlGenericError(xmlGenericErrorContext, 11744 "PP: Parsing PI\n"); 11745 #endif 11746 xmlParsePI(ctxt); 11747 if (ctxt->instate == XML_PARSER_EOF) 11748 goto done; 11749 ctxt->instate = XML_PARSER_MISC; 11750 ctxt->progressive = 1; 11751 ctxt->checkIndex = 0; 11752 } else if ((cur == '<') && (next == '!') && 11753 (ctxt->input->cur[2] == '-') && 11754 (ctxt->input->cur[3] == '-')) { 11755 if ((!terminate) && 11756 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11757 ctxt->progressive = XML_PARSER_COMMENT; 11758 goto done; 11759 } 11760 #ifdef DEBUG_PUSH 11761 xmlGenericError(xmlGenericErrorContext, 11762 "PP: Parsing Comment\n"); 11763 #endif 11764 xmlParseComment(ctxt); 11765 if (ctxt->instate == XML_PARSER_EOF) 11766 goto done; 11767 ctxt->instate = XML_PARSER_MISC; 11768 ctxt->progressive = 1; 11769 ctxt->checkIndex = 0; 11770 } else if ((cur == '<') && (next == '!') && 11771 (ctxt->input->cur[2] == 'D') && 11772 (ctxt->input->cur[3] == 'O') && 11773 (ctxt->input->cur[4] == 'C') && 11774 (ctxt->input->cur[5] == 'T') && 11775 (ctxt->input->cur[6] == 'Y') && 11776 (ctxt->input->cur[7] == 'P') && 11777 (ctxt->input->cur[8] == 'E')) { 11778 if ((!terminate) && 11779 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11780 ctxt->progressive = XML_PARSER_DTD; 11781 goto done; 11782 } 11783 #ifdef DEBUG_PUSH 11784 xmlGenericError(xmlGenericErrorContext, 11785 "PP: Parsing internal subset\n"); 11786 #endif 11787 ctxt->inSubset = 1; 11788 ctxt->progressive = 0; 11789 ctxt->checkIndex = 0; 11790 xmlParseDocTypeDecl(ctxt); 11791 if (ctxt->instate == XML_PARSER_EOF) 11792 goto done; 11793 if (RAW == '[') { 11794 ctxt->instate = XML_PARSER_DTD; 11795 #ifdef DEBUG_PUSH 11796 xmlGenericError(xmlGenericErrorContext, 11797 "PP: entering DTD\n"); 11798 #endif 11799 } else { 11800 /* 11801 * Create and update the external subset. 11802 */ 11803 ctxt->inSubset = 2; 11804 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11805 (ctxt->sax->externalSubset != NULL)) 11806 ctxt->sax->externalSubset(ctxt->userData, 11807 ctxt->intSubName, ctxt->extSubSystem, 11808 ctxt->extSubURI); 11809 ctxt->inSubset = 0; 11810 xmlCleanSpecialAttr(ctxt); 11811 ctxt->instate = XML_PARSER_PROLOG; 11812 #ifdef DEBUG_PUSH 11813 xmlGenericError(xmlGenericErrorContext, 11814 "PP: entering PROLOG\n"); 11815 #endif 11816 } 11817 } else if ((cur == '<') && (next == '!') && 11818 (avail < 9)) { 11819 goto done; 11820 } else { 11821 ctxt->instate = XML_PARSER_START_TAG; 11822 ctxt->progressive = XML_PARSER_START_TAG; 11823 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11824 #ifdef DEBUG_PUSH 11825 xmlGenericError(xmlGenericErrorContext, 11826 "PP: entering START_TAG\n"); 11827 #endif 11828 } 11829 break; 11830 case XML_PARSER_PROLOG: 11831 SKIP_BLANKS; 11832 if (ctxt->input->buf == NULL) 11833 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11834 else 11835 avail = xmlBufUse(ctxt->input->buf->buffer) - 11836 (ctxt->input->cur - ctxt->input->base); 11837 if (avail < 2) 11838 goto done; 11839 cur = ctxt->input->cur[0]; 11840 next = ctxt->input->cur[1]; 11841 if ((cur == '<') && (next == '?')) { 11842 if ((!terminate) && 11843 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11844 ctxt->progressive = XML_PARSER_PI; 11845 goto done; 11846 } 11847 #ifdef DEBUG_PUSH 11848 xmlGenericError(xmlGenericErrorContext, 11849 "PP: Parsing PI\n"); 11850 #endif 11851 xmlParsePI(ctxt); 11852 if (ctxt->instate == XML_PARSER_EOF) 11853 goto done; 11854 ctxt->instate = XML_PARSER_PROLOG; 11855 ctxt->progressive = 1; 11856 } else if ((cur == '<') && (next == '!') && 11857 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11858 if ((!terminate) && 11859 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11860 ctxt->progressive = XML_PARSER_COMMENT; 11861 goto done; 11862 } 11863 #ifdef DEBUG_PUSH 11864 xmlGenericError(xmlGenericErrorContext, 11865 "PP: Parsing Comment\n"); 11866 #endif 11867 xmlParseComment(ctxt); 11868 if (ctxt->instate == XML_PARSER_EOF) 11869 goto done; 11870 ctxt->instate = XML_PARSER_PROLOG; 11871 ctxt->progressive = 1; 11872 } else if ((cur == '<') && (next == '!') && 11873 (avail < 4)) { 11874 goto done; 11875 } else { 11876 ctxt->instate = XML_PARSER_START_TAG; 11877 if (ctxt->progressive == 0) 11878 ctxt->progressive = XML_PARSER_START_TAG; 11879 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11880 #ifdef DEBUG_PUSH 11881 xmlGenericError(xmlGenericErrorContext, 11882 "PP: entering START_TAG\n"); 11883 #endif 11884 } 11885 break; 11886 case XML_PARSER_EPILOG: 11887 SKIP_BLANKS; 11888 if (ctxt->input->buf == NULL) 11889 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11890 else 11891 avail = xmlBufUse(ctxt->input->buf->buffer) - 11892 (ctxt->input->cur - ctxt->input->base); 11893 if (avail < 2) 11894 goto done; 11895 cur = ctxt->input->cur[0]; 11896 next = ctxt->input->cur[1]; 11897 if ((cur == '<') && (next == '?')) { 11898 if ((!terminate) && 11899 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11900 ctxt->progressive = XML_PARSER_PI; 11901 goto done; 11902 } 11903 #ifdef DEBUG_PUSH 11904 xmlGenericError(xmlGenericErrorContext, 11905 "PP: Parsing PI\n"); 11906 #endif 11907 xmlParsePI(ctxt); 11908 if (ctxt->instate == XML_PARSER_EOF) 11909 goto done; 11910 ctxt->instate = XML_PARSER_EPILOG; 11911 ctxt->progressive = 1; 11912 } else if ((cur == '<') && (next == '!') && 11913 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11914 if ((!terminate) && 11915 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11916 ctxt->progressive = XML_PARSER_COMMENT; 11917 goto done; 11918 } 11919 #ifdef DEBUG_PUSH 11920 xmlGenericError(xmlGenericErrorContext, 11921 "PP: Parsing Comment\n"); 11922 #endif 11923 xmlParseComment(ctxt); 11924 if (ctxt->instate == XML_PARSER_EOF) 11925 goto done; 11926 ctxt->instate = XML_PARSER_EPILOG; 11927 ctxt->progressive = 1; 11928 } else if ((cur == '<') && (next == '!') && 11929 (avail < 4)) { 11930 goto done; 11931 } else { 11932 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11933 xmlHaltParser(ctxt); 11934 #ifdef DEBUG_PUSH 11935 xmlGenericError(xmlGenericErrorContext, 11936 "PP: entering EOF\n"); 11937 #endif 11938 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11939 ctxt->sax->endDocument(ctxt->userData); 11940 goto done; 11941 } 11942 break; 11943 case XML_PARSER_DTD: { 11944 /* 11945 * Sorry but progressive parsing of the internal subset 11946 * is not expected to be supported. We first check that 11947 * the full content of the internal subset is available and 11948 * the parsing is launched only at that point. 11949 * Internal subset ends up with "']' S? '>'" in an unescaped 11950 * section and not in a ']]>' sequence which are conditional 11951 * sections (whoever argued to keep that crap in XML deserve 11952 * a place in hell !). 11953 */ 11954 int base, i; 11955 xmlChar *buf; 11956 xmlChar quote = 0; 11957 size_t use; 11958 11959 base = ctxt->input->cur - ctxt->input->base; 11960 if (base < 0) return(0); 11961 if (ctxt->checkIndex > base) 11962 base = ctxt->checkIndex; 11963 buf = xmlBufContent(ctxt->input->buf->buffer); 11964 use = xmlBufUse(ctxt->input->buf->buffer); 11965 for (;(unsigned int) base < use; base++) { 11966 if (quote != 0) { 11967 if (buf[base] == quote) 11968 quote = 0; 11969 continue; 11970 } 11971 if ((quote == 0) && (buf[base] == '<')) { 11972 int found = 0; 11973 /* special handling of comments */ 11974 if (((unsigned int) base + 4 < use) && 11975 (buf[base + 1] == '!') && 11976 (buf[base + 2] == '-') && 11977 (buf[base + 3] == '-')) { 11978 for (;(unsigned int) base + 3 < use; base++) { 11979 if ((buf[base] == '-') && 11980 (buf[base + 1] == '-') && 11981 (buf[base + 2] == '>')) { 11982 found = 1; 11983 base += 2; 11984 break; 11985 } 11986 } 11987 if (!found) { 11988 #if 0 11989 fprintf(stderr, "unfinished comment\n"); 11990 #endif 11991 break; /* for */ 11992 } 11993 continue; 11994 } 11995 } 11996 if (buf[base] == '"') { 11997 quote = '"'; 11998 continue; 11999 } 12000 if (buf[base] == '\'') { 12001 quote = '\''; 12002 continue; 12003 } 12004 if (buf[base] == ']') { 12005 #if 0 12006 fprintf(stderr, "%c%c%c%c: ", buf[base], 12007 buf[base + 1], buf[base + 2], buf[base + 3]); 12008 #endif 12009 if ((unsigned int) base +1 >= use) 12010 break; 12011 if (buf[base + 1] == ']') { 12012 /* conditional crap, skip both ']' ! */ 12013 base++; 12014 continue; 12015 } 12016 for (i = 1; (unsigned int) base + i < use; i++) { 12017 if (buf[base + i] == '>') { 12018 #if 0 12019 fprintf(stderr, "found\n"); 12020 #endif 12021 goto found_end_int_subset; 12022 } 12023 if (!IS_BLANK_CH(buf[base + i])) { 12024 #if 0 12025 fprintf(stderr, "not found\n"); 12026 #endif 12027 goto not_end_of_int_subset; 12028 } 12029 } 12030 #if 0 12031 fprintf(stderr, "end of stream\n"); 12032 #endif 12033 break; 12034 12035 } 12036 not_end_of_int_subset: 12037 continue; /* for */ 12038 } 12039 /* 12040 * We didn't found the end of the Internal subset 12041 */ 12042 if (quote == 0) 12043 ctxt->checkIndex = base; 12044 else 12045 ctxt->checkIndex = 0; 12046 #ifdef DEBUG_PUSH 12047 if (next == 0) 12048 xmlGenericError(xmlGenericErrorContext, 12049 "PP: lookup of int subset end filed\n"); 12050 #endif 12051 goto done; 12052 12053 found_end_int_subset: 12054 ctxt->checkIndex = 0; 12055 xmlParseInternalSubset(ctxt); 12056 if (ctxt->instate == XML_PARSER_EOF) 12057 goto done; 12058 ctxt->inSubset = 2; 12059 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12060 (ctxt->sax->externalSubset != NULL)) 12061 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12062 ctxt->extSubSystem, ctxt->extSubURI); 12063 ctxt->inSubset = 0; 12064 xmlCleanSpecialAttr(ctxt); 12065 if (ctxt->instate == XML_PARSER_EOF) 12066 goto done; 12067 ctxt->instate = XML_PARSER_PROLOG; 12068 ctxt->checkIndex = 0; 12069 #ifdef DEBUG_PUSH 12070 xmlGenericError(xmlGenericErrorContext, 12071 "PP: entering PROLOG\n"); 12072 #endif 12073 break; 12074 } 12075 case XML_PARSER_COMMENT: 12076 xmlGenericError(xmlGenericErrorContext, 12077 "PP: internal error, state == COMMENT\n"); 12078 ctxt->instate = XML_PARSER_CONTENT; 12079 #ifdef DEBUG_PUSH 12080 xmlGenericError(xmlGenericErrorContext, 12081 "PP: entering CONTENT\n"); 12082 #endif 12083 break; 12084 case XML_PARSER_IGNORE: 12085 xmlGenericError(xmlGenericErrorContext, 12086 "PP: internal error, state == IGNORE"); 12087 ctxt->instate = XML_PARSER_DTD; 12088 #ifdef DEBUG_PUSH 12089 xmlGenericError(xmlGenericErrorContext, 12090 "PP: entering DTD\n"); 12091 #endif 12092 break; 12093 case XML_PARSER_PI: 12094 xmlGenericError(xmlGenericErrorContext, 12095 "PP: internal error, state == PI\n"); 12096 ctxt->instate = XML_PARSER_CONTENT; 12097 #ifdef DEBUG_PUSH 12098 xmlGenericError(xmlGenericErrorContext, 12099 "PP: entering CONTENT\n"); 12100 #endif 12101 break; 12102 case XML_PARSER_ENTITY_DECL: 12103 xmlGenericError(xmlGenericErrorContext, 12104 "PP: internal error, state == ENTITY_DECL\n"); 12105 ctxt->instate = XML_PARSER_DTD; 12106 #ifdef DEBUG_PUSH 12107 xmlGenericError(xmlGenericErrorContext, 12108 "PP: entering DTD\n"); 12109 #endif 12110 break; 12111 case XML_PARSER_ENTITY_VALUE: 12112 xmlGenericError(xmlGenericErrorContext, 12113 "PP: internal error, state == ENTITY_VALUE\n"); 12114 ctxt->instate = XML_PARSER_CONTENT; 12115 #ifdef DEBUG_PUSH 12116 xmlGenericError(xmlGenericErrorContext, 12117 "PP: entering DTD\n"); 12118 #endif 12119 break; 12120 case XML_PARSER_ATTRIBUTE_VALUE: 12121 xmlGenericError(xmlGenericErrorContext, 12122 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12123 ctxt->instate = XML_PARSER_START_TAG; 12124 #ifdef DEBUG_PUSH 12125 xmlGenericError(xmlGenericErrorContext, 12126 "PP: entering START_TAG\n"); 12127 #endif 12128 break; 12129 case XML_PARSER_SYSTEM_LITERAL: 12130 xmlGenericError(xmlGenericErrorContext, 12131 "PP: internal error, state == SYSTEM_LITERAL\n"); 12132 ctxt->instate = XML_PARSER_START_TAG; 12133 #ifdef DEBUG_PUSH 12134 xmlGenericError(xmlGenericErrorContext, 12135 "PP: entering START_TAG\n"); 12136 #endif 12137 break; 12138 case XML_PARSER_PUBLIC_LITERAL: 12139 xmlGenericError(xmlGenericErrorContext, 12140 "PP: internal error, state == PUBLIC_LITERAL\n"); 12141 ctxt->instate = XML_PARSER_START_TAG; 12142 #ifdef DEBUG_PUSH 12143 xmlGenericError(xmlGenericErrorContext, 12144 "PP: entering START_TAG\n"); 12145 #endif 12146 break; 12147 } 12148 } 12149 done: 12150 #ifdef DEBUG_PUSH 12151 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12152 #endif 12153 return(ret); 12154 encoding_error: 12155 { 12156 char buffer[150]; 12157 12158 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12159 ctxt->input->cur[0], ctxt->input->cur[1], 12160 ctxt->input->cur[2], ctxt->input->cur[3]); 12161 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12162 "Input is not proper UTF-8, indicate encoding !\n%s", 12163 BAD_CAST buffer, NULL); 12164 } 12165 return(0); 12166 } 12167 12168 /** 12169 * xmlParseCheckTransition: 12170 * @ctxt: an XML parser context 12171 * @chunk: a char array 12172 * @size: the size in byte of the chunk 12173 * 12174 * Check depending on the current parser state if the chunk given must be 12175 * processed immediately or one need more data to advance on parsing. 12176 * 12177 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12178 */ 12179 static int 12180 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12181 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12182 return(-1); 12183 if (ctxt->instate == XML_PARSER_START_TAG) { 12184 if (memchr(chunk, '>', size) != NULL) 12185 return(1); 12186 return(0); 12187 } 12188 if (ctxt->progressive == XML_PARSER_COMMENT) { 12189 if (memchr(chunk, '>', size) != NULL) 12190 return(1); 12191 return(0); 12192 } 12193 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12194 if (memchr(chunk, '>', size) != NULL) 12195 return(1); 12196 return(0); 12197 } 12198 if (ctxt->progressive == XML_PARSER_PI) { 12199 if (memchr(chunk, '>', size) != NULL) 12200 return(1); 12201 return(0); 12202 } 12203 if (ctxt->instate == XML_PARSER_END_TAG) { 12204 if (memchr(chunk, '>', size) != NULL) 12205 return(1); 12206 return(0); 12207 } 12208 if ((ctxt->progressive == XML_PARSER_DTD) || 12209 (ctxt->instate == XML_PARSER_DTD)) { 12210 if (memchr(chunk, '>', size) != NULL) 12211 return(1); 12212 return(0); 12213 } 12214 return(1); 12215 } 12216 12217 /** 12218 * xmlParseChunk: 12219 * @ctxt: an XML parser context 12220 * @chunk: an char array 12221 * @size: the size in byte of the chunk 12222 * @terminate: last chunk indicator 12223 * 12224 * Parse a Chunk of memory 12225 * 12226 * Returns zero if no error, the xmlParserErrors otherwise. 12227 */ 12228 int 12229 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12230 int terminate) { 12231 int end_in_lf = 0; 12232 int remain = 0; 12233 size_t old_avail = 0; 12234 size_t avail = 0; 12235 12236 if (ctxt == NULL) 12237 return(XML_ERR_INTERNAL_ERROR); 12238 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12239 return(ctxt->errNo); 12240 if (ctxt->instate == XML_PARSER_EOF) 12241 return(-1); 12242 if (ctxt->instate == XML_PARSER_START) 12243 xmlDetectSAX2(ctxt); 12244 if ((size > 0) && (chunk != NULL) && (!terminate) && 12245 (chunk[size - 1] == '\r')) { 12246 end_in_lf = 1; 12247 size--; 12248 } 12249 12250 xmldecl_done: 12251 12252 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12253 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12254 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12255 size_t cur = ctxt->input->cur - ctxt->input->base; 12256 int res; 12257 12258 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12259 /* 12260 * Specific handling if we autodetected an encoding, we should not 12261 * push more than the first line ... which depend on the encoding 12262 * And only push the rest once the final encoding was detected 12263 */ 12264 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12265 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12266 unsigned int len = 45; 12267 12268 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12269 BAD_CAST "UTF-16")) || 12270 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12271 BAD_CAST "UTF16"))) 12272 len = 90; 12273 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12274 BAD_CAST "UCS-4")) || 12275 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12276 BAD_CAST "UCS4"))) 12277 len = 180; 12278 12279 if (ctxt->input->buf->rawconsumed < len) 12280 len -= ctxt->input->buf->rawconsumed; 12281 12282 /* 12283 * Change size for reading the initial declaration only 12284 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12285 * will blindly copy extra bytes from memory. 12286 */ 12287 if ((unsigned int) size > len) { 12288 remain = size - len; 12289 size = len; 12290 } else { 12291 remain = 0; 12292 } 12293 } 12294 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12295 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12296 if (res < 0) { 12297 ctxt->errNo = XML_PARSER_EOF; 12298 xmlHaltParser(ctxt); 12299 return (XML_PARSER_EOF); 12300 } 12301 #ifdef DEBUG_PUSH 12302 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12303 #endif 12304 12305 } else if (ctxt->instate != XML_PARSER_EOF) { 12306 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12307 xmlParserInputBufferPtr in = ctxt->input->buf; 12308 if ((in->encoder != NULL) && (in->buffer != NULL) && 12309 (in->raw != NULL)) { 12310 int nbchars; 12311 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12312 size_t current = ctxt->input->cur - ctxt->input->base; 12313 12314 nbchars = xmlCharEncInput(in, terminate); 12315 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12316 if (nbchars < 0) { 12317 /* TODO 2.6.0 */ 12318 xmlGenericError(xmlGenericErrorContext, 12319 "xmlParseChunk: encoder error\n"); 12320 xmlHaltParser(ctxt); 12321 return(XML_ERR_INVALID_ENCODING); 12322 } 12323 } 12324 } 12325 } 12326 if (remain != 0) { 12327 xmlParseTryOrFinish(ctxt, 0); 12328 } else { 12329 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12330 avail = xmlBufUse(ctxt->input->buf->buffer); 12331 /* 12332 * Depending on the current state it may not be such 12333 * a good idea to try parsing if there is nothing in the chunk 12334 * which would be worth doing a parser state transition and we 12335 * need to wait for more data 12336 */ 12337 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12338 (old_avail == 0) || (avail == 0) || 12339 (xmlParseCheckTransition(ctxt, 12340 (const char *)&ctxt->input->base[old_avail], 12341 avail - old_avail))) 12342 xmlParseTryOrFinish(ctxt, terminate); 12343 } 12344 if (ctxt->instate == XML_PARSER_EOF) 12345 return(ctxt->errNo); 12346 12347 if ((ctxt->input != NULL) && 12348 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12349 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12350 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12352 xmlHaltParser(ctxt); 12353 } 12354 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12355 return(ctxt->errNo); 12356 12357 if (remain != 0) { 12358 chunk += size; 12359 size = remain; 12360 remain = 0; 12361 goto xmldecl_done; 12362 } 12363 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12364 (ctxt->input->buf != NULL)) { 12365 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12366 ctxt->input); 12367 size_t current = ctxt->input->cur - ctxt->input->base; 12368 12369 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12370 12371 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12372 base, current); 12373 } 12374 if (terminate) { 12375 /* 12376 * Check for termination 12377 */ 12378 int cur_avail = 0; 12379 12380 if (ctxt->input != NULL) { 12381 if (ctxt->input->buf == NULL) 12382 cur_avail = ctxt->input->length - 12383 (ctxt->input->cur - ctxt->input->base); 12384 else 12385 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12386 (ctxt->input->cur - ctxt->input->base); 12387 } 12388 12389 if ((ctxt->instate != XML_PARSER_EOF) && 12390 (ctxt->instate != XML_PARSER_EPILOG)) { 12391 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12392 } 12393 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12394 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12395 } 12396 if (ctxt->instate != XML_PARSER_EOF) { 12397 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12398 ctxt->sax->endDocument(ctxt->userData); 12399 } 12400 ctxt->instate = XML_PARSER_EOF; 12401 } 12402 if (ctxt->wellFormed == 0) 12403 return((xmlParserErrors) ctxt->errNo); 12404 else 12405 return(0); 12406 } 12407 12408 /************************************************************************ 12409 * * 12410 * I/O front end functions to the parser * 12411 * * 12412 ************************************************************************/ 12413 12414 /** 12415 * xmlCreatePushParserCtxt: 12416 * @sax: a SAX handler 12417 * @user_data: The user data returned on SAX callbacks 12418 * @chunk: a pointer to an array of chars 12419 * @size: number of chars in the array 12420 * @filename: an optional file name or URI 12421 * 12422 * Create a parser context for using the XML parser in push mode. 12423 * If @buffer and @size are non-NULL, the data is used to detect 12424 * the encoding. The remaining characters will be parsed so they 12425 * don't need to be fed in again through xmlParseChunk. 12426 * To allow content encoding detection, @size should be >= 4 12427 * The value of @filename is used for fetching external entities 12428 * and error/warning reports. 12429 * 12430 * Returns the new parser context or NULL 12431 */ 12432 12433 xmlParserCtxtPtr 12434 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12435 const char *chunk, int size, const char *filename) { 12436 xmlParserCtxtPtr ctxt; 12437 xmlParserInputPtr inputStream; 12438 xmlParserInputBufferPtr buf; 12439 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12440 12441 /* 12442 * plug some encoding conversion routines 12443 */ 12444 if ((chunk != NULL) && (size >= 4)) 12445 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12446 12447 buf = xmlAllocParserInputBuffer(enc); 12448 if (buf == NULL) return(NULL); 12449 12450 ctxt = xmlNewParserCtxt(); 12451 if (ctxt == NULL) { 12452 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12453 xmlFreeParserInputBuffer(buf); 12454 return(NULL); 12455 } 12456 ctxt->dictNames = 1; 12457 if (sax != NULL) { 12458 #ifdef LIBXML_SAX1_ENABLED 12459 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12460 #endif /* LIBXML_SAX1_ENABLED */ 12461 xmlFree(ctxt->sax); 12462 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12463 if (ctxt->sax == NULL) { 12464 xmlErrMemory(ctxt, NULL); 12465 xmlFreeParserInputBuffer(buf); 12466 xmlFreeParserCtxt(ctxt); 12467 return(NULL); 12468 } 12469 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12470 if (sax->initialized == XML_SAX2_MAGIC) 12471 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12472 else 12473 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12474 if (user_data != NULL) 12475 ctxt->userData = user_data; 12476 } 12477 if (filename == NULL) { 12478 ctxt->directory = NULL; 12479 } else { 12480 ctxt->directory = xmlParserGetDirectory(filename); 12481 } 12482 12483 inputStream = xmlNewInputStream(ctxt); 12484 if (inputStream == NULL) { 12485 xmlFreeParserCtxt(ctxt); 12486 xmlFreeParserInputBuffer(buf); 12487 return(NULL); 12488 } 12489 12490 if (filename == NULL) 12491 inputStream->filename = NULL; 12492 else { 12493 inputStream->filename = (char *) 12494 xmlCanonicPath((const xmlChar *) filename); 12495 if (inputStream->filename == NULL) { 12496 xmlFreeParserCtxt(ctxt); 12497 xmlFreeParserInputBuffer(buf); 12498 return(NULL); 12499 } 12500 } 12501 inputStream->buf = buf; 12502 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12503 inputPush(ctxt, inputStream); 12504 12505 /* 12506 * If the caller didn't provide an initial 'chunk' for determining 12507 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12508 * that it can be automatically determined later 12509 */ 12510 if ((size == 0) || (chunk == NULL)) { 12511 ctxt->charset = XML_CHAR_ENCODING_NONE; 12512 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12513 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12514 size_t cur = ctxt->input->cur - ctxt->input->base; 12515 12516 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12517 12518 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12519 #ifdef DEBUG_PUSH 12520 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12521 #endif 12522 } 12523 12524 if (enc != XML_CHAR_ENCODING_NONE) { 12525 xmlSwitchEncoding(ctxt, enc); 12526 } 12527 12528 return(ctxt); 12529 } 12530 #endif /* LIBXML_PUSH_ENABLED */ 12531 12532 /** 12533 * xmlHaltParser: 12534 * @ctxt: an XML parser context 12535 * 12536 * Blocks further parser processing don't override error 12537 * for internal use 12538 */ 12539 static void 12540 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12541 if (ctxt == NULL) 12542 return; 12543 ctxt->instate = XML_PARSER_EOF; 12544 ctxt->disableSAX = 1; 12545 while (ctxt->inputNr > 1) 12546 xmlFreeInputStream(inputPop(ctxt)); 12547 if (ctxt->input != NULL) { 12548 /* 12549 * in case there was a specific allocation deallocate before 12550 * overriding base 12551 */ 12552 if (ctxt->input->free != NULL) { 12553 ctxt->input->free((xmlChar *) ctxt->input->base); 12554 ctxt->input->free = NULL; 12555 } 12556 if (ctxt->input->buf != NULL) { 12557 xmlFreeParserInputBuffer(ctxt->input->buf); 12558 ctxt->input->buf = NULL; 12559 } 12560 ctxt->input->cur = BAD_CAST""; 12561 ctxt->input->length = 0; 12562 ctxt->input->base = ctxt->input->cur; 12563 ctxt->input->end = ctxt->input->cur; 12564 } 12565 } 12566 12567 /** 12568 * xmlStopParser: 12569 * @ctxt: an XML parser context 12570 * 12571 * Blocks further parser processing 12572 */ 12573 void 12574 xmlStopParser(xmlParserCtxtPtr ctxt) { 12575 if (ctxt == NULL) 12576 return; 12577 xmlHaltParser(ctxt); 12578 ctxt->errNo = XML_ERR_USER_STOP; 12579 } 12580 12581 /** 12582 * xmlCreateIOParserCtxt: 12583 * @sax: a SAX handler 12584 * @user_data: The user data returned on SAX callbacks 12585 * @ioread: an I/O read function 12586 * @ioclose: an I/O close function 12587 * @ioctx: an I/O handler 12588 * @enc: the charset encoding if known 12589 * 12590 * Create a parser context for using the XML parser with an existing 12591 * I/O stream 12592 * 12593 * Returns the new parser context or NULL 12594 */ 12595 xmlParserCtxtPtr 12596 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12597 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12598 void *ioctx, xmlCharEncoding enc) { 12599 xmlParserCtxtPtr ctxt; 12600 xmlParserInputPtr inputStream; 12601 xmlParserInputBufferPtr buf; 12602 12603 if (ioread == NULL) return(NULL); 12604 12605 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12606 if (buf == NULL) { 12607 if (ioclose != NULL) 12608 ioclose(ioctx); 12609 return (NULL); 12610 } 12611 12612 ctxt = xmlNewParserCtxt(); 12613 if (ctxt == NULL) { 12614 xmlFreeParserInputBuffer(buf); 12615 return(NULL); 12616 } 12617 if (sax != NULL) { 12618 #ifdef LIBXML_SAX1_ENABLED 12619 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12620 #endif /* LIBXML_SAX1_ENABLED */ 12621 xmlFree(ctxt->sax); 12622 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12623 if (ctxt->sax == NULL) { 12624 xmlFreeParserInputBuffer(buf); 12625 xmlErrMemory(ctxt, NULL); 12626 xmlFreeParserCtxt(ctxt); 12627 return(NULL); 12628 } 12629 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12630 if (sax->initialized == XML_SAX2_MAGIC) 12631 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12632 else 12633 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12634 if (user_data != NULL) 12635 ctxt->userData = user_data; 12636 } 12637 12638 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12639 if (inputStream == NULL) { 12640 xmlFreeParserCtxt(ctxt); 12641 return(NULL); 12642 } 12643 inputPush(ctxt, inputStream); 12644 12645 return(ctxt); 12646 } 12647 12648 #ifdef LIBXML_VALID_ENABLED 12649 /************************************************************************ 12650 * * 12651 * Front ends when parsing a DTD * 12652 * * 12653 ************************************************************************/ 12654 12655 /** 12656 * xmlIOParseDTD: 12657 * @sax: the SAX handler block or NULL 12658 * @input: an Input Buffer 12659 * @enc: the charset encoding if known 12660 * 12661 * Load and parse a DTD 12662 * 12663 * Returns the resulting xmlDtdPtr or NULL in case of error. 12664 * @input will be freed by the function in any case. 12665 */ 12666 12667 xmlDtdPtr 12668 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12669 xmlCharEncoding enc) { 12670 xmlDtdPtr ret = NULL; 12671 xmlParserCtxtPtr ctxt; 12672 xmlParserInputPtr pinput = NULL; 12673 xmlChar start[4]; 12674 12675 if (input == NULL) 12676 return(NULL); 12677 12678 ctxt = xmlNewParserCtxt(); 12679 if (ctxt == NULL) { 12680 xmlFreeParserInputBuffer(input); 12681 return(NULL); 12682 } 12683 12684 /* We are loading a DTD */ 12685 ctxt->options |= XML_PARSE_DTDLOAD; 12686 12687 /* 12688 * Set-up the SAX context 12689 */ 12690 if (sax != NULL) { 12691 if (ctxt->sax != NULL) 12692 xmlFree(ctxt->sax); 12693 ctxt->sax = sax; 12694 ctxt->userData = ctxt; 12695 } 12696 xmlDetectSAX2(ctxt); 12697 12698 /* 12699 * generate a parser input from the I/O handler 12700 */ 12701 12702 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12703 if (pinput == NULL) { 12704 if (sax != NULL) ctxt->sax = NULL; 12705 xmlFreeParserInputBuffer(input); 12706 xmlFreeParserCtxt(ctxt); 12707 return(NULL); 12708 } 12709 12710 /* 12711 * plug some encoding conversion routines here. 12712 */ 12713 if (xmlPushInput(ctxt, pinput) < 0) { 12714 if (sax != NULL) ctxt->sax = NULL; 12715 xmlFreeParserCtxt(ctxt); 12716 return(NULL); 12717 } 12718 if (enc != XML_CHAR_ENCODING_NONE) { 12719 xmlSwitchEncoding(ctxt, enc); 12720 } 12721 12722 pinput->filename = NULL; 12723 pinput->line = 1; 12724 pinput->col = 1; 12725 pinput->base = ctxt->input->cur; 12726 pinput->cur = ctxt->input->cur; 12727 pinput->free = NULL; 12728 12729 /* 12730 * let's parse that entity knowing it's an external subset. 12731 */ 12732 ctxt->inSubset = 2; 12733 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12734 if (ctxt->myDoc == NULL) { 12735 xmlErrMemory(ctxt, "New Doc failed"); 12736 return(NULL); 12737 } 12738 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12739 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12740 BAD_CAST "none", BAD_CAST "none"); 12741 12742 if ((enc == XML_CHAR_ENCODING_NONE) && 12743 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12744 /* 12745 * Get the 4 first bytes and decode the charset 12746 * if enc != XML_CHAR_ENCODING_NONE 12747 * plug some encoding conversion routines. 12748 */ 12749 start[0] = RAW; 12750 start[1] = NXT(1); 12751 start[2] = NXT(2); 12752 start[3] = NXT(3); 12753 enc = xmlDetectCharEncoding(start, 4); 12754 if (enc != XML_CHAR_ENCODING_NONE) { 12755 xmlSwitchEncoding(ctxt, enc); 12756 } 12757 } 12758 12759 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12760 12761 if (ctxt->myDoc != NULL) { 12762 if (ctxt->wellFormed) { 12763 ret = ctxt->myDoc->extSubset; 12764 ctxt->myDoc->extSubset = NULL; 12765 if (ret != NULL) { 12766 xmlNodePtr tmp; 12767 12768 ret->doc = NULL; 12769 tmp = ret->children; 12770 while (tmp != NULL) { 12771 tmp->doc = NULL; 12772 tmp = tmp->next; 12773 } 12774 } 12775 } else { 12776 ret = NULL; 12777 } 12778 xmlFreeDoc(ctxt->myDoc); 12779 ctxt->myDoc = NULL; 12780 } 12781 if (sax != NULL) ctxt->sax = NULL; 12782 xmlFreeParserCtxt(ctxt); 12783 12784 return(ret); 12785 } 12786 12787 /** 12788 * xmlSAXParseDTD: 12789 * @sax: the SAX handler block 12790 * @ExternalID: a NAME* containing the External ID of the DTD 12791 * @SystemID: a NAME* containing the URL to the DTD 12792 * 12793 * Load and parse an external subset. 12794 * 12795 * Returns the resulting xmlDtdPtr or NULL in case of error. 12796 */ 12797 12798 xmlDtdPtr 12799 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12800 const xmlChar *SystemID) { 12801 xmlDtdPtr ret = NULL; 12802 xmlParserCtxtPtr ctxt; 12803 xmlParserInputPtr input = NULL; 12804 xmlCharEncoding enc; 12805 xmlChar* systemIdCanonic; 12806 12807 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12808 12809 ctxt = xmlNewParserCtxt(); 12810 if (ctxt == NULL) { 12811 return(NULL); 12812 } 12813 12814 /* We are loading a DTD */ 12815 ctxt->options |= XML_PARSE_DTDLOAD; 12816 12817 /* 12818 * Set-up the SAX context 12819 */ 12820 if (sax != NULL) { 12821 if (ctxt->sax != NULL) 12822 xmlFree(ctxt->sax); 12823 ctxt->sax = sax; 12824 ctxt->userData = ctxt; 12825 } 12826 12827 /* 12828 * Canonicalise the system ID 12829 */ 12830 systemIdCanonic = xmlCanonicPath(SystemID); 12831 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12832 xmlFreeParserCtxt(ctxt); 12833 return(NULL); 12834 } 12835 12836 /* 12837 * Ask the Entity resolver to load the damn thing 12838 */ 12839 12840 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12841 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12842 systemIdCanonic); 12843 if (input == NULL) { 12844 if (sax != NULL) ctxt->sax = NULL; 12845 xmlFreeParserCtxt(ctxt); 12846 if (systemIdCanonic != NULL) 12847 xmlFree(systemIdCanonic); 12848 return(NULL); 12849 } 12850 12851 /* 12852 * plug some encoding conversion routines here. 12853 */ 12854 if (xmlPushInput(ctxt, input) < 0) { 12855 if (sax != NULL) ctxt->sax = NULL; 12856 xmlFreeParserCtxt(ctxt); 12857 if (systemIdCanonic != NULL) 12858 xmlFree(systemIdCanonic); 12859 return(NULL); 12860 } 12861 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12862 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12863 xmlSwitchEncoding(ctxt, enc); 12864 } 12865 12866 if (input->filename == NULL) 12867 input->filename = (char *) systemIdCanonic; 12868 else 12869 xmlFree(systemIdCanonic); 12870 input->line = 1; 12871 input->col = 1; 12872 input->base = ctxt->input->cur; 12873 input->cur = ctxt->input->cur; 12874 input->free = NULL; 12875 12876 /* 12877 * let's parse that entity knowing it's an external subset. 12878 */ 12879 ctxt->inSubset = 2; 12880 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12881 if (ctxt->myDoc == NULL) { 12882 xmlErrMemory(ctxt, "New Doc failed"); 12883 if (sax != NULL) ctxt->sax = NULL; 12884 xmlFreeParserCtxt(ctxt); 12885 return(NULL); 12886 } 12887 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12888 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12889 ExternalID, SystemID); 12890 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12891 12892 if (ctxt->myDoc != NULL) { 12893 if (ctxt->wellFormed) { 12894 ret = ctxt->myDoc->extSubset; 12895 ctxt->myDoc->extSubset = NULL; 12896 if (ret != NULL) { 12897 xmlNodePtr tmp; 12898 12899 ret->doc = NULL; 12900 tmp = ret->children; 12901 while (tmp != NULL) { 12902 tmp->doc = NULL; 12903 tmp = tmp->next; 12904 } 12905 } 12906 } else { 12907 ret = NULL; 12908 } 12909 xmlFreeDoc(ctxt->myDoc); 12910 ctxt->myDoc = NULL; 12911 } 12912 if (sax != NULL) ctxt->sax = NULL; 12913 xmlFreeParserCtxt(ctxt); 12914 12915 return(ret); 12916 } 12917 12918 12919 /** 12920 * xmlParseDTD: 12921 * @ExternalID: a NAME* containing the External ID of the DTD 12922 * @SystemID: a NAME* containing the URL to the DTD 12923 * 12924 * Load and parse an external subset. 12925 * 12926 * Returns the resulting xmlDtdPtr or NULL in case of error. 12927 */ 12928 12929 xmlDtdPtr 12930 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12931 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12932 } 12933 #endif /* LIBXML_VALID_ENABLED */ 12934 12935 /************************************************************************ 12936 * * 12937 * Front ends when parsing an Entity * 12938 * * 12939 ************************************************************************/ 12940 12941 /** 12942 * xmlParseCtxtExternalEntity: 12943 * @ctx: the existing parsing context 12944 * @URL: the URL for the entity to load 12945 * @ID: the System ID for the entity to load 12946 * @lst: the return value for the set of parsed nodes 12947 * 12948 * Parse an external general entity within an existing parsing context 12949 * An external general parsed entity is well-formed if it matches the 12950 * production labeled extParsedEnt. 12951 * 12952 * [78] extParsedEnt ::= TextDecl? content 12953 * 12954 * Returns 0 if the entity is well formed, -1 in case of args problem and 12955 * the parser error code otherwise 12956 */ 12957 12958 int 12959 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12960 const xmlChar *ID, xmlNodePtr *lst) { 12961 void *userData; 12962 12963 if (ctx == NULL) return(-1); 12964 /* 12965 * If the user provided their own SAX callbacks, then reuse the 12966 * userData callback field, otherwise the expected setup in a 12967 * DOM builder is to have userData == ctxt 12968 */ 12969 if (ctx->userData == ctx) 12970 userData = NULL; 12971 else 12972 userData = ctx->userData; 12973 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax, 12974 userData, ctx->depth + 1, 12975 URL, ID, lst); 12976 } 12977 12978 /** 12979 * xmlParseExternalEntityPrivate: 12980 * @doc: the document the chunk pertains to 12981 * @oldctxt: the previous parser context if available 12982 * @sax: the SAX handler block (possibly NULL) 12983 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12984 * @depth: Used for loop detection, use 0 12985 * @URL: the URL for the entity to load 12986 * @ID: the System ID for the entity to load 12987 * @list: the return value for the set of parsed nodes 12988 * 12989 * Private version of xmlParseExternalEntity() 12990 * 12991 * Returns 0 if the entity is well formed, -1 in case of args problem and 12992 * the parser error code otherwise 12993 */ 12994 12995 static xmlParserErrors 12996 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12997 xmlSAXHandlerPtr sax, 12998 void *user_data, int depth, const xmlChar *URL, 12999 const xmlChar *ID, xmlNodePtr *list) { 13000 xmlParserCtxtPtr ctxt; 13001 xmlDocPtr newDoc; 13002 xmlNodePtr newRoot; 13003 xmlSAXHandlerPtr oldsax = NULL; 13004 xmlParserErrors ret = XML_ERR_OK; 13005 xmlChar start[4]; 13006 xmlCharEncoding enc; 13007 13008 if (((depth > 40) && 13009 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13010 (depth > 1024)) { 13011 return(XML_ERR_ENTITY_LOOP); 13012 } 13013 13014 if (list != NULL) 13015 *list = NULL; 13016 if ((URL == NULL) && (ID == NULL)) 13017 return(XML_ERR_INTERNAL_ERROR); 13018 if (doc == NULL) 13019 return(XML_ERR_INTERNAL_ERROR); 13020 13021 13022 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13023 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13024 ctxt->userData = ctxt; 13025 if (sax != NULL) { 13026 oldsax = ctxt->sax; 13027 ctxt->sax = sax; 13028 if (user_data != NULL) 13029 ctxt->userData = user_data; 13030 } 13031 xmlDetectSAX2(ctxt); 13032 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13033 if (newDoc == NULL) { 13034 xmlFreeParserCtxt(ctxt); 13035 return(XML_ERR_INTERNAL_ERROR); 13036 } 13037 newDoc->properties = XML_DOC_INTERNAL; 13038 if (doc) { 13039 newDoc->intSubset = doc->intSubset; 13040 newDoc->extSubset = doc->extSubset; 13041 if (doc->dict) { 13042 newDoc->dict = doc->dict; 13043 xmlDictReference(newDoc->dict); 13044 } 13045 if (doc->URL != NULL) { 13046 newDoc->URL = xmlStrdup(doc->URL); 13047 } 13048 } 13049 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13050 if (newRoot == NULL) { 13051 if (sax != NULL) 13052 ctxt->sax = oldsax; 13053 xmlFreeParserCtxt(ctxt); 13054 newDoc->intSubset = NULL; 13055 newDoc->extSubset = NULL; 13056 xmlFreeDoc(newDoc); 13057 return(XML_ERR_INTERNAL_ERROR); 13058 } 13059 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13060 nodePush(ctxt, newDoc->children); 13061 if (doc == NULL) { 13062 ctxt->myDoc = newDoc; 13063 } else { 13064 ctxt->myDoc = doc; 13065 newRoot->doc = doc; 13066 } 13067 13068 /* 13069 * Get the 4 first bytes and decode the charset 13070 * if enc != XML_CHAR_ENCODING_NONE 13071 * plug some encoding conversion routines. 13072 */ 13073 GROW; 13074 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13075 start[0] = RAW; 13076 start[1] = NXT(1); 13077 start[2] = NXT(2); 13078 start[3] = NXT(3); 13079 enc = xmlDetectCharEncoding(start, 4); 13080 if (enc != XML_CHAR_ENCODING_NONE) { 13081 xmlSwitchEncoding(ctxt, enc); 13082 } 13083 } 13084 13085 /* 13086 * Parse a possible text declaration first 13087 */ 13088 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13089 xmlParseTextDecl(ctxt); 13090 /* 13091 * An XML-1.0 document can't reference an entity not XML-1.0 13092 */ 13093 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) && 13094 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13095 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13096 "Version mismatch between document and entity\n"); 13097 } 13098 } 13099 13100 ctxt->instate = XML_PARSER_CONTENT; 13101 ctxt->depth = depth; 13102 if (oldctxt != NULL) { 13103 ctxt->_private = oldctxt->_private; 13104 ctxt->loadsubset = oldctxt->loadsubset; 13105 ctxt->validate = oldctxt->validate; 13106 ctxt->valid = oldctxt->valid; 13107 ctxt->replaceEntities = oldctxt->replaceEntities; 13108 if (oldctxt->validate) { 13109 ctxt->vctxt.error = oldctxt->vctxt.error; 13110 ctxt->vctxt.warning = oldctxt->vctxt.warning; 13111 ctxt->vctxt.userData = oldctxt->vctxt.userData; 13112 } 13113 ctxt->external = oldctxt->external; 13114 if (ctxt->dict) xmlDictFree(ctxt->dict); 13115 ctxt->dict = oldctxt->dict; 13116 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13117 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13118 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13119 ctxt->dictNames = oldctxt->dictNames; 13120 ctxt->attsDefault = oldctxt->attsDefault; 13121 ctxt->attsSpecial = oldctxt->attsSpecial; 13122 ctxt->linenumbers = oldctxt->linenumbers; 13123 ctxt->record_info = oldctxt->record_info; 13124 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13125 ctxt->node_seq.length = oldctxt->node_seq.length; 13126 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13127 } else { 13128 /* 13129 * Doing validity checking on chunk without context 13130 * doesn't make sense 13131 */ 13132 ctxt->_private = NULL; 13133 ctxt->validate = 0; 13134 ctxt->external = 2; 13135 ctxt->loadsubset = 0; 13136 } 13137 13138 xmlParseContent(ctxt); 13139 13140 if ((RAW == '<') && (NXT(1) == '/')) { 13141 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13142 } else if (RAW != 0) { 13143 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13144 } 13145 if (ctxt->node != newDoc->children) { 13146 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13147 } 13148 13149 if (!ctxt->wellFormed) { 13150 if (ctxt->errNo == 0) 13151 ret = XML_ERR_INTERNAL_ERROR; 13152 else 13153 ret = (xmlParserErrors)ctxt->errNo; 13154 } else { 13155 if (list != NULL) { 13156 xmlNodePtr cur; 13157 13158 /* 13159 * Return the newly created nodeset after unlinking it from 13160 * they pseudo parent. 13161 */ 13162 cur = newDoc->children->children; 13163 *list = cur; 13164 while (cur != NULL) { 13165 cur->parent = NULL; 13166 cur = cur->next; 13167 } 13168 newDoc->children->children = NULL; 13169 } 13170 ret = XML_ERR_OK; 13171 } 13172 13173 /* 13174 * Record in the parent context the number of entities replacement 13175 * done when parsing that reference. 13176 */ 13177 if (oldctxt != NULL) 13178 oldctxt->nbentities += ctxt->nbentities; 13179 13180 /* 13181 * Also record the size of the entity parsed 13182 */ 13183 if (ctxt->input != NULL && oldctxt != NULL) { 13184 oldctxt->sizeentities += ctxt->input->consumed; 13185 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13186 } 13187 /* 13188 * And record the last error if any 13189 */ 13190 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13191 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13192 13193 if (sax != NULL) 13194 ctxt->sax = oldsax; 13195 if (oldctxt != NULL) { 13196 ctxt->dict = NULL; 13197 ctxt->attsDefault = NULL; 13198 ctxt->attsSpecial = NULL; 13199 oldctxt->validate = ctxt->validate; 13200 oldctxt->valid = ctxt->valid; 13201 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13202 oldctxt->node_seq.length = ctxt->node_seq.length; 13203 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13204 } 13205 ctxt->node_seq.maximum = 0; 13206 ctxt->node_seq.length = 0; 13207 ctxt->node_seq.buffer = NULL; 13208 xmlFreeParserCtxt(ctxt); 13209 newDoc->intSubset = NULL; 13210 newDoc->extSubset = NULL; 13211 xmlFreeDoc(newDoc); 13212 13213 return(ret); 13214 } 13215 13216 #ifdef LIBXML_SAX1_ENABLED 13217 /** 13218 * xmlParseExternalEntity: 13219 * @doc: the document the chunk pertains to 13220 * @sax: the SAX handler block (possibly NULL) 13221 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13222 * @depth: Used for loop detection, use 0 13223 * @URL: the URL for the entity to load 13224 * @ID: the System ID for the entity to load 13225 * @lst: the return value for the set of parsed nodes 13226 * 13227 * Parse an external general entity 13228 * An external general parsed entity is well-formed if it matches the 13229 * production labeled extParsedEnt. 13230 * 13231 * [78] extParsedEnt ::= TextDecl? content 13232 * 13233 * Returns 0 if the entity is well formed, -1 in case of args problem and 13234 * the parser error code otherwise 13235 */ 13236 13237 int 13238 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13239 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13240 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13241 ID, lst)); 13242 } 13243 13244 /** 13245 * xmlParseBalancedChunkMemory: 13246 * @doc: the document the chunk pertains to (must not be NULL) 13247 * @sax: the SAX handler block (possibly NULL) 13248 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13249 * @depth: Used for loop detection, use 0 13250 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13251 * @lst: the return value for the set of parsed nodes 13252 * 13253 * Parse a well-balanced chunk of an XML document 13254 * called by the parser 13255 * The allowed sequence for the Well Balanced Chunk is the one defined by 13256 * the content production in the XML grammar: 13257 * 13258 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13259 * 13260 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13261 * the parser error code otherwise 13262 */ 13263 13264 int 13265 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13266 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13267 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13268 depth, string, lst, 0 ); 13269 } 13270 #endif /* LIBXML_SAX1_ENABLED */ 13271 13272 /** 13273 * xmlParseBalancedChunkMemoryInternal: 13274 * @oldctxt: the existing parsing context 13275 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13276 * @user_data: the user data field for the parser context 13277 * @lst: the return value for the set of parsed nodes 13278 * 13279 * 13280 * Parse a well-balanced chunk of an XML document 13281 * called by the parser 13282 * The allowed sequence for the Well Balanced Chunk is the one defined by 13283 * the content production in the XML grammar: 13284 * 13285 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13286 * 13287 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13288 * error code otherwise 13289 * 13290 * In case recover is set to 1, the nodelist will not be empty even if 13291 * the parsed chunk is not well balanced. 13292 */ 13293 static xmlParserErrors 13294 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13295 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13296 xmlParserCtxtPtr ctxt; 13297 xmlDocPtr newDoc = NULL; 13298 xmlNodePtr newRoot; 13299 xmlSAXHandlerPtr oldsax = NULL; 13300 xmlNodePtr content = NULL; 13301 xmlNodePtr last = NULL; 13302 int size; 13303 xmlParserErrors ret = XML_ERR_OK; 13304 #ifdef SAX2 13305 int i; 13306 #endif 13307 13308 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13309 (oldctxt->depth > 1024)) { 13310 return(XML_ERR_ENTITY_LOOP); 13311 } 13312 13313 13314 if (lst != NULL) 13315 *lst = NULL; 13316 if (string == NULL) 13317 return(XML_ERR_INTERNAL_ERROR); 13318 13319 size = xmlStrlen(string); 13320 13321 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13322 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13323 if (user_data != NULL) 13324 ctxt->userData = user_data; 13325 else 13326 ctxt->userData = ctxt; 13327 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13328 ctxt->dict = oldctxt->dict; 13329 ctxt->input_id = oldctxt->input_id + 1; 13330 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13331 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13332 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13333 13334 #ifdef SAX2 13335 /* propagate namespaces down the entity */ 13336 for (i = 0;i < oldctxt->nsNr;i += 2) { 13337 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13338 } 13339 #endif 13340 13341 oldsax = ctxt->sax; 13342 ctxt->sax = oldctxt->sax; 13343 xmlDetectSAX2(ctxt); 13344 ctxt->replaceEntities = oldctxt->replaceEntities; 13345 ctxt->options = oldctxt->options; 13346 13347 ctxt->_private = oldctxt->_private; 13348 if (oldctxt->myDoc == NULL) { 13349 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13350 if (newDoc == NULL) { 13351 ctxt->sax = oldsax; 13352 ctxt->dict = NULL; 13353 xmlFreeParserCtxt(ctxt); 13354 return(XML_ERR_INTERNAL_ERROR); 13355 } 13356 newDoc->properties = XML_DOC_INTERNAL; 13357 newDoc->dict = ctxt->dict; 13358 xmlDictReference(newDoc->dict); 13359 ctxt->myDoc = newDoc; 13360 } else { 13361 ctxt->myDoc = oldctxt->myDoc; 13362 content = ctxt->myDoc->children; 13363 last = ctxt->myDoc->last; 13364 } 13365 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13366 if (newRoot == NULL) { 13367 ctxt->sax = oldsax; 13368 ctxt->dict = NULL; 13369 xmlFreeParserCtxt(ctxt); 13370 if (newDoc != NULL) { 13371 xmlFreeDoc(newDoc); 13372 } 13373 return(XML_ERR_INTERNAL_ERROR); 13374 } 13375 ctxt->myDoc->children = NULL; 13376 ctxt->myDoc->last = NULL; 13377 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13378 nodePush(ctxt, ctxt->myDoc->children); 13379 ctxt->instate = XML_PARSER_CONTENT; 13380 ctxt->depth = oldctxt->depth + 1; 13381 13382 ctxt->validate = 0; 13383 ctxt->loadsubset = oldctxt->loadsubset; 13384 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13385 /* 13386 * ID/IDREF registration will be done in xmlValidateElement below 13387 */ 13388 ctxt->loadsubset |= XML_SKIP_IDS; 13389 } 13390 ctxt->dictNames = oldctxt->dictNames; 13391 ctxt->attsDefault = oldctxt->attsDefault; 13392 ctxt->attsSpecial = oldctxt->attsSpecial; 13393 13394 xmlParseContent(ctxt); 13395 if ((RAW == '<') && (NXT(1) == '/')) { 13396 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13397 } else if (RAW != 0) { 13398 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13399 } 13400 if (ctxt->node != ctxt->myDoc->children) { 13401 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13402 } 13403 13404 if (!ctxt->wellFormed) { 13405 if (ctxt->errNo == 0) 13406 ret = XML_ERR_INTERNAL_ERROR; 13407 else 13408 ret = (xmlParserErrors)ctxt->errNo; 13409 } else { 13410 ret = XML_ERR_OK; 13411 } 13412 13413 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13414 xmlNodePtr cur; 13415 13416 /* 13417 * Return the newly created nodeset after unlinking it from 13418 * they pseudo parent. 13419 */ 13420 cur = ctxt->myDoc->children->children; 13421 *lst = cur; 13422 while (cur != NULL) { 13423 #ifdef LIBXML_VALID_ENABLED 13424 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13425 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13426 (cur->type == XML_ELEMENT_NODE)) { 13427 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13428 oldctxt->myDoc, cur); 13429 } 13430 #endif /* LIBXML_VALID_ENABLED */ 13431 cur->parent = NULL; 13432 cur = cur->next; 13433 } 13434 ctxt->myDoc->children->children = NULL; 13435 } 13436 if (ctxt->myDoc != NULL) { 13437 xmlFreeNode(ctxt->myDoc->children); 13438 ctxt->myDoc->children = content; 13439 ctxt->myDoc->last = last; 13440 } 13441 13442 /* 13443 * Record in the parent context the number of entities replacement 13444 * done when parsing that reference. 13445 */ 13446 if (oldctxt != NULL) 13447 oldctxt->nbentities += ctxt->nbentities; 13448 13449 /* 13450 * Also record the last error if any 13451 */ 13452 if (ctxt->lastError.code != XML_ERR_OK) 13453 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13454 13455 ctxt->sax = oldsax; 13456 ctxt->dict = NULL; 13457 ctxt->attsDefault = NULL; 13458 ctxt->attsSpecial = NULL; 13459 xmlFreeParserCtxt(ctxt); 13460 if (newDoc != NULL) { 13461 xmlFreeDoc(newDoc); 13462 } 13463 13464 return(ret); 13465 } 13466 13467 /** 13468 * xmlParseInNodeContext: 13469 * @node: the context node 13470 * @data: the input string 13471 * @datalen: the input string length in bytes 13472 * @options: a combination of xmlParserOption 13473 * @lst: the return value for the set of parsed nodes 13474 * 13475 * Parse a well-balanced chunk of an XML document 13476 * within the context (DTD, namespaces, etc ...) of the given node. 13477 * 13478 * The allowed sequence for the data is a Well Balanced Chunk defined by 13479 * the content production in the XML grammar: 13480 * 13481 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13482 * 13483 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13484 * error code otherwise 13485 */ 13486 xmlParserErrors 13487 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13488 int options, xmlNodePtr *lst) { 13489 #ifdef SAX2 13490 xmlParserCtxtPtr ctxt; 13491 xmlDocPtr doc = NULL; 13492 xmlNodePtr fake, cur; 13493 int nsnr = 0; 13494 13495 xmlParserErrors ret = XML_ERR_OK; 13496 13497 /* 13498 * check all input parameters, grab the document 13499 */ 13500 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13501 return(XML_ERR_INTERNAL_ERROR); 13502 switch (node->type) { 13503 case XML_ELEMENT_NODE: 13504 case XML_ATTRIBUTE_NODE: 13505 case XML_TEXT_NODE: 13506 case XML_CDATA_SECTION_NODE: 13507 case XML_ENTITY_REF_NODE: 13508 case XML_PI_NODE: 13509 case XML_COMMENT_NODE: 13510 case XML_DOCUMENT_NODE: 13511 case XML_HTML_DOCUMENT_NODE: 13512 break; 13513 default: 13514 return(XML_ERR_INTERNAL_ERROR); 13515 13516 } 13517 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13518 (node->type != XML_DOCUMENT_NODE) && 13519 (node->type != XML_HTML_DOCUMENT_NODE)) 13520 node = node->parent; 13521 if (node == NULL) 13522 return(XML_ERR_INTERNAL_ERROR); 13523 if (node->type == XML_ELEMENT_NODE) 13524 doc = node->doc; 13525 else 13526 doc = (xmlDocPtr) node; 13527 if (doc == NULL) 13528 return(XML_ERR_INTERNAL_ERROR); 13529 13530 /* 13531 * allocate a context and set-up everything not related to the 13532 * node position in the tree 13533 */ 13534 if (doc->type == XML_DOCUMENT_NODE) 13535 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13536 #ifdef LIBXML_HTML_ENABLED 13537 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13538 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13539 /* 13540 * When parsing in context, it makes no sense to add implied 13541 * elements like html/body/etc... 13542 */ 13543 options |= HTML_PARSE_NOIMPLIED; 13544 } 13545 #endif 13546 else 13547 return(XML_ERR_INTERNAL_ERROR); 13548 13549 if (ctxt == NULL) 13550 return(XML_ERR_NO_MEMORY); 13551 13552 /* 13553 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13554 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13555 * we must wait until the last moment to free the original one. 13556 */ 13557 if (doc->dict != NULL) { 13558 if (ctxt->dict != NULL) 13559 xmlDictFree(ctxt->dict); 13560 ctxt->dict = doc->dict; 13561 } else 13562 options |= XML_PARSE_NODICT; 13563 13564 if (doc->encoding != NULL) { 13565 xmlCharEncodingHandlerPtr hdlr; 13566 13567 if (ctxt->encoding != NULL) 13568 xmlFree((xmlChar *) ctxt->encoding); 13569 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13570 13571 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13572 if (hdlr != NULL) { 13573 xmlSwitchToEncoding(ctxt, hdlr); 13574 } else { 13575 return(XML_ERR_UNSUPPORTED_ENCODING); 13576 } 13577 } 13578 13579 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13580 xmlDetectSAX2(ctxt); 13581 ctxt->myDoc = doc; 13582 /* parsing in context, i.e. as within existing content */ 13583 ctxt->input_id = 2; 13584 ctxt->instate = XML_PARSER_CONTENT; 13585 13586 fake = xmlNewDocComment(node->doc, NULL); 13587 if (fake == NULL) { 13588 xmlFreeParserCtxt(ctxt); 13589 return(XML_ERR_NO_MEMORY); 13590 } 13591 xmlAddChild(node, fake); 13592 13593 if (node->type == XML_ELEMENT_NODE) { 13594 nodePush(ctxt, node); 13595 /* 13596 * initialize the SAX2 namespaces stack 13597 */ 13598 cur = node; 13599 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13600 xmlNsPtr ns = cur->nsDef; 13601 const xmlChar *iprefix, *ihref; 13602 13603 while (ns != NULL) { 13604 if (ctxt->dict) { 13605 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13606 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13607 } else { 13608 iprefix = ns->prefix; 13609 ihref = ns->href; 13610 } 13611 13612 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13613 nsPush(ctxt, iprefix, ihref); 13614 nsnr++; 13615 } 13616 ns = ns->next; 13617 } 13618 cur = cur->parent; 13619 } 13620 } 13621 13622 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13623 /* 13624 * ID/IDREF registration will be done in xmlValidateElement below 13625 */ 13626 ctxt->loadsubset |= XML_SKIP_IDS; 13627 } 13628 13629 #ifdef LIBXML_HTML_ENABLED 13630 if (doc->type == XML_HTML_DOCUMENT_NODE) 13631 __htmlParseContent(ctxt); 13632 else 13633 #endif 13634 xmlParseContent(ctxt); 13635 13636 nsPop(ctxt, nsnr); 13637 if ((RAW == '<') && (NXT(1) == '/')) { 13638 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13639 } else if (RAW != 0) { 13640 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13641 } 13642 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13643 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13644 ctxt->wellFormed = 0; 13645 } 13646 13647 if (!ctxt->wellFormed) { 13648 if (ctxt->errNo == 0) 13649 ret = XML_ERR_INTERNAL_ERROR; 13650 else 13651 ret = (xmlParserErrors)ctxt->errNo; 13652 } else { 13653 ret = XML_ERR_OK; 13654 } 13655 13656 /* 13657 * Return the newly created nodeset after unlinking it from 13658 * the pseudo sibling. 13659 */ 13660 13661 cur = fake->next; 13662 fake->next = NULL; 13663 node->last = fake; 13664 13665 if (cur != NULL) { 13666 cur->prev = NULL; 13667 } 13668 13669 *lst = cur; 13670 13671 while (cur != NULL) { 13672 cur->parent = NULL; 13673 cur = cur->next; 13674 } 13675 13676 xmlUnlinkNode(fake); 13677 xmlFreeNode(fake); 13678 13679 13680 if (ret != XML_ERR_OK) { 13681 xmlFreeNodeList(*lst); 13682 *lst = NULL; 13683 } 13684 13685 if (doc->dict != NULL) 13686 ctxt->dict = NULL; 13687 xmlFreeParserCtxt(ctxt); 13688 13689 return(ret); 13690 #else /* !SAX2 */ 13691 return(XML_ERR_INTERNAL_ERROR); 13692 #endif 13693 } 13694 13695 #ifdef LIBXML_SAX1_ENABLED 13696 /** 13697 * xmlParseBalancedChunkMemoryRecover: 13698 * @doc: the document the chunk pertains to (must not be NULL) 13699 * @sax: the SAX handler block (possibly NULL) 13700 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13701 * @depth: Used for loop detection, use 0 13702 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13703 * @lst: the return value for the set of parsed nodes 13704 * @recover: return nodes even if the data is broken (use 0) 13705 * 13706 * 13707 * Parse a well-balanced chunk of an XML document 13708 * called by the parser 13709 * The allowed sequence for the Well Balanced Chunk is the one defined by 13710 * the content production in the XML grammar: 13711 * 13712 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13713 * 13714 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13715 * the parser error code otherwise 13716 * 13717 * In case recover is set to 1, the nodelist will not be empty even if 13718 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13719 * some extent. 13720 */ 13721 int 13722 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13723 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13724 int recover) { 13725 xmlParserCtxtPtr ctxt; 13726 xmlDocPtr newDoc; 13727 xmlSAXHandlerPtr oldsax = NULL; 13728 xmlNodePtr content, newRoot; 13729 int size; 13730 int ret = 0; 13731 13732 if (depth > 40) { 13733 return(XML_ERR_ENTITY_LOOP); 13734 } 13735 13736 13737 if (lst != NULL) 13738 *lst = NULL; 13739 if (string == NULL) 13740 return(-1); 13741 13742 size = xmlStrlen(string); 13743 13744 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13745 if (ctxt == NULL) return(-1); 13746 ctxt->userData = ctxt; 13747 if (sax != NULL) { 13748 oldsax = ctxt->sax; 13749 ctxt->sax = sax; 13750 if (user_data != NULL) 13751 ctxt->userData = user_data; 13752 } 13753 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13754 if (newDoc == NULL) { 13755 xmlFreeParserCtxt(ctxt); 13756 return(-1); 13757 } 13758 newDoc->properties = XML_DOC_INTERNAL; 13759 if ((doc != NULL) && (doc->dict != NULL)) { 13760 xmlDictFree(ctxt->dict); 13761 ctxt->dict = doc->dict; 13762 xmlDictReference(ctxt->dict); 13763 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13764 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13765 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13766 ctxt->dictNames = 1; 13767 } else { 13768 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13769 } 13770 /* doc == NULL is only supported for historic reasons */ 13771 if (doc != NULL) { 13772 newDoc->intSubset = doc->intSubset; 13773 newDoc->extSubset = doc->extSubset; 13774 } 13775 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13776 if (newRoot == NULL) { 13777 if (sax != NULL) 13778 ctxt->sax = oldsax; 13779 xmlFreeParserCtxt(ctxt); 13780 newDoc->intSubset = NULL; 13781 newDoc->extSubset = NULL; 13782 xmlFreeDoc(newDoc); 13783 return(-1); 13784 } 13785 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13786 nodePush(ctxt, newRoot); 13787 /* doc == NULL is only supported for historic reasons */ 13788 if (doc == NULL) { 13789 ctxt->myDoc = newDoc; 13790 } else { 13791 ctxt->myDoc = newDoc; 13792 newDoc->children->doc = doc; 13793 /* Ensure that doc has XML spec namespace */ 13794 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13795 newDoc->oldNs = doc->oldNs; 13796 } 13797 ctxt->instate = XML_PARSER_CONTENT; 13798 ctxt->input_id = 2; 13799 ctxt->depth = depth; 13800 13801 /* 13802 * Doing validity checking on chunk doesn't make sense 13803 */ 13804 ctxt->validate = 0; 13805 ctxt->loadsubset = 0; 13806 xmlDetectSAX2(ctxt); 13807 13808 if ( doc != NULL ){ 13809 content = doc->children; 13810 doc->children = NULL; 13811 xmlParseContent(ctxt); 13812 doc->children = content; 13813 } 13814 else { 13815 xmlParseContent(ctxt); 13816 } 13817 if ((RAW == '<') && (NXT(1) == '/')) { 13818 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13819 } else if (RAW != 0) { 13820 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13821 } 13822 if (ctxt->node != newDoc->children) { 13823 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13824 } 13825 13826 if (!ctxt->wellFormed) { 13827 if (ctxt->errNo == 0) 13828 ret = 1; 13829 else 13830 ret = ctxt->errNo; 13831 } else { 13832 ret = 0; 13833 } 13834 13835 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13836 xmlNodePtr cur; 13837 13838 /* 13839 * Return the newly created nodeset after unlinking it from 13840 * they pseudo parent. 13841 */ 13842 cur = newDoc->children->children; 13843 *lst = cur; 13844 while (cur != NULL) { 13845 xmlSetTreeDoc(cur, doc); 13846 cur->parent = NULL; 13847 cur = cur->next; 13848 } 13849 newDoc->children->children = NULL; 13850 } 13851 13852 if (sax != NULL) 13853 ctxt->sax = oldsax; 13854 xmlFreeParserCtxt(ctxt); 13855 newDoc->intSubset = NULL; 13856 newDoc->extSubset = NULL; 13857 /* This leaks the namespace list if doc == NULL */ 13858 newDoc->oldNs = NULL; 13859 xmlFreeDoc(newDoc); 13860 13861 return(ret); 13862 } 13863 13864 /** 13865 * xmlSAXParseEntity: 13866 * @sax: the SAX handler block 13867 * @filename: the filename 13868 * 13869 * parse an XML external entity out of context and build a tree. 13870 * It use the given SAX function block to handle the parsing callback. 13871 * If sax is NULL, fallback to the default DOM tree building routines. 13872 * 13873 * [78] extParsedEnt ::= TextDecl? content 13874 * 13875 * This correspond to a "Well Balanced" chunk 13876 * 13877 * Returns the resulting document tree 13878 */ 13879 13880 xmlDocPtr 13881 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13882 xmlDocPtr ret; 13883 xmlParserCtxtPtr ctxt; 13884 13885 ctxt = xmlCreateFileParserCtxt(filename); 13886 if (ctxt == NULL) { 13887 return(NULL); 13888 } 13889 if (sax != NULL) { 13890 if (ctxt->sax != NULL) 13891 xmlFree(ctxt->sax); 13892 ctxt->sax = sax; 13893 ctxt->userData = NULL; 13894 } 13895 13896 xmlParseExtParsedEnt(ctxt); 13897 13898 if (ctxt->wellFormed) 13899 ret = ctxt->myDoc; 13900 else { 13901 ret = NULL; 13902 xmlFreeDoc(ctxt->myDoc); 13903 ctxt->myDoc = NULL; 13904 } 13905 if (sax != NULL) 13906 ctxt->sax = NULL; 13907 xmlFreeParserCtxt(ctxt); 13908 13909 return(ret); 13910 } 13911 13912 /** 13913 * xmlParseEntity: 13914 * @filename: the filename 13915 * 13916 * parse an XML external entity out of context and build a tree. 13917 * 13918 * [78] extParsedEnt ::= TextDecl? content 13919 * 13920 * This correspond to a "Well Balanced" chunk 13921 * 13922 * Returns the resulting document tree 13923 */ 13924 13925 xmlDocPtr 13926 xmlParseEntity(const char *filename) { 13927 return(xmlSAXParseEntity(NULL, filename)); 13928 } 13929 #endif /* LIBXML_SAX1_ENABLED */ 13930 13931 /** 13932 * xmlCreateEntityParserCtxtInternal: 13933 * @URL: the entity URL 13934 * @ID: the entity PUBLIC ID 13935 * @base: a possible base for the target URI 13936 * @pctx: parser context used to set options on new context 13937 * 13938 * Create a parser context for an external entity 13939 * Automatic support for ZLIB/Compress compressed document is provided 13940 * by default if found at compile-time. 13941 * 13942 * Returns the new parser context or NULL 13943 */ 13944 static xmlParserCtxtPtr 13945 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13946 const xmlChar *base, xmlParserCtxtPtr pctx) { 13947 xmlParserCtxtPtr ctxt; 13948 xmlParserInputPtr inputStream; 13949 char *directory = NULL; 13950 xmlChar *uri; 13951 13952 ctxt = xmlNewParserCtxt(); 13953 if (ctxt == NULL) { 13954 return(NULL); 13955 } 13956 13957 if (pctx != NULL) { 13958 ctxt->options = pctx->options; 13959 ctxt->_private = pctx->_private; 13960 /* 13961 * this is a subparser of pctx, so the input_id should be 13962 * incremented to distinguish from main entity 13963 */ 13964 ctxt->input_id = pctx->input_id + 1; 13965 } 13966 13967 /* Don't read from stdin. */ 13968 if (xmlStrcmp(URL, BAD_CAST "-") == 0) 13969 URL = BAD_CAST "./-"; 13970 13971 uri = xmlBuildURI(URL, base); 13972 13973 if (uri == NULL) { 13974 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13975 if (inputStream == NULL) { 13976 xmlFreeParserCtxt(ctxt); 13977 return(NULL); 13978 } 13979 13980 inputPush(ctxt, inputStream); 13981 13982 if ((ctxt->directory == NULL) && (directory == NULL)) 13983 directory = xmlParserGetDirectory((char *)URL); 13984 if ((ctxt->directory == NULL) && (directory != NULL)) 13985 ctxt->directory = directory; 13986 } else { 13987 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13988 if (inputStream == NULL) { 13989 xmlFree(uri); 13990 xmlFreeParserCtxt(ctxt); 13991 return(NULL); 13992 } 13993 13994 inputPush(ctxt, inputStream); 13995 13996 if ((ctxt->directory == NULL) && (directory == NULL)) 13997 directory = xmlParserGetDirectory((char *)uri); 13998 if ((ctxt->directory == NULL) && (directory != NULL)) 13999 ctxt->directory = directory; 14000 xmlFree(uri); 14001 } 14002 return(ctxt); 14003 } 14004 14005 /** 14006 * xmlCreateEntityParserCtxt: 14007 * @URL: the entity URL 14008 * @ID: the entity PUBLIC ID 14009 * @base: a possible base for the target URI 14010 * 14011 * Create a parser context for an external entity 14012 * Automatic support for ZLIB/Compress compressed document is provided 14013 * by default if found at compile-time. 14014 * 14015 * Returns the new parser context or NULL 14016 */ 14017 xmlParserCtxtPtr 14018 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14019 const xmlChar *base) { 14020 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14021 14022 } 14023 14024 /************************************************************************ 14025 * * 14026 * Front ends when parsing from a file * 14027 * * 14028 ************************************************************************/ 14029 14030 /** 14031 * xmlCreateURLParserCtxt: 14032 * @filename: the filename or URL 14033 * @options: a combination of xmlParserOption 14034 * 14035 * Create a parser context for a file or URL content. 14036 * Automatic support for ZLIB/Compress compressed document is provided 14037 * by default if found at compile-time and for file accesses 14038 * 14039 * Returns the new parser context or NULL 14040 */ 14041 xmlParserCtxtPtr 14042 xmlCreateURLParserCtxt(const char *filename, int options) 14043 { 14044 xmlParserCtxtPtr ctxt; 14045 xmlParserInputPtr inputStream; 14046 char *directory = NULL; 14047 14048 ctxt = xmlNewParserCtxt(); 14049 if (ctxt == NULL) { 14050 xmlErrMemory(NULL, "cannot allocate parser context"); 14051 return(NULL); 14052 } 14053 14054 if (options) 14055 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14056 ctxt->linenumbers = 1; 14057 14058 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14059 if (inputStream == NULL) { 14060 xmlFreeParserCtxt(ctxt); 14061 return(NULL); 14062 } 14063 14064 inputPush(ctxt, inputStream); 14065 if ((ctxt->directory == NULL) && (directory == NULL)) 14066 directory = xmlParserGetDirectory(filename); 14067 if ((ctxt->directory == NULL) && (directory != NULL)) 14068 ctxt->directory = directory; 14069 14070 return(ctxt); 14071 } 14072 14073 /** 14074 * xmlCreateFileParserCtxt: 14075 * @filename: the filename 14076 * 14077 * Create a parser context for a file content. 14078 * Automatic support for ZLIB/Compress compressed document is provided 14079 * by default if found at compile-time. 14080 * 14081 * Returns the new parser context or NULL 14082 */ 14083 xmlParserCtxtPtr 14084 xmlCreateFileParserCtxt(const char *filename) 14085 { 14086 return(xmlCreateURLParserCtxt(filename, 0)); 14087 } 14088 14089 #ifdef LIBXML_SAX1_ENABLED 14090 /** 14091 * xmlSAXParseFileWithData: 14092 * @sax: the SAX handler block 14093 * @filename: the filename 14094 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14095 * documents 14096 * @data: the userdata 14097 * 14098 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14099 * compressed document is provided by default if found at compile-time. 14100 * It use the given SAX function block to handle the parsing callback. 14101 * If sax is NULL, fallback to the default DOM tree building routines. 14102 * 14103 * User data (void *) is stored within the parser context in the 14104 * context's _private member, so it is available nearly everywhere in libxml 14105 * 14106 * Returns the resulting document tree 14107 */ 14108 14109 xmlDocPtr 14110 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14111 int recovery, void *data) { 14112 xmlDocPtr ret; 14113 xmlParserCtxtPtr ctxt; 14114 14115 xmlInitParser(); 14116 14117 ctxt = xmlCreateFileParserCtxt(filename); 14118 if (ctxt == NULL) { 14119 return(NULL); 14120 } 14121 if (sax != NULL) { 14122 if (ctxt->sax != NULL) 14123 xmlFree(ctxt->sax); 14124 ctxt->sax = sax; 14125 } 14126 xmlDetectSAX2(ctxt); 14127 if (data!=NULL) { 14128 ctxt->_private = data; 14129 } 14130 14131 if (ctxt->directory == NULL) 14132 ctxt->directory = xmlParserGetDirectory(filename); 14133 14134 ctxt->recovery = recovery; 14135 14136 xmlParseDocument(ctxt); 14137 14138 if ((ctxt->wellFormed) || recovery) { 14139 ret = ctxt->myDoc; 14140 if ((ret != NULL) && (ctxt->input->buf != NULL)) { 14141 if (ctxt->input->buf->compressed > 0) 14142 ret->compression = 9; 14143 else 14144 ret->compression = ctxt->input->buf->compressed; 14145 } 14146 } 14147 else { 14148 ret = NULL; 14149 xmlFreeDoc(ctxt->myDoc); 14150 ctxt->myDoc = NULL; 14151 } 14152 if (sax != NULL) 14153 ctxt->sax = NULL; 14154 xmlFreeParserCtxt(ctxt); 14155 14156 return(ret); 14157 } 14158 14159 /** 14160 * xmlSAXParseFile: 14161 * @sax: the SAX handler block 14162 * @filename: the filename 14163 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14164 * documents 14165 * 14166 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14167 * compressed document is provided by default if found at compile-time. 14168 * It use the given SAX function block to handle the parsing callback. 14169 * If sax is NULL, fallback to the default DOM tree building routines. 14170 * 14171 * Returns the resulting document tree 14172 */ 14173 14174 xmlDocPtr 14175 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14176 int recovery) { 14177 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14178 } 14179 14180 /** 14181 * xmlRecoverDoc: 14182 * @cur: a pointer to an array of xmlChar 14183 * 14184 * parse an XML in-memory document and build a tree. 14185 * In the case the document is not Well Formed, a attempt to build a 14186 * tree is tried anyway 14187 * 14188 * Returns the resulting document tree or NULL in case of failure 14189 */ 14190 14191 xmlDocPtr 14192 xmlRecoverDoc(const xmlChar *cur) { 14193 return(xmlSAXParseDoc(NULL, cur, 1)); 14194 } 14195 14196 /** 14197 * xmlParseFile: 14198 * @filename: the filename 14199 * 14200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14201 * compressed document is provided by default if found at compile-time. 14202 * 14203 * Returns the resulting document tree if the file was wellformed, 14204 * NULL otherwise. 14205 */ 14206 14207 xmlDocPtr 14208 xmlParseFile(const char *filename) { 14209 return(xmlSAXParseFile(NULL, filename, 0)); 14210 } 14211 14212 /** 14213 * xmlRecoverFile: 14214 * @filename: the filename 14215 * 14216 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14217 * compressed document is provided by default if found at compile-time. 14218 * In the case the document is not Well Formed, it attempts to build 14219 * a tree anyway 14220 * 14221 * Returns the resulting document tree or NULL in case of failure 14222 */ 14223 14224 xmlDocPtr 14225 xmlRecoverFile(const char *filename) { 14226 return(xmlSAXParseFile(NULL, filename, 1)); 14227 } 14228 14229 14230 /** 14231 * xmlSetupParserForBuffer: 14232 * @ctxt: an XML parser context 14233 * @buffer: a xmlChar * buffer 14234 * @filename: a file name 14235 * 14236 * Setup the parser context to parse a new buffer; Clears any prior 14237 * contents from the parser context. The buffer parameter must not be 14238 * NULL, but the filename parameter can be 14239 */ 14240 void 14241 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14242 const char* filename) 14243 { 14244 xmlParserInputPtr input; 14245 14246 if ((ctxt == NULL) || (buffer == NULL)) 14247 return; 14248 14249 input = xmlNewInputStream(ctxt); 14250 if (input == NULL) { 14251 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14252 xmlClearParserCtxt(ctxt); 14253 return; 14254 } 14255 14256 xmlClearParserCtxt(ctxt); 14257 if (filename != NULL) 14258 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14259 input->base = buffer; 14260 input->cur = buffer; 14261 input->end = &buffer[xmlStrlen(buffer)]; 14262 inputPush(ctxt, input); 14263 } 14264 14265 /** 14266 * xmlSAXUserParseFile: 14267 * @sax: a SAX handler 14268 * @user_data: The user data returned on SAX callbacks 14269 * @filename: a file name 14270 * 14271 * parse an XML file and call the given SAX handler routines. 14272 * Automatic support for ZLIB/Compress compressed document is provided 14273 * 14274 * Returns 0 in case of success or a error number otherwise 14275 */ 14276 int 14277 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14278 const char *filename) { 14279 int ret = 0; 14280 xmlParserCtxtPtr ctxt; 14281 14282 ctxt = xmlCreateFileParserCtxt(filename); 14283 if (ctxt == NULL) return -1; 14284 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14285 xmlFree(ctxt->sax); 14286 ctxt->sax = sax; 14287 xmlDetectSAX2(ctxt); 14288 14289 if (user_data != NULL) 14290 ctxt->userData = user_data; 14291 14292 xmlParseDocument(ctxt); 14293 14294 if (ctxt->wellFormed) 14295 ret = 0; 14296 else { 14297 if (ctxt->errNo != 0) 14298 ret = ctxt->errNo; 14299 else 14300 ret = -1; 14301 } 14302 if (sax != NULL) 14303 ctxt->sax = NULL; 14304 if (ctxt->myDoc != NULL) { 14305 xmlFreeDoc(ctxt->myDoc); 14306 ctxt->myDoc = NULL; 14307 } 14308 xmlFreeParserCtxt(ctxt); 14309 14310 return ret; 14311 } 14312 #endif /* LIBXML_SAX1_ENABLED */ 14313 14314 /************************************************************************ 14315 * * 14316 * Front ends when parsing from memory * 14317 * * 14318 ************************************************************************/ 14319 14320 /** 14321 * xmlCreateMemoryParserCtxt: 14322 * @buffer: a pointer to a char array 14323 * @size: the size of the array 14324 * 14325 * Create a parser context for an XML in-memory document. 14326 * 14327 * Returns the new parser context or NULL 14328 */ 14329 xmlParserCtxtPtr 14330 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14331 xmlParserCtxtPtr ctxt; 14332 xmlParserInputPtr input; 14333 xmlParserInputBufferPtr buf; 14334 14335 if (buffer == NULL) 14336 return(NULL); 14337 if (size <= 0) 14338 return(NULL); 14339 14340 ctxt = xmlNewParserCtxt(); 14341 if (ctxt == NULL) 14342 return(NULL); 14343 14344 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14345 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14346 if (buf == NULL) { 14347 xmlFreeParserCtxt(ctxt); 14348 return(NULL); 14349 } 14350 14351 input = xmlNewInputStream(ctxt); 14352 if (input == NULL) { 14353 xmlFreeParserInputBuffer(buf); 14354 xmlFreeParserCtxt(ctxt); 14355 return(NULL); 14356 } 14357 14358 input->filename = NULL; 14359 input->buf = buf; 14360 xmlBufResetInput(input->buf->buffer, input); 14361 14362 inputPush(ctxt, input); 14363 return(ctxt); 14364 } 14365 14366 #ifdef LIBXML_SAX1_ENABLED 14367 /** 14368 * xmlSAXParseMemoryWithData: 14369 * @sax: the SAX handler block 14370 * @buffer: an pointer to a char array 14371 * @size: the size of the array 14372 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14373 * documents 14374 * @data: the userdata 14375 * 14376 * parse an XML in-memory block and use the given SAX function block 14377 * to handle the parsing callback. If sax is NULL, fallback to the default 14378 * DOM tree building routines. 14379 * 14380 * User data (void *) is stored within the parser context in the 14381 * context's _private member, so it is available nearly everywhere in libxml 14382 * 14383 * Returns the resulting document tree 14384 */ 14385 14386 xmlDocPtr 14387 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14388 int size, int recovery, void *data) { 14389 xmlDocPtr ret; 14390 xmlParserCtxtPtr ctxt; 14391 14392 xmlInitParser(); 14393 14394 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14395 if (ctxt == NULL) return(NULL); 14396 if (sax != NULL) { 14397 if (ctxt->sax != NULL) 14398 xmlFree(ctxt->sax); 14399 ctxt->sax = sax; 14400 } 14401 xmlDetectSAX2(ctxt); 14402 if (data!=NULL) { 14403 ctxt->_private=data; 14404 } 14405 14406 ctxt->recovery = recovery; 14407 14408 xmlParseDocument(ctxt); 14409 14410 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14411 else { 14412 ret = NULL; 14413 xmlFreeDoc(ctxt->myDoc); 14414 ctxt->myDoc = NULL; 14415 } 14416 if (sax != NULL) 14417 ctxt->sax = NULL; 14418 xmlFreeParserCtxt(ctxt); 14419 14420 return(ret); 14421 } 14422 14423 /** 14424 * xmlSAXParseMemory: 14425 * @sax: the SAX handler block 14426 * @buffer: an pointer to a char array 14427 * @size: the size of the array 14428 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14429 * documents 14430 * 14431 * parse an XML in-memory block and use the given SAX function block 14432 * to handle the parsing callback. If sax is NULL, fallback to the default 14433 * DOM tree building routines. 14434 * 14435 * Returns the resulting document tree 14436 */ 14437 xmlDocPtr 14438 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14439 int size, int recovery) { 14440 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14441 } 14442 14443 /** 14444 * xmlParseMemory: 14445 * @buffer: an pointer to a char array 14446 * @size: the size of the array 14447 * 14448 * parse an XML in-memory block and build a tree. 14449 * 14450 * Returns the resulting document tree 14451 */ 14452 14453 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14454 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14455 } 14456 14457 /** 14458 * xmlRecoverMemory: 14459 * @buffer: an pointer to a char array 14460 * @size: the size of the array 14461 * 14462 * parse an XML in-memory block and build a tree. 14463 * In the case the document is not Well Formed, an attempt to 14464 * build a tree is tried anyway 14465 * 14466 * Returns the resulting document tree or NULL in case of error 14467 */ 14468 14469 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14470 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14471 } 14472 14473 /** 14474 * xmlSAXUserParseMemory: 14475 * @sax: a SAX handler 14476 * @user_data: The user data returned on SAX callbacks 14477 * @buffer: an in-memory XML document input 14478 * @size: the length of the XML document in bytes 14479 * 14480 * A better SAX parsing routine. 14481 * parse an XML in-memory buffer and call the given SAX handler routines. 14482 * 14483 * Returns 0 in case of success or a error number otherwise 14484 */ 14485 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14486 const char *buffer, int size) { 14487 int ret = 0; 14488 xmlParserCtxtPtr ctxt; 14489 14490 xmlInitParser(); 14491 14492 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14493 if (ctxt == NULL) return -1; 14494 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14495 xmlFree(ctxt->sax); 14496 ctxt->sax = sax; 14497 xmlDetectSAX2(ctxt); 14498 14499 if (user_data != NULL) 14500 ctxt->userData = user_data; 14501 14502 xmlParseDocument(ctxt); 14503 14504 if (ctxt->wellFormed) 14505 ret = 0; 14506 else { 14507 if (ctxt->errNo != 0) 14508 ret = ctxt->errNo; 14509 else 14510 ret = -1; 14511 } 14512 if (sax != NULL) 14513 ctxt->sax = NULL; 14514 if (ctxt->myDoc != NULL) { 14515 xmlFreeDoc(ctxt->myDoc); 14516 ctxt->myDoc = NULL; 14517 } 14518 xmlFreeParserCtxt(ctxt); 14519 14520 return ret; 14521 } 14522 #endif /* LIBXML_SAX1_ENABLED */ 14523 14524 /** 14525 * xmlCreateDocParserCtxt: 14526 * @cur: a pointer to an array of xmlChar 14527 * 14528 * Creates a parser context for an XML in-memory document. 14529 * 14530 * Returns the new parser context or NULL 14531 */ 14532 xmlParserCtxtPtr 14533 xmlCreateDocParserCtxt(const xmlChar *cur) { 14534 int len; 14535 14536 if (cur == NULL) 14537 return(NULL); 14538 len = xmlStrlen(cur); 14539 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14540 } 14541 14542 #ifdef LIBXML_SAX1_ENABLED 14543 /** 14544 * xmlSAXParseDoc: 14545 * @sax: the SAX handler block 14546 * @cur: a pointer to an array of xmlChar 14547 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14548 * documents 14549 * 14550 * parse an XML in-memory document and build a tree. 14551 * It use the given SAX function block to handle the parsing callback. 14552 * If sax is NULL, fallback to the default DOM tree building routines. 14553 * 14554 * Returns the resulting document tree 14555 */ 14556 14557 xmlDocPtr 14558 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14559 xmlDocPtr ret; 14560 xmlParserCtxtPtr ctxt; 14561 xmlSAXHandlerPtr oldsax = NULL; 14562 14563 if (cur == NULL) return(NULL); 14564 14565 14566 ctxt = xmlCreateDocParserCtxt(cur); 14567 if (ctxt == NULL) return(NULL); 14568 if (sax != NULL) { 14569 oldsax = ctxt->sax; 14570 ctxt->sax = sax; 14571 ctxt->userData = NULL; 14572 } 14573 xmlDetectSAX2(ctxt); 14574 14575 xmlParseDocument(ctxt); 14576 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14577 else { 14578 ret = NULL; 14579 xmlFreeDoc(ctxt->myDoc); 14580 ctxt->myDoc = NULL; 14581 } 14582 if (sax != NULL) 14583 ctxt->sax = oldsax; 14584 xmlFreeParserCtxt(ctxt); 14585 14586 return(ret); 14587 } 14588 14589 /** 14590 * xmlParseDoc: 14591 * @cur: a pointer to an array of xmlChar 14592 * 14593 * parse an XML in-memory document and build a tree. 14594 * 14595 * Returns the resulting document tree 14596 */ 14597 14598 xmlDocPtr 14599 xmlParseDoc(const xmlChar *cur) { 14600 return(xmlSAXParseDoc(NULL, cur, 0)); 14601 } 14602 #endif /* LIBXML_SAX1_ENABLED */ 14603 14604 #ifdef LIBXML_LEGACY_ENABLED 14605 /************************************************************************ 14606 * * 14607 * Specific function to keep track of entities references * 14608 * and used by the XSLT debugger * 14609 * * 14610 ************************************************************************/ 14611 14612 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14613 14614 /** 14615 * xmlAddEntityReference: 14616 * @ent : A valid entity 14617 * @firstNode : A valid first node for children of entity 14618 * @lastNode : A valid last node of children entity 14619 * 14620 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14621 */ 14622 static void 14623 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14624 xmlNodePtr lastNode) 14625 { 14626 if (xmlEntityRefFunc != NULL) { 14627 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14628 } 14629 } 14630 14631 14632 /** 14633 * xmlSetEntityReferenceFunc: 14634 * @func: A valid function 14635 * 14636 * Set the function to call call back when a xml reference has been made 14637 */ 14638 void 14639 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14640 { 14641 xmlEntityRefFunc = func; 14642 } 14643 #endif /* LIBXML_LEGACY_ENABLED */ 14644 14645 /************************************************************************ 14646 * * 14647 * Miscellaneous * 14648 * * 14649 ************************************************************************/ 14650 14651 #ifdef LIBXML_XPATH_ENABLED 14652 #include <libxml/xpath.h> 14653 #endif 14654 14655 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14656 static int xmlParserInitialized = 0; 14657 14658 /** 14659 * xmlInitParser: 14660 * 14661 * Initialization function for the XML parser. 14662 * This is not reentrant. Call once before processing in case of 14663 * use in multithreaded programs. 14664 */ 14665 14666 void 14667 xmlInitParser(void) { 14668 if (xmlParserInitialized != 0) 14669 return; 14670 14671 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL)) 14672 if (xmlFree == free) 14673 atexit(xmlCleanupParser); 14674 #endif 14675 14676 #ifdef LIBXML_THREAD_ENABLED 14677 __xmlGlobalInitMutexLock(); 14678 if (xmlParserInitialized == 0) { 14679 #endif 14680 xmlInitThreads(); 14681 xmlInitGlobals(); 14682 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14683 (xmlGenericError == NULL)) 14684 initGenericErrorDefaultFunc(NULL); 14685 xmlInitMemory(); 14686 xmlInitializeDict(); 14687 xmlInitCharEncodingHandlers(); 14688 xmlDefaultSAXHandlerInit(); 14689 xmlRegisterDefaultInputCallbacks(); 14690 #ifdef LIBXML_OUTPUT_ENABLED 14691 xmlRegisterDefaultOutputCallbacks(); 14692 #endif /* LIBXML_OUTPUT_ENABLED */ 14693 #ifdef LIBXML_HTML_ENABLED 14694 htmlInitAutoClose(); 14695 htmlDefaultSAXHandlerInit(); 14696 #endif 14697 #ifdef LIBXML_XPATH_ENABLED 14698 xmlXPathInit(); 14699 #endif 14700 xmlParserInitialized = 1; 14701 #ifdef LIBXML_THREAD_ENABLED 14702 } 14703 __xmlGlobalInitMutexUnlock(); 14704 #endif 14705 } 14706 14707 /** 14708 * xmlCleanupParser: 14709 * 14710 * This function name is somewhat misleading. It does not clean up 14711 * parser state, it cleans up memory allocated by the library itself. 14712 * It is a cleanup function for the XML library. It tries to reclaim all 14713 * related global memory allocated for the library processing. 14714 * It doesn't deallocate any document related memory. One should 14715 * call xmlCleanupParser() only when the process has finished using 14716 * the library and all XML/HTML documents built with it. 14717 * See also xmlInitParser() which has the opposite function of preparing 14718 * the library for operations. 14719 * 14720 * WARNING: if your application is multithreaded or has plugin support 14721 * calling this may crash the application if another thread or 14722 * a plugin is still using libxml2. It's sometimes very hard to 14723 * guess if libxml2 is in use in the application, some libraries 14724 * or plugins may use it without notice. In case of doubt abstain 14725 * from calling this function or do it just before calling exit() 14726 * to avoid leak reports from valgrind ! 14727 */ 14728 14729 void 14730 xmlCleanupParser(void) { 14731 if (!xmlParserInitialized) 14732 return; 14733 14734 xmlCleanupCharEncodingHandlers(); 14735 #ifdef LIBXML_CATALOG_ENABLED 14736 xmlCatalogCleanup(); 14737 #endif 14738 xmlDictCleanup(); 14739 xmlCleanupInputCallbacks(); 14740 #ifdef LIBXML_OUTPUT_ENABLED 14741 xmlCleanupOutputCallbacks(); 14742 #endif 14743 #ifdef LIBXML_SCHEMAS_ENABLED 14744 xmlSchemaCleanupTypes(); 14745 xmlRelaxNGCleanupTypes(); 14746 #endif 14747 xmlCleanupGlobals(); 14748 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14749 xmlCleanupMemory(); 14750 xmlParserInitialized = 0; 14751 } 14752 14753 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \ 14754 !defined(_WIN32) 14755 static void 14756 ATTRIBUTE_DESTRUCTOR 14757 xmlDestructor(void) { 14758 /* 14759 * Calling custom deallocation functions in a destructor can cause 14760 * problems, for example with Nokogiri. 14761 */ 14762 if (xmlFree == free) 14763 xmlCleanupParser(); 14764 } 14765 #endif 14766 14767 /************************************************************************ 14768 * * 14769 * New set (2.6.0) of simpler and more flexible APIs * 14770 * * 14771 ************************************************************************/ 14772 14773 /** 14774 * DICT_FREE: 14775 * @str: a string 14776 * 14777 * Free a string if it is not owned by the "dict" dictionary in the 14778 * current scope 14779 */ 14780 #define DICT_FREE(str) \ 14781 if ((str) && ((!dict) || \ 14782 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14783 xmlFree((char *)(str)); 14784 14785 /** 14786 * xmlCtxtReset: 14787 * @ctxt: an XML parser context 14788 * 14789 * Reset a parser context 14790 */ 14791 void 14792 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14793 { 14794 xmlParserInputPtr input; 14795 xmlDictPtr dict; 14796 14797 if (ctxt == NULL) 14798 return; 14799 14800 dict = ctxt->dict; 14801 14802 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14803 xmlFreeInputStream(input); 14804 } 14805 ctxt->inputNr = 0; 14806 ctxt->input = NULL; 14807 14808 ctxt->spaceNr = 0; 14809 if (ctxt->spaceTab != NULL) { 14810 ctxt->spaceTab[0] = -1; 14811 ctxt->space = &ctxt->spaceTab[0]; 14812 } else { 14813 ctxt->space = NULL; 14814 } 14815 14816 14817 ctxt->nodeNr = 0; 14818 ctxt->node = NULL; 14819 14820 ctxt->nameNr = 0; 14821 ctxt->name = NULL; 14822 14823 ctxt->nsNr = 0; 14824 14825 DICT_FREE(ctxt->version); 14826 ctxt->version = NULL; 14827 DICT_FREE(ctxt->encoding); 14828 ctxt->encoding = NULL; 14829 DICT_FREE(ctxt->directory); 14830 ctxt->directory = NULL; 14831 DICT_FREE(ctxt->extSubURI); 14832 ctxt->extSubURI = NULL; 14833 DICT_FREE(ctxt->extSubSystem); 14834 ctxt->extSubSystem = NULL; 14835 if (ctxt->myDoc != NULL) 14836 xmlFreeDoc(ctxt->myDoc); 14837 ctxt->myDoc = NULL; 14838 14839 ctxt->standalone = -1; 14840 ctxt->hasExternalSubset = 0; 14841 ctxt->hasPErefs = 0; 14842 ctxt->html = 0; 14843 ctxt->external = 0; 14844 ctxt->instate = XML_PARSER_START; 14845 ctxt->token = 0; 14846 14847 ctxt->wellFormed = 1; 14848 ctxt->nsWellFormed = 1; 14849 ctxt->disableSAX = 0; 14850 ctxt->valid = 1; 14851 #if 0 14852 ctxt->vctxt.userData = ctxt; 14853 ctxt->vctxt.error = xmlParserValidityError; 14854 ctxt->vctxt.warning = xmlParserValidityWarning; 14855 #endif 14856 ctxt->record_info = 0; 14857 ctxt->checkIndex = 0; 14858 ctxt->inSubset = 0; 14859 ctxt->errNo = XML_ERR_OK; 14860 ctxt->depth = 0; 14861 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14862 ctxt->catalogs = NULL; 14863 ctxt->nbentities = 0; 14864 ctxt->sizeentities = 0; 14865 ctxt->sizeentcopy = 0; 14866 xmlInitNodeInfoSeq(&ctxt->node_seq); 14867 14868 if (ctxt->attsDefault != NULL) { 14869 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 14870 ctxt->attsDefault = NULL; 14871 } 14872 if (ctxt->attsSpecial != NULL) { 14873 xmlHashFree(ctxt->attsSpecial, NULL); 14874 ctxt->attsSpecial = NULL; 14875 } 14876 14877 #ifdef LIBXML_CATALOG_ENABLED 14878 if (ctxt->catalogs != NULL) 14879 xmlCatalogFreeLocal(ctxt->catalogs); 14880 #endif 14881 if (ctxt->lastError.code != XML_ERR_OK) 14882 xmlResetError(&ctxt->lastError); 14883 } 14884 14885 /** 14886 * xmlCtxtResetPush: 14887 * @ctxt: an XML parser context 14888 * @chunk: a pointer to an array of chars 14889 * @size: number of chars in the array 14890 * @filename: an optional file name or URI 14891 * @encoding: the document encoding, or NULL 14892 * 14893 * Reset a push parser context 14894 * 14895 * Returns 0 in case of success and 1 in case of error 14896 */ 14897 int 14898 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14899 int size, const char *filename, const char *encoding) 14900 { 14901 xmlParserInputPtr inputStream; 14902 xmlParserInputBufferPtr buf; 14903 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14904 14905 if (ctxt == NULL) 14906 return(1); 14907 14908 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14909 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14910 14911 buf = xmlAllocParserInputBuffer(enc); 14912 if (buf == NULL) 14913 return(1); 14914 14915 if (ctxt == NULL) { 14916 xmlFreeParserInputBuffer(buf); 14917 return(1); 14918 } 14919 14920 xmlCtxtReset(ctxt); 14921 14922 if (filename == NULL) { 14923 ctxt->directory = NULL; 14924 } else { 14925 ctxt->directory = xmlParserGetDirectory(filename); 14926 } 14927 14928 inputStream = xmlNewInputStream(ctxt); 14929 if (inputStream == NULL) { 14930 xmlFreeParserInputBuffer(buf); 14931 return(1); 14932 } 14933 14934 if (filename == NULL) 14935 inputStream->filename = NULL; 14936 else 14937 inputStream->filename = (char *) 14938 xmlCanonicPath((const xmlChar *) filename); 14939 inputStream->buf = buf; 14940 xmlBufResetInput(buf->buffer, inputStream); 14941 14942 inputPush(ctxt, inputStream); 14943 14944 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14945 (ctxt->input->buf != NULL)) { 14946 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14947 size_t cur = ctxt->input->cur - ctxt->input->base; 14948 14949 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14950 14951 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14952 #ifdef DEBUG_PUSH 14953 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14954 #endif 14955 } 14956 14957 if (encoding != NULL) { 14958 xmlCharEncodingHandlerPtr hdlr; 14959 14960 if (ctxt->encoding != NULL) 14961 xmlFree((xmlChar *) ctxt->encoding); 14962 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14963 14964 hdlr = xmlFindCharEncodingHandler(encoding); 14965 if (hdlr != NULL) { 14966 xmlSwitchToEncoding(ctxt, hdlr); 14967 } else { 14968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14969 "Unsupported encoding %s\n", BAD_CAST encoding); 14970 } 14971 } else if (enc != XML_CHAR_ENCODING_NONE) { 14972 xmlSwitchEncoding(ctxt, enc); 14973 } 14974 14975 return(0); 14976 } 14977 14978 14979 /** 14980 * xmlCtxtUseOptionsInternal: 14981 * @ctxt: an XML parser context 14982 * @options: a combination of xmlParserOption 14983 * @encoding: the user provided encoding to use 14984 * 14985 * Applies the options to the parser context 14986 * 14987 * Returns 0 in case of success, the set of unknown or unimplemented options 14988 * in case of error. 14989 */ 14990 static int 14991 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14992 { 14993 if (ctxt == NULL) 14994 return(-1); 14995 if (encoding != NULL) { 14996 if (ctxt->encoding != NULL) 14997 xmlFree((xmlChar *) ctxt->encoding); 14998 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14999 } 15000 if (options & XML_PARSE_RECOVER) { 15001 ctxt->recovery = 1; 15002 options -= XML_PARSE_RECOVER; 15003 ctxt->options |= XML_PARSE_RECOVER; 15004 } else 15005 ctxt->recovery = 0; 15006 if (options & XML_PARSE_DTDLOAD) { 15007 ctxt->loadsubset = XML_DETECT_IDS; 15008 options -= XML_PARSE_DTDLOAD; 15009 ctxt->options |= XML_PARSE_DTDLOAD; 15010 } else 15011 ctxt->loadsubset = 0; 15012 if (options & XML_PARSE_DTDATTR) { 15013 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15014 options -= XML_PARSE_DTDATTR; 15015 ctxt->options |= XML_PARSE_DTDATTR; 15016 } 15017 if (options & XML_PARSE_NOENT) { 15018 ctxt->replaceEntities = 1; 15019 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15020 options -= XML_PARSE_NOENT; 15021 ctxt->options |= XML_PARSE_NOENT; 15022 } else 15023 ctxt->replaceEntities = 0; 15024 if (options & XML_PARSE_PEDANTIC) { 15025 ctxt->pedantic = 1; 15026 options -= XML_PARSE_PEDANTIC; 15027 ctxt->options |= XML_PARSE_PEDANTIC; 15028 } else 15029 ctxt->pedantic = 0; 15030 if (options & XML_PARSE_NOBLANKS) { 15031 ctxt->keepBlanks = 0; 15032 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15033 options -= XML_PARSE_NOBLANKS; 15034 ctxt->options |= XML_PARSE_NOBLANKS; 15035 } else 15036 ctxt->keepBlanks = 1; 15037 if (options & XML_PARSE_DTDVALID) { 15038 ctxt->validate = 1; 15039 if (options & XML_PARSE_NOWARNING) 15040 ctxt->vctxt.warning = NULL; 15041 if (options & XML_PARSE_NOERROR) 15042 ctxt->vctxt.error = NULL; 15043 options -= XML_PARSE_DTDVALID; 15044 ctxt->options |= XML_PARSE_DTDVALID; 15045 } else 15046 ctxt->validate = 0; 15047 if (options & XML_PARSE_NOWARNING) { 15048 ctxt->sax->warning = NULL; 15049 options -= XML_PARSE_NOWARNING; 15050 } 15051 if (options & XML_PARSE_NOERROR) { 15052 ctxt->sax->error = NULL; 15053 ctxt->sax->fatalError = NULL; 15054 options -= XML_PARSE_NOERROR; 15055 } 15056 #ifdef LIBXML_SAX1_ENABLED 15057 if (options & XML_PARSE_SAX1) { 15058 ctxt->sax->startElement = xmlSAX2StartElement; 15059 ctxt->sax->endElement = xmlSAX2EndElement; 15060 ctxt->sax->startElementNs = NULL; 15061 ctxt->sax->endElementNs = NULL; 15062 ctxt->sax->initialized = 1; 15063 options -= XML_PARSE_SAX1; 15064 ctxt->options |= XML_PARSE_SAX1; 15065 } 15066 #endif /* LIBXML_SAX1_ENABLED */ 15067 if (options & XML_PARSE_NODICT) { 15068 ctxt->dictNames = 0; 15069 options -= XML_PARSE_NODICT; 15070 ctxt->options |= XML_PARSE_NODICT; 15071 } else { 15072 ctxt->dictNames = 1; 15073 } 15074 if (options & XML_PARSE_NOCDATA) { 15075 ctxt->sax->cdataBlock = NULL; 15076 options -= XML_PARSE_NOCDATA; 15077 ctxt->options |= XML_PARSE_NOCDATA; 15078 } 15079 if (options & XML_PARSE_NSCLEAN) { 15080 ctxt->options |= XML_PARSE_NSCLEAN; 15081 options -= XML_PARSE_NSCLEAN; 15082 } 15083 if (options & XML_PARSE_NONET) { 15084 ctxt->options |= XML_PARSE_NONET; 15085 options -= XML_PARSE_NONET; 15086 } 15087 if (options & XML_PARSE_COMPACT) { 15088 ctxt->options |= XML_PARSE_COMPACT; 15089 options -= XML_PARSE_COMPACT; 15090 } 15091 if (options & XML_PARSE_OLD10) { 15092 ctxt->options |= XML_PARSE_OLD10; 15093 options -= XML_PARSE_OLD10; 15094 } 15095 if (options & XML_PARSE_NOBASEFIX) { 15096 ctxt->options |= XML_PARSE_NOBASEFIX; 15097 options -= XML_PARSE_NOBASEFIX; 15098 } 15099 if (options & XML_PARSE_HUGE) { 15100 ctxt->options |= XML_PARSE_HUGE; 15101 options -= XML_PARSE_HUGE; 15102 if (ctxt->dict != NULL) 15103 xmlDictSetLimit(ctxt->dict, 0); 15104 } 15105 if (options & XML_PARSE_OLDSAX) { 15106 ctxt->options |= XML_PARSE_OLDSAX; 15107 options -= XML_PARSE_OLDSAX; 15108 } 15109 if (options & XML_PARSE_IGNORE_ENC) { 15110 ctxt->options |= XML_PARSE_IGNORE_ENC; 15111 options -= XML_PARSE_IGNORE_ENC; 15112 } 15113 if (options & XML_PARSE_BIG_LINES) { 15114 ctxt->options |= XML_PARSE_BIG_LINES; 15115 options -= XML_PARSE_BIG_LINES; 15116 } 15117 ctxt->linenumbers = 1; 15118 return (options); 15119 } 15120 15121 /** 15122 * xmlCtxtUseOptions: 15123 * @ctxt: an XML parser context 15124 * @options: a combination of xmlParserOption 15125 * 15126 * Applies the options to the parser context 15127 * 15128 * Returns 0 in case of success, the set of unknown or unimplemented options 15129 * in case of error. 15130 */ 15131 int 15132 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15133 { 15134 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15135 } 15136 15137 /** 15138 * xmlDoRead: 15139 * @ctxt: an XML parser context 15140 * @URL: the base URL to use for the document 15141 * @encoding: the document encoding, or NULL 15142 * @options: a combination of xmlParserOption 15143 * @reuse: keep the context for reuse 15144 * 15145 * Common front-end for the xmlRead functions 15146 * 15147 * Returns the resulting document tree or NULL 15148 */ 15149 static xmlDocPtr 15150 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15151 int options, int reuse) 15152 { 15153 xmlDocPtr ret; 15154 15155 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15156 if (encoding != NULL) { 15157 xmlCharEncodingHandlerPtr hdlr; 15158 15159 hdlr = xmlFindCharEncodingHandler(encoding); 15160 if (hdlr != NULL) 15161 xmlSwitchToEncoding(ctxt, hdlr); 15162 } 15163 if ((URL != NULL) && (ctxt->input != NULL) && 15164 (ctxt->input->filename == NULL)) 15165 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15166 xmlParseDocument(ctxt); 15167 if ((ctxt->wellFormed) || ctxt->recovery) 15168 ret = ctxt->myDoc; 15169 else { 15170 ret = NULL; 15171 if (ctxt->myDoc != NULL) { 15172 xmlFreeDoc(ctxt->myDoc); 15173 } 15174 } 15175 ctxt->myDoc = NULL; 15176 if (!reuse) { 15177 xmlFreeParserCtxt(ctxt); 15178 } 15179 15180 return (ret); 15181 } 15182 15183 /** 15184 * xmlReadDoc: 15185 * @cur: a pointer to a zero terminated string 15186 * @URL: the base URL to use for the document 15187 * @encoding: the document encoding, or NULL 15188 * @options: a combination of xmlParserOption 15189 * 15190 * parse an XML in-memory document and build a tree. 15191 * 15192 * Returns the resulting document tree 15193 */ 15194 xmlDocPtr 15195 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15196 { 15197 xmlParserCtxtPtr ctxt; 15198 15199 if (cur == NULL) 15200 return (NULL); 15201 xmlInitParser(); 15202 15203 ctxt = xmlCreateDocParserCtxt(cur); 15204 if (ctxt == NULL) 15205 return (NULL); 15206 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15207 } 15208 15209 /** 15210 * xmlReadFile: 15211 * @filename: a file or URL 15212 * @encoding: the document encoding, or NULL 15213 * @options: a combination of xmlParserOption 15214 * 15215 * parse an XML file from the filesystem or the network. 15216 * 15217 * Returns the resulting document tree 15218 */ 15219 xmlDocPtr 15220 xmlReadFile(const char *filename, const char *encoding, int options) 15221 { 15222 xmlParserCtxtPtr ctxt; 15223 15224 xmlInitParser(); 15225 ctxt = xmlCreateURLParserCtxt(filename, options); 15226 if (ctxt == NULL) 15227 return (NULL); 15228 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15229 } 15230 15231 /** 15232 * xmlReadMemory: 15233 * @buffer: a pointer to a char array 15234 * @size: the size of the array 15235 * @URL: the base URL to use for the document 15236 * @encoding: the document encoding, or NULL 15237 * @options: a combination of xmlParserOption 15238 * 15239 * parse an XML in-memory document and build a tree. 15240 * 15241 * Returns the resulting document tree 15242 */ 15243 xmlDocPtr 15244 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15245 { 15246 xmlParserCtxtPtr ctxt; 15247 15248 xmlInitParser(); 15249 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15250 if (ctxt == NULL) 15251 return (NULL); 15252 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15253 } 15254 15255 /** 15256 * xmlReadFd: 15257 * @fd: an open file descriptor 15258 * @URL: the base URL to use for the document 15259 * @encoding: the document encoding, or NULL 15260 * @options: a combination of xmlParserOption 15261 * 15262 * parse an XML from a file descriptor and build a tree. 15263 * NOTE that the file descriptor will not be closed when the 15264 * reader is closed or reset. 15265 * 15266 * Returns the resulting document tree 15267 */ 15268 xmlDocPtr 15269 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15270 { 15271 xmlParserCtxtPtr ctxt; 15272 xmlParserInputBufferPtr input; 15273 xmlParserInputPtr stream; 15274 15275 if (fd < 0) 15276 return (NULL); 15277 xmlInitParser(); 15278 15279 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15280 if (input == NULL) 15281 return (NULL); 15282 input->closecallback = NULL; 15283 ctxt = xmlNewParserCtxt(); 15284 if (ctxt == NULL) { 15285 xmlFreeParserInputBuffer(input); 15286 return (NULL); 15287 } 15288 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15289 if (stream == NULL) { 15290 xmlFreeParserInputBuffer(input); 15291 xmlFreeParserCtxt(ctxt); 15292 return (NULL); 15293 } 15294 inputPush(ctxt, stream); 15295 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15296 } 15297 15298 /** 15299 * xmlReadIO: 15300 * @ioread: an I/O read function 15301 * @ioclose: an I/O close function 15302 * @ioctx: an I/O handler 15303 * @URL: the base URL to use for the document 15304 * @encoding: the document encoding, or NULL 15305 * @options: a combination of xmlParserOption 15306 * 15307 * parse an XML document from I/O functions and source and build a tree. 15308 * 15309 * Returns the resulting document tree 15310 */ 15311 xmlDocPtr 15312 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15313 void *ioctx, const char *URL, const char *encoding, int options) 15314 { 15315 xmlParserCtxtPtr ctxt; 15316 xmlParserInputBufferPtr input; 15317 xmlParserInputPtr stream; 15318 15319 if (ioread == NULL) 15320 return (NULL); 15321 xmlInitParser(); 15322 15323 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15324 XML_CHAR_ENCODING_NONE); 15325 if (input == NULL) { 15326 if (ioclose != NULL) 15327 ioclose(ioctx); 15328 return (NULL); 15329 } 15330 ctxt = xmlNewParserCtxt(); 15331 if (ctxt == NULL) { 15332 xmlFreeParserInputBuffer(input); 15333 return (NULL); 15334 } 15335 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15336 if (stream == NULL) { 15337 xmlFreeParserInputBuffer(input); 15338 xmlFreeParserCtxt(ctxt); 15339 return (NULL); 15340 } 15341 inputPush(ctxt, stream); 15342 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15343 } 15344 15345 /** 15346 * xmlCtxtReadDoc: 15347 * @ctxt: an XML parser context 15348 * @cur: a pointer to a zero terminated string 15349 * @URL: the base URL to use for the document 15350 * @encoding: the document encoding, or NULL 15351 * @options: a combination of xmlParserOption 15352 * 15353 * parse an XML in-memory document and build a tree. 15354 * This reuses the existing @ctxt parser context 15355 * 15356 * Returns the resulting document tree 15357 */ 15358 xmlDocPtr 15359 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15360 const char *URL, const char *encoding, int options) 15361 { 15362 xmlParserInputPtr stream; 15363 15364 if (cur == NULL) 15365 return (NULL); 15366 if (ctxt == NULL) 15367 return (NULL); 15368 xmlInitParser(); 15369 15370 xmlCtxtReset(ctxt); 15371 15372 stream = xmlNewStringInputStream(ctxt, cur); 15373 if (stream == NULL) { 15374 return (NULL); 15375 } 15376 inputPush(ctxt, stream); 15377 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15378 } 15379 15380 /** 15381 * xmlCtxtReadFile: 15382 * @ctxt: an XML parser context 15383 * @filename: a file or URL 15384 * @encoding: the document encoding, or NULL 15385 * @options: a combination of xmlParserOption 15386 * 15387 * parse an XML file from the filesystem or the network. 15388 * This reuses the existing @ctxt parser context 15389 * 15390 * Returns the resulting document tree 15391 */ 15392 xmlDocPtr 15393 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15394 const char *encoding, int options) 15395 { 15396 xmlParserInputPtr stream; 15397 15398 if (filename == NULL) 15399 return (NULL); 15400 if (ctxt == NULL) 15401 return (NULL); 15402 xmlInitParser(); 15403 15404 xmlCtxtReset(ctxt); 15405 15406 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15407 if (stream == NULL) { 15408 return (NULL); 15409 } 15410 inputPush(ctxt, stream); 15411 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15412 } 15413 15414 /** 15415 * xmlCtxtReadMemory: 15416 * @ctxt: an XML parser context 15417 * @buffer: a pointer to a char array 15418 * @size: the size of the array 15419 * @URL: the base URL to use for the document 15420 * @encoding: the document encoding, or NULL 15421 * @options: a combination of xmlParserOption 15422 * 15423 * parse an XML in-memory document and build a tree. 15424 * This reuses the existing @ctxt parser context 15425 * 15426 * Returns the resulting document tree 15427 */ 15428 xmlDocPtr 15429 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15430 const char *URL, const char *encoding, int options) 15431 { 15432 xmlParserInputBufferPtr input; 15433 xmlParserInputPtr stream; 15434 15435 if (ctxt == NULL) 15436 return (NULL); 15437 if (buffer == NULL) 15438 return (NULL); 15439 xmlInitParser(); 15440 15441 xmlCtxtReset(ctxt); 15442 15443 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15444 if (input == NULL) { 15445 return(NULL); 15446 } 15447 15448 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15449 if (stream == NULL) { 15450 xmlFreeParserInputBuffer(input); 15451 return(NULL); 15452 } 15453 15454 inputPush(ctxt, stream); 15455 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15456 } 15457 15458 /** 15459 * xmlCtxtReadFd: 15460 * @ctxt: an XML parser context 15461 * @fd: an open file descriptor 15462 * @URL: the base URL to use for the document 15463 * @encoding: the document encoding, or NULL 15464 * @options: a combination of xmlParserOption 15465 * 15466 * parse an XML from a file descriptor and build a tree. 15467 * This reuses the existing @ctxt parser context 15468 * NOTE that the file descriptor will not be closed when the 15469 * reader is closed or reset. 15470 * 15471 * Returns the resulting document tree 15472 */ 15473 xmlDocPtr 15474 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15475 const char *URL, const char *encoding, int options) 15476 { 15477 xmlParserInputBufferPtr input; 15478 xmlParserInputPtr stream; 15479 15480 if (fd < 0) 15481 return (NULL); 15482 if (ctxt == NULL) 15483 return (NULL); 15484 xmlInitParser(); 15485 15486 xmlCtxtReset(ctxt); 15487 15488 15489 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15490 if (input == NULL) 15491 return (NULL); 15492 input->closecallback = NULL; 15493 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15494 if (stream == NULL) { 15495 xmlFreeParserInputBuffer(input); 15496 return (NULL); 15497 } 15498 inputPush(ctxt, stream); 15499 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15500 } 15501 15502 /** 15503 * xmlCtxtReadIO: 15504 * @ctxt: an XML parser context 15505 * @ioread: an I/O read function 15506 * @ioclose: an I/O close function 15507 * @ioctx: an I/O handler 15508 * @URL: the base URL to use for the document 15509 * @encoding: the document encoding, or NULL 15510 * @options: a combination of xmlParserOption 15511 * 15512 * parse an XML document from I/O functions and source and build a tree. 15513 * This reuses the existing @ctxt parser context 15514 * 15515 * Returns the resulting document tree 15516 */ 15517 xmlDocPtr 15518 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15519 xmlInputCloseCallback ioclose, void *ioctx, 15520 const char *URL, 15521 const char *encoding, int options) 15522 { 15523 xmlParserInputBufferPtr input; 15524 xmlParserInputPtr stream; 15525 15526 if (ioread == NULL) 15527 return (NULL); 15528 if (ctxt == NULL) 15529 return (NULL); 15530 xmlInitParser(); 15531 15532 xmlCtxtReset(ctxt); 15533 15534 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15535 XML_CHAR_ENCODING_NONE); 15536 if (input == NULL) { 15537 if (ioclose != NULL) 15538 ioclose(ioctx); 15539 return (NULL); 15540 } 15541 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15542 if (stream == NULL) { 15543 xmlFreeParserInputBuffer(input); 15544 return (NULL); 15545 } 15546 inputPush(ctxt, stream); 15547 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15548 } 15549 15550