1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 /* To avoid EBCDIC trouble when parsing on zOS */ 34 #if defined(__MVS__) 35 #pragma convert("ISO8859-1") 36 #endif 37 38 #define IN_LIBXML 39 #include "libxml.h" 40 41 #if defined(_WIN32) && !defined (__CYGWIN__) 42 #define XML_DIR_SEP '\\' 43 #else 44 #define XML_DIR_SEP '/' 45 #endif 46 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <string.h> 50 #include <stdarg.h> 51 #include <stddef.h> 52 #include <libxml/xmlmemory.h> 53 #include <libxml/threads.h> 54 #include <libxml/globals.h> 55 #include <libxml/tree.h> 56 #include <libxml/parser.h> 57 #include <libxml/parserInternals.h> 58 #include <libxml/valid.h> 59 #include <libxml/entities.h> 60 #include <libxml/xmlerror.h> 61 #include <libxml/encoding.h> 62 #include <libxml/xmlIO.h> 63 #include <libxml/uri.h> 64 #ifdef LIBXML_CATALOG_ENABLED 65 #include <libxml/catalog.h> 66 #endif 67 #ifdef LIBXML_SCHEMAS_ENABLED 68 #include <libxml/xmlschemastypes.h> 69 #include <libxml/relaxng.h> 70 #endif 71 #ifdef HAVE_CTYPE_H 72 #include <ctype.h> 73 #endif 74 #ifdef HAVE_STDLIB_H 75 #include <stdlib.h> 76 #endif 77 #ifdef HAVE_SYS_STAT_H 78 #include <sys/stat.h> 79 #endif 80 #ifdef HAVE_FCNTL_H 81 #include <fcntl.h> 82 #endif 83 #ifdef HAVE_UNISTD_H 84 #include <unistd.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 struct _xmlStartTag { 91 const xmlChar *prefix; 92 const xmlChar *URI; 93 int line; 94 int nsNr; 95 }; 96 97 static void 98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 99 100 static xmlParserCtxtPtr 101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 102 const xmlChar *base, xmlParserCtxtPtr pctx); 103 104 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 105 106 static int 107 xmlParseElementStart(xmlParserCtxtPtr ctxt); 108 109 static void 110 xmlParseElementEnd(xmlParserCtxtPtr ctxt); 111 112 /************************************************************************ 113 * * 114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 115 * * 116 ************************************************************************/ 117 118 #define XML_PARSER_BIG_ENTITY 1000 119 #define XML_PARSER_LOT_ENTITY 5000 120 121 /* 122 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 123 * replacement over the size in byte of the input indicates that you have 124 * and exponential behaviour. A value of 10 correspond to at least 3 entity 125 * replacement per byte of input. 126 */ 127 #define XML_PARSER_NON_LINEAR 10 128 129 /* 130 * xmlParserEntityCheck 131 * 132 * Function to check non-linear entity expansion behaviour 133 * This is here to detect and stop exponential linear entity expansion 134 * This is not a limitation of the parser but a safety 135 * boundary feature. It can be disabled with the XML_PARSE_HUGE 136 * parser option. 137 */ 138 static int 139 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 140 xmlEntityPtr ent, size_t replacement) 141 { 142 size_t consumed = 0; 143 int i; 144 145 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 146 return (0); 147 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 148 return (1); 149 150 /* 151 * This may look absurd but is needed to detect 152 * entities problems 153 */ 154 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 155 (ent->content != NULL) && (ent->checked == 0) && 156 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 157 unsigned long oldnbent = ctxt->nbentities, diff; 158 xmlChar *rep; 159 160 ent->checked = 1; 161 162 ++ctxt->depth; 163 rep = xmlStringDecodeEntities(ctxt, ent->content, 164 XML_SUBSTITUTE_REF, 0, 0, 0); 165 --ctxt->depth; 166 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { 167 ent->content[0] = 0; 168 } 169 170 diff = ctxt->nbentities - oldnbent + 1; 171 if (diff > INT_MAX / 2) 172 diff = INT_MAX / 2; 173 ent->checked = diff * 2; 174 if (rep != NULL) { 175 if (xmlStrchr(rep, '<')) 176 ent->checked |= 1; 177 xmlFree(rep); 178 rep = NULL; 179 } 180 } 181 182 /* 183 * Prevent entity exponential check, not just replacement while 184 * parsing the DTD 185 * The check is potentially costly so do that only once in a thousand 186 */ 187 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && 188 (ctxt->nbentities % 1024 == 0)) { 189 for (i = 0;i < ctxt->inputNr;i++) { 190 consumed += ctxt->inputTab[i]->consumed + 191 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); 192 } 193 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { 194 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 195 ctxt->instate = XML_PARSER_EOF; 196 return (1); 197 } 198 consumed = 0; 199 } 200 201 202 203 if (replacement != 0) { 204 if (replacement < XML_MAX_TEXT_LENGTH) 205 return(0); 206 207 /* 208 * If the volume of entity copy reaches 10 times the 209 * amount of parsed data and over the large text threshold 210 * then that's very likely to be an abuse. 211 */ 212 if (ctxt->input != NULL) { 213 consumed = ctxt->input->consumed + 214 (ctxt->input->cur - ctxt->input->base); 215 } 216 consumed += ctxt->sizeentities; 217 218 if (replacement < XML_PARSER_NON_LINEAR * consumed) 219 return(0); 220 } else if (size != 0) { 221 /* 222 * Do the check based on the replacement size of the entity 223 */ 224 if (size < XML_PARSER_BIG_ENTITY) 225 return(0); 226 227 /* 228 * A limit on the amount of text data reasonably used 229 */ 230 if (ctxt->input != NULL) { 231 consumed = ctxt->input->consumed + 232 (ctxt->input->cur - ctxt->input->base); 233 } 234 consumed += ctxt->sizeentities; 235 236 if ((size < XML_PARSER_NON_LINEAR * consumed) && 237 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 238 return (0); 239 } else if (ent != NULL) { 240 /* 241 * use the number of parsed entities in the replacement 242 */ 243 size = ent->checked / 2; 244 245 /* 246 * The amount of data parsed counting entities size only once 247 */ 248 if (ctxt->input != NULL) { 249 consumed = ctxt->input->consumed + 250 (ctxt->input->cur - ctxt->input->base); 251 } 252 consumed += ctxt->sizeentities; 253 254 /* 255 * Check the density of entities for the amount of data 256 * knowing an entity reference will take at least 3 bytes 257 */ 258 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 259 return (0); 260 } else { 261 /* 262 * strange we got no data for checking 263 */ 264 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 265 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 266 (ctxt->nbentities <= 10000)) 267 return (0); 268 } 269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 270 return (1); 271 } 272 273 /** 274 * xmlParserMaxDepth: 275 * 276 * arbitrary depth limit for the XML documents that we allow to 277 * process. This is not a limitation of the parser but a safety 278 * boundary feature. It can be disabled with the XML_PARSE_HUGE 279 * parser option. 280 */ 281 unsigned int xmlParserMaxDepth = 256; 282 283 284 285 #define SAX2 1 286 #define XML_PARSER_BIG_BUFFER_SIZE 300 287 #define XML_PARSER_BUFFER_SIZE 100 288 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 289 290 /** 291 * XML_PARSER_CHUNK_SIZE 292 * 293 * When calling GROW that's the minimal amount of data 294 * the parser expected to have received. It is not a hard 295 * limit but an optimization when reading strings like Names 296 * It is not strictly needed as long as inputs available characters 297 * are followed by 0, which should be provided by the I/O level 298 */ 299 #define XML_PARSER_CHUNK_SIZE 100 300 301 /* 302 * List of XML prefixed PI allowed by W3C specs 303 */ 304 305 static const char *xmlW3CPIs[] = { 306 "xml-stylesheet", 307 "xml-model", 308 NULL 309 }; 310 311 312 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 313 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 314 const xmlChar **str); 315 316 static xmlParserErrors 317 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 318 xmlSAXHandlerPtr sax, 319 void *user_data, int depth, const xmlChar *URL, 320 const xmlChar *ID, xmlNodePtr *list); 321 322 static int 323 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 324 const char *encoding); 325 #ifdef LIBXML_LEGACY_ENABLED 326 static void 327 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 328 xmlNodePtr lastNode); 329 #endif /* LIBXML_LEGACY_ENABLED */ 330 331 static xmlParserErrors 332 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 333 const xmlChar *string, void *user_data, xmlNodePtr *lst); 334 335 static int 336 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 337 338 /************************************************************************ 339 * * 340 * Some factorized error routines * 341 * * 342 ************************************************************************/ 343 344 /** 345 * xmlErrAttributeDup: 346 * @ctxt: an XML parser context 347 * @prefix: the attribute prefix 348 * @localname: the attribute localname 349 * 350 * Handle a redefinition of attribute error 351 */ 352 static void 353 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 354 const xmlChar * localname) 355 { 356 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 357 (ctxt->instate == XML_PARSER_EOF)) 358 return; 359 if (ctxt != NULL) 360 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 361 362 if (prefix == NULL) 363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 364 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 365 (const char *) localname, NULL, NULL, 0, 0, 366 "Attribute %s redefined\n", localname); 367 else 368 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 369 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 370 (const char *) prefix, (const char *) localname, 371 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 372 localname); 373 if (ctxt != NULL) { 374 ctxt->wellFormed = 0; 375 if (ctxt->recovery == 0) 376 ctxt->disableSAX = 1; 377 } 378 } 379 380 /** 381 * xmlFatalErr: 382 * @ctxt: an XML parser context 383 * @error: the error number 384 * @extra: extra information string 385 * 386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 387 */ 388 static void 389 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 390 { 391 const char *errmsg; 392 393 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 394 (ctxt->instate == XML_PARSER_EOF)) 395 return; 396 switch (error) { 397 case XML_ERR_INVALID_HEX_CHARREF: 398 errmsg = "CharRef: invalid hexadecimal value"; 399 break; 400 case XML_ERR_INVALID_DEC_CHARREF: 401 errmsg = "CharRef: invalid decimal value"; 402 break; 403 case XML_ERR_INVALID_CHARREF: 404 errmsg = "CharRef: invalid value"; 405 break; 406 case XML_ERR_INTERNAL_ERROR: 407 errmsg = "internal error"; 408 break; 409 case XML_ERR_PEREF_AT_EOF: 410 errmsg = "PEReference at end of document"; 411 break; 412 case XML_ERR_PEREF_IN_PROLOG: 413 errmsg = "PEReference in prolog"; 414 break; 415 case XML_ERR_PEREF_IN_EPILOG: 416 errmsg = "PEReference in epilog"; 417 break; 418 case XML_ERR_PEREF_NO_NAME: 419 errmsg = "PEReference: no name"; 420 break; 421 case XML_ERR_PEREF_SEMICOL_MISSING: 422 errmsg = "PEReference: expecting ';'"; 423 break; 424 case XML_ERR_ENTITY_LOOP: 425 errmsg = "Detected an entity reference loop"; 426 break; 427 case XML_ERR_ENTITY_NOT_STARTED: 428 errmsg = "EntityValue: \" or ' expected"; 429 break; 430 case XML_ERR_ENTITY_PE_INTERNAL: 431 errmsg = "PEReferences forbidden in internal subset"; 432 break; 433 case XML_ERR_ENTITY_NOT_FINISHED: 434 errmsg = "EntityValue: \" or ' expected"; 435 break; 436 case XML_ERR_ATTRIBUTE_NOT_STARTED: 437 errmsg = "AttValue: \" or ' expected"; 438 break; 439 case XML_ERR_LT_IN_ATTRIBUTE: 440 errmsg = "Unescaped '<' not allowed in attributes values"; 441 break; 442 case XML_ERR_LITERAL_NOT_STARTED: 443 errmsg = "SystemLiteral \" or ' expected"; 444 break; 445 case XML_ERR_LITERAL_NOT_FINISHED: 446 errmsg = "Unfinished System or Public ID \" or ' expected"; 447 break; 448 case XML_ERR_MISPLACED_CDATA_END: 449 errmsg = "Sequence ']]>' not allowed in content"; 450 break; 451 case XML_ERR_URI_REQUIRED: 452 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 453 break; 454 case XML_ERR_PUBID_REQUIRED: 455 errmsg = "PUBLIC, the Public Identifier is missing"; 456 break; 457 case XML_ERR_HYPHEN_IN_COMMENT: 458 errmsg = "Comment must not contain '--' (double-hyphen)"; 459 break; 460 case XML_ERR_PI_NOT_STARTED: 461 errmsg = "xmlParsePI : no target name"; 462 break; 463 case XML_ERR_RESERVED_XML_NAME: 464 errmsg = "Invalid PI name"; 465 break; 466 case XML_ERR_NOTATION_NOT_STARTED: 467 errmsg = "NOTATION: Name expected here"; 468 break; 469 case XML_ERR_NOTATION_NOT_FINISHED: 470 errmsg = "'>' required to close NOTATION declaration"; 471 break; 472 case XML_ERR_VALUE_REQUIRED: 473 errmsg = "Entity value required"; 474 break; 475 case XML_ERR_URI_FRAGMENT: 476 errmsg = "Fragment not allowed"; 477 break; 478 case XML_ERR_ATTLIST_NOT_STARTED: 479 errmsg = "'(' required to start ATTLIST enumeration"; 480 break; 481 case XML_ERR_NMTOKEN_REQUIRED: 482 errmsg = "NmToken expected in ATTLIST enumeration"; 483 break; 484 case XML_ERR_ATTLIST_NOT_FINISHED: 485 errmsg = "')' required to finish ATTLIST enumeration"; 486 break; 487 case XML_ERR_MIXED_NOT_STARTED: 488 errmsg = "MixedContentDecl : '|' or ')*' expected"; 489 break; 490 case XML_ERR_PCDATA_REQUIRED: 491 errmsg = "MixedContentDecl : '#PCDATA' expected"; 492 break; 493 case XML_ERR_ELEMCONTENT_NOT_STARTED: 494 errmsg = "ContentDecl : Name or '(' expected"; 495 break; 496 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 497 errmsg = "ContentDecl : ',' '|' or ')' expected"; 498 break; 499 case XML_ERR_PEREF_IN_INT_SUBSET: 500 errmsg = 501 "PEReference: forbidden within markup decl in internal subset"; 502 break; 503 case XML_ERR_GT_REQUIRED: 504 errmsg = "expected '>'"; 505 break; 506 case XML_ERR_CONDSEC_INVALID: 507 errmsg = "XML conditional section '[' expected"; 508 break; 509 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 510 errmsg = "Content error in the external subset"; 511 break; 512 case XML_ERR_CONDSEC_INVALID_KEYWORD: 513 errmsg = 514 "conditional section INCLUDE or IGNORE keyword expected"; 515 break; 516 case XML_ERR_CONDSEC_NOT_FINISHED: 517 errmsg = "XML conditional section not closed"; 518 break; 519 case XML_ERR_XMLDECL_NOT_STARTED: 520 errmsg = "Text declaration '<?xml' required"; 521 break; 522 case XML_ERR_XMLDECL_NOT_FINISHED: 523 errmsg = "parsing XML declaration: '?>' expected"; 524 break; 525 case XML_ERR_EXT_ENTITY_STANDALONE: 526 errmsg = "external parsed entities cannot be standalone"; 527 break; 528 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 529 errmsg = "EntityRef: expecting ';'"; 530 break; 531 case XML_ERR_DOCTYPE_NOT_FINISHED: 532 errmsg = "DOCTYPE improperly terminated"; 533 break; 534 case XML_ERR_LTSLASH_REQUIRED: 535 errmsg = "EndTag: '</' not found"; 536 break; 537 case XML_ERR_EQUAL_REQUIRED: 538 errmsg = "expected '='"; 539 break; 540 case XML_ERR_STRING_NOT_CLOSED: 541 errmsg = "String not closed expecting \" or '"; 542 break; 543 case XML_ERR_STRING_NOT_STARTED: 544 errmsg = "String not started expecting ' or \""; 545 break; 546 case XML_ERR_ENCODING_NAME: 547 errmsg = "Invalid XML encoding name"; 548 break; 549 case XML_ERR_STANDALONE_VALUE: 550 errmsg = "standalone accepts only 'yes' or 'no'"; 551 break; 552 case XML_ERR_DOCUMENT_EMPTY: 553 errmsg = "Document is empty"; 554 break; 555 case XML_ERR_DOCUMENT_END: 556 errmsg = "Extra content at the end of the document"; 557 break; 558 case XML_ERR_NOT_WELL_BALANCED: 559 errmsg = "chunk is not well balanced"; 560 break; 561 case XML_ERR_EXTRA_CONTENT: 562 errmsg = "extra content at the end of well balanced chunk"; 563 break; 564 case XML_ERR_VERSION_MISSING: 565 errmsg = "Malformed declaration expecting version"; 566 break; 567 case XML_ERR_NAME_TOO_LONG: 568 errmsg = "Name too long use XML_PARSE_HUGE option"; 569 break; 570 #if 0 571 case: 572 errmsg = ""; 573 break; 574 #endif 575 default: 576 errmsg = "Unregistered error message"; 577 } 578 if (ctxt != NULL) 579 ctxt->errNo = error; 580 if (info == NULL) { 581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 582 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 583 errmsg); 584 } else { 585 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 586 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 587 errmsg, info); 588 } 589 if (ctxt != NULL) { 590 ctxt->wellFormed = 0; 591 if (ctxt->recovery == 0) 592 ctxt->disableSAX = 1; 593 } 594 } 595 596 /** 597 * xmlFatalErrMsg: 598 * @ctxt: an XML parser context 599 * @error: the error number 600 * @msg: the error message 601 * 602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 603 */ 604 static void LIBXML_ATTR_FORMAT(3,0) 605 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 606 const char *msg) 607 { 608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 609 (ctxt->instate == XML_PARSER_EOF)) 610 return; 611 if (ctxt != NULL) 612 ctxt->errNo = error; 613 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 614 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 615 if (ctxt != NULL) { 616 ctxt->wellFormed = 0; 617 if (ctxt->recovery == 0) 618 ctxt->disableSAX = 1; 619 } 620 } 621 622 /** 623 * xmlWarningMsg: 624 * @ctxt: an XML parser context 625 * @error: the error number 626 * @msg: the error message 627 * @str1: extra data 628 * @str2: extra data 629 * 630 * Handle a warning. 631 */ 632 static void LIBXML_ATTR_FORMAT(3,0) 633 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 634 const char *msg, const xmlChar *str1, const xmlChar *str2) 635 { 636 xmlStructuredErrorFunc schannel = NULL; 637 638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 639 (ctxt->instate == XML_PARSER_EOF)) 640 return; 641 if ((ctxt != NULL) && (ctxt->sax != NULL) && 642 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 643 schannel = ctxt->sax->serror; 644 if (ctxt != NULL) { 645 __xmlRaiseError(schannel, 646 (ctxt->sax) ? ctxt->sax->warning : NULL, 647 ctxt->userData, 648 ctxt, NULL, XML_FROM_PARSER, error, 649 XML_ERR_WARNING, NULL, 0, 650 (const char *) str1, (const char *) str2, NULL, 0, 0, 651 msg, (const char *) str1, (const char *) str2); 652 } else { 653 __xmlRaiseError(schannel, NULL, NULL, 654 ctxt, NULL, XML_FROM_PARSER, error, 655 XML_ERR_WARNING, NULL, 0, 656 (const char *) str1, (const char *) str2, NULL, 0, 0, 657 msg, (const char *) str1, (const char *) str2); 658 } 659 } 660 661 /** 662 * xmlValidityError: 663 * @ctxt: an XML parser context 664 * @error: the error number 665 * @msg: the error message 666 * @str1: extra data 667 * 668 * Handle a validity error. 669 */ 670 static void LIBXML_ATTR_FORMAT(3,0) 671 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 672 const char *msg, const xmlChar *str1, const xmlChar *str2) 673 { 674 xmlStructuredErrorFunc schannel = NULL; 675 676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 677 (ctxt->instate == XML_PARSER_EOF)) 678 return; 679 if (ctxt != NULL) { 680 ctxt->errNo = error; 681 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 682 schannel = ctxt->sax->serror; 683 } 684 if (ctxt != NULL) { 685 __xmlRaiseError(schannel, 686 ctxt->vctxt.error, ctxt->vctxt.userData, 687 ctxt, NULL, XML_FROM_DTD, error, 688 XML_ERR_ERROR, NULL, 0, (const char *) str1, 689 (const char *) str2, NULL, 0, 0, 690 msg, (const char *) str1, (const char *) str2); 691 ctxt->valid = 0; 692 } else { 693 __xmlRaiseError(schannel, NULL, NULL, 694 ctxt, NULL, XML_FROM_DTD, error, 695 XML_ERR_ERROR, NULL, 0, (const char *) str1, 696 (const char *) str2, NULL, 0, 0, 697 msg, (const char *) str1, (const char *) str2); 698 } 699 } 700 701 /** 702 * xmlFatalErrMsgInt: 703 * @ctxt: an XML parser context 704 * @error: the error number 705 * @msg: the error message 706 * @val: an integer value 707 * 708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 709 */ 710 static void LIBXML_ATTR_FORMAT(3,0) 711 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 712 const char *msg, int val) 713 { 714 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 715 (ctxt->instate == XML_PARSER_EOF)) 716 return; 717 if (ctxt != NULL) 718 ctxt->errNo = error; 719 __xmlRaiseError(NULL, NULL, NULL, 720 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 721 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 722 if (ctxt != NULL) { 723 ctxt->wellFormed = 0; 724 if (ctxt->recovery == 0) 725 ctxt->disableSAX = 1; 726 } 727 } 728 729 /** 730 * xmlFatalErrMsgStrIntStr: 731 * @ctxt: an XML parser context 732 * @error: the error number 733 * @msg: the error message 734 * @str1: an string info 735 * @val: an integer value 736 * @str2: an string info 737 * 738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 739 */ 740 static void LIBXML_ATTR_FORMAT(3,0) 741 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 742 const char *msg, const xmlChar *str1, int val, 743 const xmlChar *str2) 744 { 745 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 746 (ctxt->instate == XML_PARSER_EOF)) 747 return; 748 if (ctxt != NULL) 749 ctxt->errNo = error; 750 __xmlRaiseError(NULL, NULL, NULL, 751 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 752 NULL, 0, (const char *) str1, (const char *) str2, 753 NULL, val, 0, msg, str1, val, str2); 754 if (ctxt != NULL) { 755 ctxt->wellFormed = 0; 756 if (ctxt->recovery == 0) 757 ctxt->disableSAX = 1; 758 } 759 } 760 761 /** 762 * xmlFatalErrMsgStr: 763 * @ctxt: an XML parser context 764 * @error: the error number 765 * @msg: the error message 766 * @val: a string value 767 * 768 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 769 */ 770 static void LIBXML_ATTR_FORMAT(3,0) 771 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 772 const char *msg, const xmlChar * val) 773 { 774 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 775 (ctxt->instate == XML_PARSER_EOF)) 776 return; 777 if (ctxt != NULL) 778 ctxt->errNo = error; 779 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 780 XML_FROM_PARSER, error, XML_ERR_FATAL, 781 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 782 val); 783 if (ctxt != NULL) { 784 ctxt->wellFormed = 0; 785 if (ctxt->recovery == 0) 786 ctxt->disableSAX = 1; 787 } 788 } 789 790 /** 791 * xmlErrMsgStr: 792 * @ctxt: an XML parser context 793 * @error: the error number 794 * @msg: the error message 795 * @val: a string value 796 * 797 * Handle a non fatal parser error 798 */ 799 static void LIBXML_ATTR_FORMAT(3,0) 800 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 801 const char *msg, const xmlChar * val) 802 { 803 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 804 (ctxt->instate == XML_PARSER_EOF)) 805 return; 806 if (ctxt != NULL) 807 ctxt->errNo = error; 808 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 809 XML_FROM_PARSER, error, XML_ERR_ERROR, 810 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 811 val); 812 } 813 814 /** 815 * xmlNsErr: 816 * @ctxt: an XML parser context 817 * @error: the error number 818 * @msg: the message 819 * @info1: extra information string 820 * @info2: extra information string 821 * 822 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 823 */ 824 static void LIBXML_ATTR_FORMAT(3,0) 825 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 826 const char *msg, 827 const xmlChar * info1, const xmlChar * info2, 828 const xmlChar * info3) 829 { 830 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 831 (ctxt->instate == XML_PARSER_EOF)) 832 return; 833 if (ctxt != NULL) 834 ctxt->errNo = error; 835 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 836 XML_ERR_ERROR, NULL, 0, (const char *) info1, 837 (const char *) info2, (const char *) info3, 0, 0, msg, 838 info1, info2, info3); 839 if (ctxt != NULL) 840 ctxt->nsWellFormed = 0; 841 } 842 843 /** 844 * xmlNsWarn 845 * @ctxt: an XML parser context 846 * @error: the error number 847 * @msg: the message 848 * @info1: extra information string 849 * @info2: extra information string 850 * 851 * Handle a namespace warning error 852 */ 853 static void LIBXML_ATTR_FORMAT(3,0) 854 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 855 const char *msg, 856 const xmlChar * info1, const xmlChar * info2, 857 const xmlChar * info3) 858 { 859 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 860 (ctxt->instate == XML_PARSER_EOF)) 861 return; 862 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 863 XML_ERR_WARNING, NULL, 0, (const char *) info1, 864 (const char *) info2, (const char *) info3, 0, 0, msg, 865 info1, info2, info3); 866 } 867 868 /************************************************************************ 869 * * 870 * Library wide options * 871 * * 872 ************************************************************************/ 873 874 /** 875 * xmlHasFeature: 876 * @feature: the feature to be examined 877 * 878 * Examines if the library has been compiled with a given feature. 879 * 880 * Returns a non-zero value if the feature exist, otherwise zero. 881 * Returns zero (0) if the feature does not exist or an unknown 882 * unknown feature is requested, non-zero otherwise. 883 */ 884 int 885 xmlHasFeature(xmlFeature feature) 886 { 887 switch (feature) { 888 case XML_WITH_THREAD: 889 #ifdef LIBXML_THREAD_ENABLED 890 return(1); 891 #else 892 return(0); 893 #endif 894 case XML_WITH_TREE: 895 #ifdef LIBXML_TREE_ENABLED 896 return(1); 897 #else 898 return(0); 899 #endif 900 case XML_WITH_OUTPUT: 901 #ifdef LIBXML_OUTPUT_ENABLED 902 return(1); 903 #else 904 return(0); 905 #endif 906 case XML_WITH_PUSH: 907 #ifdef LIBXML_PUSH_ENABLED 908 return(1); 909 #else 910 return(0); 911 #endif 912 case XML_WITH_READER: 913 #ifdef LIBXML_READER_ENABLED 914 return(1); 915 #else 916 return(0); 917 #endif 918 case XML_WITH_PATTERN: 919 #ifdef LIBXML_PATTERN_ENABLED 920 return(1); 921 #else 922 return(0); 923 #endif 924 case XML_WITH_WRITER: 925 #ifdef LIBXML_WRITER_ENABLED 926 return(1); 927 #else 928 return(0); 929 #endif 930 case XML_WITH_SAX1: 931 #ifdef LIBXML_SAX1_ENABLED 932 return(1); 933 #else 934 return(0); 935 #endif 936 case XML_WITH_FTP: 937 #ifdef LIBXML_FTP_ENABLED 938 return(1); 939 #else 940 return(0); 941 #endif 942 case XML_WITH_HTTP: 943 #ifdef LIBXML_HTTP_ENABLED 944 return(1); 945 #else 946 return(0); 947 #endif 948 case XML_WITH_VALID: 949 #ifdef LIBXML_VALID_ENABLED 950 return(1); 951 #else 952 return(0); 953 #endif 954 case XML_WITH_HTML: 955 #ifdef LIBXML_HTML_ENABLED 956 return(1); 957 #else 958 return(0); 959 #endif 960 case XML_WITH_LEGACY: 961 #ifdef LIBXML_LEGACY_ENABLED 962 return(1); 963 #else 964 return(0); 965 #endif 966 case XML_WITH_C14N: 967 #ifdef LIBXML_C14N_ENABLED 968 return(1); 969 #else 970 return(0); 971 #endif 972 case XML_WITH_CATALOG: 973 #ifdef LIBXML_CATALOG_ENABLED 974 return(1); 975 #else 976 return(0); 977 #endif 978 case XML_WITH_XPATH: 979 #ifdef LIBXML_XPATH_ENABLED 980 return(1); 981 #else 982 return(0); 983 #endif 984 case XML_WITH_XPTR: 985 #ifdef LIBXML_XPTR_ENABLED 986 return(1); 987 #else 988 return(0); 989 #endif 990 case XML_WITH_XINCLUDE: 991 #ifdef LIBXML_XINCLUDE_ENABLED 992 return(1); 993 #else 994 return(0); 995 #endif 996 case XML_WITH_ICONV: 997 #ifdef LIBXML_ICONV_ENABLED 998 return(1); 999 #else 1000 return(0); 1001 #endif 1002 case XML_WITH_ISO8859X: 1003 #ifdef LIBXML_ISO8859X_ENABLED 1004 return(1); 1005 #else 1006 return(0); 1007 #endif 1008 case XML_WITH_UNICODE: 1009 #ifdef LIBXML_UNICODE_ENABLED 1010 return(1); 1011 #else 1012 return(0); 1013 #endif 1014 case XML_WITH_REGEXP: 1015 #ifdef LIBXML_REGEXP_ENABLED 1016 return(1); 1017 #else 1018 return(0); 1019 #endif 1020 case XML_WITH_AUTOMATA: 1021 #ifdef LIBXML_AUTOMATA_ENABLED 1022 return(1); 1023 #else 1024 return(0); 1025 #endif 1026 case XML_WITH_EXPR: 1027 #ifdef LIBXML_EXPR_ENABLED 1028 return(1); 1029 #else 1030 return(0); 1031 #endif 1032 case XML_WITH_SCHEMAS: 1033 #ifdef LIBXML_SCHEMAS_ENABLED 1034 return(1); 1035 #else 1036 return(0); 1037 #endif 1038 case XML_WITH_SCHEMATRON: 1039 #ifdef LIBXML_SCHEMATRON_ENABLED 1040 return(1); 1041 #else 1042 return(0); 1043 #endif 1044 case XML_WITH_MODULES: 1045 #ifdef LIBXML_MODULES_ENABLED 1046 return(1); 1047 #else 1048 return(0); 1049 #endif 1050 case XML_WITH_DEBUG: 1051 #ifdef LIBXML_DEBUG_ENABLED 1052 return(1); 1053 #else 1054 return(0); 1055 #endif 1056 case XML_WITH_DEBUG_MEM: 1057 #ifdef DEBUG_MEMORY_LOCATION 1058 return(1); 1059 #else 1060 return(0); 1061 #endif 1062 case XML_WITH_DEBUG_RUN: 1063 #ifdef LIBXML_DEBUG_RUNTIME 1064 return(1); 1065 #else 1066 return(0); 1067 #endif 1068 case XML_WITH_ZLIB: 1069 #ifdef LIBXML_ZLIB_ENABLED 1070 return(1); 1071 #else 1072 return(0); 1073 #endif 1074 case XML_WITH_LZMA: 1075 #ifdef LIBXML_LZMA_ENABLED 1076 return(1); 1077 #else 1078 return(0); 1079 #endif 1080 case XML_WITH_ICU: 1081 #ifdef LIBXML_ICU_ENABLED 1082 return(1); 1083 #else 1084 return(0); 1085 #endif 1086 default: 1087 break; 1088 } 1089 return(0); 1090 } 1091 1092 /************************************************************************ 1093 * * 1094 * SAX2 defaulted attributes handling * 1095 * * 1096 ************************************************************************/ 1097 1098 /** 1099 * xmlDetectSAX2: 1100 * @ctxt: an XML parser context 1101 * 1102 * Do the SAX2 detection and specific initialization 1103 */ 1104 static void 1105 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1106 xmlSAXHandlerPtr sax; 1107 if (ctxt == NULL) return; 1108 sax = ctxt->sax; 1109 #ifdef LIBXML_SAX1_ENABLED 1110 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) && 1111 ((sax->startElementNs != NULL) || 1112 (sax->endElementNs != NULL) || 1113 ((sax->startElement == NULL) && (sax->endElement == NULL)))) 1114 ctxt->sax2 = 1; 1115 #else 1116 ctxt->sax2 = 1; 1117 #endif /* LIBXML_SAX1_ENABLED */ 1118 1119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1122 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1123 (ctxt->str_xml_ns == NULL)) { 1124 xmlErrMemory(ctxt, NULL); 1125 } 1126 } 1127 1128 typedef struct _xmlDefAttrs xmlDefAttrs; 1129 typedef xmlDefAttrs *xmlDefAttrsPtr; 1130 struct _xmlDefAttrs { 1131 int nbAttrs; /* number of defaulted attributes on that element */ 1132 int maxAttrs; /* the size of the array */ 1133 #if __STDC_VERSION__ >= 199901L 1134 /* Using a C99 flexible array member avoids UBSan errors. */ 1135 const xmlChar *values[]; /* array of localname/prefix/values/external */ 1136 #else 1137 const xmlChar *values[5]; 1138 #endif 1139 }; 1140 1141 /** 1142 * xmlAttrNormalizeSpace: 1143 * @src: the source string 1144 * @dst: the target string 1145 * 1146 * Normalize the space in non CDATA attribute values: 1147 * If the attribute type is not CDATA, then the XML processor MUST further 1148 * process the normalized attribute value by discarding any leading and 1149 * trailing space (#x20) characters, and by replacing sequences of space 1150 * (#x20) characters by a single space (#x20) character. 1151 * Note that the size of dst need to be at least src, and if one doesn't need 1152 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1153 * passing src as dst is just fine. 1154 * 1155 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1156 * is needed. 1157 */ 1158 static xmlChar * 1159 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1160 { 1161 if ((src == NULL) || (dst == NULL)) 1162 return(NULL); 1163 1164 while (*src == 0x20) src++; 1165 while (*src != 0) { 1166 if (*src == 0x20) { 1167 while (*src == 0x20) src++; 1168 if (*src != 0) 1169 *dst++ = 0x20; 1170 } else { 1171 *dst++ = *src++; 1172 } 1173 } 1174 *dst = 0; 1175 if (dst == src) 1176 return(NULL); 1177 return(dst); 1178 } 1179 1180 /** 1181 * xmlAttrNormalizeSpace2: 1182 * @src: the source string 1183 * 1184 * Normalize the space in non CDATA attribute values, a slightly more complex 1185 * front end to avoid allocation problems when running on attribute values 1186 * coming from the input. 1187 * 1188 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1189 * is needed. 1190 */ 1191 static const xmlChar * 1192 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1193 { 1194 int i; 1195 int remove_head = 0; 1196 int need_realloc = 0; 1197 const xmlChar *cur; 1198 1199 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1200 return(NULL); 1201 i = *len; 1202 if (i <= 0) 1203 return(NULL); 1204 1205 cur = src; 1206 while (*cur == 0x20) { 1207 cur++; 1208 remove_head++; 1209 } 1210 while (*cur != 0) { 1211 if (*cur == 0x20) { 1212 cur++; 1213 if ((*cur == 0x20) || (*cur == 0)) { 1214 need_realloc = 1; 1215 break; 1216 } 1217 } else 1218 cur++; 1219 } 1220 if (need_realloc) { 1221 xmlChar *ret; 1222 1223 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1224 if (ret == NULL) { 1225 xmlErrMemory(ctxt, NULL); 1226 return(NULL); 1227 } 1228 xmlAttrNormalizeSpace(ret, ret); 1229 *len = (int) strlen((const char *)ret); 1230 return(ret); 1231 } else if (remove_head) { 1232 *len -= remove_head; 1233 memmove(src, src + remove_head, 1 + *len); 1234 return(src); 1235 } 1236 return(NULL); 1237 } 1238 1239 /** 1240 * xmlAddDefAttrs: 1241 * @ctxt: an XML parser context 1242 * @fullname: the element fullname 1243 * @fullattr: the attribute fullname 1244 * @value: the attribute value 1245 * 1246 * Add a defaulted attribute for an element 1247 */ 1248 static void 1249 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1250 const xmlChar *fullname, 1251 const xmlChar *fullattr, 1252 const xmlChar *value) { 1253 xmlDefAttrsPtr defaults; 1254 int len; 1255 const xmlChar *name; 1256 const xmlChar *prefix; 1257 1258 /* 1259 * Allows to detect attribute redefinitions 1260 */ 1261 if (ctxt->attsSpecial != NULL) { 1262 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1263 return; 1264 } 1265 1266 if (ctxt->attsDefault == NULL) { 1267 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1268 if (ctxt->attsDefault == NULL) 1269 goto mem_error; 1270 } 1271 1272 /* 1273 * split the element name into prefix:localname , the string found 1274 * are within the DTD and then not associated to namespace names. 1275 */ 1276 name = xmlSplitQName3(fullname, &len); 1277 if (name == NULL) { 1278 name = xmlDictLookup(ctxt->dict, fullname, -1); 1279 prefix = NULL; 1280 } else { 1281 name = xmlDictLookup(ctxt->dict, name, -1); 1282 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1283 } 1284 1285 /* 1286 * make sure there is some storage 1287 */ 1288 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1289 if (defaults == NULL) { 1290 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1291 (4 * 5) * sizeof(const xmlChar *)); 1292 if (defaults == NULL) 1293 goto mem_error; 1294 defaults->nbAttrs = 0; 1295 defaults->maxAttrs = 4; 1296 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1297 defaults, NULL) < 0) { 1298 xmlFree(defaults); 1299 goto mem_error; 1300 } 1301 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1302 xmlDefAttrsPtr temp; 1303 1304 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1305 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1306 if (temp == NULL) 1307 goto mem_error; 1308 defaults = temp; 1309 defaults->maxAttrs *= 2; 1310 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1311 defaults, NULL) < 0) { 1312 xmlFree(defaults); 1313 goto mem_error; 1314 } 1315 } 1316 1317 /* 1318 * Split the element name into prefix:localname , the string found 1319 * are within the DTD and hen not associated to namespace names. 1320 */ 1321 name = xmlSplitQName3(fullattr, &len); 1322 if (name == NULL) { 1323 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1324 prefix = NULL; 1325 } else { 1326 name = xmlDictLookup(ctxt->dict, name, -1); 1327 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1328 } 1329 1330 defaults->values[5 * defaults->nbAttrs] = name; 1331 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1332 /* intern the string and precompute the end */ 1333 len = xmlStrlen(value); 1334 value = xmlDictLookup(ctxt->dict, value, len); 1335 defaults->values[5 * defaults->nbAttrs + 2] = value; 1336 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1337 if (ctxt->external) 1338 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1339 else 1340 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1341 defaults->nbAttrs++; 1342 1343 return; 1344 1345 mem_error: 1346 xmlErrMemory(ctxt, NULL); 1347 return; 1348 } 1349 1350 /** 1351 * xmlAddSpecialAttr: 1352 * @ctxt: an XML parser context 1353 * @fullname: the element fullname 1354 * @fullattr: the attribute fullname 1355 * @type: the attribute type 1356 * 1357 * Register this attribute type 1358 */ 1359 static void 1360 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1361 const xmlChar *fullname, 1362 const xmlChar *fullattr, 1363 int type) 1364 { 1365 if (ctxt->attsSpecial == NULL) { 1366 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1367 if (ctxt->attsSpecial == NULL) 1368 goto mem_error; 1369 } 1370 1371 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1372 return; 1373 1374 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1375 (void *) (ptrdiff_t) type); 1376 return; 1377 1378 mem_error: 1379 xmlErrMemory(ctxt, NULL); 1380 return; 1381 } 1382 1383 /** 1384 * xmlCleanSpecialAttrCallback: 1385 * 1386 * Removes CDATA attributes from the special attribute table 1387 */ 1388 static void 1389 xmlCleanSpecialAttrCallback(void *payload, void *data, 1390 const xmlChar *fullname, const xmlChar *fullattr, 1391 const xmlChar *unused ATTRIBUTE_UNUSED) { 1392 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1393 1394 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1395 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1396 } 1397 } 1398 1399 /** 1400 * xmlCleanSpecialAttr: 1401 * @ctxt: an XML parser context 1402 * 1403 * Trim the list of attributes defined to remove all those of type 1404 * CDATA as they are not special. This call should be done when finishing 1405 * to parse the DTD and before starting to parse the document root. 1406 */ 1407 static void 1408 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1409 { 1410 if (ctxt->attsSpecial == NULL) 1411 return; 1412 1413 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1414 1415 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1416 xmlHashFree(ctxt->attsSpecial, NULL); 1417 ctxt->attsSpecial = NULL; 1418 } 1419 return; 1420 } 1421 1422 /** 1423 * xmlCheckLanguageID: 1424 * @lang: pointer to the string value 1425 * 1426 * Checks that the value conforms to the LanguageID production: 1427 * 1428 * NOTE: this is somewhat deprecated, those productions were removed from 1429 * the XML Second edition. 1430 * 1431 * [33] LanguageID ::= Langcode ('-' Subcode)* 1432 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1433 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1434 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1435 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1436 * [38] Subcode ::= ([a-z] | [A-Z])+ 1437 * 1438 * The current REC reference the successors of RFC 1766, currently 5646 1439 * 1440 * http://www.rfc-editor.org/rfc/rfc5646.txt 1441 * langtag = language 1442 * ["-" script] 1443 * ["-" region] 1444 * *("-" variant) 1445 * *("-" extension) 1446 * ["-" privateuse] 1447 * language = 2*3ALPHA ; shortest ISO 639 code 1448 * ["-" extlang] ; sometimes followed by 1449 * ; extended language subtags 1450 * / 4ALPHA ; or reserved for future use 1451 * / 5*8ALPHA ; or registered language subtag 1452 * 1453 * extlang = 3ALPHA ; selected ISO 639 codes 1454 * *2("-" 3ALPHA) ; permanently reserved 1455 * 1456 * script = 4ALPHA ; ISO 15924 code 1457 * 1458 * region = 2ALPHA ; ISO 3166-1 code 1459 * / 3DIGIT ; UN M.49 code 1460 * 1461 * variant = 5*8alphanum ; registered variants 1462 * / (DIGIT 3alphanum) 1463 * 1464 * extension = singleton 1*("-" (2*8alphanum)) 1465 * 1466 * ; Single alphanumerics 1467 * ; "x" reserved for private use 1468 * singleton = DIGIT ; 0 - 9 1469 * / %x41-57 ; A - W 1470 * / %x59-5A ; Y - Z 1471 * / %x61-77 ; a - w 1472 * / %x79-7A ; y - z 1473 * 1474 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1475 * The parser below doesn't try to cope with extension or privateuse 1476 * that could be added but that's not interoperable anyway 1477 * 1478 * Returns 1 if correct 0 otherwise 1479 **/ 1480 int 1481 xmlCheckLanguageID(const xmlChar * lang) 1482 { 1483 const xmlChar *cur = lang, *nxt; 1484 1485 if (cur == NULL) 1486 return (0); 1487 if (((cur[0] == 'i') && (cur[1] == '-')) || 1488 ((cur[0] == 'I') && (cur[1] == '-')) || 1489 ((cur[0] == 'x') && (cur[1] == '-')) || 1490 ((cur[0] == 'X') && (cur[1] == '-'))) { 1491 /* 1492 * Still allow IANA code and user code which were coming 1493 * from the previous version of the XML-1.0 specification 1494 * it's deprecated but we should not fail 1495 */ 1496 cur += 2; 1497 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1498 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1499 cur++; 1500 return(cur[0] == 0); 1501 } 1502 nxt = cur; 1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1505 nxt++; 1506 if (nxt - cur >= 4) { 1507 /* 1508 * Reserved 1509 */ 1510 if ((nxt - cur > 8) || (nxt[0] != 0)) 1511 return(0); 1512 return(1); 1513 } 1514 if (nxt - cur < 2) 1515 return(0); 1516 /* we got an ISO 639 code */ 1517 if (nxt[0] == 0) 1518 return(1); 1519 if (nxt[0] != '-') 1520 return(0); 1521 1522 nxt++; 1523 cur = nxt; 1524 /* now we can have extlang or script or region or variant */ 1525 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1526 goto region_m49; 1527 1528 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1529 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1530 nxt++; 1531 if (nxt - cur == 4) 1532 goto script; 1533 if (nxt - cur == 2) 1534 goto region; 1535 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1536 goto variant; 1537 if (nxt - cur != 3) 1538 return(0); 1539 /* we parsed an extlang */ 1540 if (nxt[0] == 0) 1541 return(1); 1542 if (nxt[0] != '-') 1543 return(0); 1544 1545 nxt++; 1546 cur = nxt; 1547 /* now we can have script or region or variant */ 1548 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1549 goto region_m49; 1550 1551 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1552 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1553 nxt++; 1554 if (nxt - cur == 2) 1555 goto region; 1556 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1557 goto variant; 1558 if (nxt - cur != 4) 1559 return(0); 1560 /* we parsed a script */ 1561 script: 1562 if (nxt[0] == 0) 1563 return(1); 1564 if (nxt[0] != '-') 1565 return(0); 1566 1567 nxt++; 1568 cur = nxt; 1569 /* now we can have region or variant */ 1570 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1571 goto region_m49; 1572 1573 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1574 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1575 nxt++; 1576 1577 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1578 goto variant; 1579 if (nxt - cur != 2) 1580 return(0); 1581 /* we parsed a region */ 1582 region: 1583 if (nxt[0] == 0) 1584 return(1); 1585 if (nxt[0] != '-') 1586 return(0); 1587 1588 nxt++; 1589 cur = nxt; 1590 /* now we can just have a variant */ 1591 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1592 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1593 nxt++; 1594 1595 if ((nxt - cur < 5) || (nxt - cur > 8)) 1596 return(0); 1597 1598 /* we parsed a variant */ 1599 variant: 1600 if (nxt[0] == 0) 1601 return(1); 1602 if (nxt[0] != '-') 1603 return(0); 1604 /* extensions and private use subtags not checked */ 1605 return (1); 1606 1607 region_m49: 1608 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1609 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1610 nxt += 3; 1611 goto region; 1612 } 1613 return(0); 1614 } 1615 1616 /************************************************************************ 1617 * * 1618 * Parser stacks related functions and macros * 1619 * * 1620 ************************************************************************/ 1621 1622 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1623 const xmlChar ** str); 1624 1625 #ifdef SAX2 1626 /** 1627 * nsPush: 1628 * @ctxt: an XML parser context 1629 * @prefix: the namespace prefix or NULL 1630 * @URL: the namespace name 1631 * 1632 * Pushes a new parser namespace on top of the ns stack 1633 * 1634 * Returns -1 in case of error, -2 if the namespace should be discarded 1635 * and the index in the stack otherwise. 1636 */ 1637 static int 1638 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1639 { 1640 if (ctxt->options & XML_PARSE_NSCLEAN) { 1641 int i; 1642 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1643 if (ctxt->nsTab[i] == prefix) { 1644 /* in scope */ 1645 if (ctxt->nsTab[i + 1] == URL) 1646 return(-2); 1647 /* out of scope keep it */ 1648 break; 1649 } 1650 } 1651 } 1652 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1653 ctxt->nsMax = 10; 1654 ctxt->nsNr = 0; 1655 ctxt->nsTab = (const xmlChar **) 1656 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1657 if (ctxt->nsTab == NULL) { 1658 xmlErrMemory(ctxt, NULL); 1659 ctxt->nsMax = 0; 1660 return (-1); 1661 } 1662 } else if (ctxt->nsNr >= ctxt->nsMax) { 1663 const xmlChar ** tmp; 1664 ctxt->nsMax *= 2; 1665 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1666 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1667 if (tmp == NULL) { 1668 xmlErrMemory(ctxt, NULL); 1669 ctxt->nsMax /= 2; 1670 return (-1); 1671 } 1672 ctxt->nsTab = tmp; 1673 } 1674 ctxt->nsTab[ctxt->nsNr++] = prefix; 1675 ctxt->nsTab[ctxt->nsNr++] = URL; 1676 return (ctxt->nsNr); 1677 } 1678 /** 1679 * nsPop: 1680 * @ctxt: an XML parser context 1681 * @nr: the number to pop 1682 * 1683 * Pops the top @nr parser prefix/namespace from the ns stack 1684 * 1685 * Returns the number of namespaces removed 1686 */ 1687 static int 1688 nsPop(xmlParserCtxtPtr ctxt, int nr) 1689 { 1690 int i; 1691 1692 if (ctxt->nsTab == NULL) return(0); 1693 if (ctxt->nsNr < nr) { 1694 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1695 nr = ctxt->nsNr; 1696 } 1697 if (ctxt->nsNr <= 0) 1698 return (0); 1699 1700 for (i = 0;i < nr;i++) { 1701 ctxt->nsNr--; 1702 ctxt->nsTab[ctxt->nsNr] = NULL; 1703 } 1704 return(nr); 1705 } 1706 #endif 1707 1708 static int 1709 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1710 const xmlChar **atts; 1711 int *attallocs; 1712 int maxatts; 1713 1714 if (ctxt->atts == NULL) { 1715 maxatts = 55; /* allow for 10 attrs by default */ 1716 atts = (const xmlChar **) 1717 xmlMalloc(maxatts * sizeof(xmlChar *)); 1718 if (atts == NULL) goto mem_error; 1719 ctxt->atts = atts; 1720 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1721 if (attallocs == NULL) goto mem_error; 1722 ctxt->attallocs = attallocs; 1723 ctxt->maxatts = maxatts; 1724 } else if (nr + 5 > ctxt->maxatts) { 1725 maxatts = (nr + 5) * 2; 1726 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1727 maxatts * sizeof(const xmlChar *)); 1728 if (atts == NULL) goto mem_error; 1729 ctxt->atts = atts; 1730 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1731 (maxatts / 5) * sizeof(int)); 1732 if (attallocs == NULL) goto mem_error; 1733 ctxt->attallocs = attallocs; 1734 ctxt->maxatts = maxatts; 1735 } 1736 return(ctxt->maxatts); 1737 mem_error: 1738 xmlErrMemory(ctxt, NULL); 1739 return(-1); 1740 } 1741 1742 /** 1743 * inputPush: 1744 * @ctxt: an XML parser context 1745 * @value: the parser input 1746 * 1747 * Pushes a new parser input on top of the input stack 1748 * 1749 * Returns -1 in case of error, the index in the stack otherwise 1750 */ 1751 int 1752 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1753 { 1754 if ((ctxt == NULL) || (value == NULL)) 1755 return(-1); 1756 if (ctxt->inputNr >= ctxt->inputMax) { 1757 ctxt->inputMax *= 2; 1758 ctxt->inputTab = 1759 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1760 ctxt->inputMax * 1761 sizeof(ctxt->inputTab[0])); 1762 if (ctxt->inputTab == NULL) { 1763 xmlErrMemory(ctxt, NULL); 1764 xmlFreeInputStream(value); 1765 ctxt->inputMax /= 2; 1766 value = NULL; 1767 return (-1); 1768 } 1769 } 1770 ctxt->inputTab[ctxt->inputNr] = value; 1771 ctxt->input = value; 1772 return (ctxt->inputNr++); 1773 } 1774 /** 1775 * inputPop: 1776 * @ctxt: an XML parser context 1777 * 1778 * Pops the top parser input from the input stack 1779 * 1780 * Returns the input just removed 1781 */ 1782 xmlParserInputPtr 1783 inputPop(xmlParserCtxtPtr ctxt) 1784 { 1785 xmlParserInputPtr ret; 1786 1787 if (ctxt == NULL) 1788 return(NULL); 1789 if (ctxt->inputNr <= 0) 1790 return (NULL); 1791 ctxt->inputNr--; 1792 if (ctxt->inputNr > 0) 1793 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1794 else 1795 ctxt->input = NULL; 1796 ret = ctxt->inputTab[ctxt->inputNr]; 1797 ctxt->inputTab[ctxt->inputNr] = NULL; 1798 return (ret); 1799 } 1800 /** 1801 * nodePush: 1802 * @ctxt: an XML parser context 1803 * @value: the element node 1804 * 1805 * Pushes a new element node on top of the node stack 1806 * 1807 * Returns -1 in case of error, the index in the stack otherwise 1808 */ 1809 int 1810 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1811 { 1812 if (ctxt == NULL) return(0); 1813 if (ctxt->nodeNr >= ctxt->nodeMax) { 1814 xmlNodePtr *tmp; 1815 1816 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1817 ctxt->nodeMax * 2 * 1818 sizeof(ctxt->nodeTab[0])); 1819 if (tmp == NULL) { 1820 xmlErrMemory(ctxt, NULL); 1821 return (-1); 1822 } 1823 ctxt->nodeTab = tmp; 1824 ctxt->nodeMax *= 2; 1825 } 1826 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1827 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1828 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1829 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1830 xmlParserMaxDepth); 1831 xmlHaltParser(ctxt); 1832 return(-1); 1833 } 1834 ctxt->nodeTab[ctxt->nodeNr] = value; 1835 ctxt->node = value; 1836 return (ctxt->nodeNr++); 1837 } 1838 1839 /** 1840 * nodePop: 1841 * @ctxt: an XML parser context 1842 * 1843 * Pops the top element node from the node stack 1844 * 1845 * Returns the node just removed 1846 */ 1847 xmlNodePtr 1848 nodePop(xmlParserCtxtPtr ctxt) 1849 { 1850 xmlNodePtr ret; 1851 1852 if (ctxt == NULL) return(NULL); 1853 if (ctxt->nodeNr <= 0) 1854 return (NULL); 1855 ctxt->nodeNr--; 1856 if (ctxt->nodeNr > 0) 1857 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1858 else 1859 ctxt->node = NULL; 1860 ret = ctxt->nodeTab[ctxt->nodeNr]; 1861 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1862 return (ret); 1863 } 1864 1865 /** 1866 * nameNsPush: 1867 * @ctxt: an XML parser context 1868 * @value: the element name 1869 * @prefix: the element prefix 1870 * @URI: the element namespace name 1871 * @line: the current line number for error messages 1872 * @nsNr: the number of namespaces pushed on the namespace table 1873 * 1874 * Pushes a new element name/prefix/URL on top of the name stack 1875 * 1876 * Returns -1 in case of error, the index in the stack otherwise 1877 */ 1878 static int 1879 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1880 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr) 1881 { 1882 xmlStartTag *tag; 1883 1884 if (ctxt->nameNr >= ctxt->nameMax) { 1885 const xmlChar * *tmp; 1886 xmlStartTag *tmp2; 1887 ctxt->nameMax *= 2; 1888 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1889 ctxt->nameMax * 1890 sizeof(ctxt->nameTab[0])); 1891 if (tmp == NULL) { 1892 ctxt->nameMax /= 2; 1893 goto mem_error; 1894 } 1895 ctxt->nameTab = tmp; 1896 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab, 1897 ctxt->nameMax * 1898 sizeof(ctxt->pushTab[0])); 1899 if (tmp2 == NULL) { 1900 ctxt->nameMax /= 2; 1901 goto mem_error; 1902 } 1903 ctxt->pushTab = tmp2; 1904 } else if (ctxt->pushTab == NULL) { 1905 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax * 1906 sizeof(ctxt->pushTab[0])); 1907 if (ctxt->pushTab == NULL) 1908 goto mem_error; 1909 } 1910 ctxt->nameTab[ctxt->nameNr] = value; 1911 ctxt->name = value; 1912 tag = &ctxt->pushTab[ctxt->nameNr]; 1913 tag->prefix = prefix; 1914 tag->URI = URI; 1915 tag->line = line; 1916 tag->nsNr = nsNr; 1917 return (ctxt->nameNr++); 1918 mem_error: 1919 xmlErrMemory(ctxt, NULL); 1920 return (-1); 1921 } 1922 #ifdef LIBXML_PUSH_ENABLED 1923 /** 1924 * nameNsPop: 1925 * @ctxt: an XML parser context 1926 * 1927 * Pops the top element/prefix/URI name from the name stack 1928 * 1929 * Returns the name just removed 1930 */ 1931 static const xmlChar * 1932 nameNsPop(xmlParserCtxtPtr ctxt) 1933 { 1934 const xmlChar *ret; 1935 1936 if (ctxt->nameNr <= 0) 1937 return (NULL); 1938 ctxt->nameNr--; 1939 if (ctxt->nameNr > 0) 1940 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1941 else 1942 ctxt->name = NULL; 1943 ret = ctxt->nameTab[ctxt->nameNr]; 1944 ctxt->nameTab[ctxt->nameNr] = NULL; 1945 return (ret); 1946 } 1947 #endif /* LIBXML_PUSH_ENABLED */ 1948 1949 /** 1950 * namePush: 1951 * @ctxt: an XML parser context 1952 * @value: the element name 1953 * 1954 * Pushes a new element name on top of the name stack 1955 * 1956 * Returns -1 in case of error, the index in the stack otherwise 1957 */ 1958 int 1959 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1960 { 1961 if (ctxt == NULL) return (-1); 1962 1963 if (ctxt->nameNr >= ctxt->nameMax) { 1964 const xmlChar * *tmp; 1965 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1966 ctxt->nameMax * 2 * 1967 sizeof(ctxt->nameTab[0])); 1968 if (tmp == NULL) { 1969 goto mem_error; 1970 } 1971 ctxt->nameTab = tmp; 1972 ctxt->nameMax *= 2; 1973 } 1974 ctxt->nameTab[ctxt->nameNr] = value; 1975 ctxt->name = value; 1976 return (ctxt->nameNr++); 1977 mem_error: 1978 xmlErrMemory(ctxt, NULL); 1979 return (-1); 1980 } 1981 /** 1982 * namePop: 1983 * @ctxt: an XML parser context 1984 * 1985 * Pops the top element name from the name stack 1986 * 1987 * Returns the name just removed 1988 */ 1989 const xmlChar * 1990 namePop(xmlParserCtxtPtr ctxt) 1991 { 1992 const xmlChar *ret; 1993 1994 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1995 return (NULL); 1996 ctxt->nameNr--; 1997 if (ctxt->nameNr > 0) 1998 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1999 else 2000 ctxt->name = NULL; 2001 ret = ctxt->nameTab[ctxt->nameNr]; 2002 ctxt->nameTab[ctxt->nameNr] = NULL; 2003 return (ret); 2004 } 2005 2006 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 2007 if (ctxt->spaceNr >= ctxt->spaceMax) { 2008 int *tmp; 2009 2010 ctxt->spaceMax *= 2; 2011 tmp = (int *) xmlRealloc(ctxt->spaceTab, 2012 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 2013 if (tmp == NULL) { 2014 xmlErrMemory(ctxt, NULL); 2015 ctxt->spaceMax /=2; 2016 return(-1); 2017 } 2018 ctxt->spaceTab = tmp; 2019 } 2020 ctxt->spaceTab[ctxt->spaceNr] = val; 2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 2022 return(ctxt->spaceNr++); 2023 } 2024 2025 static int spacePop(xmlParserCtxtPtr ctxt) { 2026 int ret; 2027 if (ctxt->spaceNr <= 0) return(0); 2028 ctxt->spaceNr--; 2029 if (ctxt->spaceNr > 0) 2030 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 2031 else 2032 ctxt->space = &ctxt->spaceTab[0]; 2033 ret = ctxt->spaceTab[ctxt->spaceNr]; 2034 ctxt->spaceTab[ctxt->spaceNr] = -1; 2035 return(ret); 2036 } 2037 2038 /* 2039 * Macros for accessing the content. Those should be used only by the parser, 2040 * and not exported. 2041 * 2042 * Dirty macros, i.e. one often need to make assumption on the context to 2043 * use them 2044 * 2045 * CUR_PTR return the current pointer to the xmlChar to be parsed. 2046 * To be used with extreme caution since operations consuming 2047 * characters may move the input buffer to a different location ! 2048 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2049 * This should be used internally by the parser 2050 * only to compare to ASCII values otherwise it would break when 2051 * running with UTF-8 encoding. 2052 * RAW same as CUR but in the input buffer, bypass any token 2053 * extraction that may have been done 2054 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2055 * to compare on ASCII based substring. 2056 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2057 * strings without newlines within the parser. 2058 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2059 * defined char within the parser. 2060 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2061 * 2062 * NEXT Skip to the next character, this does the proper decoding 2063 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2064 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2065 * CUR_CHAR(l) returns the current unicode character (int), set l 2066 * to the number of xmlChars used for the encoding [0-5]. 2067 * CUR_SCHAR same but operate on a string instead of the context 2068 * COPY_BUF copy the current unicode char to the target buffer, increment 2069 * the index 2070 * GROW, SHRINK handling of input buffers 2071 */ 2072 2073 #define RAW (*ctxt->input->cur) 2074 #define CUR (*ctxt->input->cur) 2075 #define NXT(val) ctxt->input->cur[(val)] 2076 #define CUR_PTR ctxt->input->cur 2077 #define BASE_PTR ctxt->input->base 2078 2079 #define CMP4( s, c1, c2, c3, c4 ) \ 2080 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2081 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2082 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2083 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2084 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2085 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2086 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2087 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2088 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2089 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2090 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2091 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2092 ((unsigned char *) s)[ 8 ] == c9 ) 2093 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2094 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2095 ((unsigned char *) s)[ 9 ] == c10 ) 2096 2097 #define SKIP(val) do { \ 2098 ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2099 if (*ctxt->input->cur == 0) \ 2100 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2101 } while (0) 2102 2103 #define SKIPL(val) do { \ 2104 int skipl; \ 2105 for(skipl=0; skipl<val; skipl++) { \ 2106 if (*(ctxt->input->cur) == '\n') { \ 2107 ctxt->input->line++; ctxt->input->col = 1; \ 2108 } else ctxt->input->col++; \ 2109 ctxt->input->cur++; \ 2110 } \ 2111 if (*ctxt->input->cur == 0) \ 2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2113 } while (0) 2114 2115 #define SHRINK if ((ctxt->progressive == 0) && \ 2116 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2117 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2118 xmlSHRINK (ctxt); 2119 2120 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2121 xmlParserInputShrink(ctxt->input); 2122 if (*ctxt->input->cur == 0) 2123 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2124 } 2125 2126 #define GROW if ((ctxt->progressive == 0) && \ 2127 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2128 xmlGROW (ctxt); 2129 2130 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2131 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur; 2132 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base; 2133 2134 if (((curEnd > XML_MAX_LOOKUP_LIMIT) || 2135 (curBase > XML_MAX_LOOKUP_LIMIT)) && 2136 ((ctxt->input->buf) && 2137 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) && 2138 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2139 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2140 xmlHaltParser(ctxt); 2141 return; 2142 } 2143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2144 if ((ctxt->input->cur > ctxt->input->end) || 2145 (ctxt->input->cur < ctxt->input->base)) { 2146 xmlHaltParser(ctxt); 2147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2148 return; 2149 } 2150 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) 2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2152 } 2153 2154 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2155 2156 #define NEXT xmlNextChar(ctxt) 2157 2158 #define NEXT1 { \ 2159 ctxt->input->col++; \ 2160 ctxt->input->cur++; \ 2161 if (*ctxt->input->cur == 0) \ 2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2163 } 2164 2165 #define NEXTL(l) do { \ 2166 if (*(ctxt->input->cur) == '\n') { \ 2167 ctxt->input->line++; ctxt->input->col = 1; \ 2168 } else ctxt->input->col++; \ 2169 ctxt->input->cur += l; \ 2170 } while (0) 2171 2172 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2173 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2174 2175 #define COPY_BUF(l,b,i,v) \ 2176 if (l == 1) b[i++] = (xmlChar) v; \ 2177 else i += xmlCopyCharMultiByte(&b[i],v) 2178 2179 /** 2180 * xmlSkipBlankChars: 2181 * @ctxt: the XML parser context 2182 * 2183 * skip all blanks character found at that point in the input streams. 2184 * It pops up finished entities in the process if allowable at that point. 2185 * 2186 * Returns the number of space chars skipped 2187 */ 2188 2189 int 2190 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2191 int res = 0; 2192 2193 /* 2194 * It's Okay to use CUR/NEXT here since all the blanks are on 2195 * the ASCII range. 2196 */ 2197 if (ctxt->instate != XML_PARSER_DTD) { 2198 const xmlChar *cur; 2199 /* 2200 * if we are in the document content, go really fast 2201 */ 2202 cur = ctxt->input->cur; 2203 while (IS_BLANK_CH(*cur)) { 2204 if (*cur == '\n') { 2205 ctxt->input->line++; ctxt->input->col = 1; 2206 } else { 2207 ctxt->input->col++; 2208 } 2209 cur++; 2210 res++; 2211 if (*cur == 0) { 2212 ctxt->input->cur = cur; 2213 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2214 cur = ctxt->input->cur; 2215 } 2216 } 2217 ctxt->input->cur = cur; 2218 } else { 2219 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2220 2221 while (1) { 2222 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2223 NEXT; 2224 } else if (CUR == '%') { 2225 /* 2226 * Need to handle support of entities branching here 2227 */ 2228 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2229 break; 2230 xmlParsePEReference(ctxt); 2231 } else if (CUR == 0) { 2232 if (ctxt->inputNr <= 1) 2233 break; 2234 xmlPopInput(ctxt); 2235 } else { 2236 break; 2237 } 2238 2239 /* 2240 * Also increase the counter when entering or exiting a PERef. 2241 * The spec says: "When a parameter-entity reference is recognized 2242 * in the DTD and included, its replacement text MUST be enlarged 2243 * by the attachment of one leading and one following space (#x20) 2244 * character." 2245 */ 2246 res++; 2247 } 2248 } 2249 return(res); 2250 } 2251 2252 /************************************************************************ 2253 * * 2254 * Commodity functions to handle entities * 2255 * * 2256 ************************************************************************/ 2257 2258 /** 2259 * xmlPopInput: 2260 * @ctxt: an XML parser context 2261 * 2262 * xmlPopInput: the current input pointed by ctxt->input came to an end 2263 * pop it and return the next char. 2264 * 2265 * Returns the current xmlChar in the parser context 2266 */ 2267 xmlChar 2268 xmlPopInput(xmlParserCtxtPtr ctxt) { 2269 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2270 if (xmlParserDebugEntities) 2271 xmlGenericError(xmlGenericErrorContext, 2272 "Popping input %d\n", ctxt->inputNr); 2273 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2274 (ctxt->instate != XML_PARSER_EOF)) 2275 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2276 "Unfinished entity outside the DTD"); 2277 xmlFreeInputStream(inputPop(ctxt)); 2278 if (*ctxt->input->cur == 0) 2279 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2280 return(CUR); 2281 } 2282 2283 /** 2284 * xmlPushInput: 2285 * @ctxt: an XML parser context 2286 * @input: an XML parser input fragment (entity, XML fragment ...). 2287 * 2288 * xmlPushInput: switch to a new input stream which is stacked on top 2289 * of the previous one(s). 2290 * Returns -1 in case of error or the index in the input stack 2291 */ 2292 int 2293 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2294 int ret; 2295 if (input == NULL) return(-1); 2296 2297 if (xmlParserDebugEntities) { 2298 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2299 xmlGenericError(xmlGenericErrorContext, 2300 "%s(%d): ", ctxt->input->filename, 2301 ctxt->input->line); 2302 xmlGenericError(xmlGenericErrorContext, 2303 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2304 } 2305 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2306 (ctxt->inputNr > 1024)) { 2307 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2308 while (ctxt->inputNr > 1) 2309 xmlFreeInputStream(inputPop(ctxt)); 2310 return(-1); 2311 } 2312 ret = inputPush(ctxt, input); 2313 if (ctxt->instate == XML_PARSER_EOF) 2314 return(-1); 2315 GROW; 2316 return(ret); 2317 } 2318 2319 /** 2320 * xmlParseCharRef: 2321 * @ctxt: an XML parser context 2322 * 2323 * parse Reference declarations 2324 * 2325 * [66] CharRef ::= '&#' [0-9]+ ';' | 2326 * '&#x' [0-9a-fA-F]+ ';' 2327 * 2328 * [ WFC: Legal Character ] 2329 * Characters referred to using character references must match the 2330 * production for Char. 2331 * 2332 * Returns the value parsed (as an int), 0 in case of error 2333 */ 2334 int 2335 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2336 int val = 0; 2337 int count = 0; 2338 2339 /* 2340 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2341 */ 2342 if ((RAW == '&') && (NXT(1) == '#') && 2343 (NXT(2) == 'x')) { 2344 SKIP(3); 2345 GROW; 2346 while (RAW != ';') { /* loop blocked by count */ 2347 if (count++ > 20) { 2348 count = 0; 2349 GROW; 2350 if (ctxt->instate == XML_PARSER_EOF) 2351 return(0); 2352 } 2353 if ((RAW >= '0') && (RAW <= '9')) 2354 val = val * 16 + (CUR - '0'); 2355 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2356 val = val * 16 + (CUR - 'a') + 10; 2357 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2358 val = val * 16 + (CUR - 'A') + 10; 2359 else { 2360 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2361 val = 0; 2362 break; 2363 } 2364 if (val > 0x110000) 2365 val = 0x110000; 2366 2367 NEXT; 2368 count++; 2369 } 2370 if (RAW == ';') { 2371 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2372 ctxt->input->col++; 2373 ctxt->input->cur++; 2374 } 2375 } else if ((RAW == '&') && (NXT(1) == '#')) { 2376 SKIP(2); 2377 GROW; 2378 while (RAW != ';') { /* loop blocked by count */ 2379 if (count++ > 20) { 2380 count = 0; 2381 GROW; 2382 if (ctxt->instate == XML_PARSER_EOF) 2383 return(0); 2384 } 2385 if ((RAW >= '0') && (RAW <= '9')) 2386 val = val * 10 + (CUR - '0'); 2387 else { 2388 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2389 val = 0; 2390 break; 2391 } 2392 if (val > 0x110000) 2393 val = 0x110000; 2394 2395 NEXT; 2396 count++; 2397 } 2398 if (RAW == ';') { 2399 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2400 ctxt->input->col++; 2401 ctxt->input->cur++; 2402 } 2403 } else { 2404 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2405 } 2406 2407 /* 2408 * [ WFC: Legal Character ] 2409 * Characters referred to using character references must match the 2410 * production for Char. 2411 */ 2412 if (val >= 0x110000) { 2413 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2414 "xmlParseCharRef: character reference out of bounds\n", 2415 val); 2416 } else if (IS_CHAR(val)) { 2417 return(val); 2418 } else { 2419 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2420 "xmlParseCharRef: invalid xmlChar value %d\n", 2421 val); 2422 } 2423 return(0); 2424 } 2425 2426 /** 2427 * xmlParseStringCharRef: 2428 * @ctxt: an XML parser context 2429 * @str: a pointer to an index in the string 2430 * 2431 * parse Reference declarations, variant parsing from a string rather 2432 * than an an input flow. 2433 * 2434 * [66] CharRef ::= '&#' [0-9]+ ';' | 2435 * '&#x' [0-9a-fA-F]+ ';' 2436 * 2437 * [ WFC: Legal Character ] 2438 * Characters referred to using character references must match the 2439 * production for Char. 2440 * 2441 * Returns the value parsed (as an int), 0 in case of error, str will be 2442 * updated to the current value of the index 2443 */ 2444 static int 2445 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2446 const xmlChar *ptr; 2447 xmlChar cur; 2448 int val = 0; 2449 2450 if ((str == NULL) || (*str == NULL)) return(0); 2451 ptr = *str; 2452 cur = *ptr; 2453 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2454 ptr += 3; 2455 cur = *ptr; 2456 while (cur != ';') { /* Non input consuming loop */ 2457 if ((cur >= '0') && (cur <= '9')) 2458 val = val * 16 + (cur - '0'); 2459 else if ((cur >= 'a') && (cur <= 'f')) 2460 val = val * 16 + (cur - 'a') + 10; 2461 else if ((cur >= 'A') && (cur <= 'F')) 2462 val = val * 16 + (cur - 'A') + 10; 2463 else { 2464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2465 val = 0; 2466 break; 2467 } 2468 if (val > 0x110000) 2469 val = 0x110000; 2470 2471 ptr++; 2472 cur = *ptr; 2473 } 2474 if (cur == ';') 2475 ptr++; 2476 } else if ((cur == '&') && (ptr[1] == '#')){ 2477 ptr += 2; 2478 cur = *ptr; 2479 while (cur != ';') { /* Non input consuming loops */ 2480 if ((cur >= '0') && (cur <= '9')) 2481 val = val * 10 + (cur - '0'); 2482 else { 2483 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2484 val = 0; 2485 break; 2486 } 2487 if (val > 0x110000) 2488 val = 0x110000; 2489 2490 ptr++; 2491 cur = *ptr; 2492 } 2493 if (cur == ';') 2494 ptr++; 2495 } else { 2496 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2497 return(0); 2498 } 2499 *str = ptr; 2500 2501 /* 2502 * [ WFC: Legal Character ] 2503 * Characters referred to using character references must match the 2504 * production for Char. 2505 */ 2506 if (val >= 0x110000) { 2507 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2508 "xmlParseStringCharRef: character reference out of bounds\n", 2509 val); 2510 } else if (IS_CHAR(val)) { 2511 return(val); 2512 } else { 2513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2514 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2515 val); 2516 } 2517 return(0); 2518 } 2519 2520 /** 2521 * xmlParserHandlePEReference: 2522 * @ctxt: the parser context 2523 * 2524 * [69] PEReference ::= '%' Name ';' 2525 * 2526 * [ WFC: No Recursion ] 2527 * A parsed entity must not contain a recursive 2528 * reference to itself, either directly or indirectly. 2529 * 2530 * [ WFC: Entity Declared ] 2531 * In a document without any DTD, a document with only an internal DTD 2532 * subset which contains no parameter entity references, or a document 2533 * with "standalone='yes'", ... ... The declaration of a parameter 2534 * entity must precede any reference to it... 2535 * 2536 * [ VC: Entity Declared ] 2537 * In a document with an external subset or external parameter entities 2538 * with "standalone='no'", ... ... The declaration of a parameter entity 2539 * must precede any reference to it... 2540 * 2541 * [ WFC: In DTD ] 2542 * Parameter-entity references may only appear in the DTD. 2543 * NOTE: misleading but this is handled. 2544 * 2545 * A PEReference may have been detected in the current input stream 2546 * the handling is done accordingly to 2547 * http://www.w3.org/TR/REC-xml#entproc 2548 * i.e. 2549 * - Included in literal in entity values 2550 * - Included as Parameter Entity reference within DTDs 2551 */ 2552 void 2553 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2554 switch(ctxt->instate) { 2555 case XML_PARSER_CDATA_SECTION: 2556 return; 2557 case XML_PARSER_COMMENT: 2558 return; 2559 case XML_PARSER_START_TAG: 2560 return; 2561 case XML_PARSER_END_TAG: 2562 return; 2563 case XML_PARSER_EOF: 2564 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2565 return; 2566 case XML_PARSER_PROLOG: 2567 case XML_PARSER_START: 2568 case XML_PARSER_MISC: 2569 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2570 return; 2571 case XML_PARSER_ENTITY_DECL: 2572 case XML_PARSER_CONTENT: 2573 case XML_PARSER_ATTRIBUTE_VALUE: 2574 case XML_PARSER_PI: 2575 case XML_PARSER_SYSTEM_LITERAL: 2576 case XML_PARSER_PUBLIC_LITERAL: 2577 /* we just ignore it there */ 2578 return; 2579 case XML_PARSER_EPILOG: 2580 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2581 return; 2582 case XML_PARSER_ENTITY_VALUE: 2583 /* 2584 * NOTE: in the case of entity values, we don't do the 2585 * substitution here since we need the literal 2586 * entity value to be able to save the internal 2587 * subset of the document. 2588 * This will be handled by xmlStringDecodeEntities 2589 */ 2590 return; 2591 case XML_PARSER_DTD: 2592 /* 2593 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2594 * In the internal DTD subset, parameter-entity references 2595 * can occur only where markup declarations can occur, not 2596 * within markup declarations. 2597 * In that case this is handled in xmlParseMarkupDecl 2598 */ 2599 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2600 return; 2601 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2602 return; 2603 break; 2604 case XML_PARSER_IGNORE: 2605 return; 2606 } 2607 2608 xmlParsePEReference(ctxt); 2609 } 2610 2611 /* 2612 * Macro used to grow the current buffer. 2613 * buffer##_size is expected to be a size_t 2614 * mem_error: is expected to handle memory allocation failures 2615 */ 2616 #define growBuffer(buffer, n) { \ 2617 xmlChar *tmp; \ 2618 size_t new_size = buffer##_size * 2 + n; \ 2619 if (new_size < buffer##_size) goto mem_error; \ 2620 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2621 if (tmp == NULL) goto mem_error; \ 2622 buffer = tmp; \ 2623 buffer##_size = new_size; \ 2624 } 2625 2626 /** 2627 * xmlStringLenDecodeEntities: 2628 * @ctxt: the parser context 2629 * @str: the input string 2630 * @len: the string length 2631 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2632 * @end: an end marker xmlChar, 0 if none 2633 * @end2: an end marker xmlChar, 0 if none 2634 * @end3: an end marker xmlChar, 0 if none 2635 * 2636 * Takes a entity string content and process to do the adequate substitutions. 2637 * 2638 * [67] Reference ::= EntityRef | CharRef 2639 * 2640 * [69] PEReference ::= '%' Name ';' 2641 * 2642 * Returns A newly allocated string with the substitution done. The caller 2643 * must deallocate it ! 2644 */ 2645 xmlChar * 2646 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2647 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2648 xmlChar *buffer = NULL; 2649 size_t buffer_size = 0; 2650 size_t nbchars = 0; 2651 2652 xmlChar *current = NULL; 2653 xmlChar *rep = NULL; 2654 const xmlChar *last; 2655 xmlEntityPtr ent; 2656 int c,l; 2657 2658 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2659 return(NULL); 2660 last = str + len; 2661 2662 if (((ctxt->depth > 40) && 2663 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2664 (ctxt->depth > 1024)) { 2665 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2666 return(NULL); 2667 } 2668 2669 /* 2670 * allocate a translation buffer. 2671 */ 2672 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2673 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2674 if (buffer == NULL) goto mem_error; 2675 2676 /* 2677 * OK loop until we reach one of the ending char or a size limit. 2678 * we are operating on already parsed values. 2679 */ 2680 if (str < last) 2681 c = CUR_SCHAR(str, l); 2682 else 2683 c = 0; 2684 while ((c != 0) && (c != end) && /* non input consuming loop */ 2685 (c != end2) && (c != end3) && 2686 (ctxt->instate != XML_PARSER_EOF)) { 2687 2688 if (c == 0) break; 2689 if ((c == '&') && (str[1] == '#')) { 2690 int val = xmlParseStringCharRef(ctxt, &str); 2691 if (val == 0) 2692 goto int_error; 2693 COPY_BUF(0,buffer,nbchars,val); 2694 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2695 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2696 } 2697 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2698 if (xmlParserDebugEntities) 2699 xmlGenericError(xmlGenericErrorContext, 2700 "String decoding Entity Reference: %.30s\n", 2701 str); 2702 ent = xmlParseStringEntityRef(ctxt, &str); 2703 xmlParserEntityCheck(ctxt, 0, ent, 0); 2704 if (ent != NULL) 2705 ctxt->nbentities += ent->checked / 2; 2706 if ((ent != NULL) && 2707 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2708 if (ent->content != NULL) { 2709 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2710 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2711 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2712 } 2713 } else { 2714 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2715 "predefined entity has no content\n"); 2716 goto int_error; 2717 } 2718 } else if ((ent != NULL) && (ent->content != NULL)) { 2719 ctxt->depth++; 2720 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2721 0, 0, 0); 2722 ctxt->depth--; 2723 if (rep == NULL) { 2724 ent->content[0] = 0; 2725 goto int_error; 2726 } 2727 2728 current = rep; 2729 while (*current != 0) { /* non input consuming loop */ 2730 buffer[nbchars++] = *current++; 2731 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2732 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2733 goto int_error; 2734 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2735 } 2736 } 2737 xmlFree(rep); 2738 rep = NULL; 2739 } else if (ent != NULL) { 2740 int i = xmlStrlen(ent->name); 2741 const xmlChar *cur = ent->name; 2742 2743 buffer[nbchars++] = '&'; 2744 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2745 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2746 } 2747 for (;i > 0;i--) 2748 buffer[nbchars++] = *cur++; 2749 buffer[nbchars++] = ';'; 2750 } 2751 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2752 if (xmlParserDebugEntities) 2753 xmlGenericError(xmlGenericErrorContext, 2754 "String decoding PE Reference: %.30s\n", str); 2755 ent = xmlParseStringPEReference(ctxt, &str); 2756 xmlParserEntityCheck(ctxt, 0, ent, 0); 2757 if (ent != NULL) 2758 ctxt->nbentities += ent->checked / 2; 2759 if (ent != NULL) { 2760 if (ent->content == NULL) { 2761 /* 2762 * Note: external parsed entities will not be loaded, 2763 * it is not required for a non-validating parser to 2764 * complete external PEReferences coming from the 2765 * internal subset 2766 */ 2767 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2768 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2769 (ctxt->validate != 0)) { 2770 xmlLoadEntityContent(ctxt, ent); 2771 } else { 2772 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2773 "not validating will not read content for PE entity %s\n", 2774 ent->name, NULL); 2775 } 2776 } 2777 ctxt->depth++; 2778 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2779 0, 0, 0); 2780 ctxt->depth--; 2781 if (rep == NULL) { 2782 if (ent->content != NULL) 2783 ent->content[0] = 0; 2784 goto int_error; 2785 } 2786 current = rep; 2787 while (*current != 0) { /* non input consuming loop */ 2788 buffer[nbchars++] = *current++; 2789 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2790 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2791 goto int_error; 2792 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2793 } 2794 } 2795 xmlFree(rep); 2796 rep = NULL; 2797 } 2798 } else { 2799 COPY_BUF(l,buffer,nbchars,c); 2800 str += l; 2801 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2802 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2803 } 2804 } 2805 if (str < last) 2806 c = CUR_SCHAR(str, l); 2807 else 2808 c = 0; 2809 } 2810 buffer[nbchars] = 0; 2811 return(buffer); 2812 2813 mem_error: 2814 xmlErrMemory(ctxt, NULL); 2815 int_error: 2816 if (rep != NULL) 2817 xmlFree(rep); 2818 if (buffer != NULL) 2819 xmlFree(buffer); 2820 return(NULL); 2821 } 2822 2823 /** 2824 * xmlStringDecodeEntities: 2825 * @ctxt: the parser context 2826 * @str: the input string 2827 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2828 * @end: an end marker xmlChar, 0 if none 2829 * @end2: an end marker xmlChar, 0 if none 2830 * @end3: an end marker xmlChar, 0 if none 2831 * 2832 * Takes a entity string content and process to do the adequate substitutions. 2833 * 2834 * [67] Reference ::= EntityRef | CharRef 2835 * 2836 * [69] PEReference ::= '%' Name ';' 2837 * 2838 * Returns A newly allocated string with the substitution done. The caller 2839 * must deallocate it ! 2840 */ 2841 xmlChar * 2842 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2843 xmlChar end, xmlChar end2, xmlChar end3) { 2844 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2845 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2846 end, end2, end3)); 2847 } 2848 2849 /************************************************************************ 2850 * * 2851 * Commodity functions, cleanup needed ? * 2852 * * 2853 ************************************************************************/ 2854 2855 /** 2856 * areBlanks: 2857 * @ctxt: an XML parser context 2858 * @str: a xmlChar * 2859 * @len: the size of @str 2860 * @blank_chars: we know the chars are blanks 2861 * 2862 * Is this a sequence of blank chars that one can ignore ? 2863 * 2864 * Returns 1 if ignorable 0 otherwise. 2865 */ 2866 2867 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2868 int blank_chars) { 2869 int i, ret; 2870 xmlNodePtr lastChild; 2871 2872 /* 2873 * Don't spend time trying to differentiate them, the same callback is 2874 * used ! 2875 */ 2876 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2877 return(0); 2878 2879 /* 2880 * Check for xml:space value. 2881 */ 2882 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2883 (*(ctxt->space) == -2)) 2884 return(0); 2885 2886 /* 2887 * Check that the string is made of blanks 2888 */ 2889 if (blank_chars == 0) { 2890 for (i = 0;i < len;i++) 2891 if (!(IS_BLANK_CH(str[i]))) return(0); 2892 } 2893 2894 /* 2895 * Look if the element is mixed content in the DTD if available 2896 */ 2897 if (ctxt->node == NULL) return(0); 2898 if (ctxt->myDoc != NULL) { 2899 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2900 if (ret == 0) return(1); 2901 if (ret == 1) return(0); 2902 } 2903 2904 /* 2905 * Otherwise, heuristic :-\ 2906 */ 2907 if ((RAW != '<') && (RAW != 0xD)) return(0); 2908 if ((ctxt->node->children == NULL) && 2909 (RAW == '<') && (NXT(1) == '/')) return(0); 2910 2911 lastChild = xmlGetLastChild(ctxt->node); 2912 if (lastChild == NULL) { 2913 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2914 (ctxt->node->content != NULL)) return(0); 2915 } else if (xmlNodeIsText(lastChild)) 2916 return(0); 2917 else if ((ctxt->node->children != NULL) && 2918 (xmlNodeIsText(ctxt->node->children))) 2919 return(0); 2920 return(1); 2921 } 2922 2923 /************************************************************************ 2924 * * 2925 * Extra stuff for namespace support * 2926 * Relates to http://www.w3.org/TR/WD-xml-names * 2927 * * 2928 ************************************************************************/ 2929 2930 /** 2931 * xmlSplitQName: 2932 * @ctxt: an XML parser context 2933 * @name: an XML parser context 2934 * @prefix: a xmlChar ** 2935 * 2936 * parse an UTF8 encoded XML qualified name string 2937 * 2938 * [NS 5] QName ::= (Prefix ':')? LocalPart 2939 * 2940 * [NS 6] Prefix ::= NCName 2941 * 2942 * [NS 7] LocalPart ::= NCName 2943 * 2944 * Returns the local part, and prefix is updated 2945 * to get the Prefix if any. 2946 */ 2947 2948 xmlChar * 2949 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2950 xmlChar buf[XML_MAX_NAMELEN + 5]; 2951 xmlChar *buffer = NULL; 2952 int len = 0; 2953 int max = XML_MAX_NAMELEN; 2954 xmlChar *ret = NULL; 2955 const xmlChar *cur = name; 2956 int c; 2957 2958 if (prefix == NULL) return(NULL); 2959 *prefix = NULL; 2960 2961 if (cur == NULL) return(NULL); 2962 2963 #ifndef XML_XML_NAMESPACE 2964 /* xml: prefix is not really a namespace */ 2965 if ((cur[0] == 'x') && (cur[1] == 'm') && 2966 (cur[2] == 'l') && (cur[3] == ':')) 2967 return(xmlStrdup(name)); 2968 #endif 2969 2970 /* nasty but well=formed */ 2971 if (cur[0] == ':') 2972 return(xmlStrdup(name)); 2973 2974 c = *cur++; 2975 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2976 buf[len++] = c; 2977 c = *cur++; 2978 } 2979 if (len >= max) { 2980 /* 2981 * Okay someone managed to make a huge name, so he's ready to pay 2982 * for the processing speed. 2983 */ 2984 max = len * 2; 2985 2986 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2987 if (buffer == NULL) { 2988 xmlErrMemory(ctxt, NULL); 2989 return(NULL); 2990 } 2991 memcpy(buffer, buf, len); 2992 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2993 if (len + 10 > max) { 2994 xmlChar *tmp; 2995 2996 max *= 2; 2997 tmp = (xmlChar *) xmlRealloc(buffer, 2998 max * sizeof(xmlChar)); 2999 if (tmp == NULL) { 3000 xmlFree(buffer); 3001 xmlErrMemory(ctxt, NULL); 3002 return(NULL); 3003 } 3004 buffer = tmp; 3005 } 3006 buffer[len++] = c; 3007 c = *cur++; 3008 } 3009 buffer[len] = 0; 3010 } 3011 3012 if ((c == ':') && (*cur == 0)) { 3013 if (buffer != NULL) 3014 xmlFree(buffer); 3015 *prefix = NULL; 3016 return(xmlStrdup(name)); 3017 } 3018 3019 if (buffer == NULL) 3020 ret = xmlStrndup(buf, len); 3021 else { 3022 ret = buffer; 3023 buffer = NULL; 3024 max = XML_MAX_NAMELEN; 3025 } 3026 3027 3028 if (c == ':') { 3029 c = *cur; 3030 *prefix = ret; 3031 if (c == 0) { 3032 return(xmlStrndup(BAD_CAST "", 0)); 3033 } 3034 len = 0; 3035 3036 /* 3037 * Check that the first character is proper to start 3038 * a new name 3039 */ 3040 if (!(((c >= 0x61) && (c <= 0x7A)) || 3041 ((c >= 0x41) && (c <= 0x5A)) || 3042 (c == '_') || (c == ':'))) { 3043 int l; 3044 int first = CUR_SCHAR(cur, l); 3045 3046 if (!IS_LETTER(first) && (first != '_')) { 3047 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3048 "Name %s is not XML Namespace compliant\n", 3049 name); 3050 } 3051 } 3052 cur++; 3053 3054 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3055 buf[len++] = c; 3056 c = *cur++; 3057 } 3058 if (len >= max) { 3059 /* 3060 * Okay someone managed to make a huge name, so he's ready to pay 3061 * for the processing speed. 3062 */ 3063 max = len * 2; 3064 3065 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3066 if (buffer == NULL) { 3067 xmlErrMemory(ctxt, NULL); 3068 return(NULL); 3069 } 3070 memcpy(buffer, buf, len); 3071 while (c != 0) { /* tested bigname2.xml */ 3072 if (len + 10 > max) { 3073 xmlChar *tmp; 3074 3075 max *= 2; 3076 tmp = (xmlChar *) xmlRealloc(buffer, 3077 max * sizeof(xmlChar)); 3078 if (tmp == NULL) { 3079 xmlErrMemory(ctxt, NULL); 3080 xmlFree(buffer); 3081 return(NULL); 3082 } 3083 buffer = tmp; 3084 } 3085 buffer[len++] = c; 3086 c = *cur++; 3087 } 3088 buffer[len] = 0; 3089 } 3090 3091 if (buffer == NULL) 3092 ret = xmlStrndup(buf, len); 3093 else { 3094 ret = buffer; 3095 } 3096 } 3097 3098 return(ret); 3099 } 3100 3101 /************************************************************************ 3102 * * 3103 * The parser itself * 3104 * Relates to http://www.w3.org/TR/REC-xml * 3105 * * 3106 ************************************************************************/ 3107 3108 /************************************************************************ 3109 * * 3110 * Routines to parse Name, NCName and NmToken * 3111 * * 3112 ************************************************************************/ 3113 #ifdef DEBUG 3114 static unsigned long nbParseName = 0; 3115 static unsigned long nbParseNmToken = 0; 3116 static unsigned long nbParseNCName = 0; 3117 static unsigned long nbParseNCNameComplex = 0; 3118 static unsigned long nbParseNameComplex = 0; 3119 static unsigned long nbParseStringName = 0; 3120 #endif 3121 3122 /* 3123 * The two following functions are related to the change of accepted 3124 * characters for Name and NmToken in the Revision 5 of XML-1.0 3125 * They correspond to the modified production [4] and the new production [4a] 3126 * changes in that revision. Also note that the macros used for the 3127 * productions Letter, Digit, CombiningChar and Extender are not needed 3128 * anymore. 3129 * We still keep compatibility to pre-revision5 parsing semantic if the 3130 * new XML_PARSE_OLD10 option is given to the parser. 3131 */ 3132 static int 3133 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3134 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3135 /* 3136 * Use the new checks of production [4] [4a] amd [5] of the 3137 * Update 5 of XML-1.0 3138 */ 3139 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3140 (((c >= 'a') && (c <= 'z')) || 3141 ((c >= 'A') && (c <= 'Z')) || 3142 (c == '_') || (c == ':') || 3143 ((c >= 0xC0) && (c <= 0xD6)) || 3144 ((c >= 0xD8) && (c <= 0xF6)) || 3145 ((c >= 0xF8) && (c <= 0x2FF)) || 3146 ((c >= 0x370) && (c <= 0x37D)) || 3147 ((c >= 0x37F) && (c <= 0x1FFF)) || 3148 ((c >= 0x200C) && (c <= 0x200D)) || 3149 ((c >= 0x2070) && (c <= 0x218F)) || 3150 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3151 ((c >= 0x3001) && (c <= 0xD7FF)) || 3152 ((c >= 0xF900) && (c <= 0xFDCF)) || 3153 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3154 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3155 return(1); 3156 } else { 3157 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3158 return(1); 3159 } 3160 return(0); 3161 } 3162 3163 static int 3164 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3166 /* 3167 * Use the new checks of production [4] [4a] amd [5] of the 3168 * Update 5 of XML-1.0 3169 */ 3170 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3171 (((c >= 'a') && (c <= 'z')) || 3172 ((c >= 'A') && (c <= 'Z')) || 3173 ((c >= '0') && (c <= '9')) || /* !start */ 3174 (c == '_') || (c == ':') || 3175 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3176 ((c >= 0xC0) && (c <= 0xD6)) || 3177 ((c >= 0xD8) && (c <= 0xF6)) || 3178 ((c >= 0xF8) && (c <= 0x2FF)) || 3179 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3180 ((c >= 0x370) && (c <= 0x37D)) || 3181 ((c >= 0x37F) && (c <= 0x1FFF)) || 3182 ((c >= 0x200C) && (c <= 0x200D)) || 3183 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3184 ((c >= 0x2070) && (c <= 0x218F)) || 3185 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3186 ((c >= 0x3001) && (c <= 0xD7FF)) || 3187 ((c >= 0xF900) && (c <= 0xFDCF)) || 3188 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3189 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3190 return(1); 3191 } else { 3192 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3193 (c == '.') || (c == '-') || 3194 (c == '_') || (c == ':') || 3195 (IS_COMBINING(c)) || 3196 (IS_EXTENDER(c))) 3197 return(1); 3198 } 3199 return(0); 3200 } 3201 3202 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3203 int *len, int *alloc, int normalize); 3204 3205 static const xmlChar * 3206 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3207 int len = 0, l; 3208 int c; 3209 int count = 0; 3210 3211 #ifdef DEBUG 3212 nbParseNameComplex++; 3213 #endif 3214 3215 /* 3216 * Handler for more complex cases 3217 */ 3218 GROW; 3219 if (ctxt->instate == XML_PARSER_EOF) 3220 return(NULL); 3221 c = CUR_CHAR(l); 3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3223 /* 3224 * Use the new checks of production [4] [4a] amd [5] of the 3225 * Update 5 of XML-1.0 3226 */ 3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3228 (!(((c >= 'a') && (c <= 'z')) || 3229 ((c >= 'A') && (c <= 'Z')) || 3230 (c == '_') || (c == ':') || 3231 ((c >= 0xC0) && (c <= 0xD6)) || 3232 ((c >= 0xD8) && (c <= 0xF6)) || 3233 ((c >= 0xF8) && (c <= 0x2FF)) || 3234 ((c >= 0x370) && (c <= 0x37D)) || 3235 ((c >= 0x37F) && (c <= 0x1FFF)) || 3236 ((c >= 0x200C) && (c <= 0x200D)) || 3237 ((c >= 0x2070) && (c <= 0x218F)) || 3238 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3239 ((c >= 0x3001) && (c <= 0xD7FF)) || 3240 ((c >= 0xF900) && (c <= 0xFDCF)) || 3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3243 return(NULL); 3244 } 3245 len += l; 3246 NEXTL(l); 3247 c = CUR_CHAR(l); 3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3249 (((c >= 'a') && (c <= 'z')) || 3250 ((c >= 'A') && (c <= 'Z')) || 3251 ((c >= '0') && (c <= '9')) || /* !start */ 3252 (c == '_') || (c == ':') || 3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3254 ((c >= 0xC0) && (c <= 0xD6)) || 3255 ((c >= 0xD8) && (c <= 0xF6)) || 3256 ((c >= 0xF8) && (c <= 0x2FF)) || 3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3258 ((c >= 0x370) && (c <= 0x37D)) || 3259 ((c >= 0x37F) && (c <= 0x1FFF)) || 3260 ((c >= 0x200C) && (c <= 0x200D)) || 3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3262 ((c >= 0x2070) && (c <= 0x218F)) || 3263 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3264 ((c >= 0x3001) && (c <= 0xD7FF)) || 3265 ((c >= 0xF900) && (c <= 0xFDCF)) || 3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3267 ((c >= 0x10000) && (c <= 0xEFFFF)) 3268 )) { 3269 if (count++ > XML_PARSER_CHUNK_SIZE) { 3270 count = 0; 3271 GROW; 3272 if (ctxt->instate == XML_PARSER_EOF) 3273 return(NULL); 3274 } 3275 len += l; 3276 NEXTL(l); 3277 c = CUR_CHAR(l); 3278 } 3279 } else { 3280 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3281 (!IS_LETTER(c) && (c != '_') && 3282 (c != ':'))) { 3283 return(NULL); 3284 } 3285 len += l; 3286 NEXTL(l); 3287 c = CUR_CHAR(l); 3288 3289 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3290 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3291 (c == '.') || (c == '-') || 3292 (c == '_') || (c == ':') || 3293 (IS_COMBINING(c)) || 3294 (IS_EXTENDER(c)))) { 3295 if (count++ > XML_PARSER_CHUNK_SIZE) { 3296 count = 0; 3297 GROW; 3298 if (ctxt->instate == XML_PARSER_EOF) 3299 return(NULL); 3300 } 3301 len += l; 3302 NEXTL(l); 3303 c = CUR_CHAR(l); 3304 } 3305 } 3306 if ((len > XML_MAX_NAME_LENGTH) && 3307 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3308 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3309 return(NULL); 3310 } 3311 if (ctxt->input->cur - ctxt->input->base < len) { 3312 /* 3313 * There were a couple of bugs where PERefs lead to to a change 3314 * of the buffer. Check the buffer size to avoid passing an invalid 3315 * pointer to xmlDictLookup. 3316 */ 3317 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3318 "unexpected change of input buffer"); 3319 return (NULL); 3320 } 3321 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3322 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3324 } 3325 3326 /** 3327 * xmlParseName: 3328 * @ctxt: an XML parser context 3329 * 3330 * parse an XML name. 3331 * 3332 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3333 * CombiningChar | Extender 3334 * 3335 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3336 * 3337 * [6] Names ::= Name (#x20 Name)* 3338 * 3339 * Returns the Name parsed or NULL 3340 */ 3341 3342 const xmlChar * 3343 xmlParseName(xmlParserCtxtPtr ctxt) { 3344 const xmlChar *in; 3345 const xmlChar *ret; 3346 int count = 0; 3347 3348 GROW; 3349 3350 #ifdef DEBUG 3351 nbParseName++; 3352 #endif 3353 3354 /* 3355 * Accelerator for simple ASCII names 3356 */ 3357 in = ctxt->input->cur; 3358 if (((*in >= 0x61) && (*in <= 0x7A)) || 3359 ((*in >= 0x41) && (*in <= 0x5A)) || 3360 (*in == '_') || (*in == ':')) { 3361 in++; 3362 while (((*in >= 0x61) && (*in <= 0x7A)) || 3363 ((*in >= 0x41) && (*in <= 0x5A)) || 3364 ((*in >= 0x30) && (*in <= 0x39)) || 3365 (*in == '_') || (*in == '-') || 3366 (*in == ':') || (*in == '.')) 3367 in++; 3368 if ((*in > 0) && (*in < 0x80)) { 3369 count = in - ctxt->input->cur; 3370 if ((count > XML_MAX_NAME_LENGTH) && 3371 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3372 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3373 return(NULL); 3374 } 3375 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3376 ctxt->input->cur = in; 3377 ctxt->input->col += count; 3378 if (ret == NULL) 3379 xmlErrMemory(ctxt, NULL); 3380 return(ret); 3381 } 3382 } 3383 /* accelerator for special cases */ 3384 return(xmlParseNameComplex(ctxt)); 3385 } 3386 3387 static const xmlChar * 3388 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3389 int len = 0, l; 3390 int c; 3391 int count = 0; 3392 size_t startPosition = 0; 3393 3394 #ifdef DEBUG 3395 nbParseNCNameComplex++; 3396 #endif 3397 3398 /* 3399 * Handler for more complex cases 3400 */ 3401 GROW; 3402 startPosition = CUR_PTR - BASE_PTR; 3403 c = CUR_CHAR(l); 3404 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3405 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3406 return(NULL); 3407 } 3408 3409 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3410 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3411 if (count++ > XML_PARSER_CHUNK_SIZE) { 3412 if ((len > XML_MAX_NAME_LENGTH) && 3413 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3414 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3415 return(NULL); 3416 } 3417 count = 0; 3418 GROW; 3419 if (ctxt->instate == XML_PARSER_EOF) 3420 return(NULL); 3421 } 3422 len += l; 3423 NEXTL(l); 3424 c = CUR_CHAR(l); 3425 if (c == 0) { 3426 count = 0; 3427 /* 3428 * when shrinking to extend the buffer we really need to preserve 3429 * the part of the name we already parsed. Hence rolling back 3430 * by current length. 3431 */ 3432 ctxt->input->cur -= l; 3433 GROW; 3434 if (ctxt->instate == XML_PARSER_EOF) 3435 return(NULL); 3436 ctxt->input->cur += l; 3437 c = CUR_CHAR(l); 3438 } 3439 } 3440 if ((len > XML_MAX_NAME_LENGTH) && 3441 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3442 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3443 return(NULL); 3444 } 3445 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3446 } 3447 3448 /** 3449 * xmlParseNCName: 3450 * @ctxt: an XML parser context 3451 * @len: length of the string parsed 3452 * 3453 * parse an XML name. 3454 * 3455 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3456 * CombiningChar | Extender 3457 * 3458 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3459 * 3460 * Returns the Name parsed or NULL 3461 */ 3462 3463 static const xmlChar * 3464 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3465 const xmlChar *in, *e; 3466 const xmlChar *ret; 3467 int count = 0; 3468 3469 #ifdef DEBUG 3470 nbParseNCName++; 3471 #endif 3472 3473 /* 3474 * Accelerator for simple ASCII names 3475 */ 3476 in = ctxt->input->cur; 3477 e = ctxt->input->end; 3478 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3479 ((*in >= 0x41) && (*in <= 0x5A)) || 3480 (*in == '_')) && (in < e)) { 3481 in++; 3482 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3483 ((*in >= 0x41) && (*in <= 0x5A)) || 3484 ((*in >= 0x30) && (*in <= 0x39)) || 3485 (*in == '_') || (*in == '-') || 3486 (*in == '.')) && (in < e)) 3487 in++; 3488 if (in >= e) 3489 goto complex; 3490 if ((*in > 0) && (*in < 0x80)) { 3491 count = in - ctxt->input->cur; 3492 if ((count > XML_MAX_NAME_LENGTH) && 3493 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3494 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3495 return(NULL); 3496 } 3497 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3498 ctxt->input->cur = in; 3499 ctxt->input->col += count; 3500 if (ret == NULL) { 3501 xmlErrMemory(ctxt, NULL); 3502 } 3503 return(ret); 3504 } 3505 } 3506 complex: 3507 return(xmlParseNCNameComplex(ctxt)); 3508 } 3509 3510 /** 3511 * xmlParseNameAndCompare: 3512 * @ctxt: an XML parser context 3513 * 3514 * parse an XML name and compares for match 3515 * (specialized for endtag parsing) 3516 * 3517 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3518 * and the name for mismatch 3519 */ 3520 3521 static const xmlChar * 3522 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3523 register const xmlChar *cmp = other; 3524 register const xmlChar *in; 3525 const xmlChar *ret; 3526 3527 GROW; 3528 if (ctxt->instate == XML_PARSER_EOF) 3529 return(NULL); 3530 3531 in = ctxt->input->cur; 3532 while (*in != 0 && *in == *cmp) { 3533 ++in; 3534 ++cmp; 3535 } 3536 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3537 /* success */ 3538 ctxt->input->col += in - ctxt->input->cur; 3539 ctxt->input->cur = in; 3540 return (const xmlChar*) 1; 3541 } 3542 /* failure (or end of input buffer), check with full function */ 3543 ret = xmlParseName (ctxt); 3544 /* strings coming from the dictionary direct compare possible */ 3545 if (ret == other) { 3546 return (const xmlChar*) 1; 3547 } 3548 return ret; 3549 } 3550 3551 /** 3552 * xmlParseStringName: 3553 * @ctxt: an XML parser context 3554 * @str: a pointer to the string pointer (IN/OUT) 3555 * 3556 * parse an XML name. 3557 * 3558 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3559 * CombiningChar | Extender 3560 * 3561 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3562 * 3563 * [6] Names ::= Name (#x20 Name)* 3564 * 3565 * Returns the Name parsed or NULL. The @str pointer 3566 * is updated to the current location in the string. 3567 */ 3568 3569 static xmlChar * 3570 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3571 xmlChar buf[XML_MAX_NAMELEN + 5]; 3572 const xmlChar *cur = *str; 3573 int len = 0, l; 3574 int c; 3575 3576 #ifdef DEBUG 3577 nbParseStringName++; 3578 #endif 3579 3580 c = CUR_SCHAR(cur, l); 3581 if (!xmlIsNameStartChar(ctxt, c)) { 3582 return(NULL); 3583 } 3584 3585 COPY_BUF(l,buf,len,c); 3586 cur += l; 3587 c = CUR_SCHAR(cur, l); 3588 while (xmlIsNameChar(ctxt, c)) { 3589 COPY_BUF(l,buf,len,c); 3590 cur += l; 3591 c = CUR_SCHAR(cur, l); 3592 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3593 /* 3594 * Okay someone managed to make a huge name, so he's ready to pay 3595 * for the processing speed. 3596 */ 3597 xmlChar *buffer; 3598 int max = len * 2; 3599 3600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3601 if (buffer == NULL) { 3602 xmlErrMemory(ctxt, NULL); 3603 return(NULL); 3604 } 3605 memcpy(buffer, buf, len); 3606 while (xmlIsNameChar(ctxt, c)) { 3607 if (len + 10 > max) { 3608 xmlChar *tmp; 3609 3610 if ((len > XML_MAX_NAME_LENGTH) && 3611 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3612 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3613 xmlFree(buffer); 3614 return(NULL); 3615 } 3616 max *= 2; 3617 tmp = (xmlChar *) xmlRealloc(buffer, 3618 max * sizeof(xmlChar)); 3619 if (tmp == NULL) { 3620 xmlErrMemory(ctxt, NULL); 3621 xmlFree(buffer); 3622 return(NULL); 3623 } 3624 buffer = tmp; 3625 } 3626 COPY_BUF(l,buffer,len,c); 3627 cur += l; 3628 c = CUR_SCHAR(cur, l); 3629 } 3630 buffer[len] = 0; 3631 *str = cur; 3632 return(buffer); 3633 } 3634 } 3635 if ((len > XML_MAX_NAME_LENGTH) && 3636 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3637 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3638 return(NULL); 3639 } 3640 *str = cur; 3641 return(xmlStrndup(buf, len)); 3642 } 3643 3644 /** 3645 * xmlParseNmtoken: 3646 * @ctxt: an XML parser context 3647 * 3648 * parse an XML Nmtoken. 3649 * 3650 * [7] Nmtoken ::= (NameChar)+ 3651 * 3652 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3653 * 3654 * Returns the Nmtoken parsed or NULL 3655 */ 3656 3657 xmlChar * 3658 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3659 xmlChar buf[XML_MAX_NAMELEN + 5]; 3660 int len = 0, l; 3661 int c; 3662 int count = 0; 3663 3664 #ifdef DEBUG 3665 nbParseNmToken++; 3666 #endif 3667 3668 GROW; 3669 if (ctxt->instate == XML_PARSER_EOF) 3670 return(NULL); 3671 c = CUR_CHAR(l); 3672 3673 while (xmlIsNameChar(ctxt, c)) { 3674 if (count++ > XML_PARSER_CHUNK_SIZE) { 3675 count = 0; 3676 GROW; 3677 } 3678 COPY_BUF(l,buf,len,c); 3679 NEXTL(l); 3680 c = CUR_CHAR(l); 3681 if (c == 0) { 3682 count = 0; 3683 GROW; 3684 if (ctxt->instate == XML_PARSER_EOF) 3685 return(NULL); 3686 c = CUR_CHAR(l); 3687 } 3688 if (len >= XML_MAX_NAMELEN) { 3689 /* 3690 * Okay someone managed to make a huge token, so he's ready to pay 3691 * for the processing speed. 3692 */ 3693 xmlChar *buffer; 3694 int max = len * 2; 3695 3696 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3697 if (buffer == NULL) { 3698 xmlErrMemory(ctxt, NULL); 3699 return(NULL); 3700 } 3701 memcpy(buffer, buf, len); 3702 while (xmlIsNameChar(ctxt, c)) { 3703 if (count++ > XML_PARSER_CHUNK_SIZE) { 3704 count = 0; 3705 GROW; 3706 if (ctxt->instate == XML_PARSER_EOF) { 3707 xmlFree(buffer); 3708 return(NULL); 3709 } 3710 } 3711 if (len + 10 > max) { 3712 xmlChar *tmp; 3713 3714 if ((max > XML_MAX_NAME_LENGTH) && 3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3717 xmlFree(buffer); 3718 return(NULL); 3719 } 3720 max *= 2; 3721 tmp = (xmlChar *) xmlRealloc(buffer, 3722 max * sizeof(xmlChar)); 3723 if (tmp == NULL) { 3724 xmlErrMemory(ctxt, NULL); 3725 xmlFree(buffer); 3726 return(NULL); 3727 } 3728 buffer = tmp; 3729 } 3730 COPY_BUF(l,buffer,len,c); 3731 NEXTL(l); 3732 c = CUR_CHAR(l); 3733 } 3734 buffer[len] = 0; 3735 return(buffer); 3736 } 3737 } 3738 if (len == 0) 3739 return(NULL); 3740 if ((len > XML_MAX_NAME_LENGTH) && 3741 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3742 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3743 return(NULL); 3744 } 3745 return(xmlStrndup(buf, len)); 3746 } 3747 3748 /** 3749 * xmlParseEntityValue: 3750 * @ctxt: an XML parser context 3751 * @orig: if non-NULL store a copy of the original entity value 3752 * 3753 * parse a value for ENTITY declarations 3754 * 3755 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3756 * "'" ([^%&'] | PEReference | Reference)* "'" 3757 * 3758 * Returns the EntityValue parsed with reference substituted or NULL 3759 */ 3760 3761 xmlChar * 3762 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3763 xmlChar *buf = NULL; 3764 int len = 0; 3765 int size = XML_PARSER_BUFFER_SIZE; 3766 int c, l; 3767 xmlChar stop; 3768 xmlChar *ret = NULL; 3769 const xmlChar *cur = NULL; 3770 xmlParserInputPtr input; 3771 3772 if (RAW == '"') stop = '"'; 3773 else if (RAW == '\'') stop = '\''; 3774 else { 3775 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3776 return(NULL); 3777 } 3778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3779 if (buf == NULL) { 3780 xmlErrMemory(ctxt, NULL); 3781 return(NULL); 3782 } 3783 3784 /* 3785 * The content of the entity definition is copied in a buffer. 3786 */ 3787 3788 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3789 input = ctxt->input; 3790 GROW; 3791 if (ctxt->instate == XML_PARSER_EOF) 3792 goto error; 3793 NEXT; 3794 c = CUR_CHAR(l); 3795 /* 3796 * NOTE: 4.4.5 Included in Literal 3797 * When a parameter entity reference appears in a literal entity 3798 * value, ... a single or double quote character in the replacement 3799 * text is always treated as a normal data character and will not 3800 * terminate the literal. 3801 * In practice it means we stop the loop only when back at parsing 3802 * the initial entity and the quote is found 3803 */ 3804 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3805 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3806 if (len + 5 >= size) { 3807 xmlChar *tmp; 3808 3809 size *= 2; 3810 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3811 if (tmp == NULL) { 3812 xmlErrMemory(ctxt, NULL); 3813 goto error; 3814 } 3815 buf = tmp; 3816 } 3817 COPY_BUF(l,buf,len,c); 3818 NEXTL(l); 3819 3820 GROW; 3821 c = CUR_CHAR(l); 3822 if (c == 0) { 3823 GROW; 3824 c = CUR_CHAR(l); 3825 } 3826 } 3827 buf[len] = 0; 3828 if (ctxt->instate == XML_PARSER_EOF) 3829 goto error; 3830 if (c != stop) { 3831 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3832 goto error; 3833 } 3834 NEXT; 3835 3836 /* 3837 * Raise problem w.r.t. '&' and '%' being used in non-entities 3838 * reference constructs. Note Charref will be handled in 3839 * xmlStringDecodeEntities() 3840 */ 3841 cur = buf; 3842 while (*cur != 0) { /* non input consuming */ 3843 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3844 xmlChar *name; 3845 xmlChar tmp = *cur; 3846 int nameOk = 0; 3847 3848 cur++; 3849 name = xmlParseStringName(ctxt, &cur); 3850 if (name != NULL) { 3851 nameOk = 1; 3852 xmlFree(name); 3853 } 3854 if ((nameOk == 0) || (*cur != ';')) { 3855 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3856 "EntityValue: '%c' forbidden except for entities references\n", 3857 tmp); 3858 goto error; 3859 } 3860 if ((tmp == '%') && (ctxt->inSubset == 1) && 3861 (ctxt->inputNr == 1)) { 3862 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3863 goto error; 3864 } 3865 if (*cur == 0) 3866 break; 3867 } 3868 cur++; 3869 } 3870 3871 /* 3872 * Then PEReference entities are substituted. 3873 * 3874 * NOTE: 4.4.7 Bypassed 3875 * When a general entity reference appears in the EntityValue in 3876 * an entity declaration, it is bypassed and left as is. 3877 * so XML_SUBSTITUTE_REF is not set here. 3878 */ 3879 ++ctxt->depth; 3880 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3881 0, 0, 0); 3882 --ctxt->depth; 3883 if (orig != NULL) { 3884 *orig = buf; 3885 buf = NULL; 3886 } 3887 3888 error: 3889 if (buf != NULL) 3890 xmlFree(buf); 3891 return(ret); 3892 } 3893 3894 /** 3895 * xmlParseAttValueComplex: 3896 * @ctxt: an XML parser context 3897 * @len: the resulting attribute len 3898 * @normalize: whether to apply the inner normalization 3899 * 3900 * parse a value for an attribute, this is the fallback function 3901 * of xmlParseAttValue() when the attribute parsing requires handling 3902 * of non-ASCII characters, or normalization compaction. 3903 * 3904 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3905 */ 3906 static xmlChar * 3907 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3908 xmlChar limit = 0; 3909 xmlChar *buf = NULL; 3910 xmlChar *rep = NULL; 3911 size_t len = 0; 3912 size_t buf_size = 0; 3913 int c, l, in_space = 0; 3914 xmlChar *current = NULL; 3915 xmlEntityPtr ent; 3916 3917 if (NXT(0) == '"') { 3918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3919 limit = '"'; 3920 NEXT; 3921 } else if (NXT(0) == '\'') { 3922 limit = '\''; 3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3924 NEXT; 3925 } else { 3926 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3927 return(NULL); 3928 } 3929 3930 /* 3931 * allocate a translation buffer. 3932 */ 3933 buf_size = XML_PARSER_BUFFER_SIZE; 3934 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3935 if (buf == NULL) goto mem_error; 3936 3937 /* 3938 * OK loop until we reach one of the ending char or a size limit. 3939 */ 3940 c = CUR_CHAR(l); 3941 while (((NXT(0) != limit) && /* checked */ 3942 (IS_CHAR(c)) && (c != '<')) && 3943 (ctxt->instate != XML_PARSER_EOF)) { 3944 /* 3945 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3946 * special option is given 3947 */ 3948 if ((len > XML_MAX_TEXT_LENGTH) && 3949 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3950 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3951 "AttValue length too long\n"); 3952 goto mem_error; 3953 } 3954 if (c == '&') { 3955 in_space = 0; 3956 if (NXT(1) == '#') { 3957 int val = xmlParseCharRef(ctxt); 3958 3959 if (val == '&') { 3960 if (ctxt->replaceEntities) { 3961 if (len + 10 > buf_size) { 3962 growBuffer(buf, 10); 3963 } 3964 buf[len++] = '&'; 3965 } else { 3966 /* 3967 * The reparsing will be done in xmlStringGetNodeList() 3968 * called by the attribute() function in SAX.c 3969 */ 3970 if (len + 10 > buf_size) { 3971 growBuffer(buf, 10); 3972 } 3973 buf[len++] = '&'; 3974 buf[len++] = '#'; 3975 buf[len++] = '3'; 3976 buf[len++] = '8'; 3977 buf[len++] = ';'; 3978 } 3979 } else if (val != 0) { 3980 if (len + 10 > buf_size) { 3981 growBuffer(buf, 10); 3982 } 3983 len += xmlCopyChar(0, &buf[len], val); 3984 } 3985 } else { 3986 ent = xmlParseEntityRef(ctxt); 3987 ctxt->nbentities++; 3988 if (ent != NULL) 3989 ctxt->nbentities += ent->owner; 3990 if ((ent != NULL) && 3991 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3992 if (len + 10 > buf_size) { 3993 growBuffer(buf, 10); 3994 } 3995 if ((ctxt->replaceEntities == 0) && 3996 (ent->content[0] == '&')) { 3997 buf[len++] = '&'; 3998 buf[len++] = '#'; 3999 buf[len++] = '3'; 4000 buf[len++] = '8'; 4001 buf[len++] = ';'; 4002 } else { 4003 buf[len++] = ent->content[0]; 4004 } 4005 } else if ((ent != NULL) && 4006 (ctxt->replaceEntities != 0)) { 4007 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4008 ++ctxt->depth; 4009 rep = xmlStringDecodeEntities(ctxt, ent->content, 4010 XML_SUBSTITUTE_REF, 4011 0, 0, 0); 4012 --ctxt->depth; 4013 if (rep != NULL) { 4014 current = rep; 4015 while (*current != 0) { /* non input consuming */ 4016 if ((*current == 0xD) || (*current == 0xA) || 4017 (*current == 0x9)) { 4018 buf[len++] = 0x20; 4019 current++; 4020 } else 4021 buf[len++] = *current++; 4022 if (len + 10 > buf_size) { 4023 growBuffer(buf, 10); 4024 } 4025 } 4026 xmlFree(rep); 4027 rep = NULL; 4028 } 4029 } else { 4030 if (len + 10 > buf_size) { 4031 growBuffer(buf, 10); 4032 } 4033 if (ent->content != NULL) 4034 buf[len++] = ent->content[0]; 4035 } 4036 } else if (ent != NULL) { 4037 int i = xmlStrlen(ent->name); 4038 const xmlChar *cur = ent->name; 4039 4040 /* 4041 * This may look absurd but is needed to detect 4042 * entities problems 4043 */ 4044 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4045 (ent->content != NULL) && (ent->checked == 0)) { 4046 unsigned long oldnbent = ctxt->nbentities, diff; 4047 4048 ++ctxt->depth; 4049 rep = xmlStringDecodeEntities(ctxt, ent->content, 4050 XML_SUBSTITUTE_REF, 0, 0, 0); 4051 --ctxt->depth; 4052 4053 diff = ctxt->nbentities - oldnbent + 1; 4054 if (diff > INT_MAX / 2) 4055 diff = INT_MAX / 2; 4056 ent->checked = diff * 2; 4057 if (rep != NULL) { 4058 if (xmlStrchr(rep, '<')) 4059 ent->checked |= 1; 4060 xmlFree(rep); 4061 rep = NULL; 4062 } else { 4063 ent->content[0] = 0; 4064 } 4065 } 4066 4067 /* 4068 * Just output the reference 4069 */ 4070 buf[len++] = '&'; 4071 while (len + i + 10 > buf_size) { 4072 growBuffer(buf, i + 10); 4073 } 4074 for (;i > 0;i--) 4075 buf[len++] = *cur++; 4076 buf[len++] = ';'; 4077 } 4078 } 4079 } else { 4080 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4081 if ((len != 0) || (!normalize)) { 4082 if ((!normalize) || (!in_space)) { 4083 COPY_BUF(l,buf,len,0x20); 4084 while (len + 10 > buf_size) { 4085 growBuffer(buf, 10); 4086 } 4087 } 4088 in_space = 1; 4089 } 4090 } else { 4091 in_space = 0; 4092 COPY_BUF(l,buf,len,c); 4093 if (len + 10 > buf_size) { 4094 growBuffer(buf, 10); 4095 } 4096 } 4097 NEXTL(l); 4098 } 4099 GROW; 4100 c = CUR_CHAR(l); 4101 } 4102 if (ctxt->instate == XML_PARSER_EOF) 4103 goto error; 4104 4105 if ((in_space) && (normalize)) { 4106 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4107 } 4108 buf[len] = 0; 4109 if (RAW == '<') { 4110 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4111 } else if (RAW != limit) { 4112 if ((c != 0) && (!IS_CHAR(c))) { 4113 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4114 "invalid character in attribute value\n"); 4115 } else { 4116 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4117 "AttValue: ' expected\n"); 4118 } 4119 } else 4120 NEXT; 4121 4122 /* 4123 * There we potentially risk an overflow, don't allow attribute value of 4124 * length more than INT_MAX it is a very reasonable assumption ! 4125 */ 4126 if (len >= INT_MAX) { 4127 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4128 "AttValue length too long\n"); 4129 goto mem_error; 4130 } 4131 4132 if (attlen != NULL) *attlen = (int) len; 4133 return(buf); 4134 4135 mem_error: 4136 xmlErrMemory(ctxt, NULL); 4137 error: 4138 if (buf != NULL) 4139 xmlFree(buf); 4140 if (rep != NULL) 4141 xmlFree(rep); 4142 return(NULL); 4143 } 4144 4145 /** 4146 * xmlParseAttValue: 4147 * @ctxt: an XML parser context 4148 * 4149 * parse a value for an attribute 4150 * Note: the parser won't do substitution of entities here, this 4151 * will be handled later in xmlStringGetNodeList 4152 * 4153 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4154 * "'" ([^<&'] | Reference)* "'" 4155 * 4156 * 3.3.3 Attribute-Value Normalization: 4157 * Before the value of an attribute is passed to the application or 4158 * checked for validity, the XML processor must normalize it as follows: 4159 * - a character reference is processed by appending the referenced 4160 * character to the attribute value 4161 * - an entity reference is processed by recursively processing the 4162 * replacement text of the entity 4163 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4164 * appending #x20 to the normalized value, except that only a single 4165 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4166 * parsed entity or the literal entity value of an internal parsed entity 4167 * - other characters are processed by appending them to the normalized value 4168 * If the declared value is not CDATA, then the XML processor must further 4169 * process the normalized attribute value by discarding any leading and 4170 * trailing space (#x20) characters, and by replacing sequences of space 4171 * (#x20) characters by a single space (#x20) character. 4172 * All attributes for which no declaration has been read should be treated 4173 * by a non-validating parser as if declared CDATA. 4174 * 4175 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4176 */ 4177 4178 4179 xmlChar * 4180 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4181 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4182 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4183 } 4184 4185 /** 4186 * xmlParseSystemLiteral: 4187 * @ctxt: an XML parser context 4188 * 4189 * parse an XML Literal 4190 * 4191 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4192 * 4193 * Returns the SystemLiteral parsed or NULL 4194 */ 4195 4196 xmlChar * 4197 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4198 xmlChar *buf = NULL; 4199 int len = 0; 4200 int size = XML_PARSER_BUFFER_SIZE; 4201 int cur, l; 4202 xmlChar stop; 4203 int state = ctxt->instate; 4204 int count = 0; 4205 4206 SHRINK; 4207 if (RAW == '"') { 4208 NEXT; 4209 stop = '"'; 4210 } else if (RAW == '\'') { 4211 NEXT; 4212 stop = '\''; 4213 } else { 4214 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4215 return(NULL); 4216 } 4217 4218 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4219 if (buf == NULL) { 4220 xmlErrMemory(ctxt, NULL); 4221 return(NULL); 4222 } 4223 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4224 cur = CUR_CHAR(l); 4225 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4226 if (len + 5 >= size) { 4227 xmlChar *tmp; 4228 4229 if ((size > XML_MAX_NAME_LENGTH) && 4230 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4231 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4232 xmlFree(buf); 4233 ctxt->instate = (xmlParserInputState) state; 4234 return(NULL); 4235 } 4236 size *= 2; 4237 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4238 if (tmp == NULL) { 4239 xmlFree(buf); 4240 xmlErrMemory(ctxt, NULL); 4241 ctxt->instate = (xmlParserInputState) state; 4242 return(NULL); 4243 } 4244 buf = tmp; 4245 } 4246 count++; 4247 if (count > 50) { 4248 SHRINK; 4249 GROW; 4250 count = 0; 4251 if (ctxt->instate == XML_PARSER_EOF) { 4252 xmlFree(buf); 4253 return(NULL); 4254 } 4255 } 4256 COPY_BUF(l,buf,len,cur); 4257 NEXTL(l); 4258 cur = CUR_CHAR(l); 4259 if (cur == 0) { 4260 GROW; 4261 SHRINK; 4262 cur = CUR_CHAR(l); 4263 } 4264 } 4265 buf[len] = 0; 4266 ctxt->instate = (xmlParserInputState) state; 4267 if (!IS_CHAR(cur)) { 4268 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4269 } else { 4270 NEXT; 4271 } 4272 return(buf); 4273 } 4274 4275 /** 4276 * xmlParsePubidLiteral: 4277 * @ctxt: an XML parser context 4278 * 4279 * parse an XML public literal 4280 * 4281 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4282 * 4283 * Returns the PubidLiteral parsed or NULL. 4284 */ 4285 4286 xmlChar * 4287 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4288 xmlChar *buf = NULL; 4289 int len = 0; 4290 int size = XML_PARSER_BUFFER_SIZE; 4291 xmlChar cur; 4292 xmlChar stop; 4293 int count = 0; 4294 xmlParserInputState oldstate = ctxt->instate; 4295 4296 SHRINK; 4297 if (RAW == '"') { 4298 NEXT; 4299 stop = '"'; 4300 } else if (RAW == '\'') { 4301 NEXT; 4302 stop = '\''; 4303 } else { 4304 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4305 return(NULL); 4306 } 4307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4308 if (buf == NULL) { 4309 xmlErrMemory(ctxt, NULL); 4310 return(NULL); 4311 } 4312 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4313 cur = CUR; 4314 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4315 if (len + 1 >= size) { 4316 xmlChar *tmp; 4317 4318 if ((size > XML_MAX_NAME_LENGTH) && 4319 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4320 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4321 xmlFree(buf); 4322 return(NULL); 4323 } 4324 size *= 2; 4325 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4326 if (tmp == NULL) { 4327 xmlErrMemory(ctxt, NULL); 4328 xmlFree(buf); 4329 return(NULL); 4330 } 4331 buf = tmp; 4332 } 4333 buf[len++] = cur; 4334 count++; 4335 if (count > 50) { 4336 SHRINK; 4337 GROW; 4338 count = 0; 4339 if (ctxt->instate == XML_PARSER_EOF) { 4340 xmlFree(buf); 4341 return(NULL); 4342 } 4343 } 4344 NEXT; 4345 cur = CUR; 4346 if (cur == 0) { 4347 GROW; 4348 SHRINK; 4349 cur = CUR; 4350 } 4351 } 4352 buf[len] = 0; 4353 if (cur != stop) { 4354 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4355 } else { 4356 NEXT; 4357 } 4358 ctxt->instate = oldstate; 4359 return(buf); 4360 } 4361 4362 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4363 4364 /* 4365 * used for the test in the inner loop of the char data testing 4366 */ 4367 static const unsigned char test_char_data[256] = { 4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4369 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4372 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4373 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4374 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4375 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4376 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4377 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4378 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4379 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4380 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4381 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4382 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4383 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4400 }; 4401 4402 /** 4403 * xmlParseCharData: 4404 * @ctxt: an XML parser context 4405 * @cdata: int indicating whether we are within a CDATA section 4406 * 4407 * parse a CharData section. 4408 * if we are within a CDATA section ']]>' marks an end of section. 4409 * 4410 * The right angle bracket (>) may be represented using the string ">", 4411 * and must, for compatibility, be escaped using ">" or a character 4412 * reference when it appears in the string "]]>" in content, when that 4413 * string is not marking the end of a CDATA section. 4414 * 4415 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4416 */ 4417 4418 void 4419 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4420 const xmlChar *in; 4421 int nbchar = 0; 4422 int line = ctxt->input->line; 4423 int col = ctxt->input->col; 4424 int ccol; 4425 4426 SHRINK; 4427 GROW; 4428 /* 4429 * Accelerated common case where input don't need to be 4430 * modified before passing it to the handler. 4431 */ 4432 if (!cdata) { 4433 in = ctxt->input->cur; 4434 do { 4435 get_more_space: 4436 while (*in == 0x20) { in++; ctxt->input->col++; } 4437 if (*in == 0xA) { 4438 do { 4439 ctxt->input->line++; ctxt->input->col = 1; 4440 in++; 4441 } while (*in == 0xA); 4442 goto get_more_space; 4443 } 4444 if (*in == '<') { 4445 nbchar = in - ctxt->input->cur; 4446 if (nbchar > 0) { 4447 const xmlChar *tmp = ctxt->input->cur; 4448 ctxt->input->cur = in; 4449 4450 if ((ctxt->sax != NULL) && 4451 (ctxt->sax->ignorableWhitespace != 4452 ctxt->sax->characters)) { 4453 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4454 if (ctxt->sax->ignorableWhitespace != NULL) 4455 ctxt->sax->ignorableWhitespace(ctxt->userData, 4456 tmp, nbchar); 4457 } else { 4458 if (ctxt->sax->characters != NULL) 4459 ctxt->sax->characters(ctxt->userData, 4460 tmp, nbchar); 4461 if (*ctxt->space == -1) 4462 *ctxt->space = -2; 4463 } 4464 } else if ((ctxt->sax != NULL) && 4465 (ctxt->sax->characters != NULL)) { 4466 ctxt->sax->characters(ctxt->userData, 4467 tmp, nbchar); 4468 } 4469 } 4470 return; 4471 } 4472 4473 get_more: 4474 ccol = ctxt->input->col; 4475 while (test_char_data[*in]) { 4476 in++; 4477 ccol++; 4478 } 4479 ctxt->input->col = ccol; 4480 if (*in == 0xA) { 4481 do { 4482 ctxt->input->line++; ctxt->input->col = 1; 4483 in++; 4484 } while (*in == 0xA); 4485 goto get_more; 4486 } 4487 if (*in == ']') { 4488 if ((in[1] == ']') && (in[2] == '>')) { 4489 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4490 ctxt->input->cur = in + 1; 4491 return; 4492 } 4493 in++; 4494 ctxt->input->col++; 4495 goto get_more; 4496 } 4497 nbchar = in - ctxt->input->cur; 4498 if (nbchar > 0) { 4499 if ((ctxt->sax != NULL) && 4500 (ctxt->sax->ignorableWhitespace != 4501 ctxt->sax->characters) && 4502 (IS_BLANK_CH(*ctxt->input->cur))) { 4503 const xmlChar *tmp = ctxt->input->cur; 4504 ctxt->input->cur = in; 4505 4506 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4507 if (ctxt->sax->ignorableWhitespace != NULL) 4508 ctxt->sax->ignorableWhitespace(ctxt->userData, 4509 tmp, nbchar); 4510 } else { 4511 if (ctxt->sax->characters != NULL) 4512 ctxt->sax->characters(ctxt->userData, 4513 tmp, nbchar); 4514 if (*ctxt->space == -1) 4515 *ctxt->space = -2; 4516 } 4517 line = ctxt->input->line; 4518 col = ctxt->input->col; 4519 } else if (ctxt->sax != NULL) { 4520 if (ctxt->sax->characters != NULL) 4521 ctxt->sax->characters(ctxt->userData, 4522 ctxt->input->cur, nbchar); 4523 line = ctxt->input->line; 4524 col = ctxt->input->col; 4525 } 4526 /* something really bad happened in the SAX callback */ 4527 if (ctxt->instate != XML_PARSER_CONTENT) 4528 return; 4529 } 4530 ctxt->input->cur = in; 4531 if (*in == 0xD) { 4532 in++; 4533 if (*in == 0xA) { 4534 ctxt->input->cur = in; 4535 in++; 4536 ctxt->input->line++; ctxt->input->col = 1; 4537 continue; /* while */ 4538 } 4539 in--; 4540 } 4541 if (*in == '<') { 4542 return; 4543 } 4544 if (*in == '&') { 4545 return; 4546 } 4547 SHRINK; 4548 GROW; 4549 if (ctxt->instate == XML_PARSER_EOF) 4550 return; 4551 in = ctxt->input->cur; 4552 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 4553 nbchar = 0; 4554 } 4555 ctxt->input->line = line; 4556 ctxt->input->col = col; 4557 xmlParseCharDataComplex(ctxt, cdata); 4558 } 4559 4560 /** 4561 * xmlParseCharDataComplex: 4562 * @ctxt: an XML parser context 4563 * @cdata: int indicating whether we are within a CDATA section 4564 * 4565 * parse a CharData section.this is the fallback function 4566 * of xmlParseCharData() when the parsing requires handling 4567 * of non-ASCII characters. 4568 */ 4569 static void 4570 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4571 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4572 int nbchar = 0; 4573 int cur, l; 4574 int count = 0; 4575 4576 SHRINK; 4577 GROW; 4578 cur = CUR_CHAR(l); 4579 while ((cur != '<') && /* checked */ 4580 (cur != '&') && 4581 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4582 if ((cur == ']') && (NXT(1) == ']') && 4583 (NXT(2) == '>')) { 4584 if (cdata) break; 4585 else { 4586 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4587 } 4588 } 4589 COPY_BUF(l,buf,nbchar,cur); 4590 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4591 buf[nbchar] = 0; 4592 4593 /* 4594 * OK the segment is to be consumed as chars. 4595 */ 4596 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4597 if (areBlanks(ctxt, buf, nbchar, 0)) { 4598 if (ctxt->sax->ignorableWhitespace != NULL) 4599 ctxt->sax->ignorableWhitespace(ctxt->userData, 4600 buf, nbchar); 4601 } else { 4602 if (ctxt->sax->characters != NULL) 4603 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4604 if ((ctxt->sax->characters != 4605 ctxt->sax->ignorableWhitespace) && 4606 (*ctxt->space == -1)) 4607 *ctxt->space = -2; 4608 } 4609 } 4610 nbchar = 0; 4611 /* something really bad happened in the SAX callback */ 4612 if (ctxt->instate != XML_PARSER_CONTENT) 4613 return; 4614 } 4615 count++; 4616 if (count > 50) { 4617 SHRINK; 4618 GROW; 4619 count = 0; 4620 if (ctxt->instate == XML_PARSER_EOF) 4621 return; 4622 } 4623 NEXTL(l); 4624 cur = CUR_CHAR(l); 4625 } 4626 if (nbchar != 0) { 4627 buf[nbchar] = 0; 4628 /* 4629 * OK the segment is to be consumed as chars. 4630 */ 4631 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4632 if (areBlanks(ctxt, buf, nbchar, 0)) { 4633 if (ctxt->sax->ignorableWhitespace != NULL) 4634 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4635 } else { 4636 if (ctxt->sax->characters != NULL) 4637 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4638 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4639 (*ctxt->space == -1)) 4640 *ctxt->space = -2; 4641 } 4642 } 4643 } 4644 if ((cur != 0) && (!IS_CHAR(cur))) { 4645 /* Generate the error and skip the offending character */ 4646 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4647 "PCDATA invalid Char value %d\n", 4648 cur); 4649 NEXTL(l); 4650 } 4651 } 4652 4653 /** 4654 * xmlParseExternalID: 4655 * @ctxt: an XML parser context 4656 * @publicID: a xmlChar** receiving PubidLiteral 4657 * @strict: indicate whether we should restrict parsing to only 4658 * production [75], see NOTE below 4659 * 4660 * Parse an External ID or a Public ID 4661 * 4662 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4663 * 'PUBLIC' S PubidLiteral S SystemLiteral 4664 * 4665 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4666 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4667 * 4668 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4669 * 4670 * Returns the function returns SystemLiteral and in the second 4671 * case publicID receives PubidLiteral, is strict is off 4672 * it is possible to return NULL and have publicID set. 4673 */ 4674 4675 xmlChar * 4676 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4677 xmlChar *URI = NULL; 4678 4679 SHRINK; 4680 4681 *publicID = NULL; 4682 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4683 SKIP(6); 4684 if (SKIP_BLANKS == 0) { 4685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4686 "Space required after 'SYSTEM'\n"); 4687 } 4688 URI = xmlParseSystemLiteral(ctxt); 4689 if (URI == NULL) { 4690 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4691 } 4692 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4693 SKIP(6); 4694 if (SKIP_BLANKS == 0) { 4695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4696 "Space required after 'PUBLIC'\n"); 4697 } 4698 *publicID = xmlParsePubidLiteral(ctxt); 4699 if (*publicID == NULL) { 4700 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4701 } 4702 if (strict) { 4703 /* 4704 * We don't handle [83] so "S SystemLiteral" is required. 4705 */ 4706 if (SKIP_BLANKS == 0) { 4707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4708 "Space required after the Public Identifier\n"); 4709 } 4710 } else { 4711 /* 4712 * We handle [83] so we return immediately, if 4713 * "S SystemLiteral" is not detected. We skip blanks if no 4714 * system literal was found, but this is harmless since we must 4715 * be at the end of a NotationDecl. 4716 */ 4717 if (SKIP_BLANKS == 0) return(NULL); 4718 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4719 } 4720 URI = xmlParseSystemLiteral(ctxt); 4721 if (URI == NULL) { 4722 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4723 } 4724 } 4725 return(URI); 4726 } 4727 4728 /** 4729 * xmlParseCommentComplex: 4730 * @ctxt: an XML parser context 4731 * @buf: the already parsed part of the buffer 4732 * @len: number of bytes in the buffer 4733 * @size: allocated size of the buffer 4734 * 4735 * Skip an XML (SGML) comment <!-- .... --> 4736 * The spec says that "For compatibility, the string "--" (double-hyphen) 4737 * must not occur within comments. " 4738 * This is the slow routine in case the accelerator for ascii didn't work 4739 * 4740 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4741 */ 4742 static void 4743 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4744 size_t len, size_t size) { 4745 int q, ql; 4746 int r, rl; 4747 int cur, l; 4748 size_t count = 0; 4749 int inputid; 4750 4751 inputid = ctxt->input->id; 4752 4753 if (buf == NULL) { 4754 len = 0; 4755 size = XML_PARSER_BUFFER_SIZE; 4756 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4757 if (buf == NULL) { 4758 xmlErrMemory(ctxt, NULL); 4759 return; 4760 } 4761 } 4762 GROW; /* Assure there's enough input data */ 4763 q = CUR_CHAR(ql); 4764 if (q == 0) 4765 goto not_terminated; 4766 if (!IS_CHAR(q)) { 4767 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4768 "xmlParseComment: invalid xmlChar value %d\n", 4769 q); 4770 xmlFree (buf); 4771 return; 4772 } 4773 NEXTL(ql); 4774 r = CUR_CHAR(rl); 4775 if (r == 0) 4776 goto not_terminated; 4777 if (!IS_CHAR(r)) { 4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4779 "xmlParseComment: invalid xmlChar value %d\n", 4780 q); 4781 xmlFree (buf); 4782 return; 4783 } 4784 NEXTL(rl); 4785 cur = CUR_CHAR(l); 4786 if (cur == 0) 4787 goto not_terminated; 4788 while (IS_CHAR(cur) && /* checked */ 4789 ((cur != '>') || 4790 (r != '-') || (q != '-'))) { 4791 if ((r == '-') && (q == '-')) { 4792 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4793 } 4794 if ((len > XML_MAX_TEXT_LENGTH) && 4795 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4796 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4797 "Comment too big found", NULL); 4798 xmlFree (buf); 4799 return; 4800 } 4801 if (len + 5 >= size) { 4802 xmlChar *new_buf; 4803 size_t new_size; 4804 4805 new_size = size * 2; 4806 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4807 if (new_buf == NULL) { 4808 xmlFree (buf); 4809 xmlErrMemory(ctxt, NULL); 4810 return; 4811 } 4812 buf = new_buf; 4813 size = new_size; 4814 } 4815 COPY_BUF(ql,buf,len,q); 4816 q = r; 4817 ql = rl; 4818 r = cur; 4819 rl = l; 4820 4821 count++; 4822 if (count > 50) { 4823 SHRINK; 4824 GROW; 4825 count = 0; 4826 if (ctxt->instate == XML_PARSER_EOF) { 4827 xmlFree(buf); 4828 return; 4829 } 4830 } 4831 NEXTL(l); 4832 cur = CUR_CHAR(l); 4833 if (cur == 0) { 4834 SHRINK; 4835 GROW; 4836 cur = CUR_CHAR(l); 4837 } 4838 } 4839 buf[len] = 0; 4840 if (cur == 0) { 4841 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4842 "Comment not terminated \n<!--%.50s\n", buf); 4843 } else if (!IS_CHAR(cur)) { 4844 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4845 "xmlParseComment: invalid xmlChar value %d\n", 4846 cur); 4847 } else { 4848 if (inputid != ctxt->input->id) { 4849 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4850 "Comment doesn't start and stop in the same" 4851 " entity\n"); 4852 } 4853 NEXT; 4854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4855 (!ctxt->disableSAX)) 4856 ctxt->sax->comment(ctxt->userData, buf); 4857 } 4858 xmlFree(buf); 4859 return; 4860 not_terminated: 4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4862 "Comment not terminated\n", NULL); 4863 xmlFree(buf); 4864 return; 4865 } 4866 4867 /** 4868 * xmlParseComment: 4869 * @ctxt: an XML parser context 4870 * 4871 * Skip an XML (SGML) comment <!-- .... --> 4872 * The spec says that "For compatibility, the string "--" (double-hyphen) 4873 * must not occur within comments. " 4874 * 4875 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4876 */ 4877 void 4878 xmlParseComment(xmlParserCtxtPtr ctxt) { 4879 xmlChar *buf = NULL; 4880 size_t size = XML_PARSER_BUFFER_SIZE; 4881 size_t len = 0; 4882 xmlParserInputState state; 4883 const xmlChar *in; 4884 size_t nbchar = 0; 4885 int ccol; 4886 int inputid; 4887 4888 /* 4889 * Check that there is a comment right here. 4890 */ 4891 if ((RAW != '<') || (NXT(1) != '!') || 4892 (NXT(2) != '-') || (NXT(3) != '-')) return; 4893 state = ctxt->instate; 4894 ctxt->instate = XML_PARSER_COMMENT; 4895 inputid = ctxt->input->id; 4896 SKIP(4); 4897 SHRINK; 4898 GROW; 4899 4900 /* 4901 * Accelerated common case where input don't need to be 4902 * modified before passing it to the handler. 4903 */ 4904 in = ctxt->input->cur; 4905 do { 4906 if (*in == 0xA) { 4907 do { 4908 ctxt->input->line++; ctxt->input->col = 1; 4909 in++; 4910 } while (*in == 0xA); 4911 } 4912 get_more: 4913 ccol = ctxt->input->col; 4914 while (((*in > '-') && (*in <= 0x7F)) || 4915 ((*in >= 0x20) && (*in < '-')) || 4916 (*in == 0x09)) { 4917 in++; 4918 ccol++; 4919 } 4920 ctxt->input->col = ccol; 4921 if (*in == 0xA) { 4922 do { 4923 ctxt->input->line++; ctxt->input->col = 1; 4924 in++; 4925 } while (*in == 0xA); 4926 goto get_more; 4927 } 4928 nbchar = in - ctxt->input->cur; 4929 /* 4930 * save current set of data 4931 */ 4932 if (nbchar > 0) { 4933 if ((ctxt->sax != NULL) && 4934 (ctxt->sax->comment != NULL)) { 4935 if (buf == NULL) { 4936 if ((*in == '-') && (in[1] == '-')) 4937 size = nbchar + 1; 4938 else 4939 size = XML_PARSER_BUFFER_SIZE + nbchar; 4940 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4941 if (buf == NULL) { 4942 xmlErrMemory(ctxt, NULL); 4943 ctxt->instate = state; 4944 return; 4945 } 4946 len = 0; 4947 } else if (len + nbchar + 1 >= size) { 4948 xmlChar *new_buf; 4949 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4950 new_buf = (xmlChar *) xmlRealloc(buf, 4951 size * sizeof(xmlChar)); 4952 if (new_buf == NULL) { 4953 xmlFree (buf); 4954 xmlErrMemory(ctxt, NULL); 4955 ctxt->instate = state; 4956 return; 4957 } 4958 buf = new_buf; 4959 } 4960 memcpy(&buf[len], ctxt->input->cur, nbchar); 4961 len += nbchar; 4962 buf[len] = 0; 4963 } 4964 } 4965 if ((len > XML_MAX_TEXT_LENGTH) && 4966 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4967 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4968 "Comment too big found", NULL); 4969 xmlFree (buf); 4970 return; 4971 } 4972 ctxt->input->cur = in; 4973 if (*in == 0xA) { 4974 in++; 4975 ctxt->input->line++; ctxt->input->col = 1; 4976 } 4977 if (*in == 0xD) { 4978 in++; 4979 if (*in == 0xA) { 4980 ctxt->input->cur = in; 4981 in++; 4982 ctxt->input->line++; ctxt->input->col = 1; 4983 continue; /* while */ 4984 } 4985 in--; 4986 } 4987 SHRINK; 4988 GROW; 4989 if (ctxt->instate == XML_PARSER_EOF) { 4990 xmlFree(buf); 4991 return; 4992 } 4993 in = ctxt->input->cur; 4994 if (*in == '-') { 4995 if (in[1] == '-') { 4996 if (in[2] == '>') { 4997 if (ctxt->input->id != inputid) { 4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4999 "comment doesn't start and stop in the" 5000 " same entity\n"); 5001 } 5002 SKIP(3); 5003 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5004 (!ctxt->disableSAX)) { 5005 if (buf != NULL) 5006 ctxt->sax->comment(ctxt->userData, buf); 5007 else 5008 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5009 } 5010 if (buf != NULL) 5011 xmlFree(buf); 5012 if (ctxt->instate != XML_PARSER_EOF) 5013 ctxt->instate = state; 5014 return; 5015 } 5016 if (buf != NULL) { 5017 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5018 "Double hyphen within comment: " 5019 "<!--%.50s\n", 5020 buf); 5021 } else 5022 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5023 "Double hyphen within comment\n", NULL); 5024 if (ctxt->instate == XML_PARSER_EOF) { 5025 xmlFree(buf); 5026 return; 5027 } 5028 in++; 5029 ctxt->input->col++; 5030 } 5031 in++; 5032 ctxt->input->col++; 5033 goto get_more; 5034 } 5035 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 5036 xmlParseCommentComplex(ctxt, buf, len, size); 5037 ctxt->instate = state; 5038 return; 5039 } 5040 5041 5042 /** 5043 * xmlParsePITarget: 5044 * @ctxt: an XML parser context 5045 * 5046 * parse the name of a PI 5047 * 5048 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5049 * 5050 * Returns the PITarget name or NULL 5051 */ 5052 5053 const xmlChar * 5054 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5055 const xmlChar *name; 5056 5057 name = xmlParseName(ctxt); 5058 if ((name != NULL) && 5059 ((name[0] == 'x') || (name[0] == 'X')) && 5060 ((name[1] == 'm') || (name[1] == 'M')) && 5061 ((name[2] == 'l') || (name[2] == 'L'))) { 5062 int i; 5063 if ((name[0] == 'x') && (name[1] == 'm') && 5064 (name[2] == 'l') && (name[3] == 0)) { 5065 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5066 "XML declaration allowed only at the start of the document\n"); 5067 return(name); 5068 } else if (name[3] == 0) { 5069 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5070 return(name); 5071 } 5072 for (i = 0;;i++) { 5073 if (xmlW3CPIs[i] == NULL) break; 5074 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5075 return(name); 5076 } 5077 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5078 "xmlParsePITarget: invalid name prefix 'xml'\n", 5079 NULL, NULL); 5080 } 5081 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5082 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5083 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5084 } 5085 return(name); 5086 } 5087 5088 #ifdef LIBXML_CATALOG_ENABLED 5089 /** 5090 * xmlParseCatalogPI: 5091 * @ctxt: an XML parser context 5092 * @catalog: the PI value string 5093 * 5094 * parse an XML Catalog Processing Instruction. 5095 * 5096 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5097 * 5098 * Occurs only if allowed by the user and if happening in the Misc 5099 * part of the document before any doctype information 5100 * This will add the given catalog to the parsing context in order 5101 * to be used if there is a resolution need further down in the document 5102 */ 5103 5104 static void 5105 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5106 xmlChar *URL = NULL; 5107 const xmlChar *tmp, *base; 5108 xmlChar marker; 5109 5110 tmp = catalog; 5111 while (IS_BLANK_CH(*tmp)) tmp++; 5112 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5113 goto error; 5114 tmp += 7; 5115 while (IS_BLANK_CH(*tmp)) tmp++; 5116 if (*tmp != '=') { 5117 return; 5118 } 5119 tmp++; 5120 while (IS_BLANK_CH(*tmp)) tmp++; 5121 marker = *tmp; 5122 if ((marker != '\'') && (marker != '"')) 5123 goto error; 5124 tmp++; 5125 base = tmp; 5126 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5127 if (*tmp == 0) 5128 goto error; 5129 URL = xmlStrndup(base, tmp - base); 5130 tmp++; 5131 while (IS_BLANK_CH(*tmp)) tmp++; 5132 if (*tmp != 0) 5133 goto error; 5134 5135 if (URL != NULL) { 5136 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5137 xmlFree(URL); 5138 } 5139 return; 5140 5141 error: 5142 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5143 "Catalog PI syntax error: %s\n", 5144 catalog, NULL); 5145 if (URL != NULL) 5146 xmlFree(URL); 5147 } 5148 #endif 5149 5150 /** 5151 * xmlParsePI: 5152 * @ctxt: an XML parser context 5153 * 5154 * parse an XML Processing Instruction. 5155 * 5156 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5157 * 5158 * The processing is transferred to SAX once parsed. 5159 */ 5160 5161 void 5162 xmlParsePI(xmlParserCtxtPtr ctxt) { 5163 xmlChar *buf = NULL; 5164 size_t len = 0; 5165 size_t size = XML_PARSER_BUFFER_SIZE; 5166 int cur, l; 5167 const xmlChar *target; 5168 xmlParserInputState state; 5169 int count = 0; 5170 5171 if ((RAW == '<') && (NXT(1) == '?')) { 5172 int inputid = ctxt->input->id; 5173 state = ctxt->instate; 5174 ctxt->instate = XML_PARSER_PI; 5175 /* 5176 * this is a Processing Instruction. 5177 */ 5178 SKIP(2); 5179 SHRINK; 5180 5181 /* 5182 * Parse the target name and check for special support like 5183 * namespace. 5184 */ 5185 target = xmlParsePITarget(ctxt); 5186 if (target != NULL) { 5187 if ((RAW == '?') && (NXT(1) == '>')) { 5188 if (inputid != ctxt->input->id) { 5189 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5190 "PI declaration doesn't start and stop in" 5191 " the same entity\n"); 5192 } 5193 SKIP(2); 5194 5195 /* 5196 * SAX: PI detected. 5197 */ 5198 if ((ctxt->sax) && (!ctxt->disableSAX) && 5199 (ctxt->sax->processingInstruction != NULL)) 5200 ctxt->sax->processingInstruction(ctxt->userData, 5201 target, NULL); 5202 if (ctxt->instate != XML_PARSER_EOF) 5203 ctxt->instate = state; 5204 return; 5205 } 5206 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5207 if (buf == NULL) { 5208 xmlErrMemory(ctxt, NULL); 5209 ctxt->instate = state; 5210 return; 5211 } 5212 if (SKIP_BLANKS == 0) { 5213 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5214 "ParsePI: PI %s space expected\n", target); 5215 } 5216 cur = CUR_CHAR(l); 5217 while (IS_CHAR(cur) && /* checked */ 5218 ((cur != '?') || (NXT(1) != '>'))) { 5219 if (len + 5 >= size) { 5220 xmlChar *tmp; 5221 size_t new_size = size * 2; 5222 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5223 if (tmp == NULL) { 5224 xmlErrMemory(ctxt, NULL); 5225 xmlFree(buf); 5226 ctxt->instate = state; 5227 return; 5228 } 5229 buf = tmp; 5230 size = new_size; 5231 } 5232 count++; 5233 if (count > 50) { 5234 SHRINK; 5235 GROW; 5236 if (ctxt->instate == XML_PARSER_EOF) { 5237 xmlFree(buf); 5238 return; 5239 } 5240 count = 0; 5241 if ((len > XML_MAX_TEXT_LENGTH) && 5242 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5243 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5244 "PI %s too big found", target); 5245 xmlFree(buf); 5246 ctxt->instate = state; 5247 return; 5248 } 5249 } 5250 COPY_BUF(l,buf,len,cur); 5251 NEXTL(l); 5252 cur = CUR_CHAR(l); 5253 if (cur == 0) { 5254 SHRINK; 5255 GROW; 5256 cur = CUR_CHAR(l); 5257 } 5258 } 5259 if ((len > XML_MAX_TEXT_LENGTH) && 5260 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5261 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5262 "PI %s too big found", target); 5263 xmlFree(buf); 5264 ctxt->instate = state; 5265 return; 5266 } 5267 buf[len] = 0; 5268 if (cur != '?') { 5269 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5270 "ParsePI: PI %s never end ...\n", target); 5271 } else { 5272 if (inputid != ctxt->input->id) { 5273 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5274 "PI declaration doesn't start and stop in" 5275 " the same entity\n"); 5276 } 5277 SKIP(2); 5278 5279 #ifdef LIBXML_CATALOG_ENABLED 5280 if (((state == XML_PARSER_MISC) || 5281 (state == XML_PARSER_START)) && 5282 (xmlStrEqual(target, XML_CATALOG_PI))) { 5283 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5284 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5285 (allow == XML_CATA_ALLOW_ALL)) 5286 xmlParseCatalogPI(ctxt, buf); 5287 } 5288 #endif 5289 5290 5291 /* 5292 * SAX: PI detected. 5293 */ 5294 if ((ctxt->sax) && (!ctxt->disableSAX) && 5295 (ctxt->sax->processingInstruction != NULL)) 5296 ctxt->sax->processingInstruction(ctxt->userData, 5297 target, buf); 5298 } 5299 xmlFree(buf); 5300 } else { 5301 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5302 } 5303 if (ctxt->instate != XML_PARSER_EOF) 5304 ctxt->instate = state; 5305 } 5306 } 5307 5308 /** 5309 * xmlParseNotationDecl: 5310 * @ctxt: an XML parser context 5311 * 5312 * parse a notation declaration 5313 * 5314 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5315 * 5316 * Hence there is actually 3 choices: 5317 * 'PUBLIC' S PubidLiteral 5318 * 'PUBLIC' S PubidLiteral S SystemLiteral 5319 * and 'SYSTEM' S SystemLiteral 5320 * 5321 * See the NOTE on xmlParseExternalID(). 5322 */ 5323 5324 void 5325 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5326 const xmlChar *name; 5327 xmlChar *Pubid; 5328 xmlChar *Systemid; 5329 5330 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5331 int inputid = ctxt->input->id; 5332 SHRINK; 5333 SKIP(10); 5334 if (SKIP_BLANKS == 0) { 5335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5336 "Space required after '<!NOTATION'\n"); 5337 return; 5338 } 5339 5340 name = xmlParseName(ctxt); 5341 if (name == NULL) { 5342 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5343 return; 5344 } 5345 if (xmlStrchr(name, ':') != NULL) { 5346 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5347 "colons are forbidden from notation names '%s'\n", 5348 name, NULL, NULL); 5349 } 5350 if (SKIP_BLANKS == 0) { 5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5352 "Space required after the NOTATION name'\n"); 5353 return; 5354 } 5355 5356 /* 5357 * Parse the IDs. 5358 */ 5359 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5360 SKIP_BLANKS; 5361 5362 if (RAW == '>') { 5363 if (inputid != ctxt->input->id) { 5364 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5365 "Notation declaration doesn't start and stop" 5366 " in the same entity\n"); 5367 } 5368 NEXT; 5369 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5370 (ctxt->sax->notationDecl != NULL)) 5371 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5372 } else { 5373 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5374 } 5375 if (Systemid != NULL) xmlFree(Systemid); 5376 if (Pubid != NULL) xmlFree(Pubid); 5377 } 5378 } 5379 5380 /** 5381 * xmlParseEntityDecl: 5382 * @ctxt: an XML parser context 5383 * 5384 * parse <!ENTITY declarations 5385 * 5386 * [70] EntityDecl ::= GEDecl | PEDecl 5387 * 5388 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5389 * 5390 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5391 * 5392 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5393 * 5394 * [74] PEDef ::= EntityValue | ExternalID 5395 * 5396 * [76] NDataDecl ::= S 'NDATA' S Name 5397 * 5398 * [ VC: Notation Declared ] 5399 * The Name must match the declared name of a notation. 5400 */ 5401 5402 void 5403 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5404 const xmlChar *name = NULL; 5405 xmlChar *value = NULL; 5406 xmlChar *URI = NULL, *literal = NULL; 5407 const xmlChar *ndata = NULL; 5408 int isParameter = 0; 5409 xmlChar *orig = NULL; 5410 5411 /* GROW; done in the caller */ 5412 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5413 int inputid = ctxt->input->id; 5414 SHRINK; 5415 SKIP(8); 5416 if (SKIP_BLANKS == 0) { 5417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5418 "Space required after '<!ENTITY'\n"); 5419 } 5420 5421 if (RAW == '%') { 5422 NEXT; 5423 if (SKIP_BLANKS == 0) { 5424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5425 "Space required after '%%'\n"); 5426 } 5427 isParameter = 1; 5428 } 5429 5430 name = xmlParseName(ctxt); 5431 if (name == NULL) { 5432 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5433 "xmlParseEntityDecl: no name\n"); 5434 return; 5435 } 5436 if (xmlStrchr(name, ':') != NULL) { 5437 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5438 "colons are forbidden from entities names '%s'\n", 5439 name, NULL, NULL); 5440 } 5441 if (SKIP_BLANKS == 0) { 5442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5443 "Space required after the entity name\n"); 5444 } 5445 5446 ctxt->instate = XML_PARSER_ENTITY_DECL; 5447 /* 5448 * handle the various case of definitions... 5449 */ 5450 if (isParameter) { 5451 if ((RAW == '"') || (RAW == '\'')) { 5452 value = xmlParseEntityValue(ctxt, &orig); 5453 if (value) { 5454 if ((ctxt->sax != NULL) && 5455 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5456 ctxt->sax->entityDecl(ctxt->userData, name, 5457 XML_INTERNAL_PARAMETER_ENTITY, 5458 NULL, NULL, value); 5459 } 5460 } else { 5461 URI = xmlParseExternalID(ctxt, &literal, 1); 5462 if ((URI == NULL) && (literal == NULL)) { 5463 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5464 } 5465 if (URI) { 5466 xmlURIPtr uri; 5467 5468 uri = xmlParseURI((const char *) URI); 5469 if (uri == NULL) { 5470 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5471 "Invalid URI: %s\n", URI); 5472 /* 5473 * This really ought to be a well formedness error 5474 * but the XML Core WG decided otherwise c.f. issue 5475 * E26 of the XML erratas. 5476 */ 5477 } else { 5478 if (uri->fragment != NULL) { 5479 /* 5480 * Okay this is foolish to block those but not 5481 * invalid URIs. 5482 */ 5483 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5484 } else { 5485 if ((ctxt->sax != NULL) && 5486 (!ctxt->disableSAX) && 5487 (ctxt->sax->entityDecl != NULL)) 5488 ctxt->sax->entityDecl(ctxt->userData, name, 5489 XML_EXTERNAL_PARAMETER_ENTITY, 5490 literal, URI, NULL); 5491 } 5492 xmlFreeURI(uri); 5493 } 5494 } 5495 } 5496 } else { 5497 if ((RAW == '"') || (RAW == '\'')) { 5498 value = xmlParseEntityValue(ctxt, &orig); 5499 if ((ctxt->sax != NULL) && 5500 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5501 ctxt->sax->entityDecl(ctxt->userData, name, 5502 XML_INTERNAL_GENERAL_ENTITY, 5503 NULL, NULL, value); 5504 /* 5505 * For expat compatibility in SAX mode. 5506 */ 5507 if ((ctxt->myDoc == NULL) || 5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5509 if (ctxt->myDoc == NULL) { 5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5511 if (ctxt->myDoc == NULL) { 5512 xmlErrMemory(ctxt, "New Doc failed"); 5513 return; 5514 } 5515 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5516 } 5517 if (ctxt->myDoc->intSubset == NULL) 5518 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5519 BAD_CAST "fake", NULL, NULL); 5520 5521 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5522 NULL, NULL, value); 5523 } 5524 } else { 5525 URI = xmlParseExternalID(ctxt, &literal, 1); 5526 if ((URI == NULL) && (literal == NULL)) { 5527 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5528 } 5529 if (URI) { 5530 xmlURIPtr uri; 5531 5532 uri = xmlParseURI((const char *)URI); 5533 if (uri == NULL) { 5534 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5535 "Invalid URI: %s\n", URI); 5536 /* 5537 * This really ought to be a well formedness error 5538 * but the XML Core WG decided otherwise c.f. issue 5539 * E26 of the XML erratas. 5540 */ 5541 } else { 5542 if (uri->fragment != NULL) { 5543 /* 5544 * Okay this is foolish to block those but not 5545 * invalid URIs. 5546 */ 5547 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5548 } 5549 xmlFreeURI(uri); 5550 } 5551 } 5552 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5554 "Space required before 'NDATA'\n"); 5555 } 5556 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5557 SKIP(5); 5558 if (SKIP_BLANKS == 0) { 5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5560 "Space required after 'NDATA'\n"); 5561 } 5562 ndata = xmlParseName(ctxt); 5563 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5564 (ctxt->sax->unparsedEntityDecl != NULL)) 5565 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5566 literal, URI, ndata); 5567 } else { 5568 if ((ctxt->sax != NULL) && 5569 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5570 ctxt->sax->entityDecl(ctxt->userData, name, 5571 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5572 literal, URI, NULL); 5573 /* 5574 * For expat compatibility in SAX mode. 5575 * assuming the entity replacement was asked for 5576 */ 5577 if ((ctxt->replaceEntities != 0) && 5578 ((ctxt->myDoc == NULL) || 5579 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5580 if (ctxt->myDoc == NULL) { 5581 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5582 if (ctxt->myDoc == NULL) { 5583 xmlErrMemory(ctxt, "New Doc failed"); 5584 return; 5585 } 5586 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5587 } 5588 5589 if (ctxt->myDoc->intSubset == NULL) 5590 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5591 BAD_CAST "fake", NULL, NULL); 5592 xmlSAX2EntityDecl(ctxt, name, 5593 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5594 literal, URI, NULL); 5595 } 5596 } 5597 } 5598 } 5599 if (ctxt->instate == XML_PARSER_EOF) 5600 goto done; 5601 SKIP_BLANKS; 5602 if (RAW != '>') { 5603 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5604 "xmlParseEntityDecl: entity %s not terminated\n", name); 5605 xmlHaltParser(ctxt); 5606 } else { 5607 if (inputid != ctxt->input->id) { 5608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5609 "Entity declaration doesn't start and stop in" 5610 " the same entity\n"); 5611 } 5612 NEXT; 5613 } 5614 if (orig != NULL) { 5615 /* 5616 * Ugly mechanism to save the raw entity value. 5617 */ 5618 xmlEntityPtr cur = NULL; 5619 5620 if (isParameter) { 5621 if ((ctxt->sax != NULL) && 5622 (ctxt->sax->getParameterEntity != NULL)) 5623 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5624 } else { 5625 if ((ctxt->sax != NULL) && 5626 (ctxt->sax->getEntity != NULL)) 5627 cur = ctxt->sax->getEntity(ctxt->userData, name); 5628 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5629 cur = xmlSAX2GetEntity(ctxt, name); 5630 } 5631 } 5632 if ((cur != NULL) && (cur->orig == NULL)) { 5633 cur->orig = orig; 5634 orig = NULL; 5635 } 5636 } 5637 5638 done: 5639 if (value != NULL) xmlFree(value); 5640 if (URI != NULL) xmlFree(URI); 5641 if (literal != NULL) xmlFree(literal); 5642 if (orig != NULL) xmlFree(orig); 5643 } 5644 } 5645 5646 /** 5647 * xmlParseDefaultDecl: 5648 * @ctxt: an XML parser context 5649 * @value: Receive a possible fixed default value for the attribute 5650 * 5651 * Parse an attribute default declaration 5652 * 5653 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5654 * 5655 * [ VC: Required Attribute ] 5656 * if the default declaration is the keyword #REQUIRED, then the 5657 * attribute must be specified for all elements of the type in the 5658 * attribute-list declaration. 5659 * 5660 * [ VC: Attribute Default Legal ] 5661 * The declared default value must meet the lexical constraints of 5662 * the declared attribute type c.f. xmlValidateAttributeDecl() 5663 * 5664 * [ VC: Fixed Attribute Default ] 5665 * if an attribute has a default value declared with the #FIXED 5666 * keyword, instances of that attribute must match the default value. 5667 * 5668 * [ WFC: No < in Attribute Values ] 5669 * handled in xmlParseAttValue() 5670 * 5671 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5672 * or XML_ATTRIBUTE_FIXED. 5673 */ 5674 5675 int 5676 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5677 int val; 5678 xmlChar *ret; 5679 5680 *value = NULL; 5681 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5682 SKIP(9); 5683 return(XML_ATTRIBUTE_REQUIRED); 5684 } 5685 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5686 SKIP(8); 5687 return(XML_ATTRIBUTE_IMPLIED); 5688 } 5689 val = XML_ATTRIBUTE_NONE; 5690 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5691 SKIP(6); 5692 val = XML_ATTRIBUTE_FIXED; 5693 if (SKIP_BLANKS == 0) { 5694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5695 "Space required after '#FIXED'\n"); 5696 } 5697 } 5698 ret = xmlParseAttValue(ctxt); 5699 ctxt->instate = XML_PARSER_DTD; 5700 if (ret == NULL) { 5701 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5702 "Attribute default value declaration error\n"); 5703 } else 5704 *value = ret; 5705 return(val); 5706 } 5707 5708 /** 5709 * xmlParseNotationType: 5710 * @ctxt: an XML parser context 5711 * 5712 * parse an Notation attribute type. 5713 * 5714 * Note: the leading 'NOTATION' S part has already being parsed... 5715 * 5716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5717 * 5718 * [ VC: Notation Attributes ] 5719 * Values of this type must match one of the notation names included 5720 * in the declaration; all notation names in the declaration must be declared. 5721 * 5722 * Returns: the notation attribute tree built while parsing 5723 */ 5724 5725 xmlEnumerationPtr 5726 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5727 const xmlChar *name; 5728 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5729 5730 if (RAW != '(') { 5731 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5732 return(NULL); 5733 } 5734 SHRINK; 5735 do { 5736 NEXT; 5737 SKIP_BLANKS; 5738 name = xmlParseName(ctxt); 5739 if (name == NULL) { 5740 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5741 "Name expected in NOTATION declaration\n"); 5742 xmlFreeEnumeration(ret); 5743 return(NULL); 5744 } 5745 tmp = ret; 5746 while (tmp != NULL) { 5747 if (xmlStrEqual(name, tmp->name)) { 5748 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5749 "standalone: attribute notation value token %s duplicated\n", 5750 name, NULL); 5751 if (!xmlDictOwns(ctxt->dict, name)) 5752 xmlFree((xmlChar *) name); 5753 break; 5754 } 5755 tmp = tmp->next; 5756 } 5757 if (tmp == NULL) { 5758 cur = xmlCreateEnumeration(name); 5759 if (cur == NULL) { 5760 xmlFreeEnumeration(ret); 5761 return(NULL); 5762 } 5763 if (last == NULL) ret = last = cur; 5764 else { 5765 last->next = cur; 5766 last = cur; 5767 } 5768 } 5769 SKIP_BLANKS; 5770 } while (RAW == '|'); 5771 if (RAW != ')') { 5772 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5773 xmlFreeEnumeration(ret); 5774 return(NULL); 5775 } 5776 NEXT; 5777 return(ret); 5778 } 5779 5780 /** 5781 * xmlParseEnumerationType: 5782 * @ctxt: an XML parser context 5783 * 5784 * parse an Enumeration attribute type. 5785 * 5786 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5787 * 5788 * [ VC: Enumeration ] 5789 * Values of this type must match one of the Nmtoken tokens in 5790 * the declaration 5791 * 5792 * Returns: the enumeration attribute tree built while parsing 5793 */ 5794 5795 xmlEnumerationPtr 5796 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5797 xmlChar *name; 5798 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5799 5800 if (RAW != '(') { 5801 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5802 return(NULL); 5803 } 5804 SHRINK; 5805 do { 5806 NEXT; 5807 SKIP_BLANKS; 5808 name = xmlParseNmtoken(ctxt); 5809 if (name == NULL) { 5810 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5811 return(ret); 5812 } 5813 tmp = ret; 5814 while (tmp != NULL) { 5815 if (xmlStrEqual(name, tmp->name)) { 5816 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5817 "standalone: attribute enumeration value token %s duplicated\n", 5818 name, NULL); 5819 if (!xmlDictOwns(ctxt->dict, name)) 5820 xmlFree(name); 5821 break; 5822 } 5823 tmp = tmp->next; 5824 } 5825 if (tmp == NULL) { 5826 cur = xmlCreateEnumeration(name); 5827 if (!xmlDictOwns(ctxt->dict, name)) 5828 xmlFree(name); 5829 if (cur == NULL) { 5830 xmlFreeEnumeration(ret); 5831 return(NULL); 5832 } 5833 if (last == NULL) ret = last = cur; 5834 else { 5835 last->next = cur; 5836 last = cur; 5837 } 5838 } 5839 SKIP_BLANKS; 5840 } while (RAW == '|'); 5841 if (RAW != ')') { 5842 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5843 return(ret); 5844 } 5845 NEXT; 5846 return(ret); 5847 } 5848 5849 /** 5850 * xmlParseEnumeratedType: 5851 * @ctxt: an XML parser context 5852 * @tree: the enumeration tree built while parsing 5853 * 5854 * parse an Enumerated attribute type. 5855 * 5856 * [57] EnumeratedType ::= NotationType | Enumeration 5857 * 5858 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5859 * 5860 * 5861 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5862 */ 5863 5864 int 5865 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5866 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5867 SKIP(8); 5868 if (SKIP_BLANKS == 0) { 5869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5870 "Space required after 'NOTATION'\n"); 5871 return(0); 5872 } 5873 *tree = xmlParseNotationType(ctxt); 5874 if (*tree == NULL) return(0); 5875 return(XML_ATTRIBUTE_NOTATION); 5876 } 5877 *tree = xmlParseEnumerationType(ctxt); 5878 if (*tree == NULL) return(0); 5879 return(XML_ATTRIBUTE_ENUMERATION); 5880 } 5881 5882 /** 5883 * xmlParseAttributeType: 5884 * @ctxt: an XML parser context 5885 * @tree: the enumeration tree built while parsing 5886 * 5887 * parse the Attribute list def for an element 5888 * 5889 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5890 * 5891 * [55] StringType ::= 'CDATA' 5892 * 5893 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5894 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5895 * 5896 * Validity constraints for attribute values syntax are checked in 5897 * xmlValidateAttributeValue() 5898 * 5899 * [ VC: ID ] 5900 * Values of type ID must match the Name production. A name must not 5901 * appear more than once in an XML document as a value of this type; 5902 * i.e., ID values must uniquely identify the elements which bear them. 5903 * 5904 * [ VC: One ID per Element Type ] 5905 * No element type may have more than one ID attribute specified. 5906 * 5907 * [ VC: ID Attribute Default ] 5908 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5909 * 5910 * [ VC: IDREF ] 5911 * Values of type IDREF must match the Name production, and values 5912 * of type IDREFS must match Names; each IDREF Name must match the value 5913 * of an ID attribute on some element in the XML document; i.e. IDREF 5914 * values must match the value of some ID attribute. 5915 * 5916 * [ VC: Entity Name ] 5917 * Values of type ENTITY must match the Name production, values 5918 * of type ENTITIES must match Names; each Entity Name must match the 5919 * name of an unparsed entity declared in the DTD. 5920 * 5921 * [ VC: Name Token ] 5922 * Values of type NMTOKEN must match the Nmtoken production; values 5923 * of type NMTOKENS must match Nmtokens. 5924 * 5925 * Returns the attribute type 5926 */ 5927 int 5928 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5929 SHRINK; 5930 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5931 SKIP(5); 5932 return(XML_ATTRIBUTE_CDATA); 5933 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5934 SKIP(6); 5935 return(XML_ATTRIBUTE_IDREFS); 5936 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5937 SKIP(5); 5938 return(XML_ATTRIBUTE_IDREF); 5939 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5940 SKIP(2); 5941 return(XML_ATTRIBUTE_ID); 5942 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5943 SKIP(6); 5944 return(XML_ATTRIBUTE_ENTITY); 5945 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5946 SKIP(8); 5947 return(XML_ATTRIBUTE_ENTITIES); 5948 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5949 SKIP(8); 5950 return(XML_ATTRIBUTE_NMTOKENS); 5951 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5952 SKIP(7); 5953 return(XML_ATTRIBUTE_NMTOKEN); 5954 } 5955 return(xmlParseEnumeratedType(ctxt, tree)); 5956 } 5957 5958 /** 5959 * xmlParseAttributeListDecl: 5960 * @ctxt: an XML parser context 5961 * 5962 * : parse the Attribute list def for an element 5963 * 5964 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5965 * 5966 * [53] AttDef ::= S Name S AttType S DefaultDecl 5967 * 5968 */ 5969 void 5970 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5971 const xmlChar *elemName; 5972 const xmlChar *attrName; 5973 xmlEnumerationPtr tree; 5974 5975 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5976 int inputid = ctxt->input->id; 5977 5978 SKIP(9); 5979 if (SKIP_BLANKS == 0) { 5980 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5981 "Space required after '<!ATTLIST'\n"); 5982 } 5983 elemName = xmlParseName(ctxt); 5984 if (elemName == NULL) { 5985 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5986 "ATTLIST: no name for Element\n"); 5987 return; 5988 } 5989 SKIP_BLANKS; 5990 GROW; 5991 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5992 int type; 5993 int def; 5994 xmlChar *defaultValue = NULL; 5995 5996 GROW; 5997 tree = NULL; 5998 attrName = xmlParseName(ctxt); 5999 if (attrName == NULL) { 6000 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6001 "ATTLIST: no name for Attribute\n"); 6002 break; 6003 } 6004 GROW; 6005 if (SKIP_BLANKS == 0) { 6006 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6007 "Space required after the attribute name\n"); 6008 break; 6009 } 6010 6011 type = xmlParseAttributeType(ctxt, &tree); 6012 if (type <= 0) { 6013 break; 6014 } 6015 6016 GROW; 6017 if (SKIP_BLANKS == 0) { 6018 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6019 "Space required after the attribute type\n"); 6020 if (tree != NULL) 6021 xmlFreeEnumeration(tree); 6022 break; 6023 } 6024 6025 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6026 if (def <= 0) { 6027 if (defaultValue != NULL) 6028 xmlFree(defaultValue); 6029 if (tree != NULL) 6030 xmlFreeEnumeration(tree); 6031 break; 6032 } 6033 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6034 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6035 6036 GROW; 6037 if (RAW != '>') { 6038 if (SKIP_BLANKS == 0) { 6039 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6040 "Space required after the attribute default value\n"); 6041 if (defaultValue != NULL) 6042 xmlFree(defaultValue); 6043 if (tree != NULL) 6044 xmlFreeEnumeration(tree); 6045 break; 6046 } 6047 } 6048 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6049 (ctxt->sax->attributeDecl != NULL)) 6050 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6051 type, def, defaultValue, tree); 6052 else if (tree != NULL) 6053 xmlFreeEnumeration(tree); 6054 6055 if ((ctxt->sax2) && (defaultValue != NULL) && 6056 (def != XML_ATTRIBUTE_IMPLIED) && 6057 (def != XML_ATTRIBUTE_REQUIRED)) { 6058 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6059 } 6060 if (ctxt->sax2) { 6061 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6062 } 6063 if (defaultValue != NULL) 6064 xmlFree(defaultValue); 6065 GROW; 6066 } 6067 if (RAW == '>') { 6068 if (inputid != ctxt->input->id) { 6069 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6070 "Attribute list declaration doesn't start and" 6071 " stop in the same entity\n"); 6072 } 6073 NEXT; 6074 } 6075 } 6076 } 6077 6078 /** 6079 * xmlParseElementMixedContentDecl: 6080 * @ctxt: an XML parser context 6081 * @inputchk: the input used for the current entity, needed for boundary checks 6082 * 6083 * parse the declaration for a Mixed Element content 6084 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6085 * 6086 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6087 * '(' S? '#PCDATA' S? ')' 6088 * 6089 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6090 * 6091 * [ VC: No Duplicate Types ] 6092 * The same name must not appear more than once in a single 6093 * mixed-content declaration. 6094 * 6095 * returns: the list of the xmlElementContentPtr describing the element choices 6096 */ 6097 xmlElementContentPtr 6098 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6099 xmlElementContentPtr ret = NULL, cur = NULL, n; 6100 const xmlChar *elem = NULL; 6101 6102 GROW; 6103 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6104 SKIP(7); 6105 SKIP_BLANKS; 6106 SHRINK; 6107 if (RAW == ')') { 6108 if (ctxt->input->id != inputchk) { 6109 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6110 "Element content declaration doesn't start and" 6111 " stop in the same entity\n"); 6112 } 6113 NEXT; 6114 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6115 if (ret == NULL) 6116 return(NULL); 6117 if (RAW == '*') { 6118 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6119 NEXT; 6120 } 6121 return(ret); 6122 } 6123 if ((RAW == '(') || (RAW == '|')) { 6124 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6125 if (ret == NULL) return(NULL); 6126 } 6127 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6128 NEXT; 6129 if (elem == NULL) { 6130 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6131 if (ret == NULL) { 6132 xmlFreeDocElementContent(ctxt->myDoc, cur); 6133 return(NULL); 6134 } 6135 ret->c1 = cur; 6136 if (cur != NULL) 6137 cur->parent = ret; 6138 cur = ret; 6139 } else { 6140 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6141 if (n == NULL) { 6142 xmlFreeDocElementContent(ctxt->myDoc, ret); 6143 return(NULL); 6144 } 6145 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6146 if (n->c1 != NULL) 6147 n->c1->parent = n; 6148 cur->c2 = n; 6149 if (n != NULL) 6150 n->parent = cur; 6151 cur = n; 6152 } 6153 SKIP_BLANKS; 6154 elem = xmlParseName(ctxt); 6155 if (elem == NULL) { 6156 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6157 "xmlParseElementMixedContentDecl : Name expected\n"); 6158 xmlFreeDocElementContent(ctxt->myDoc, ret); 6159 return(NULL); 6160 } 6161 SKIP_BLANKS; 6162 GROW; 6163 } 6164 if ((RAW == ')') && (NXT(1) == '*')) { 6165 if (elem != NULL) { 6166 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6167 XML_ELEMENT_CONTENT_ELEMENT); 6168 if (cur->c2 != NULL) 6169 cur->c2->parent = cur; 6170 } 6171 if (ret != NULL) 6172 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6173 if (ctxt->input->id != inputchk) { 6174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6175 "Element content declaration doesn't start and" 6176 " stop in the same entity\n"); 6177 } 6178 SKIP(2); 6179 } else { 6180 xmlFreeDocElementContent(ctxt->myDoc, ret); 6181 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6182 return(NULL); 6183 } 6184 6185 } else { 6186 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6187 } 6188 return(ret); 6189 } 6190 6191 /** 6192 * xmlParseElementChildrenContentDeclPriv: 6193 * @ctxt: an XML parser context 6194 * @inputchk: the input used for the current entity, needed for boundary checks 6195 * @depth: the level of recursion 6196 * 6197 * parse the declaration for a Mixed Element content 6198 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6199 * 6200 * 6201 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6202 * 6203 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6204 * 6205 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6206 * 6207 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6208 * 6209 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6210 * TODO Parameter-entity replacement text must be properly nested 6211 * with parenthesized groups. That is to say, if either of the 6212 * opening or closing parentheses in a choice, seq, or Mixed 6213 * construct is contained in the replacement text for a parameter 6214 * entity, both must be contained in the same replacement text. For 6215 * interoperability, if a parameter-entity reference appears in a 6216 * choice, seq, or Mixed construct, its replacement text should not 6217 * be empty, and neither the first nor last non-blank character of 6218 * the replacement text should be a connector (| or ,). 6219 * 6220 * Returns the tree of xmlElementContentPtr describing the element 6221 * hierarchy. 6222 */ 6223 static xmlElementContentPtr 6224 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6225 int depth) { 6226 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6227 const xmlChar *elem; 6228 xmlChar type = 0; 6229 6230 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6231 (depth > 2048)) { 6232 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6233 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6234 depth); 6235 return(NULL); 6236 } 6237 SKIP_BLANKS; 6238 GROW; 6239 if (RAW == '(') { 6240 int inputid = ctxt->input->id; 6241 6242 /* Recurse on first child */ 6243 NEXT; 6244 SKIP_BLANKS; 6245 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6246 depth + 1); 6247 if (cur == NULL) 6248 return(NULL); 6249 SKIP_BLANKS; 6250 GROW; 6251 } else { 6252 elem = xmlParseName(ctxt); 6253 if (elem == NULL) { 6254 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6255 return(NULL); 6256 } 6257 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6258 if (cur == NULL) { 6259 xmlErrMemory(ctxt, NULL); 6260 return(NULL); 6261 } 6262 GROW; 6263 if (RAW == '?') { 6264 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6265 NEXT; 6266 } else if (RAW == '*') { 6267 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6268 NEXT; 6269 } else if (RAW == '+') { 6270 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6271 NEXT; 6272 } else { 6273 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6274 } 6275 GROW; 6276 } 6277 SKIP_BLANKS; 6278 SHRINK; 6279 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6280 /* 6281 * Each loop we parse one separator and one element. 6282 */ 6283 if (RAW == ',') { 6284 if (type == 0) type = CUR; 6285 6286 /* 6287 * Detect "Name | Name , Name" error 6288 */ 6289 else if (type != CUR) { 6290 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6291 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6292 type); 6293 if ((last != NULL) && (last != ret)) 6294 xmlFreeDocElementContent(ctxt->myDoc, last); 6295 if (ret != NULL) 6296 xmlFreeDocElementContent(ctxt->myDoc, ret); 6297 return(NULL); 6298 } 6299 NEXT; 6300 6301 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6302 if (op == NULL) { 6303 if ((last != NULL) && (last != ret)) 6304 xmlFreeDocElementContent(ctxt->myDoc, last); 6305 xmlFreeDocElementContent(ctxt->myDoc, ret); 6306 return(NULL); 6307 } 6308 if (last == NULL) { 6309 op->c1 = ret; 6310 if (ret != NULL) 6311 ret->parent = op; 6312 ret = cur = op; 6313 } else { 6314 cur->c2 = op; 6315 if (op != NULL) 6316 op->parent = cur; 6317 op->c1 = last; 6318 if (last != NULL) 6319 last->parent = op; 6320 cur =op; 6321 last = NULL; 6322 } 6323 } else if (RAW == '|') { 6324 if (type == 0) type = CUR; 6325 6326 /* 6327 * Detect "Name , Name | Name" error 6328 */ 6329 else if (type != CUR) { 6330 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6331 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6332 type); 6333 if ((last != NULL) && (last != ret)) 6334 xmlFreeDocElementContent(ctxt->myDoc, last); 6335 if (ret != NULL) 6336 xmlFreeDocElementContent(ctxt->myDoc, ret); 6337 return(NULL); 6338 } 6339 NEXT; 6340 6341 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6342 if (op == NULL) { 6343 if ((last != NULL) && (last != ret)) 6344 xmlFreeDocElementContent(ctxt->myDoc, last); 6345 if (ret != NULL) 6346 xmlFreeDocElementContent(ctxt->myDoc, ret); 6347 return(NULL); 6348 } 6349 if (last == NULL) { 6350 op->c1 = ret; 6351 if (ret != NULL) 6352 ret->parent = op; 6353 ret = cur = op; 6354 } else { 6355 cur->c2 = op; 6356 if (op != NULL) 6357 op->parent = cur; 6358 op->c1 = last; 6359 if (last != NULL) 6360 last->parent = op; 6361 cur =op; 6362 last = NULL; 6363 } 6364 } else { 6365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6366 if ((last != NULL) && (last != ret)) 6367 xmlFreeDocElementContent(ctxt->myDoc, last); 6368 if (ret != NULL) 6369 xmlFreeDocElementContent(ctxt->myDoc, ret); 6370 return(NULL); 6371 } 6372 GROW; 6373 SKIP_BLANKS; 6374 GROW; 6375 if (RAW == '(') { 6376 int inputid = ctxt->input->id; 6377 /* Recurse on second child */ 6378 NEXT; 6379 SKIP_BLANKS; 6380 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6381 depth + 1); 6382 if (last == NULL) { 6383 if (ret != NULL) 6384 xmlFreeDocElementContent(ctxt->myDoc, ret); 6385 return(NULL); 6386 } 6387 SKIP_BLANKS; 6388 } else { 6389 elem = xmlParseName(ctxt); 6390 if (elem == NULL) { 6391 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6392 if (ret != NULL) 6393 xmlFreeDocElementContent(ctxt->myDoc, ret); 6394 return(NULL); 6395 } 6396 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6397 if (last == NULL) { 6398 if (ret != NULL) 6399 xmlFreeDocElementContent(ctxt->myDoc, ret); 6400 return(NULL); 6401 } 6402 if (RAW == '?') { 6403 last->ocur = XML_ELEMENT_CONTENT_OPT; 6404 NEXT; 6405 } else if (RAW == '*') { 6406 last->ocur = XML_ELEMENT_CONTENT_MULT; 6407 NEXT; 6408 } else if (RAW == '+') { 6409 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6410 NEXT; 6411 } else { 6412 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6413 } 6414 } 6415 SKIP_BLANKS; 6416 GROW; 6417 } 6418 if ((cur != NULL) && (last != NULL)) { 6419 cur->c2 = last; 6420 if (last != NULL) 6421 last->parent = cur; 6422 } 6423 if (ctxt->input->id != inputchk) { 6424 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6425 "Element content declaration doesn't start and stop in" 6426 " the same entity\n"); 6427 } 6428 NEXT; 6429 if (RAW == '?') { 6430 if (ret != NULL) { 6431 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6432 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6433 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6434 else 6435 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6436 } 6437 NEXT; 6438 } else if (RAW == '*') { 6439 if (ret != NULL) { 6440 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6441 cur = ret; 6442 /* 6443 * Some normalization: 6444 * (a | b* | c?)* == (a | b | c)* 6445 */ 6446 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6447 if ((cur->c1 != NULL) && 6448 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6449 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6450 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6451 if ((cur->c2 != NULL) && 6452 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6453 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6454 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6455 cur = cur->c2; 6456 } 6457 } 6458 NEXT; 6459 } else if (RAW == '+') { 6460 if (ret != NULL) { 6461 int found = 0; 6462 6463 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6464 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6465 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6466 else 6467 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6468 /* 6469 * Some normalization: 6470 * (a | b*)+ == (a | b)* 6471 * (a | b?)+ == (a | b)* 6472 */ 6473 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6474 if ((cur->c1 != NULL) && 6475 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6476 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6477 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6478 found = 1; 6479 } 6480 if ((cur->c2 != NULL) && 6481 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6482 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6483 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6484 found = 1; 6485 } 6486 cur = cur->c2; 6487 } 6488 if (found) 6489 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6490 } 6491 NEXT; 6492 } 6493 return(ret); 6494 } 6495 6496 /** 6497 * xmlParseElementChildrenContentDecl: 6498 * @ctxt: an XML parser context 6499 * @inputchk: the input used for the current entity, needed for boundary checks 6500 * 6501 * parse the declaration for a Mixed Element content 6502 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6503 * 6504 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6505 * 6506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6507 * 6508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6509 * 6510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6511 * 6512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6513 * TODO Parameter-entity replacement text must be properly nested 6514 * with parenthesized groups. That is to say, if either of the 6515 * opening or closing parentheses in a choice, seq, or Mixed 6516 * construct is contained in the replacement text for a parameter 6517 * entity, both must be contained in the same replacement text. For 6518 * interoperability, if a parameter-entity reference appears in a 6519 * choice, seq, or Mixed construct, its replacement text should not 6520 * be empty, and neither the first nor last non-blank character of 6521 * the replacement text should be a connector (| or ,). 6522 * 6523 * Returns the tree of xmlElementContentPtr describing the element 6524 * hierarchy. 6525 */ 6526 xmlElementContentPtr 6527 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6528 /* stub left for API/ABI compat */ 6529 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6530 } 6531 6532 /** 6533 * xmlParseElementContentDecl: 6534 * @ctxt: an XML parser context 6535 * @name: the name of the element being defined. 6536 * @result: the Element Content pointer will be stored here if any 6537 * 6538 * parse the declaration for an Element content either Mixed or Children, 6539 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6540 * 6541 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6542 * 6543 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6544 */ 6545 6546 int 6547 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6548 xmlElementContentPtr *result) { 6549 6550 xmlElementContentPtr tree = NULL; 6551 int inputid = ctxt->input->id; 6552 int res; 6553 6554 *result = NULL; 6555 6556 if (RAW != '(') { 6557 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6558 "xmlParseElementContentDecl : %s '(' expected\n", name); 6559 return(-1); 6560 } 6561 NEXT; 6562 GROW; 6563 if (ctxt->instate == XML_PARSER_EOF) 6564 return(-1); 6565 SKIP_BLANKS; 6566 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6567 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6568 res = XML_ELEMENT_TYPE_MIXED; 6569 } else { 6570 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6571 res = XML_ELEMENT_TYPE_ELEMENT; 6572 } 6573 SKIP_BLANKS; 6574 *result = tree; 6575 return(res); 6576 } 6577 6578 /** 6579 * xmlParseElementDecl: 6580 * @ctxt: an XML parser context 6581 * 6582 * parse an Element declaration. 6583 * 6584 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6585 * 6586 * [ VC: Unique Element Type Declaration ] 6587 * No element type may be declared more than once 6588 * 6589 * Returns the type of the element, or -1 in case of error 6590 */ 6591 int 6592 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6593 const xmlChar *name; 6594 int ret = -1; 6595 xmlElementContentPtr content = NULL; 6596 6597 /* GROW; done in the caller */ 6598 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6599 int inputid = ctxt->input->id; 6600 6601 SKIP(9); 6602 if (SKIP_BLANKS == 0) { 6603 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6604 "Space required after 'ELEMENT'\n"); 6605 return(-1); 6606 } 6607 name = xmlParseName(ctxt); 6608 if (name == NULL) { 6609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6610 "xmlParseElementDecl: no name for Element\n"); 6611 return(-1); 6612 } 6613 if (SKIP_BLANKS == 0) { 6614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6615 "Space required after the element name\n"); 6616 } 6617 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6618 SKIP(5); 6619 /* 6620 * Element must always be empty. 6621 */ 6622 ret = XML_ELEMENT_TYPE_EMPTY; 6623 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6624 (NXT(2) == 'Y')) { 6625 SKIP(3); 6626 /* 6627 * Element is a generic container. 6628 */ 6629 ret = XML_ELEMENT_TYPE_ANY; 6630 } else if (RAW == '(') { 6631 ret = xmlParseElementContentDecl(ctxt, name, &content); 6632 } else { 6633 /* 6634 * [ WFC: PEs in Internal Subset ] error handling. 6635 */ 6636 if ((RAW == '%') && (ctxt->external == 0) && 6637 (ctxt->inputNr == 1)) { 6638 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6639 "PEReference: forbidden within markup decl in internal subset\n"); 6640 } else { 6641 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6642 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6643 } 6644 return(-1); 6645 } 6646 6647 SKIP_BLANKS; 6648 6649 if (RAW != '>') { 6650 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6651 if (content != NULL) { 6652 xmlFreeDocElementContent(ctxt->myDoc, content); 6653 } 6654 } else { 6655 if (inputid != ctxt->input->id) { 6656 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6657 "Element declaration doesn't start and stop in" 6658 " the same entity\n"); 6659 } 6660 6661 NEXT; 6662 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6663 (ctxt->sax->elementDecl != NULL)) { 6664 if (content != NULL) 6665 content->parent = NULL; 6666 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6667 content); 6668 if ((content != NULL) && (content->parent == NULL)) { 6669 /* 6670 * this is a trick: if xmlAddElementDecl is called, 6671 * instead of copying the full tree it is plugged directly 6672 * if called from the parser. Avoid duplicating the 6673 * interfaces or change the API/ABI 6674 */ 6675 xmlFreeDocElementContent(ctxt->myDoc, content); 6676 } 6677 } else if (content != NULL) { 6678 xmlFreeDocElementContent(ctxt->myDoc, content); 6679 } 6680 } 6681 } 6682 return(ret); 6683 } 6684 6685 /** 6686 * xmlParseConditionalSections 6687 * @ctxt: an XML parser context 6688 * 6689 * [61] conditionalSect ::= includeSect | ignoreSect 6690 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6691 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6692 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6693 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6694 */ 6695 6696 static void 6697 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6698 int *inputIds = NULL; 6699 size_t inputIdsSize = 0; 6700 size_t depth = 0; 6701 6702 while (ctxt->instate != XML_PARSER_EOF) { 6703 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6704 int id = ctxt->input->id; 6705 6706 SKIP(3); 6707 SKIP_BLANKS; 6708 6709 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6710 SKIP(7); 6711 SKIP_BLANKS; 6712 if (RAW != '[') { 6713 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6714 xmlHaltParser(ctxt); 6715 goto error; 6716 } 6717 if (ctxt->input->id != id) { 6718 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6719 "All markup of the conditional section is" 6720 " not in the same entity\n"); 6721 } 6722 NEXT; 6723 6724 if (inputIdsSize <= depth) { 6725 int *tmp; 6726 6727 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2); 6728 tmp = (int *) xmlRealloc(inputIds, 6729 inputIdsSize * sizeof(int)); 6730 if (tmp == NULL) { 6731 xmlErrMemory(ctxt, NULL); 6732 goto error; 6733 } 6734 inputIds = tmp; 6735 } 6736 inputIds[depth] = id; 6737 depth++; 6738 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6739 int state; 6740 xmlParserInputState instate; 6741 size_t ignoreDepth = 0; 6742 6743 SKIP(6); 6744 SKIP_BLANKS; 6745 if (RAW != '[') { 6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6747 xmlHaltParser(ctxt); 6748 goto error; 6749 } 6750 if (ctxt->input->id != id) { 6751 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6752 "All markup of the conditional section is" 6753 " not in the same entity\n"); 6754 } 6755 NEXT; 6756 6757 /* 6758 * Parse up to the end of the conditional section but disable 6759 * SAX event generating DTD building in the meantime 6760 */ 6761 state = ctxt->disableSAX; 6762 instate = ctxt->instate; 6763 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6764 ctxt->instate = XML_PARSER_IGNORE; 6765 6766 while (RAW != 0) { 6767 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6768 SKIP(3); 6769 ignoreDepth++; 6770 /* Check for integer overflow */ 6771 if (ignoreDepth == 0) { 6772 xmlErrMemory(ctxt, NULL); 6773 goto error; 6774 } 6775 } else if ((RAW == ']') && (NXT(1) == ']') && 6776 (NXT(2) == '>')) { 6777 if (ignoreDepth == 0) 6778 break; 6779 SKIP(3); 6780 ignoreDepth--; 6781 } else { 6782 NEXT; 6783 } 6784 } 6785 6786 ctxt->disableSAX = state; 6787 ctxt->instate = instate; 6788 6789 if (RAW == 0) { 6790 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6791 goto error; 6792 } 6793 if (ctxt->input->id != id) { 6794 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6795 "All markup of the conditional section is" 6796 " not in the same entity\n"); 6797 } 6798 SKIP(3); 6799 } else { 6800 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6801 xmlHaltParser(ctxt); 6802 goto error; 6803 } 6804 } else if ((depth > 0) && 6805 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6806 depth--; 6807 if (ctxt->input->id != inputIds[depth]) { 6808 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6809 "All markup of the conditional section is not" 6810 " in the same entity\n"); 6811 } 6812 SKIP(3); 6813 } else { 6814 const xmlChar *check = CUR_PTR; 6815 unsigned int cons = ctxt->input->consumed; 6816 6817 xmlParseMarkupDecl(ctxt); 6818 6819 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6820 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6821 xmlHaltParser(ctxt); 6822 goto error; 6823 } 6824 } 6825 6826 if (depth == 0) 6827 break; 6828 6829 SKIP_BLANKS; 6830 GROW; 6831 } 6832 6833 error: 6834 xmlFree(inputIds); 6835 } 6836 6837 /** 6838 * xmlParseMarkupDecl: 6839 * @ctxt: an XML parser context 6840 * 6841 * parse Markup declarations 6842 * 6843 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6844 * NotationDecl | PI | Comment 6845 * 6846 * [ VC: Proper Declaration/PE Nesting ] 6847 * Parameter-entity replacement text must be properly nested with 6848 * markup declarations. That is to say, if either the first character 6849 * or the last character of a markup declaration (markupdecl above) is 6850 * contained in the replacement text for a parameter-entity reference, 6851 * both must be contained in the same replacement text. 6852 * 6853 * [ WFC: PEs in Internal Subset ] 6854 * In the internal DTD subset, parameter-entity references can occur 6855 * only where markup declarations can occur, not within markup declarations. 6856 * (This does not apply to references that occur in external parameter 6857 * entities or to the external subset.) 6858 */ 6859 void 6860 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6861 GROW; 6862 if (CUR == '<') { 6863 if (NXT(1) == '!') { 6864 switch (NXT(2)) { 6865 case 'E': 6866 if (NXT(3) == 'L') 6867 xmlParseElementDecl(ctxt); 6868 else if (NXT(3) == 'N') 6869 xmlParseEntityDecl(ctxt); 6870 break; 6871 case 'A': 6872 xmlParseAttributeListDecl(ctxt); 6873 break; 6874 case 'N': 6875 xmlParseNotationDecl(ctxt); 6876 break; 6877 case '-': 6878 xmlParseComment(ctxt); 6879 break; 6880 default: 6881 /* there is an error but it will be detected later */ 6882 break; 6883 } 6884 } else if (NXT(1) == '?') { 6885 xmlParsePI(ctxt); 6886 } 6887 } 6888 6889 /* 6890 * detect requirement to exit there and act accordingly 6891 * and avoid having instate overridden later on 6892 */ 6893 if (ctxt->instate == XML_PARSER_EOF) 6894 return; 6895 6896 ctxt->instate = XML_PARSER_DTD; 6897 } 6898 6899 /** 6900 * xmlParseTextDecl: 6901 * @ctxt: an XML parser context 6902 * 6903 * parse an XML declaration header for external entities 6904 * 6905 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6906 */ 6907 6908 void 6909 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6910 xmlChar *version; 6911 const xmlChar *encoding; 6912 int oldstate; 6913 6914 /* 6915 * We know that '<?xml' is here. 6916 */ 6917 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6918 SKIP(5); 6919 } else { 6920 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6921 return; 6922 } 6923 6924 /* Avoid expansion of parameter entities when skipping blanks. */ 6925 oldstate = ctxt->instate; 6926 ctxt->instate = XML_PARSER_START; 6927 6928 if (SKIP_BLANKS == 0) { 6929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6930 "Space needed after '<?xml'\n"); 6931 } 6932 6933 /* 6934 * We may have the VersionInfo here. 6935 */ 6936 version = xmlParseVersionInfo(ctxt); 6937 if (version == NULL) 6938 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6939 else { 6940 if (SKIP_BLANKS == 0) { 6941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6942 "Space needed here\n"); 6943 } 6944 } 6945 ctxt->input->version = version; 6946 6947 /* 6948 * We must have the encoding declaration 6949 */ 6950 encoding = xmlParseEncodingDecl(ctxt); 6951 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6952 /* 6953 * The XML REC instructs us to stop parsing right here 6954 */ 6955 ctxt->instate = oldstate; 6956 return; 6957 } 6958 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6959 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6960 "Missing encoding in text declaration\n"); 6961 } 6962 6963 SKIP_BLANKS; 6964 if ((RAW == '?') && (NXT(1) == '>')) { 6965 SKIP(2); 6966 } else if (RAW == '>') { 6967 /* Deprecated old WD ... */ 6968 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6969 NEXT; 6970 } else { 6971 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6972 MOVETO_ENDTAG(CUR_PTR); 6973 NEXT; 6974 } 6975 6976 ctxt->instate = oldstate; 6977 } 6978 6979 /** 6980 * xmlParseExternalSubset: 6981 * @ctxt: an XML parser context 6982 * @ExternalID: the external identifier 6983 * @SystemID: the system identifier (or URL) 6984 * 6985 * parse Markup declarations from an external subset 6986 * 6987 * [30] extSubset ::= textDecl? extSubsetDecl 6988 * 6989 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6990 */ 6991 void 6992 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6993 const xmlChar *SystemID) { 6994 xmlDetectSAX2(ctxt); 6995 GROW; 6996 6997 if ((ctxt->encoding == NULL) && 6998 (ctxt->input->end - ctxt->input->cur >= 4)) { 6999 xmlChar start[4]; 7000 xmlCharEncoding enc; 7001 7002 start[0] = RAW; 7003 start[1] = NXT(1); 7004 start[2] = NXT(2); 7005 start[3] = NXT(3); 7006 enc = xmlDetectCharEncoding(start, 4); 7007 if (enc != XML_CHAR_ENCODING_NONE) 7008 xmlSwitchEncoding(ctxt, enc); 7009 } 7010 7011 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7012 xmlParseTextDecl(ctxt); 7013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7014 /* 7015 * The XML REC instructs us to stop parsing right here 7016 */ 7017 xmlHaltParser(ctxt); 7018 return; 7019 } 7020 } 7021 if (ctxt->myDoc == NULL) { 7022 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7023 if (ctxt->myDoc == NULL) { 7024 xmlErrMemory(ctxt, "New Doc failed"); 7025 return; 7026 } 7027 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7028 } 7029 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7030 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7031 7032 ctxt->instate = XML_PARSER_DTD; 7033 ctxt->external = 1; 7034 SKIP_BLANKS; 7035 while (((RAW == '<') && (NXT(1) == '?')) || 7036 ((RAW == '<') && (NXT(1) == '!')) || 7037 (RAW == '%')) { 7038 const xmlChar *check = CUR_PTR; 7039 unsigned int cons = ctxt->input->consumed; 7040 7041 GROW; 7042 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7043 xmlParseConditionalSections(ctxt); 7044 } else 7045 xmlParseMarkupDecl(ctxt); 7046 SKIP_BLANKS; 7047 7048 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7049 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7050 break; 7051 } 7052 } 7053 7054 if (RAW != 0) { 7055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7056 } 7057 7058 } 7059 7060 /** 7061 * xmlParseReference: 7062 * @ctxt: an XML parser context 7063 * 7064 * parse and handle entity references in content, depending on the SAX 7065 * interface, this may end-up in a call to character() if this is a 7066 * CharRef, a predefined entity, if there is no reference() callback. 7067 * or if the parser was asked to switch to that mode. 7068 * 7069 * [67] Reference ::= EntityRef | CharRef 7070 */ 7071 void 7072 xmlParseReference(xmlParserCtxtPtr ctxt) { 7073 xmlEntityPtr ent; 7074 xmlChar *val; 7075 int was_checked; 7076 xmlNodePtr list = NULL; 7077 xmlParserErrors ret = XML_ERR_OK; 7078 7079 7080 if (RAW != '&') 7081 return; 7082 7083 /* 7084 * Simple case of a CharRef 7085 */ 7086 if (NXT(1) == '#') { 7087 int i = 0; 7088 xmlChar out[16]; 7089 int hex = NXT(2); 7090 int value = xmlParseCharRef(ctxt); 7091 7092 if (value == 0) 7093 return; 7094 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7095 /* 7096 * So we are using non-UTF-8 buffers 7097 * Check that the char fit on 8bits, if not 7098 * generate a CharRef. 7099 */ 7100 if (value <= 0xFF) { 7101 out[0] = value; 7102 out[1] = 0; 7103 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7104 (!ctxt->disableSAX)) 7105 ctxt->sax->characters(ctxt->userData, out, 1); 7106 } else { 7107 if ((hex == 'x') || (hex == 'X')) 7108 snprintf((char *)out, sizeof(out), "#x%X", value); 7109 else 7110 snprintf((char *)out, sizeof(out), "#%d", value); 7111 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7112 (!ctxt->disableSAX)) 7113 ctxt->sax->reference(ctxt->userData, out); 7114 } 7115 } else { 7116 /* 7117 * Just encode the value in UTF-8 7118 */ 7119 COPY_BUF(0 ,out, i, value); 7120 out[i] = 0; 7121 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7122 (!ctxt->disableSAX)) 7123 ctxt->sax->characters(ctxt->userData, out, i); 7124 } 7125 return; 7126 } 7127 7128 /* 7129 * We are seeing an entity reference 7130 */ 7131 ent = xmlParseEntityRef(ctxt); 7132 if (ent == NULL) return; 7133 if (!ctxt->wellFormed) 7134 return; 7135 was_checked = ent->checked; 7136 7137 /* special case of predefined entities */ 7138 if ((ent->name == NULL) || 7139 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7140 val = ent->content; 7141 if (val == NULL) return; 7142 /* 7143 * inline the entity. 7144 */ 7145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7146 (!ctxt->disableSAX)) 7147 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7148 return; 7149 } 7150 7151 /* 7152 * The first reference to the entity trigger a parsing phase 7153 * where the ent->children is filled with the result from 7154 * the parsing. 7155 * Note: external parsed entities will not be loaded, it is not 7156 * required for a non-validating parser, unless the parsing option 7157 * of validating, or substituting entities were given. Doing so is 7158 * far more secure as the parser will only process data coming from 7159 * the document entity by default. 7160 */ 7161 if (((ent->checked == 0) || 7162 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7163 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7164 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7165 unsigned long oldnbent = ctxt->nbentities, diff; 7166 7167 /* 7168 * This is a bit hackish but this seems the best 7169 * way to make sure both SAX and DOM entity support 7170 * behaves okay. 7171 */ 7172 void *user_data; 7173 if (ctxt->userData == ctxt) 7174 user_data = NULL; 7175 else 7176 user_data = ctxt->userData; 7177 7178 /* 7179 * Check that this entity is well formed 7180 * 4.3.2: An internal general parsed entity is well-formed 7181 * if its replacement text matches the production labeled 7182 * content. 7183 */ 7184 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7185 ctxt->depth++; 7186 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7187 user_data, &list); 7188 ctxt->depth--; 7189 7190 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7191 ctxt->depth++; 7192 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7193 user_data, ctxt->depth, ent->URI, 7194 ent->ExternalID, &list); 7195 ctxt->depth--; 7196 } else { 7197 ret = XML_ERR_ENTITY_PE_INTERNAL; 7198 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7199 "invalid entity type found\n", NULL); 7200 } 7201 7202 /* 7203 * Store the number of entities needing parsing for this entity 7204 * content and do checkings 7205 */ 7206 diff = ctxt->nbentities - oldnbent + 1; 7207 if (diff > INT_MAX / 2) 7208 diff = INT_MAX / 2; 7209 ent->checked = diff * 2; 7210 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7211 ent->checked |= 1; 7212 if (ret == XML_ERR_ENTITY_LOOP) { 7213 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7214 xmlHaltParser(ctxt); 7215 xmlFreeNodeList(list); 7216 return; 7217 } 7218 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7219 xmlFreeNodeList(list); 7220 return; 7221 } 7222 7223 if ((ret == XML_ERR_OK) && (list != NULL)) { 7224 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7225 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7226 (ent->children == NULL)) { 7227 ent->children = list; 7228 /* 7229 * Prune it directly in the generated document 7230 * except for single text nodes. 7231 */ 7232 if ((ctxt->replaceEntities == 0) || 7233 (ctxt->parseMode == XML_PARSE_READER) || 7234 ((list->type == XML_TEXT_NODE) && 7235 (list->next == NULL))) { 7236 ent->owner = 1; 7237 while (list != NULL) { 7238 list->parent = (xmlNodePtr) ent; 7239 xmlSetTreeDoc(list, ent->doc); 7240 if (list->next == NULL) 7241 ent->last = list; 7242 list = list->next; 7243 } 7244 list = NULL; 7245 } else { 7246 ent->owner = 0; 7247 while (list != NULL) { 7248 list->parent = (xmlNodePtr) ctxt->node; 7249 list->doc = ctxt->myDoc; 7250 if (list->next == NULL) 7251 ent->last = list; 7252 list = list->next; 7253 } 7254 list = ent->children; 7255 #ifdef LIBXML_LEGACY_ENABLED 7256 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7257 xmlAddEntityReference(ent, list, NULL); 7258 #endif /* LIBXML_LEGACY_ENABLED */ 7259 } 7260 } else { 7261 xmlFreeNodeList(list); 7262 list = NULL; 7263 } 7264 } else if ((ret != XML_ERR_OK) && 7265 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7266 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7267 "Entity '%s' failed to parse\n", ent->name); 7268 if (ent->content != NULL) 7269 ent->content[0] = 0; 7270 xmlParserEntityCheck(ctxt, 0, ent, 0); 7271 } else if (list != NULL) { 7272 xmlFreeNodeList(list); 7273 list = NULL; 7274 } 7275 if (ent->checked == 0) 7276 ent->checked = 2; 7277 7278 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7279 was_checked = 0; 7280 } else if (ent->checked != 1) { 7281 ctxt->nbentities += ent->checked / 2; 7282 } 7283 7284 /* 7285 * Now that the entity content has been gathered 7286 * provide it to the application, this can take different forms based 7287 * on the parsing modes. 7288 */ 7289 if (ent->children == NULL) { 7290 /* 7291 * Probably running in SAX mode and the callbacks don't 7292 * build the entity content. So unless we already went 7293 * though parsing for first checking go though the entity 7294 * content to generate callbacks associated to the entity 7295 */ 7296 if (was_checked != 0) { 7297 void *user_data; 7298 /* 7299 * This is a bit hackish but this seems the best 7300 * way to make sure both SAX and DOM entity support 7301 * behaves okay. 7302 */ 7303 if (ctxt->userData == ctxt) 7304 user_data = NULL; 7305 else 7306 user_data = ctxt->userData; 7307 7308 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7309 ctxt->depth++; 7310 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7311 ent->content, user_data, NULL); 7312 ctxt->depth--; 7313 } else if (ent->etype == 7314 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7315 ctxt->depth++; 7316 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7317 ctxt->sax, user_data, ctxt->depth, 7318 ent->URI, ent->ExternalID, NULL); 7319 ctxt->depth--; 7320 } else { 7321 ret = XML_ERR_ENTITY_PE_INTERNAL; 7322 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7323 "invalid entity type found\n", NULL); 7324 } 7325 if (ret == XML_ERR_ENTITY_LOOP) { 7326 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7327 return; 7328 } 7329 } 7330 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7331 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7332 /* 7333 * Entity reference callback comes second, it's somewhat 7334 * superfluous but a compatibility to historical behaviour 7335 */ 7336 ctxt->sax->reference(ctxt->userData, ent->name); 7337 } 7338 return; 7339 } 7340 7341 /* 7342 * If we didn't get any children for the entity being built 7343 */ 7344 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7345 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7346 /* 7347 * Create a node. 7348 */ 7349 ctxt->sax->reference(ctxt->userData, ent->name); 7350 return; 7351 } 7352 7353 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7354 /* 7355 * There is a problem on the handling of _private for entities 7356 * (bug 155816): Should we copy the content of the field from 7357 * the entity (possibly overwriting some value set by the user 7358 * when a copy is created), should we leave it alone, or should 7359 * we try to take care of different situations? The problem 7360 * is exacerbated by the usage of this field by the xmlReader. 7361 * To fix this bug, we look at _private on the created node 7362 * and, if it's NULL, we copy in whatever was in the entity. 7363 * If it's not NULL we leave it alone. This is somewhat of a 7364 * hack - maybe we should have further tests to determine 7365 * what to do. 7366 */ 7367 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7368 /* 7369 * Seems we are generating the DOM content, do 7370 * a simple tree copy for all references except the first 7371 * In the first occurrence list contains the replacement. 7372 */ 7373 if (((list == NULL) && (ent->owner == 0)) || 7374 (ctxt->parseMode == XML_PARSE_READER)) { 7375 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7376 7377 /* 7378 * We are copying here, make sure there is no abuse 7379 */ 7380 ctxt->sizeentcopy += ent->length + 5; 7381 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7382 return; 7383 7384 /* 7385 * when operating on a reader, the entities definitions 7386 * are always owning the entities subtree. 7387 if (ctxt->parseMode == XML_PARSE_READER) 7388 ent->owner = 1; 7389 */ 7390 7391 cur = ent->children; 7392 while (cur != NULL) { 7393 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7394 if (nw != NULL) { 7395 if (nw->_private == NULL) 7396 nw->_private = cur->_private; 7397 if (firstChild == NULL){ 7398 firstChild = nw; 7399 } 7400 nw = xmlAddChild(ctxt->node, nw); 7401 } 7402 if (cur == ent->last) { 7403 /* 7404 * needed to detect some strange empty 7405 * node cases in the reader tests 7406 */ 7407 if ((ctxt->parseMode == XML_PARSE_READER) && 7408 (nw != NULL) && 7409 (nw->type == XML_ELEMENT_NODE) && 7410 (nw->children == NULL)) 7411 nw->extra = 1; 7412 7413 break; 7414 } 7415 cur = cur->next; 7416 } 7417 #ifdef LIBXML_LEGACY_ENABLED 7418 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7419 xmlAddEntityReference(ent, firstChild, nw); 7420 #endif /* LIBXML_LEGACY_ENABLED */ 7421 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7422 xmlNodePtr nw = NULL, cur, next, last, 7423 firstChild = NULL; 7424 7425 /* 7426 * We are copying here, make sure there is no abuse 7427 */ 7428 ctxt->sizeentcopy += ent->length + 5; 7429 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7430 return; 7431 7432 /* 7433 * Copy the entity child list and make it the new 7434 * entity child list. The goal is to make sure any 7435 * ID or REF referenced will be the one from the 7436 * document content and not the entity copy. 7437 */ 7438 cur = ent->children; 7439 ent->children = NULL; 7440 last = ent->last; 7441 ent->last = NULL; 7442 while (cur != NULL) { 7443 next = cur->next; 7444 cur->next = NULL; 7445 cur->parent = NULL; 7446 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7447 if (nw != NULL) { 7448 if (nw->_private == NULL) 7449 nw->_private = cur->_private; 7450 if (firstChild == NULL){ 7451 firstChild = cur; 7452 } 7453 xmlAddChild((xmlNodePtr) ent, nw); 7454 xmlAddChild(ctxt->node, cur); 7455 } 7456 if (cur == last) 7457 break; 7458 cur = next; 7459 } 7460 if (ent->owner == 0) 7461 ent->owner = 1; 7462 #ifdef LIBXML_LEGACY_ENABLED 7463 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7464 xmlAddEntityReference(ent, firstChild, nw); 7465 #endif /* LIBXML_LEGACY_ENABLED */ 7466 } else { 7467 const xmlChar *nbktext; 7468 7469 /* 7470 * the name change is to avoid coalescing of the 7471 * node with a possible previous text one which 7472 * would make ent->children a dangling pointer 7473 */ 7474 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7475 -1); 7476 if (ent->children->type == XML_TEXT_NODE) 7477 ent->children->name = nbktext; 7478 if ((ent->last != ent->children) && 7479 (ent->last->type == XML_TEXT_NODE)) 7480 ent->last->name = nbktext; 7481 xmlAddChildList(ctxt->node, ent->children); 7482 } 7483 7484 /* 7485 * This is to avoid a nasty side effect, see 7486 * characters() in SAX.c 7487 */ 7488 ctxt->nodemem = 0; 7489 ctxt->nodelen = 0; 7490 return; 7491 } 7492 } 7493 } 7494 7495 /** 7496 * xmlParseEntityRef: 7497 * @ctxt: an XML parser context 7498 * 7499 * parse ENTITY references declarations 7500 * 7501 * [68] EntityRef ::= '&' Name ';' 7502 * 7503 * [ WFC: Entity Declared ] 7504 * In a document without any DTD, a document with only an internal DTD 7505 * subset which contains no parameter entity references, or a document 7506 * with "standalone='yes'", the Name given in the entity reference 7507 * must match that in an entity declaration, except that well-formed 7508 * documents need not declare any of the following entities: amp, lt, 7509 * gt, apos, quot. The declaration of a parameter entity must precede 7510 * any reference to it. Similarly, the declaration of a general entity 7511 * must precede any reference to it which appears in a default value in an 7512 * attribute-list declaration. Note that if entities are declared in the 7513 * external subset or in external parameter entities, a non-validating 7514 * processor is not obligated to read and process their declarations; 7515 * for such documents, the rule that an entity must be declared is a 7516 * well-formedness constraint only if standalone='yes'. 7517 * 7518 * [ WFC: Parsed Entity ] 7519 * An entity reference must not contain the name of an unparsed entity 7520 * 7521 * Returns the xmlEntityPtr if found, or NULL otherwise. 7522 */ 7523 xmlEntityPtr 7524 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7525 const xmlChar *name; 7526 xmlEntityPtr ent = NULL; 7527 7528 GROW; 7529 if (ctxt->instate == XML_PARSER_EOF) 7530 return(NULL); 7531 7532 if (RAW != '&') 7533 return(NULL); 7534 NEXT; 7535 name = xmlParseName(ctxt); 7536 if (name == NULL) { 7537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7538 "xmlParseEntityRef: no name\n"); 7539 return(NULL); 7540 } 7541 if (RAW != ';') { 7542 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7543 return(NULL); 7544 } 7545 NEXT; 7546 7547 /* 7548 * Predefined entities override any extra definition 7549 */ 7550 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7551 ent = xmlGetPredefinedEntity(name); 7552 if (ent != NULL) 7553 return(ent); 7554 } 7555 7556 /* 7557 * Increase the number of entity references parsed 7558 */ 7559 ctxt->nbentities++; 7560 7561 /* 7562 * Ask first SAX for entity resolution, otherwise try the 7563 * entities which may have stored in the parser context. 7564 */ 7565 if (ctxt->sax != NULL) { 7566 if (ctxt->sax->getEntity != NULL) 7567 ent = ctxt->sax->getEntity(ctxt->userData, name); 7568 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7569 (ctxt->options & XML_PARSE_OLDSAX)) 7570 ent = xmlGetPredefinedEntity(name); 7571 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7572 (ctxt->userData==ctxt)) { 7573 ent = xmlSAX2GetEntity(ctxt, name); 7574 } 7575 } 7576 if (ctxt->instate == XML_PARSER_EOF) 7577 return(NULL); 7578 /* 7579 * [ WFC: Entity Declared ] 7580 * In a document without any DTD, a document with only an 7581 * internal DTD subset which contains no parameter entity 7582 * references, or a document with "standalone='yes'", the 7583 * Name given in the entity reference must match that in an 7584 * entity declaration, except that well-formed documents 7585 * need not declare any of the following entities: amp, lt, 7586 * gt, apos, quot. 7587 * The declaration of a parameter entity must precede any 7588 * reference to it. 7589 * Similarly, the declaration of a general entity must 7590 * precede any reference to it which appears in a default 7591 * value in an attribute-list declaration. Note that if 7592 * entities are declared in the external subset or in 7593 * external parameter entities, a non-validating processor 7594 * is not obligated to read and process their declarations; 7595 * for such documents, the rule that an entity must be 7596 * declared is a well-formedness constraint only if 7597 * standalone='yes'. 7598 */ 7599 if (ent == NULL) { 7600 if ((ctxt->standalone == 1) || 7601 ((ctxt->hasExternalSubset == 0) && 7602 (ctxt->hasPErefs == 0))) { 7603 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7604 "Entity '%s' not defined\n", name); 7605 } else { 7606 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7607 "Entity '%s' not defined\n", name); 7608 if ((ctxt->inSubset == 0) && 7609 (ctxt->sax != NULL) && 7610 (ctxt->sax->reference != NULL)) { 7611 ctxt->sax->reference(ctxt->userData, name); 7612 } 7613 } 7614 xmlParserEntityCheck(ctxt, 0, ent, 0); 7615 ctxt->valid = 0; 7616 } 7617 7618 /* 7619 * [ WFC: Parsed Entity ] 7620 * An entity reference must not contain the name of an 7621 * unparsed entity 7622 */ 7623 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7624 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7625 "Entity reference to unparsed entity %s\n", name); 7626 } 7627 7628 /* 7629 * [ WFC: No External Entity References ] 7630 * Attribute values cannot contain direct or indirect 7631 * entity references to external entities. 7632 */ 7633 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7634 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7636 "Attribute references external entity '%s'\n", name); 7637 } 7638 /* 7639 * [ WFC: No < in Attribute Values ] 7640 * The replacement text of any entity referred to directly or 7641 * indirectly in an attribute value (other than "<") must 7642 * not contain a <. 7643 */ 7644 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7645 (ent != NULL) && 7646 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7647 if (((ent->checked & 1) || (ent->checked == 0)) && 7648 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7649 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7650 "'<' in entity '%s' is not allowed in attributes values\n", name); 7651 } 7652 } 7653 7654 /* 7655 * Internal check, no parameter entities here ... 7656 */ 7657 else { 7658 switch (ent->etype) { 7659 case XML_INTERNAL_PARAMETER_ENTITY: 7660 case XML_EXTERNAL_PARAMETER_ENTITY: 7661 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7662 "Attempt to reference the parameter entity '%s'\n", 7663 name); 7664 break; 7665 default: 7666 break; 7667 } 7668 } 7669 7670 /* 7671 * [ WFC: No Recursion ] 7672 * A parsed entity must not contain a recursive reference 7673 * to itself, either directly or indirectly. 7674 * Done somewhere else 7675 */ 7676 return(ent); 7677 } 7678 7679 /** 7680 * xmlParseStringEntityRef: 7681 * @ctxt: an XML parser context 7682 * @str: a pointer to an index in the string 7683 * 7684 * parse ENTITY references declarations, but this version parses it from 7685 * a string value. 7686 * 7687 * [68] EntityRef ::= '&' Name ';' 7688 * 7689 * [ WFC: Entity Declared ] 7690 * In a document without any DTD, a document with only an internal DTD 7691 * subset which contains no parameter entity references, or a document 7692 * with "standalone='yes'", the Name given in the entity reference 7693 * must match that in an entity declaration, except that well-formed 7694 * documents need not declare any of the following entities: amp, lt, 7695 * gt, apos, quot. The declaration of a parameter entity must precede 7696 * any reference to it. Similarly, the declaration of a general entity 7697 * must precede any reference to it which appears in a default value in an 7698 * attribute-list declaration. Note that if entities are declared in the 7699 * external subset or in external parameter entities, a non-validating 7700 * processor is not obligated to read and process their declarations; 7701 * for such documents, the rule that an entity must be declared is a 7702 * well-formedness constraint only if standalone='yes'. 7703 * 7704 * [ WFC: Parsed Entity ] 7705 * An entity reference must not contain the name of an unparsed entity 7706 * 7707 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7708 * is updated to the current location in the string. 7709 */ 7710 static xmlEntityPtr 7711 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7712 xmlChar *name; 7713 const xmlChar *ptr; 7714 xmlChar cur; 7715 xmlEntityPtr ent = NULL; 7716 7717 if ((str == NULL) || (*str == NULL)) 7718 return(NULL); 7719 ptr = *str; 7720 cur = *ptr; 7721 if (cur != '&') 7722 return(NULL); 7723 7724 ptr++; 7725 name = xmlParseStringName(ctxt, &ptr); 7726 if (name == NULL) { 7727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7728 "xmlParseStringEntityRef: no name\n"); 7729 *str = ptr; 7730 return(NULL); 7731 } 7732 if (*ptr != ';') { 7733 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7734 xmlFree(name); 7735 *str = ptr; 7736 return(NULL); 7737 } 7738 ptr++; 7739 7740 7741 /* 7742 * Predefined entities override any extra definition 7743 */ 7744 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7745 ent = xmlGetPredefinedEntity(name); 7746 if (ent != NULL) { 7747 xmlFree(name); 7748 *str = ptr; 7749 return(ent); 7750 } 7751 } 7752 7753 /* 7754 * Increase the number of entity references parsed 7755 */ 7756 ctxt->nbentities++; 7757 7758 /* 7759 * Ask first SAX for entity resolution, otherwise try the 7760 * entities which may have stored in the parser context. 7761 */ 7762 if (ctxt->sax != NULL) { 7763 if (ctxt->sax->getEntity != NULL) 7764 ent = ctxt->sax->getEntity(ctxt->userData, name); 7765 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7766 ent = xmlGetPredefinedEntity(name); 7767 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7768 ent = xmlSAX2GetEntity(ctxt, name); 7769 } 7770 } 7771 if (ctxt->instate == XML_PARSER_EOF) { 7772 xmlFree(name); 7773 return(NULL); 7774 } 7775 7776 /* 7777 * [ WFC: Entity Declared ] 7778 * In a document without any DTD, a document with only an 7779 * internal DTD subset which contains no parameter entity 7780 * references, or a document with "standalone='yes'", the 7781 * Name given in the entity reference must match that in an 7782 * entity declaration, except that well-formed documents 7783 * need not declare any of the following entities: amp, lt, 7784 * gt, apos, quot. 7785 * The declaration of a parameter entity must precede any 7786 * reference to it. 7787 * Similarly, the declaration of a general entity must 7788 * precede any reference to it which appears in a default 7789 * value in an attribute-list declaration. Note that if 7790 * entities are declared in the external subset or in 7791 * external parameter entities, a non-validating processor 7792 * is not obligated to read and process their declarations; 7793 * for such documents, the rule that an entity must be 7794 * declared is a well-formedness constraint only if 7795 * standalone='yes'. 7796 */ 7797 if (ent == NULL) { 7798 if ((ctxt->standalone == 1) || 7799 ((ctxt->hasExternalSubset == 0) && 7800 (ctxt->hasPErefs == 0))) { 7801 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7802 "Entity '%s' not defined\n", name); 7803 } else { 7804 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7805 "Entity '%s' not defined\n", 7806 name); 7807 } 7808 xmlParserEntityCheck(ctxt, 0, ent, 0); 7809 /* TODO ? check regressions ctxt->valid = 0; */ 7810 } 7811 7812 /* 7813 * [ WFC: Parsed Entity ] 7814 * An entity reference must not contain the name of an 7815 * unparsed entity 7816 */ 7817 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7818 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7819 "Entity reference to unparsed entity %s\n", name); 7820 } 7821 7822 /* 7823 * [ WFC: No External Entity References ] 7824 * Attribute values cannot contain direct or indirect 7825 * entity references to external entities. 7826 */ 7827 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7828 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7829 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7830 "Attribute references external entity '%s'\n", name); 7831 } 7832 /* 7833 * [ WFC: No < in Attribute Values ] 7834 * The replacement text of any entity referred to directly or 7835 * indirectly in an attribute value (other than "<") must 7836 * not contain a <. 7837 */ 7838 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7839 (ent != NULL) && (ent->content != NULL) && 7840 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7841 (xmlStrchr(ent->content, '<'))) { 7842 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7843 "'<' in entity '%s' is not allowed in attributes values\n", 7844 name); 7845 } 7846 7847 /* 7848 * Internal check, no parameter entities here ... 7849 */ 7850 else { 7851 switch (ent->etype) { 7852 case XML_INTERNAL_PARAMETER_ENTITY: 7853 case XML_EXTERNAL_PARAMETER_ENTITY: 7854 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7855 "Attempt to reference the parameter entity '%s'\n", 7856 name); 7857 break; 7858 default: 7859 break; 7860 } 7861 } 7862 7863 /* 7864 * [ WFC: No Recursion ] 7865 * A parsed entity must not contain a recursive reference 7866 * to itself, either directly or indirectly. 7867 * Done somewhere else 7868 */ 7869 7870 xmlFree(name); 7871 *str = ptr; 7872 return(ent); 7873 } 7874 7875 /** 7876 * xmlParsePEReference: 7877 * @ctxt: an XML parser context 7878 * 7879 * parse PEReference declarations 7880 * The entity content is handled directly by pushing it's content as 7881 * a new input stream. 7882 * 7883 * [69] PEReference ::= '%' Name ';' 7884 * 7885 * [ WFC: No Recursion ] 7886 * A parsed entity must not contain a recursive 7887 * reference to itself, either directly or indirectly. 7888 * 7889 * [ WFC: Entity Declared ] 7890 * In a document without any DTD, a document with only an internal DTD 7891 * subset which contains no parameter entity references, or a document 7892 * with "standalone='yes'", ... ... The declaration of a parameter 7893 * entity must precede any reference to it... 7894 * 7895 * [ VC: Entity Declared ] 7896 * In a document with an external subset or external parameter entities 7897 * with "standalone='no'", ... ... The declaration of a parameter entity 7898 * must precede any reference to it... 7899 * 7900 * [ WFC: In DTD ] 7901 * Parameter-entity references may only appear in the DTD. 7902 * NOTE: misleading but this is handled. 7903 */ 7904 void 7905 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7906 { 7907 const xmlChar *name; 7908 xmlEntityPtr entity = NULL; 7909 xmlParserInputPtr input; 7910 7911 if (RAW != '%') 7912 return; 7913 NEXT; 7914 name = xmlParseName(ctxt); 7915 if (name == NULL) { 7916 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 7917 return; 7918 } 7919 if (xmlParserDebugEntities) 7920 xmlGenericError(xmlGenericErrorContext, 7921 "PEReference: %s\n", name); 7922 if (RAW != ';') { 7923 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 7924 return; 7925 } 7926 7927 NEXT; 7928 7929 /* 7930 * Increase the number of entity references parsed 7931 */ 7932 ctxt->nbentities++; 7933 7934 /* 7935 * Request the entity from SAX 7936 */ 7937 if ((ctxt->sax != NULL) && 7938 (ctxt->sax->getParameterEntity != NULL)) 7939 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7940 if (ctxt->instate == XML_PARSER_EOF) 7941 return; 7942 if (entity == NULL) { 7943 /* 7944 * [ WFC: Entity Declared ] 7945 * In a document without any DTD, a document with only an 7946 * internal DTD subset which contains no parameter entity 7947 * references, or a document with "standalone='yes'", ... 7948 * ... The declaration of a parameter entity must precede 7949 * any reference to it... 7950 */ 7951 if ((ctxt->standalone == 1) || 7952 ((ctxt->hasExternalSubset == 0) && 7953 (ctxt->hasPErefs == 0))) { 7954 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7955 "PEReference: %%%s; not found\n", 7956 name); 7957 } else { 7958 /* 7959 * [ VC: Entity Declared ] 7960 * In a document with an external subset or external 7961 * parameter entities with "standalone='no'", ... 7962 * ... The declaration of a parameter entity must 7963 * precede any reference to it... 7964 */ 7965 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 7966 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 7967 "PEReference: %%%s; not found\n", 7968 name, NULL); 7969 } else 7970 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7971 "PEReference: %%%s; not found\n", 7972 name, NULL); 7973 ctxt->valid = 0; 7974 } 7975 xmlParserEntityCheck(ctxt, 0, NULL, 0); 7976 } else { 7977 /* 7978 * Internal checking in case the entity quest barfed 7979 */ 7980 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7981 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7982 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7983 "Internal: %%%s; is not a parameter entity\n", 7984 name, NULL); 7985 } else { 7986 xmlChar start[4]; 7987 xmlCharEncoding enc; 7988 7989 if (xmlParserEntityCheck(ctxt, 0, entity, 0)) 7990 return; 7991 7992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7993 ((ctxt->options & XML_PARSE_NOENT) == 0) && 7994 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 7995 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 7996 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 7997 (ctxt->replaceEntities == 0) && 7998 (ctxt->validate == 0)) 7999 return; 8000 8001 input = xmlNewEntityInputStream(ctxt, entity); 8002 if (xmlPushInput(ctxt, input) < 0) { 8003 xmlFreeInputStream(input); 8004 return; 8005 } 8006 8007 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 8008 /* 8009 * Get the 4 first bytes and decode the charset 8010 * if enc != XML_CHAR_ENCODING_NONE 8011 * plug some encoding conversion routines. 8012 * Note that, since we may have some non-UTF8 8013 * encoding (like UTF16, bug 135229), the 'length' 8014 * is not known, but we can calculate based upon 8015 * the amount of data in the buffer. 8016 */ 8017 GROW 8018 if (ctxt->instate == XML_PARSER_EOF) 8019 return; 8020 if ((ctxt->input->end - ctxt->input->cur)>=4) { 8021 start[0] = RAW; 8022 start[1] = NXT(1); 8023 start[2] = NXT(2); 8024 start[3] = NXT(3); 8025 enc = xmlDetectCharEncoding(start, 4); 8026 if (enc != XML_CHAR_ENCODING_NONE) { 8027 xmlSwitchEncoding(ctxt, enc); 8028 } 8029 } 8030 8031 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8032 (IS_BLANK_CH(NXT(5)))) { 8033 xmlParseTextDecl(ctxt); 8034 } 8035 } 8036 } 8037 } 8038 ctxt->hasPErefs = 1; 8039 } 8040 8041 /** 8042 * xmlLoadEntityContent: 8043 * @ctxt: an XML parser context 8044 * @entity: an unloaded system entity 8045 * 8046 * Load the original content of the given system entity from the 8047 * ExternalID/SystemID given. This is to be used for Included in Literal 8048 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8049 * 8050 * Returns 0 in case of success and -1 in case of failure 8051 */ 8052 static int 8053 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8054 xmlParserInputPtr input; 8055 xmlBufferPtr buf; 8056 int l, c; 8057 int count = 0; 8058 8059 if ((ctxt == NULL) || (entity == NULL) || 8060 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8061 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8062 (entity->content != NULL)) { 8063 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8064 "xmlLoadEntityContent parameter error"); 8065 return(-1); 8066 } 8067 8068 if (xmlParserDebugEntities) 8069 xmlGenericError(xmlGenericErrorContext, 8070 "Reading %s entity content input\n", entity->name); 8071 8072 buf = xmlBufferCreate(); 8073 if (buf == NULL) { 8074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8075 "xmlLoadEntityContent parameter error"); 8076 return(-1); 8077 } 8078 8079 input = xmlNewEntityInputStream(ctxt, entity); 8080 if (input == NULL) { 8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8082 "xmlLoadEntityContent input error"); 8083 xmlBufferFree(buf); 8084 return(-1); 8085 } 8086 8087 /* 8088 * Push the entity as the current input, read char by char 8089 * saving to the buffer until the end of the entity or an error 8090 */ 8091 if (xmlPushInput(ctxt, input) < 0) { 8092 xmlBufferFree(buf); 8093 return(-1); 8094 } 8095 8096 GROW; 8097 c = CUR_CHAR(l); 8098 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8099 (IS_CHAR(c))) { 8100 xmlBufferAdd(buf, ctxt->input->cur, l); 8101 if (count++ > XML_PARSER_CHUNK_SIZE) { 8102 count = 0; 8103 GROW; 8104 if (ctxt->instate == XML_PARSER_EOF) { 8105 xmlBufferFree(buf); 8106 return(-1); 8107 } 8108 } 8109 NEXTL(l); 8110 c = CUR_CHAR(l); 8111 if (c == 0) { 8112 count = 0; 8113 GROW; 8114 if (ctxt->instate == XML_PARSER_EOF) { 8115 xmlBufferFree(buf); 8116 return(-1); 8117 } 8118 c = CUR_CHAR(l); 8119 } 8120 } 8121 8122 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8123 xmlPopInput(ctxt); 8124 } else if (!IS_CHAR(c)) { 8125 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8126 "xmlLoadEntityContent: invalid char value %d\n", 8127 c); 8128 xmlBufferFree(buf); 8129 return(-1); 8130 } 8131 entity->content = buf->content; 8132 buf->content = NULL; 8133 xmlBufferFree(buf); 8134 8135 return(0); 8136 } 8137 8138 /** 8139 * xmlParseStringPEReference: 8140 * @ctxt: an XML parser context 8141 * @str: a pointer to an index in the string 8142 * 8143 * parse PEReference declarations 8144 * 8145 * [69] PEReference ::= '%' Name ';' 8146 * 8147 * [ WFC: No Recursion ] 8148 * A parsed entity must not contain a recursive 8149 * reference to itself, either directly or indirectly. 8150 * 8151 * [ WFC: Entity Declared ] 8152 * In a document without any DTD, a document with only an internal DTD 8153 * subset which contains no parameter entity references, or a document 8154 * with "standalone='yes'", ... ... The declaration of a parameter 8155 * entity must precede any reference to it... 8156 * 8157 * [ VC: Entity Declared ] 8158 * In a document with an external subset or external parameter entities 8159 * with "standalone='no'", ... ... The declaration of a parameter entity 8160 * must precede any reference to it... 8161 * 8162 * [ WFC: In DTD ] 8163 * Parameter-entity references may only appear in the DTD. 8164 * NOTE: misleading but this is handled. 8165 * 8166 * Returns the string of the entity content. 8167 * str is updated to the current value of the index 8168 */ 8169 static xmlEntityPtr 8170 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8171 const xmlChar *ptr; 8172 xmlChar cur; 8173 xmlChar *name; 8174 xmlEntityPtr entity = NULL; 8175 8176 if ((str == NULL) || (*str == NULL)) return(NULL); 8177 ptr = *str; 8178 cur = *ptr; 8179 if (cur != '%') 8180 return(NULL); 8181 ptr++; 8182 name = xmlParseStringName(ctxt, &ptr); 8183 if (name == NULL) { 8184 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8185 "xmlParseStringPEReference: no name\n"); 8186 *str = ptr; 8187 return(NULL); 8188 } 8189 cur = *ptr; 8190 if (cur != ';') { 8191 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8192 xmlFree(name); 8193 *str = ptr; 8194 return(NULL); 8195 } 8196 ptr++; 8197 8198 /* 8199 * Increase the number of entity references parsed 8200 */ 8201 ctxt->nbentities++; 8202 8203 /* 8204 * Request the entity from SAX 8205 */ 8206 if ((ctxt->sax != NULL) && 8207 (ctxt->sax->getParameterEntity != NULL)) 8208 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8209 if (ctxt->instate == XML_PARSER_EOF) { 8210 xmlFree(name); 8211 *str = ptr; 8212 return(NULL); 8213 } 8214 if (entity == NULL) { 8215 /* 8216 * [ WFC: Entity Declared ] 8217 * In a document without any DTD, a document with only an 8218 * internal DTD subset which contains no parameter entity 8219 * references, or a document with "standalone='yes'", ... 8220 * ... The declaration of a parameter entity must precede 8221 * any reference to it... 8222 */ 8223 if ((ctxt->standalone == 1) || 8224 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8225 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8226 "PEReference: %%%s; not found\n", name); 8227 } else { 8228 /* 8229 * [ VC: Entity Declared ] 8230 * In a document with an external subset or external 8231 * parameter entities with "standalone='no'", ... 8232 * ... The declaration of a parameter entity must 8233 * precede any reference to it... 8234 */ 8235 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8236 "PEReference: %%%s; not found\n", 8237 name, NULL); 8238 ctxt->valid = 0; 8239 } 8240 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8241 } else { 8242 /* 8243 * Internal checking in case the entity quest barfed 8244 */ 8245 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8246 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8247 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8248 "%%%s; is not a parameter entity\n", 8249 name, NULL); 8250 } 8251 } 8252 ctxt->hasPErefs = 1; 8253 xmlFree(name); 8254 *str = ptr; 8255 return(entity); 8256 } 8257 8258 /** 8259 * xmlParseDocTypeDecl: 8260 * @ctxt: an XML parser context 8261 * 8262 * parse a DOCTYPE declaration 8263 * 8264 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8265 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8266 * 8267 * [ VC: Root Element Type ] 8268 * The Name in the document type declaration must match the element 8269 * type of the root element. 8270 */ 8271 8272 void 8273 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8274 const xmlChar *name = NULL; 8275 xmlChar *ExternalID = NULL; 8276 xmlChar *URI = NULL; 8277 8278 /* 8279 * We know that '<!DOCTYPE' has been detected. 8280 */ 8281 SKIP(9); 8282 8283 SKIP_BLANKS; 8284 8285 /* 8286 * Parse the DOCTYPE name. 8287 */ 8288 name = xmlParseName(ctxt); 8289 if (name == NULL) { 8290 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8291 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8292 } 8293 ctxt->intSubName = name; 8294 8295 SKIP_BLANKS; 8296 8297 /* 8298 * Check for SystemID and ExternalID 8299 */ 8300 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8301 8302 if ((URI != NULL) || (ExternalID != NULL)) { 8303 ctxt->hasExternalSubset = 1; 8304 } 8305 ctxt->extSubURI = URI; 8306 ctxt->extSubSystem = ExternalID; 8307 8308 SKIP_BLANKS; 8309 8310 /* 8311 * Create and update the internal subset. 8312 */ 8313 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8314 (!ctxt->disableSAX)) 8315 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8316 if (ctxt->instate == XML_PARSER_EOF) 8317 return; 8318 8319 /* 8320 * Is there any internal subset declarations ? 8321 * they are handled separately in xmlParseInternalSubset() 8322 */ 8323 if (RAW == '[') 8324 return; 8325 8326 /* 8327 * We should be at the end of the DOCTYPE declaration. 8328 */ 8329 if (RAW != '>') { 8330 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8331 } 8332 NEXT; 8333 } 8334 8335 /** 8336 * xmlParseInternalSubset: 8337 * @ctxt: an XML parser context 8338 * 8339 * parse the internal subset declaration 8340 * 8341 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8342 */ 8343 8344 static void 8345 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8346 /* 8347 * Is there any DTD definition ? 8348 */ 8349 if (RAW == '[') { 8350 int baseInputNr = ctxt->inputNr; 8351 ctxt->instate = XML_PARSER_DTD; 8352 NEXT; 8353 /* 8354 * Parse the succession of Markup declarations and 8355 * PEReferences. 8356 * Subsequence (markupdecl | PEReference | S)* 8357 */ 8358 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8359 (ctxt->instate != XML_PARSER_EOF)) { 8360 const xmlChar *check = CUR_PTR; 8361 unsigned int cons = ctxt->input->consumed; 8362 8363 SKIP_BLANKS; 8364 xmlParseMarkupDecl(ctxt); 8365 xmlParsePEReference(ctxt); 8366 8367 /* 8368 * Conditional sections are allowed from external entities included 8369 * by PE References in the internal subset. 8370 */ 8371 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && 8372 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 8373 xmlParseConditionalSections(ctxt); 8374 } 8375 8376 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8377 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8378 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8379 if (ctxt->inputNr > baseInputNr) 8380 xmlPopInput(ctxt); 8381 else 8382 break; 8383 } 8384 } 8385 if (RAW == ']') { 8386 NEXT; 8387 SKIP_BLANKS; 8388 } 8389 } 8390 8391 /* 8392 * We should be at the end of the DOCTYPE declaration. 8393 */ 8394 if (RAW != '>') { 8395 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8396 return; 8397 } 8398 NEXT; 8399 } 8400 8401 #ifdef LIBXML_SAX1_ENABLED 8402 /** 8403 * xmlParseAttribute: 8404 * @ctxt: an XML parser context 8405 * @value: a xmlChar ** used to store the value of the attribute 8406 * 8407 * parse an attribute 8408 * 8409 * [41] Attribute ::= Name Eq AttValue 8410 * 8411 * [ WFC: No External Entity References ] 8412 * Attribute values cannot contain direct or indirect entity references 8413 * to external entities. 8414 * 8415 * [ WFC: No < in Attribute Values ] 8416 * The replacement text of any entity referred to directly or indirectly in 8417 * an attribute value (other than "<") must not contain a <. 8418 * 8419 * [ VC: Attribute Value Type ] 8420 * The attribute must have been declared; the value must be of the type 8421 * declared for it. 8422 * 8423 * [25] Eq ::= S? '=' S? 8424 * 8425 * With namespace: 8426 * 8427 * [NS 11] Attribute ::= QName Eq AttValue 8428 * 8429 * Also the case QName == xmlns:??? is handled independently as a namespace 8430 * definition. 8431 * 8432 * Returns the attribute name, and the value in *value. 8433 */ 8434 8435 const xmlChar * 8436 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8437 const xmlChar *name; 8438 xmlChar *val; 8439 8440 *value = NULL; 8441 GROW; 8442 name = xmlParseName(ctxt); 8443 if (name == NULL) { 8444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8445 "error parsing attribute name\n"); 8446 return(NULL); 8447 } 8448 8449 /* 8450 * read the value 8451 */ 8452 SKIP_BLANKS; 8453 if (RAW == '=') { 8454 NEXT; 8455 SKIP_BLANKS; 8456 val = xmlParseAttValue(ctxt); 8457 ctxt->instate = XML_PARSER_CONTENT; 8458 } else { 8459 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8460 "Specification mandates value for attribute %s\n", name); 8461 return(NULL); 8462 } 8463 8464 /* 8465 * Check that xml:lang conforms to the specification 8466 * No more registered as an error, just generate a warning now 8467 * since this was deprecated in XML second edition 8468 */ 8469 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8470 if (!xmlCheckLanguageID(val)) { 8471 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8472 "Malformed value for xml:lang : %s\n", 8473 val, NULL); 8474 } 8475 } 8476 8477 /* 8478 * Check that xml:space conforms to the specification 8479 */ 8480 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8481 if (xmlStrEqual(val, BAD_CAST "default")) 8482 *(ctxt->space) = 0; 8483 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8484 *(ctxt->space) = 1; 8485 else { 8486 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8487 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8488 val, NULL); 8489 } 8490 } 8491 8492 *value = val; 8493 return(name); 8494 } 8495 8496 /** 8497 * xmlParseStartTag: 8498 * @ctxt: an XML parser context 8499 * 8500 * parse a start of tag either for rule element or 8501 * EmptyElement. In both case we don't parse the tag closing chars. 8502 * 8503 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8504 * 8505 * [ WFC: Unique Att Spec ] 8506 * No attribute name may appear more than once in the same start-tag or 8507 * empty-element tag. 8508 * 8509 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8510 * 8511 * [ WFC: Unique Att Spec ] 8512 * No attribute name may appear more than once in the same start-tag or 8513 * empty-element tag. 8514 * 8515 * With namespace: 8516 * 8517 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8518 * 8519 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8520 * 8521 * Returns the element name parsed 8522 */ 8523 8524 const xmlChar * 8525 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8526 const xmlChar *name; 8527 const xmlChar *attname; 8528 xmlChar *attvalue; 8529 const xmlChar **atts = ctxt->atts; 8530 int nbatts = 0; 8531 int maxatts = ctxt->maxatts; 8532 int i; 8533 8534 if (RAW != '<') return(NULL); 8535 NEXT1; 8536 8537 name = xmlParseName(ctxt); 8538 if (name == NULL) { 8539 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8540 "xmlParseStartTag: invalid element name\n"); 8541 return(NULL); 8542 } 8543 8544 /* 8545 * Now parse the attributes, it ends up with the ending 8546 * 8547 * (S Attribute)* S? 8548 */ 8549 SKIP_BLANKS; 8550 GROW; 8551 8552 while (((RAW != '>') && 8553 ((RAW != '/') || (NXT(1) != '>')) && 8554 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8555 const xmlChar *q = CUR_PTR; 8556 unsigned int cons = ctxt->input->consumed; 8557 8558 attname = xmlParseAttribute(ctxt, &attvalue); 8559 if ((attname != NULL) && (attvalue != NULL)) { 8560 /* 8561 * [ WFC: Unique Att Spec ] 8562 * No attribute name may appear more than once in the same 8563 * start-tag or empty-element tag. 8564 */ 8565 for (i = 0; i < nbatts;i += 2) { 8566 if (xmlStrEqual(atts[i], attname)) { 8567 xmlErrAttributeDup(ctxt, NULL, attname); 8568 xmlFree(attvalue); 8569 goto failed; 8570 } 8571 } 8572 /* 8573 * Add the pair to atts 8574 */ 8575 if (atts == NULL) { 8576 maxatts = 22; /* allow for 10 attrs by default */ 8577 atts = (const xmlChar **) 8578 xmlMalloc(maxatts * sizeof(xmlChar *)); 8579 if (atts == NULL) { 8580 xmlErrMemory(ctxt, NULL); 8581 if (attvalue != NULL) 8582 xmlFree(attvalue); 8583 goto failed; 8584 } 8585 ctxt->atts = atts; 8586 ctxt->maxatts = maxatts; 8587 } else if (nbatts + 4 > maxatts) { 8588 const xmlChar **n; 8589 8590 maxatts *= 2; 8591 n = (const xmlChar **) xmlRealloc((void *) atts, 8592 maxatts * sizeof(const xmlChar *)); 8593 if (n == NULL) { 8594 xmlErrMemory(ctxt, NULL); 8595 if (attvalue != NULL) 8596 xmlFree(attvalue); 8597 goto failed; 8598 } 8599 atts = n; 8600 ctxt->atts = atts; 8601 ctxt->maxatts = maxatts; 8602 } 8603 atts[nbatts++] = attname; 8604 atts[nbatts++] = attvalue; 8605 atts[nbatts] = NULL; 8606 atts[nbatts + 1] = NULL; 8607 } else { 8608 if (attvalue != NULL) 8609 xmlFree(attvalue); 8610 } 8611 8612 failed: 8613 8614 GROW 8615 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8616 break; 8617 if (SKIP_BLANKS == 0) { 8618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8619 "attributes construct error\n"); 8620 } 8621 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8622 (attname == NULL) && (attvalue == NULL)) { 8623 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8624 "xmlParseStartTag: problem parsing attributes\n"); 8625 break; 8626 } 8627 SHRINK; 8628 GROW; 8629 } 8630 8631 /* 8632 * SAX: Start of Element ! 8633 */ 8634 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8635 (!ctxt->disableSAX)) { 8636 if (nbatts > 0) 8637 ctxt->sax->startElement(ctxt->userData, name, atts); 8638 else 8639 ctxt->sax->startElement(ctxt->userData, name, NULL); 8640 } 8641 8642 if (atts != NULL) { 8643 /* Free only the content strings */ 8644 for (i = 1;i < nbatts;i+=2) 8645 if (atts[i] != NULL) 8646 xmlFree((xmlChar *) atts[i]); 8647 } 8648 return(name); 8649 } 8650 8651 /** 8652 * xmlParseEndTag1: 8653 * @ctxt: an XML parser context 8654 * @line: line of the start tag 8655 * @nsNr: number of namespaces on the start tag 8656 * 8657 * parse an end of tag 8658 * 8659 * [42] ETag ::= '</' Name S? '>' 8660 * 8661 * With namespace 8662 * 8663 * [NS 9] ETag ::= '</' QName S? '>' 8664 */ 8665 8666 static void 8667 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8668 const xmlChar *name; 8669 8670 GROW; 8671 if ((RAW != '<') || (NXT(1) != '/')) { 8672 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8673 "xmlParseEndTag: '</' not found\n"); 8674 return; 8675 } 8676 SKIP(2); 8677 8678 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8679 8680 /* 8681 * We should definitely be at the ending "S? '>'" part 8682 */ 8683 GROW; 8684 SKIP_BLANKS; 8685 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8686 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8687 } else 8688 NEXT1; 8689 8690 /* 8691 * [ WFC: Element Type Match ] 8692 * The Name in an element's end-tag must match the element type in the 8693 * start-tag. 8694 * 8695 */ 8696 if (name != (xmlChar*)1) { 8697 if (name == NULL) name = BAD_CAST "unparsable"; 8698 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8699 "Opening and ending tag mismatch: %s line %d and %s\n", 8700 ctxt->name, line, name); 8701 } 8702 8703 /* 8704 * SAX: End of Tag 8705 */ 8706 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8707 (!ctxt->disableSAX)) 8708 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8709 8710 namePop(ctxt); 8711 spacePop(ctxt); 8712 return; 8713 } 8714 8715 /** 8716 * xmlParseEndTag: 8717 * @ctxt: an XML parser context 8718 * 8719 * parse an end of tag 8720 * 8721 * [42] ETag ::= '</' Name S? '>' 8722 * 8723 * With namespace 8724 * 8725 * [NS 9] ETag ::= '</' QName S? '>' 8726 */ 8727 8728 void 8729 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8730 xmlParseEndTag1(ctxt, 0); 8731 } 8732 #endif /* LIBXML_SAX1_ENABLED */ 8733 8734 /************************************************************************ 8735 * * 8736 * SAX 2 specific operations * 8737 * * 8738 ************************************************************************/ 8739 8740 /* 8741 * xmlGetNamespace: 8742 * @ctxt: an XML parser context 8743 * @prefix: the prefix to lookup 8744 * 8745 * Lookup the namespace name for the @prefix (which ca be NULL) 8746 * The prefix must come from the @ctxt->dict dictionary 8747 * 8748 * Returns the namespace name or NULL if not bound 8749 */ 8750 static const xmlChar * 8751 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8752 int i; 8753 8754 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8755 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8756 if (ctxt->nsTab[i] == prefix) { 8757 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8758 return(NULL); 8759 return(ctxt->nsTab[i + 1]); 8760 } 8761 return(NULL); 8762 } 8763 8764 /** 8765 * xmlParseQName: 8766 * @ctxt: an XML parser context 8767 * @prefix: pointer to store the prefix part 8768 * 8769 * parse an XML Namespace QName 8770 * 8771 * [6] QName ::= (Prefix ':')? LocalPart 8772 * [7] Prefix ::= NCName 8773 * [8] LocalPart ::= NCName 8774 * 8775 * Returns the Name parsed or NULL 8776 */ 8777 8778 static const xmlChar * 8779 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8780 const xmlChar *l, *p; 8781 8782 GROW; 8783 8784 l = xmlParseNCName(ctxt); 8785 if (l == NULL) { 8786 if (CUR == ':') { 8787 l = xmlParseName(ctxt); 8788 if (l != NULL) { 8789 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8790 "Failed to parse QName '%s'\n", l, NULL, NULL); 8791 *prefix = NULL; 8792 return(l); 8793 } 8794 } 8795 return(NULL); 8796 } 8797 if (CUR == ':') { 8798 NEXT; 8799 p = l; 8800 l = xmlParseNCName(ctxt); 8801 if (l == NULL) { 8802 xmlChar *tmp; 8803 8804 if (ctxt->instate == XML_PARSER_EOF) 8805 return(NULL); 8806 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8807 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8808 l = xmlParseNmtoken(ctxt); 8809 if (l == NULL) { 8810 if (ctxt->instate == XML_PARSER_EOF) 8811 return(NULL); 8812 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8813 } else { 8814 tmp = xmlBuildQName(l, p, NULL, 0); 8815 xmlFree((char *)l); 8816 } 8817 p = xmlDictLookup(ctxt->dict, tmp, -1); 8818 if (tmp != NULL) xmlFree(tmp); 8819 *prefix = NULL; 8820 return(p); 8821 } 8822 if (CUR == ':') { 8823 xmlChar *tmp; 8824 8825 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8826 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8827 NEXT; 8828 tmp = (xmlChar *) xmlParseName(ctxt); 8829 if (tmp != NULL) { 8830 tmp = xmlBuildQName(tmp, l, NULL, 0); 8831 l = xmlDictLookup(ctxt->dict, tmp, -1); 8832 if (tmp != NULL) xmlFree(tmp); 8833 *prefix = p; 8834 return(l); 8835 } 8836 if (ctxt->instate == XML_PARSER_EOF) 8837 return(NULL); 8838 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8839 l = xmlDictLookup(ctxt->dict, tmp, -1); 8840 if (tmp != NULL) xmlFree(tmp); 8841 *prefix = p; 8842 return(l); 8843 } 8844 *prefix = p; 8845 } else 8846 *prefix = NULL; 8847 return(l); 8848 } 8849 8850 /** 8851 * xmlParseQNameAndCompare: 8852 * @ctxt: an XML parser context 8853 * @name: the localname 8854 * @prefix: the prefix, if any. 8855 * 8856 * parse an XML name and compares for match 8857 * (specialized for endtag parsing) 8858 * 8859 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8860 * and the name for mismatch 8861 */ 8862 8863 static const xmlChar * 8864 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8865 xmlChar const *prefix) { 8866 const xmlChar *cmp; 8867 const xmlChar *in; 8868 const xmlChar *ret; 8869 const xmlChar *prefix2; 8870 8871 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8872 8873 GROW; 8874 in = ctxt->input->cur; 8875 8876 cmp = prefix; 8877 while (*in != 0 && *in == *cmp) { 8878 ++in; 8879 ++cmp; 8880 } 8881 if ((*cmp == 0) && (*in == ':')) { 8882 in++; 8883 cmp = name; 8884 while (*in != 0 && *in == *cmp) { 8885 ++in; 8886 ++cmp; 8887 } 8888 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8889 /* success */ 8890 ctxt->input->col += in - ctxt->input->cur; 8891 ctxt->input->cur = in; 8892 return((const xmlChar*) 1); 8893 } 8894 } 8895 /* 8896 * all strings coms from the dictionary, equality can be done directly 8897 */ 8898 ret = xmlParseQName (ctxt, &prefix2); 8899 if ((ret == name) && (prefix == prefix2)) 8900 return((const xmlChar*) 1); 8901 return ret; 8902 } 8903 8904 /** 8905 * xmlParseAttValueInternal: 8906 * @ctxt: an XML parser context 8907 * @len: attribute len result 8908 * @alloc: whether the attribute was reallocated as a new string 8909 * @normalize: if 1 then further non-CDATA normalization must be done 8910 * 8911 * parse a value for an attribute. 8912 * NOTE: if no normalization is needed, the routine will return pointers 8913 * directly from the data buffer. 8914 * 8915 * 3.3.3 Attribute-Value Normalization: 8916 * Before the value of an attribute is passed to the application or 8917 * checked for validity, the XML processor must normalize it as follows: 8918 * - a character reference is processed by appending the referenced 8919 * character to the attribute value 8920 * - an entity reference is processed by recursively processing the 8921 * replacement text of the entity 8922 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8923 * appending #x20 to the normalized value, except that only a single 8924 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8925 * parsed entity or the literal entity value of an internal parsed entity 8926 * - other characters are processed by appending them to the normalized value 8927 * If the declared value is not CDATA, then the XML processor must further 8928 * process the normalized attribute value by discarding any leading and 8929 * trailing space (#x20) characters, and by replacing sequences of space 8930 * (#x20) characters by a single space (#x20) character. 8931 * All attributes for which no declaration has been read should be treated 8932 * by a non-validating parser as if declared CDATA. 8933 * 8934 * Returns the AttValue parsed or NULL. The value has to be freed by the 8935 * caller if it was copied, this can be detected by val[*len] == 0. 8936 */ 8937 8938 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \ 8939 const xmlChar *oldbase = ctxt->input->base;\ 8940 GROW;\ 8941 if (ctxt->instate == XML_PARSER_EOF)\ 8942 return(NULL);\ 8943 if (oldbase != ctxt->input->base) {\ 8944 ptrdiff_t delta = ctxt->input->base - oldbase;\ 8945 start = start + delta;\ 8946 in = in + delta;\ 8947 }\ 8948 end = ctxt->input->end; 8949 8950 static xmlChar * 8951 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8952 int normalize) 8953 { 8954 xmlChar limit = 0; 8955 const xmlChar *in = NULL, *start, *end, *last; 8956 xmlChar *ret = NULL; 8957 int line, col; 8958 8959 GROW; 8960 in = (xmlChar *) CUR_PTR; 8961 line = ctxt->input->line; 8962 col = ctxt->input->col; 8963 if (*in != '"' && *in != '\'') { 8964 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8965 return (NULL); 8966 } 8967 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8968 8969 /* 8970 * try to handle in this routine the most common case where no 8971 * allocation of a new string is required and where content is 8972 * pure ASCII. 8973 */ 8974 limit = *in++; 8975 col++; 8976 end = ctxt->input->end; 8977 start = in; 8978 if (in >= end) { 8979 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8980 } 8981 if (normalize) { 8982 /* 8983 * Skip any leading spaces 8984 */ 8985 while ((in < end) && (*in != limit) && 8986 ((*in == 0x20) || (*in == 0x9) || 8987 (*in == 0xA) || (*in == 0xD))) { 8988 if (*in == 0xA) { 8989 line++; col = 1; 8990 } else { 8991 col++; 8992 } 8993 in++; 8994 start = in; 8995 if (in >= end) { 8996 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8997 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9000 "AttValue length too long\n"); 9001 return(NULL); 9002 } 9003 } 9004 } 9005 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9006 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9007 col++; 9008 if ((*in++ == 0x20) && (*in == 0x20)) break; 9009 if (in >= end) { 9010 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9011 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9012 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9013 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9014 "AttValue length too long\n"); 9015 return(NULL); 9016 } 9017 } 9018 } 9019 last = in; 9020 /* 9021 * skip the trailing blanks 9022 */ 9023 while ((last[-1] == 0x20) && (last > start)) last--; 9024 while ((in < end) && (*in != limit) && 9025 ((*in == 0x20) || (*in == 0x9) || 9026 (*in == 0xA) || (*in == 0xD))) { 9027 if (*in == 0xA) { 9028 line++, col = 1; 9029 } else { 9030 col++; 9031 } 9032 in++; 9033 if (in >= end) { 9034 const xmlChar *oldbase = ctxt->input->base; 9035 GROW; 9036 if (ctxt->instate == XML_PARSER_EOF) 9037 return(NULL); 9038 if (oldbase != ctxt->input->base) { 9039 ptrdiff_t delta = ctxt->input->base - oldbase; 9040 start = start + delta; 9041 in = in + delta; 9042 last = last + delta; 9043 } 9044 end = ctxt->input->end; 9045 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9046 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9047 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9048 "AttValue length too long\n"); 9049 return(NULL); 9050 } 9051 } 9052 } 9053 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9054 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9055 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9056 "AttValue length too long\n"); 9057 return(NULL); 9058 } 9059 if (*in != limit) goto need_complex; 9060 } else { 9061 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9062 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9063 in++; 9064 col++; 9065 if (in >= end) { 9066 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9067 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9068 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9069 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9070 "AttValue length too long\n"); 9071 return(NULL); 9072 } 9073 } 9074 } 9075 last = in; 9076 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9077 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9078 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9079 "AttValue length too long\n"); 9080 return(NULL); 9081 } 9082 if (*in != limit) goto need_complex; 9083 } 9084 in++; 9085 col++; 9086 if (len != NULL) { 9087 *len = last - start; 9088 ret = (xmlChar *) start; 9089 } else { 9090 if (alloc) *alloc = 1; 9091 ret = xmlStrndup(start, last - start); 9092 } 9093 CUR_PTR = in; 9094 ctxt->input->line = line; 9095 ctxt->input->col = col; 9096 if (alloc) *alloc = 0; 9097 return ret; 9098 need_complex: 9099 if (alloc) *alloc = 1; 9100 return xmlParseAttValueComplex(ctxt, len, normalize); 9101 } 9102 9103 /** 9104 * xmlParseAttribute2: 9105 * @ctxt: an XML parser context 9106 * @pref: the element prefix 9107 * @elem: the element name 9108 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9109 * @value: a xmlChar ** used to store the value of the attribute 9110 * @len: an int * to save the length of the attribute 9111 * @alloc: an int * to indicate if the attribute was allocated 9112 * 9113 * parse an attribute in the new SAX2 framework. 9114 * 9115 * Returns the attribute name, and the value in *value, . 9116 */ 9117 9118 static const xmlChar * 9119 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9120 const xmlChar * pref, const xmlChar * elem, 9121 const xmlChar ** prefix, xmlChar ** value, 9122 int *len, int *alloc) 9123 { 9124 const xmlChar *name; 9125 xmlChar *val, *internal_val = NULL; 9126 int normalize = 0; 9127 9128 *value = NULL; 9129 GROW; 9130 name = xmlParseQName(ctxt, prefix); 9131 if (name == NULL) { 9132 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9133 "error parsing attribute name\n"); 9134 return (NULL); 9135 } 9136 9137 /* 9138 * get the type if needed 9139 */ 9140 if (ctxt->attsSpecial != NULL) { 9141 int type; 9142 9143 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9144 pref, elem, *prefix, name); 9145 if (type != 0) 9146 normalize = 1; 9147 } 9148 9149 /* 9150 * read the value 9151 */ 9152 SKIP_BLANKS; 9153 if (RAW == '=') { 9154 NEXT; 9155 SKIP_BLANKS; 9156 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9157 if (normalize) { 9158 /* 9159 * Sometimes a second normalisation pass for spaces is needed 9160 * but that only happens if charrefs or entities references 9161 * have been used in the attribute value, i.e. the attribute 9162 * value have been extracted in an allocated string already. 9163 */ 9164 if (*alloc) { 9165 const xmlChar *val2; 9166 9167 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9168 if ((val2 != NULL) && (val2 != val)) { 9169 xmlFree(val); 9170 val = (xmlChar *) val2; 9171 } 9172 } 9173 } 9174 ctxt->instate = XML_PARSER_CONTENT; 9175 } else { 9176 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9177 "Specification mandates value for attribute %s\n", 9178 name); 9179 return (NULL); 9180 } 9181 9182 if (*prefix == ctxt->str_xml) { 9183 /* 9184 * Check that xml:lang conforms to the specification 9185 * No more registered as an error, just generate a warning now 9186 * since this was deprecated in XML second edition 9187 */ 9188 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9189 internal_val = xmlStrndup(val, *len); 9190 if (!xmlCheckLanguageID(internal_val)) { 9191 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9192 "Malformed value for xml:lang : %s\n", 9193 internal_val, NULL); 9194 } 9195 } 9196 9197 /* 9198 * Check that xml:space conforms to the specification 9199 */ 9200 if (xmlStrEqual(name, BAD_CAST "space")) { 9201 internal_val = xmlStrndup(val, *len); 9202 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9203 *(ctxt->space) = 0; 9204 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9205 *(ctxt->space) = 1; 9206 else { 9207 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9208 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9209 internal_val, NULL); 9210 } 9211 } 9212 if (internal_val) { 9213 xmlFree(internal_val); 9214 } 9215 } 9216 9217 *value = val; 9218 return (name); 9219 } 9220 /** 9221 * xmlParseStartTag2: 9222 * @ctxt: an XML parser context 9223 * 9224 * parse a start of tag either for rule element or 9225 * EmptyElement. In both case we don't parse the tag closing chars. 9226 * This routine is called when running SAX2 parsing 9227 * 9228 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9229 * 9230 * [ WFC: Unique Att Spec ] 9231 * No attribute name may appear more than once in the same start-tag or 9232 * empty-element tag. 9233 * 9234 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9235 * 9236 * [ WFC: Unique Att Spec ] 9237 * No attribute name may appear more than once in the same start-tag or 9238 * empty-element tag. 9239 * 9240 * With namespace: 9241 * 9242 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9243 * 9244 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9245 * 9246 * Returns the element name parsed 9247 */ 9248 9249 static const xmlChar * 9250 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9251 const xmlChar **URI, int *tlen) { 9252 const xmlChar *localname; 9253 const xmlChar *prefix; 9254 const xmlChar *attname; 9255 const xmlChar *aprefix; 9256 const xmlChar *nsname; 9257 xmlChar *attvalue; 9258 const xmlChar **atts = ctxt->atts; 9259 int maxatts = ctxt->maxatts; 9260 int nratts, nbatts, nbdef, inputid; 9261 int i, j, nbNs, attval; 9262 unsigned long cur; 9263 int nsNr = ctxt->nsNr; 9264 9265 if (RAW != '<') return(NULL); 9266 NEXT1; 9267 9268 /* 9269 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9270 * point since the attribute values may be stored as pointers to 9271 * the buffer and calling SHRINK would destroy them ! 9272 * The Shrinking is only possible once the full set of attribute 9273 * callbacks have been done. 9274 */ 9275 SHRINK; 9276 cur = ctxt->input->cur - ctxt->input->base; 9277 inputid = ctxt->input->id; 9278 nbatts = 0; 9279 nratts = 0; 9280 nbdef = 0; 9281 nbNs = 0; 9282 attval = 0; 9283 /* Forget any namespaces added during an earlier parse of this element. */ 9284 ctxt->nsNr = nsNr; 9285 9286 localname = xmlParseQName(ctxt, &prefix); 9287 if (localname == NULL) { 9288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9289 "StartTag: invalid element name\n"); 9290 return(NULL); 9291 } 9292 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9293 9294 /* 9295 * Now parse the attributes, it ends up with the ending 9296 * 9297 * (S Attribute)* S? 9298 */ 9299 SKIP_BLANKS; 9300 GROW; 9301 9302 while (((RAW != '>') && 9303 ((RAW != '/') || (NXT(1) != '>')) && 9304 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9305 const xmlChar *q = CUR_PTR; 9306 unsigned int cons = ctxt->input->consumed; 9307 int len = -1, alloc = 0; 9308 9309 attname = xmlParseAttribute2(ctxt, prefix, localname, 9310 &aprefix, &attvalue, &len, &alloc); 9311 if ((attname == NULL) || (attvalue == NULL)) 9312 goto next_attr; 9313 if (len < 0) len = xmlStrlen(attvalue); 9314 9315 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9316 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9317 xmlURIPtr uri; 9318 9319 if (URL == NULL) { 9320 xmlErrMemory(ctxt, "dictionary allocation failure"); 9321 if ((attvalue != NULL) && (alloc != 0)) 9322 xmlFree(attvalue); 9323 localname = NULL; 9324 goto done; 9325 } 9326 if (*URL != 0) { 9327 uri = xmlParseURI((const char *) URL); 9328 if (uri == NULL) { 9329 xmlNsErr(ctxt, XML_WAR_NS_URI, 9330 "xmlns: '%s' is not a valid URI\n", 9331 URL, NULL, NULL); 9332 } else { 9333 if (uri->scheme == NULL) { 9334 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9335 "xmlns: URI %s is not absolute\n", 9336 URL, NULL, NULL); 9337 } 9338 xmlFreeURI(uri); 9339 } 9340 if (URL == ctxt->str_xml_ns) { 9341 if (attname != ctxt->str_xml) { 9342 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9343 "xml namespace URI cannot be the default namespace\n", 9344 NULL, NULL, NULL); 9345 } 9346 goto next_attr; 9347 } 9348 if ((len == 29) && 9349 (xmlStrEqual(URL, 9350 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9351 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9352 "reuse of the xmlns namespace name is forbidden\n", 9353 NULL, NULL, NULL); 9354 goto next_attr; 9355 } 9356 } 9357 /* 9358 * check that it's not a defined namespace 9359 */ 9360 for (j = 1;j <= nbNs;j++) 9361 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9362 break; 9363 if (j <= nbNs) 9364 xmlErrAttributeDup(ctxt, NULL, attname); 9365 else 9366 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9367 9368 } else if (aprefix == ctxt->str_xmlns) { 9369 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9370 xmlURIPtr uri; 9371 9372 if (attname == ctxt->str_xml) { 9373 if (URL != ctxt->str_xml_ns) { 9374 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9375 "xml namespace prefix mapped to wrong URI\n", 9376 NULL, NULL, NULL); 9377 } 9378 /* 9379 * Do not keep a namespace definition node 9380 */ 9381 goto next_attr; 9382 } 9383 if (URL == ctxt->str_xml_ns) { 9384 if (attname != ctxt->str_xml) { 9385 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9386 "xml namespace URI mapped to wrong prefix\n", 9387 NULL, NULL, NULL); 9388 } 9389 goto next_attr; 9390 } 9391 if (attname == ctxt->str_xmlns) { 9392 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9393 "redefinition of the xmlns prefix is forbidden\n", 9394 NULL, NULL, NULL); 9395 goto next_attr; 9396 } 9397 if ((len == 29) && 9398 (xmlStrEqual(URL, 9399 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9400 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9401 "reuse of the xmlns namespace name is forbidden\n", 9402 NULL, NULL, NULL); 9403 goto next_attr; 9404 } 9405 if ((URL == NULL) || (URL[0] == 0)) { 9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9407 "xmlns:%s: Empty XML namespace is not allowed\n", 9408 attname, NULL, NULL); 9409 goto next_attr; 9410 } else { 9411 uri = xmlParseURI((const char *) URL); 9412 if (uri == NULL) { 9413 xmlNsErr(ctxt, XML_WAR_NS_URI, 9414 "xmlns:%s: '%s' is not a valid URI\n", 9415 attname, URL, NULL); 9416 } else { 9417 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9418 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9419 "xmlns:%s: URI %s is not absolute\n", 9420 attname, URL, NULL); 9421 } 9422 xmlFreeURI(uri); 9423 } 9424 } 9425 9426 /* 9427 * check that it's not a defined namespace 9428 */ 9429 for (j = 1;j <= nbNs;j++) 9430 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9431 break; 9432 if (j <= nbNs) 9433 xmlErrAttributeDup(ctxt, aprefix, attname); 9434 else 9435 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9436 9437 } else { 9438 /* 9439 * Add the pair to atts 9440 */ 9441 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9442 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9443 goto next_attr; 9444 } 9445 maxatts = ctxt->maxatts; 9446 atts = ctxt->atts; 9447 } 9448 ctxt->attallocs[nratts++] = alloc; 9449 atts[nbatts++] = attname; 9450 atts[nbatts++] = aprefix; 9451 /* 9452 * The namespace URI field is used temporarily to point at the 9453 * base of the current input buffer for non-alloced attributes. 9454 * When the input buffer is reallocated, all the pointers become 9455 * invalid, but they can be reconstructed later. 9456 */ 9457 if (alloc) 9458 atts[nbatts++] = NULL; 9459 else 9460 atts[nbatts++] = ctxt->input->base; 9461 atts[nbatts++] = attvalue; 9462 attvalue += len; 9463 atts[nbatts++] = attvalue; 9464 /* 9465 * tag if some deallocation is needed 9466 */ 9467 if (alloc != 0) attval = 1; 9468 attvalue = NULL; /* moved into atts */ 9469 } 9470 9471 next_attr: 9472 if ((attvalue != NULL) && (alloc != 0)) { 9473 xmlFree(attvalue); 9474 attvalue = NULL; 9475 } 9476 9477 GROW 9478 if (ctxt->instate == XML_PARSER_EOF) 9479 break; 9480 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9481 break; 9482 if (SKIP_BLANKS == 0) { 9483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9484 "attributes construct error\n"); 9485 break; 9486 } 9487 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9488 (attname == NULL) && (attvalue == NULL)) { 9489 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9490 "xmlParseStartTag: problem parsing attributes\n"); 9491 break; 9492 } 9493 GROW; 9494 } 9495 9496 if (ctxt->input->id != inputid) { 9497 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9498 "Unexpected change of input\n"); 9499 localname = NULL; 9500 goto done; 9501 } 9502 9503 /* Reconstruct attribute value pointers. */ 9504 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9505 if (atts[i+2] != NULL) { 9506 /* 9507 * Arithmetic on dangling pointers is technically undefined 9508 * behavior, but well... 9509 */ 9510 ptrdiff_t offset = ctxt->input->base - atts[i+2]; 9511 atts[i+2] = NULL; /* Reset repurposed namespace URI */ 9512 atts[i+3] += offset; /* value */ 9513 atts[i+4] += offset; /* valuend */ 9514 } 9515 } 9516 9517 /* 9518 * The attributes defaulting 9519 */ 9520 if (ctxt->attsDefault != NULL) { 9521 xmlDefAttrsPtr defaults; 9522 9523 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9524 if (defaults != NULL) { 9525 for (i = 0;i < defaults->nbAttrs;i++) { 9526 attname = defaults->values[5 * i]; 9527 aprefix = defaults->values[5 * i + 1]; 9528 9529 /* 9530 * special work for namespaces defaulted defs 9531 */ 9532 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9533 /* 9534 * check that it's not a defined namespace 9535 */ 9536 for (j = 1;j <= nbNs;j++) 9537 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9538 break; 9539 if (j <= nbNs) continue; 9540 9541 nsname = xmlGetNamespace(ctxt, NULL); 9542 if (nsname != defaults->values[5 * i + 2]) { 9543 if (nsPush(ctxt, NULL, 9544 defaults->values[5 * i + 2]) > 0) 9545 nbNs++; 9546 } 9547 } else if (aprefix == ctxt->str_xmlns) { 9548 /* 9549 * check that it's not a defined namespace 9550 */ 9551 for (j = 1;j <= nbNs;j++) 9552 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9553 break; 9554 if (j <= nbNs) continue; 9555 9556 nsname = xmlGetNamespace(ctxt, attname); 9557 if (nsname != defaults->values[2]) { 9558 if (nsPush(ctxt, attname, 9559 defaults->values[5 * i + 2]) > 0) 9560 nbNs++; 9561 } 9562 } else { 9563 /* 9564 * check that it's not a defined attribute 9565 */ 9566 for (j = 0;j < nbatts;j+=5) { 9567 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9568 break; 9569 } 9570 if (j < nbatts) continue; 9571 9572 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9573 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9574 localname = NULL; 9575 goto done; 9576 } 9577 maxatts = ctxt->maxatts; 9578 atts = ctxt->atts; 9579 } 9580 atts[nbatts++] = attname; 9581 atts[nbatts++] = aprefix; 9582 if (aprefix == NULL) 9583 atts[nbatts++] = NULL; 9584 else 9585 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9586 atts[nbatts++] = defaults->values[5 * i + 2]; 9587 atts[nbatts++] = defaults->values[5 * i + 3]; 9588 if ((ctxt->standalone == 1) && 9589 (defaults->values[5 * i + 4] != NULL)) { 9590 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9591 "standalone: attribute %s on %s defaulted from external subset\n", 9592 attname, localname); 9593 } 9594 nbdef++; 9595 } 9596 } 9597 } 9598 } 9599 9600 /* 9601 * The attributes checkings 9602 */ 9603 for (i = 0; i < nbatts;i += 5) { 9604 /* 9605 * The default namespace does not apply to attribute names. 9606 */ 9607 if (atts[i + 1] != NULL) { 9608 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9609 if (nsname == NULL) { 9610 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9611 "Namespace prefix %s for %s on %s is not defined\n", 9612 atts[i + 1], atts[i], localname); 9613 } 9614 atts[i + 2] = nsname; 9615 } else 9616 nsname = NULL; 9617 /* 9618 * [ WFC: Unique Att Spec ] 9619 * No attribute name may appear more than once in the same 9620 * start-tag or empty-element tag. 9621 * As extended by the Namespace in XML REC. 9622 */ 9623 for (j = 0; j < i;j += 5) { 9624 if (atts[i] == atts[j]) { 9625 if (atts[i+1] == atts[j+1]) { 9626 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9627 break; 9628 } 9629 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9630 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9631 "Namespaced Attribute %s in '%s' redefined\n", 9632 atts[i], nsname, NULL); 9633 break; 9634 } 9635 } 9636 } 9637 } 9638 9639 nsname = xmlGetNamespace(ctxt, prefix); 9640 if ((prefix != NULL) && (nsname == NULL)) { 9641 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9642 "Namespace prefix %s on %s is not defined\n", 9643 prefix, localname, NULL); 9644 } 9645 *pref = prefix; 9646 *URI = nsname; 9647 9648 /* 9649 * SAX: Start of Element ! 9650 */ 9651 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9652 (!ctxt->disableSAX)) { 9653 if (nbNs > 0) 9654 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9655 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9656 nbatts / 5, nbdef, atts); 9657 else 9658 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9659 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9660 } 9661 9662 done: 9663 /* 9664 * Free up attribute allocated strings if needed 9665 */ 9666 if (attval != 0) { 9667 for (i = 3,j = 0; j < nratts;i += 5,j++) 9668 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9669 xmlFree((xmlChar *) atts[i]); 9670 } 9671 9672 return(localname); 9673 } 9674 9675 /** 9676 * xmlParseEndTag2: 9677 * @ctxt: an XML parser context 9678 * @line: line of the start tag 9679 * @nsNr: number of namespaces on the start tag 9680 * 9681 * parse an end of tag 9682 * 9683 * [42] ETag ::= '</' Name S? '>' 9684 * 9685 * With namespace 9686 * 9687 * [NS 9] ETag ::= '</' QName S? '>' 9688 */ 9689 9690 static void 9691 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) { 9692 const xmlChar *name; 9693 9694 GROW; 9695 if ((RAW != '<') || (NXT(1) != '/')) { 9696 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9697 return; 9698 } 9699 SKIP(2); 9700 9701 if (tag->prefix == NULL) 9702 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9703 else 9704 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix); 9705 9706 /* 9707 * We should definitely be at the ending "S? '>'" part 9708 */ 9709 GROW; 9710 if (ctxt->instate == XML_PARSER_EOF) 9711 return; 9712 SKIP_BLANKS; 9713 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9714 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9715 } else 9716 NEXT1; 9717 9718 /* 9719 * [ WFC: Element Type Match ] 9720 * The Name in an element's end-tag must match the element type in the 9721 * start-tag. 9722 * 9723 */ 9724 if (name != (xmlChar*)1) { 9725 if (name == NULL) name = BAD_CAST "unparsable"; 9726 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9727 "Opening and ending tag mismatch: %s line %d and %s\n", 9728 ctxt->name, tag->line, name); 9729 } 9730 9731 /* 9732 * SAX: End of Tag 9733 */ 9734 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9735 (!ctxt->disableSAX)) 9736 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix, 9737 tag->URI); 9738 9739 spacePop(ctxt); 9740 if (tag->nsNr != 0) 9741 nsPop(ctxt, tag->nsNr); 9742 } 9743 9744 /** 9745 * xmlParseCDSect: 9746 * @ctxt: an XML parser context 9747 * 9748 * Parse escaped pure raw content. 9749 * 9750 * [18] CDSect ::= CDStart CData CDEnd 9751 * 9752 * [19] CDStart ::= '<![CDATA[' 9753 * 9754 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9755 * 9756 * [21] CDEnd ::= ']]>' 9757 */ 9758 void 9759 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9760 xmlChar *buf = NULL; 9761 int len = 0; 9762 int size = XML_PARSER_BUFFER_SIZE; 9763 int r, rl; 9764 int s, sl; 9765 int cur, l; 9766 int count = 0; 9767 9768 /* Check 2.6.0 was NXT(0) not RAW */ 9769 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9770 SKIP(9); 9771 } else 9772 return; 9773 9774 ctxt->instate = XML_PARSER_CDATA_SECTION; 9775 r = CUR_CHAR(rl); 9776 if (!IS_CHAR(r)) { 9777 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9778 ctxt->instate = XML_PARSER_CONTENT; 9779 return; 9780 } 9781 NEXTL(rl); 9782 s = CUR_CHAR(sl); 9783 if (!IS_CHAR(s)) { 9784 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9785 ctxt->instate = XML_PARSER_CONTENT; 9786 return; 9787 } 9788 NEXTL(sl); 9789 cur = CUR_CHAR(l); 9790 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9791 if (buf == NULL) { 9792 xmlErrMemory(ctxt, NULL); 9793 return; 9794 } 9795 while (IS_CHAR(cur) && 9796 ((r != ']') || (s != ']') || (cur != '>'))) { 9797 if (len + 5 >= size) { 9798 xmlChar *tmp; 9799 9800 if ((size > XML_MAX_TEXT_LENGTH) && 9801 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9802 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9803 "CData section too big found", NULL); 9804 xmlFree (buf); 9805 return; 9806 } 9807 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9808 if (tmp == NULL) { 9809 xmlFree(buf); 9810 xmlErrMemory(ctxt, NULL); 9811 return; 9812 } 9813 buf = tmp; 9814 size *= 2; 9815 } 9816 COPY_BUF(rl,buf,len,r); 9817 r = s; 9818 rl = sl; 9819 s = cur; 9820 sl = l; 9821 count++; 9822 if (count > 50) { 9823 SHRINK; 9824 GROW; 9825 if (ctxt->instate == XML_PARSER_EOF) { 9826 xmlFree(buf); 9827 return; 9828 } 9829 count = 0; 9830 } 9831 NEXTL(l); 9832 cur = CUR_CHAR(l); 9833 } 9834 buf[len] = 0; 9835 ctxt->instate = XML_PARSER_CONTENT; 9836 if (cur != '>') { 9837 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9838 "CData section not finished\n%.50s\n", buf); 9839 xmlFree(buf); 9840 return; 9841 } 9842 NEXTL(l); 9843 9844 /* 9845 * OK the buffer is to be consumed as cdata. 9846 */ 9847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9848 if (ctxt->sax->cdataBlock != NULL) 9849 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9850 else if (ctxt->sax->characters != NULL) 9851 ctxt->sax->characters(ctxt->userData, buf, len); 9852 } 9853 xmlFree(buf); 9854 } 9855 9856 /** 9857 * xmlParseContentInternal: 9858 * @ctxt: an XML parser context 9859 * 9860 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of 9861 * unexpected EOF to the caller. 9862 */ 9863 9864 static void 9865 xmlParseContentInternal(xmlParserCtxtPtr ctxt) { 9866 int nameNr = ctxt->nameNr; 9867 9868 GROW; 9869 while ((RAW != 0) && 9870 (ctxt->instate != XML_PARSER_EOF)) { 9871 const xmlChar *test = CUR_PTR; 9872 unsigned int cons = ctxt->input->consumed; 9873 const xmlChar *cur = ctxt->input->cur; 9874 9875 /* 9876 * First case : a Processing Instruction. 9877 */ 9878 if ((*cur == '<') && (cur[1] == '?')) { 9879 xmlParsePI(ctxt); 9880 } 9881 9882 /* 9883 * Second case : a CDSection 9884 */ 9885 /* 2.6.0 test was *cur not RAW */ 9886 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9887 xmlParseCDSect(ctxt); 9888 } 9889 9890 /* 9891 * Third case : a comment 9892 */ 9893 else if ((*cur == '<') && (NXT(1) == '!') && 9894 (NXT(2) == '-') && (NXT(3) == '-')) { 9895 xmlParseComment(ctxt); 9896 ctxt->instate = XML_PARSER_CONTENT; 9897 } 9898 9899 /* 9900 * Fourth case : a sub-element. 9901 */ 9902 else if (*cur == '<') { 9903 if (NXT(1) == '/') { 9904 if (ctxt->nameNr <= nameNr) 9905 break; 9906 xmlParseElementEnd(ctxt); 9907 } else { 9908 xmlParseElementStart(ctxt); 9909 } 9910 } 9911 9912 /* 9913 * Fifth case : a reference. If if has not been resolved, 9914 * parsing returns it's Name, create the node 9915 */ 9916 9917 else if (*cur == '&') { 9918 xmlParseReference(ctxt); 9919 } 9920 9921 /* 9922 * Last case, text. Note that References are handled directly. 9923 */ 9924 else { 9925 xmlParseCharData(ctxt, 0); 9926 } 9927 9928 GROW; 9929 SHRINK; 9930 9931 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9932 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9933 "detected an error in element content\n"); 9934 xmlHaltParser(ctxt); 9935 break; 9936 } 9937 } 9938 } 9939 9940 /** 9941 * xmlParseContent: 9942 * @ctxt: an XML parser context 9943 * 9944 * Parse a content sequence. Stops at EOF or '</'. 9945 * 9946 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9947 */ 9948 9949 void 9950 xmlParseContent(xmlParserCtxtPtr ctxt) { 9951 int nameNr = ctxt->nameNr; 9952 9953 xmlParseContentInternal(ctxt); 9954 9955 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) { 9956 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 9957 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 9958 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9959 "Premature end of data in tag %s line %d\n", 9960 name, line, NULL); 9961 } 9962 } 9963 9964 /** 9965 * xmlParseElement: 9966 * @ctxt: an XML parser context 9967 * 9968 * parse an XML element 9969 * 9970 * [39] element ::= EmptyElemTag | STag content ETag 9971 * 9972 * [ WFC: Element Type Match ] 9973 * The Name in an element's end-tag must match the element type in the 9974 * start-tag. 9975 * 9976 */ 9977 9978 void 9979 xmlParseElement(xmlParserCtxtPtr ctxt) { 9980 if (xmlParseElementStart(ctxt) != 0) 9981 return; 9982 9983 xmlParseContentInternal(ctxt); 9984 if (ctxt->instate == XML_PARSER_EOF) 9985 return; 9986 9987 if (CUR == 0) { 9988 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 9989 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 9990 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9991 "Premature end of data in tag %s line %d\n", 9992 name, line, NULL); 9993 return; 9994 } 9995 9996 xmlParseElementEnd(ctxt); 9997 } 9998 9999 /** 10000 * xmlParseElementStart: 10001 * @ctxt: an XML parser context 10002 * 10003 * Parse the start of an XML element. Returns -1 in case of error, 0 if an 10004 * opening tag was parsed, 1 if an empty element was parsed. 10005 */ 10006 static int 10007 xmlParseElementStart(xmlParserCtxtPtr ctxt) { 10008 const xmlChar *name; 10009 const xmlChar *prefix = NULL; 10010 const xmlChar *URI = NULL; 10011 xmlParserNodeInfo node_info; 10012 int line, tlen = 0; 10013 xmlNodePtr ret; 10014 int nsNr = ctxt->nsNr; 10015 10016 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10017 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10018 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10019 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10020 xmlParserMaxDepth); 10021 xmlHaltParser(ctxt); 10022 return(-1); 10023 } 10024 10025 /* Capture start position */ 10026 if (ctxt->record_info) { 10027 node_info.begin_pos = ctxt->input->consumed + 10028 (CUR_PTR - ctxt->input->base); 10029 node_info.begin_line = ctxt->input->line; 10030 } 10031 10032 if (ctxt->spaceNr == 0) 10033 spacePush(ctxt, -1); 10034 else if (*ctxt->space == -2) 10035 spacePush(ctxt, -1); 10036 else 10037 spacePush(ctxt, *ctxt->space); 10038 10039 line = ctxt->input->line; 10040 #ifdef LIBXML_SAX1_ENABLED 10041 if (ctxt->sax2) 10042 #endif /* LIBXML_SAX1_ENABLED */ 10043 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10044 #ifdef LIBXML_SAX1_ENABLED 10045 else 10046 name = xmlParseStartTag(ctxt); 10047 #endif /* LIBXML_SAX1_ENABLED */ 10048 if (ctxt->instate == XML_PARSER_EOF) 10049 return(-1); 10050 if (name == NULL) { 10051 spacePop(ctxt); 10052 return(-1); 10053 } 10054 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); 10055 ret = ctxt->node; 10056 10057 #ifdef LIBXML_VALID_ENABLED 10058 /* 10059 * [ VC: Root Element Type ] 10060 * The Name in the document type declaration must match the element 10061 * type of the root element. 10062 */ 10063 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10064 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10065 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10066 #endif /* LIBXML_VALID_ENABLED */ 10067 10068 /* 10069 * Check for an Empty Element. 10070 */ 10071 if ((RAW == '/') && (NXT(1) == '>')) { 10072 SKIP(2); 10073 if (ctxt->sax2) { 10074 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10075 (!ctxt->disableSAX)) 10076 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10077 #ifdef LIBXML_SAX1_ENABLED 10078 } else { 10079 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10080 (!ctxt->disableSAX)) 10081 ctxt->sax->endElement(ctxt->userData, name); 10082 #endif /* LIBXML_SAX1_ENABLED */ 10083 } 10084 namePop(ctxt); 10085 spacePop(ctxt); 10086 if (nsNr != ctxt->nsNr) 10087 nsPop(ctxt, ctxt->nsNr - nsNr); 10088 if ( ret != NULL && ctxt->record_info ) { 10089 node_info.end_pos = ctxt->input->consumed + 10090 (CUR_PTR - ctxt->input->base); 10091 node_info.end_line = ctxt->input->line; 10092 node_info.node = ret; 10093 xmlParserAddNodeInfo(ctxt, &node_info); 10094 } 10095 return(1); 10096 } 10097 if (RAW == '>') { 10098 NEXT1; 10099 } else { 10100 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10101 "Couldn't find end of Start Tag %s line %d\n", 10102 name, line, NULL); 10103 10104 /* 10105 * end of parsing of this node. 10106 */ 10107 nodePop(ctxt); 10108 namePop(ctxt); 10109 spacePop(ctxt); 10110 if (nsNr != ctxt->nsNr) 10111 nsPop(ctxt, ctxt->nsNr - nsNr); 10112 10113 /* 10114 * Capture end position and add node 10115 */ 10116 if ( ret != NULL && ctxt->record_info ) { 10117 node_info.end_pos = ctxt->input->consumed + 10118 (CUR_PTR - ctxt->input->base); 10119 node_info.end_line = ctxt->input->line; 10120 node_info.node = ret; 10121 xmlParserAddNodeInfo(ctxt, &node_info); 10122 } 10123 return(-1); 10124 } 10125 10126 return(0); 10127 } 10128 10129 /** 10130 * xmlParseElementEnd: 10131 * @ctxt: an XML parser context 10132 * 10133 * Parse the end of an XML element. 10134 */ 10135 static void 10136 xmlParseElementEnd(xmlParserCtxtPtr ctxt) { 10137 xmlParserNodeInfo node_info; 10138 xmlNodePtr ret = ctxt->node; 10139 10140 if (ctxt->nameNr <= 0) 10141 return; 10142 10143 /* 10144 * parse the end of tag: '</' should be here. 10145 */ 10146 if (ctxt->sax2) { 10147 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 10148 namePop(ctxt); 10149 } 10150 #ifdef LIBXML_SAX1_ENABLED 10151 else 10152 xmlParseEndTag1(ctxt, 0); 10153 #endif /* LIBXML_SAX1_ENABLED */ 10154 10155 /* 10156 * Capture end position and add node 10157 */ 10158 if ( ret != NULL && ctxt->record_info ) { 10159 node_info.end_pos = ctxt->input->consumed + 10160 (CUR_PTR - ctxt->input->base); 10161 node_info.end_line = ctxt->input->line; 10162 node_info.node = ret; 10163 xmlParserAddNodeInfo(ctxt, &node_info); 10164 } 10165 } 10166 10167 /** 10168 * xmlParseVersionNum: 10169 * @ctxt: an XML parser context 10170 * 10171 * parse the XML version value. 10172 * 10173 * [26] VersionNum ::= '1.' [0-9]+ 10174 * 10175 * In practice allow [0-9].[0-9]+ at that level 10176 * 10177 * Returns the string giving the XML version number, or NULL 10178 */ 10179 xmlChar * 10180 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10181 xmlChar *buf = NULL; 10182 int len = 0; 10183 int size = 10; 10184 xmlChar cur; 10185 10186 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10187 if (buf == NULL) { 10188 xmlErrMemory(ctxt, NULL); 10189 return(NULL); 10190 } 10191 cur = CUR; 10192 if (!((cur >= '0') && (cur <= '9'))) { 10193 xmlFree(buf); 10194 return(NULL); 10195 } 10196 buf[len++] = cur; 10197 NEXT; 10198 cur=CUR; 10199 if (cur != '.') { 10200 xmlFree(buf); 10201 return(NULL); 10202 } 10203 buf[len++] = cur; 10204 NEXT; 10205 cur=CUR; 10206 while ((cur >= '0') && (cur <= '9')) { 10207 if (len + 1 >= size) { 10208 xmlChar *tmp; 10209 10210 size *= 2; 10211 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10212 if (tmp == NULL) { 10213 xmlFree(buf); 10214 xmlErrMemory(ctxt, NULL); 10215 return(NULL); 10216 } 10217 buf = tmp; 10218 } 10219 buf[len++] = cur; 10220 NEXT; 10221 cur=CUR; 10222 } 10223 buf[len] = 0; 10224 return(buf); 10225 } 10226 10227 /** 10228 * xmlParseVersionInfo: 10229 * @ctxt: an XML parser context 10230 * 10231 * parse the XML version. 10232 * 10233 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10234 * 10235 * [25] Eq ::= S? '=' S? 10236 * 10237 * Returns the version string, e.g. "1.0" 10238 */ 10239 10240 xmlChar * 10241 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10242 xmlChar *version = NULL; 10243 10244 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10245 SKIP(7); 10246 SKIP_BLANKS; 10247 if (RAW != '=') { 10248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10249 return(NULL); 10250 } 10251 NEXT; 10252 SKIP_BLANKS; 10253 if (RAW == '"') { 10254 NEXT; 10255 version = xmlParseVersionNum(ctxt); 10256 if (RAW != '"') { 10257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10258 } else 10259 NEXT; 10260 } else if (RAW == '\''){ 10261 NEXT; 10262 version = xmlParseVersionNum(ctxt); 10263 if (RAW != '\'') { 10264 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10265 } else 10266 NEXT; 10267 } else { 10268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10269 } 10270 } 10271 return(version); 10272 } 10273 10274 /** 10275 * xmlParseEncName: 10276 * @ctxt: an XML parser context 10277 * 10278 * parse the XML encoding name 10279 * 10280 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10281 * 10282 * Returns the encoding name value or NULL 10283 */ 10284 xmlChar * 10285 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10286 xmlChar *buf = NULL; 10287 int len = 0; 10288 int size = 10; 10289 xmlChar cur; 10290 10291 cur = CUR; 10292 if (((cur >= 'a') && (cur <= 'z')) || 10293 ((cur >= 'A') && (cur <= 'Z'))) { 10294 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10295 if (buf == NULL) { 10296 xmlErrMemory(ctxt, NULL); 10297 return(NULL); 10298 } 10299 10300 buf[len++] = cur; 10301 NEXT; 10302 cur = CUR; 10303 while (((cur >= 'a') && (cur <= 'z')) || 10304 ((cur >= 'A') && (cur <= 'Z')) || 10305 ((cur >= '0') && (cur <= '9')) || 10306 (cur == '.') || (cur == '_') || 10307 (cur == '-')) { 10308 if (len + 1 >= size) { 10309 xmlChar *tmp; 10310 10311 size *= 2; 10312 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10313 if (tmp == NULL) { 10314 xmlErrMemory(ctxt, NULL); 10315 xmlFree(buf); 10316 return(NULL); 10317 } 10318 buf = tmp; 10319 } 10320 buf[len++] = cur; 10321 NEXT; 10322 cur = CUR; 10323 if (cur == 0) { 10324 SHRINK; 10325 GROW; 10326 cur = CUR; 10327 } 10328 } 10329 buf[len] = 0; 10330 } else { 10331 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10332 } 10333 return(buf); 10334 } 10335 10336 /** 10337 * xmlParseEncodingDecl: 10338 * @ctxt: an XML parser context 10339 * 10340 * parse the XML encoding declaration 10341 * 10342 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10343 * 10344 * this setups the conversion filters. 10345 * 10346 * Returns the encoding value or NULL 10347 */ 10348 10349 const xmlChar * 10350 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10351 xmlChar *encoding = NULL; 10352 10353 SKIP_BLANKS; 10354 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10355 SKIP(8); 10356 SKIP_BLANKS; 10357 if (RAW != '=') { 10358 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10359 return(NULL); 10360 } 10361 NEXT; 10362 SKIP_BLANKS; 10363 if (RAW == '"') { 10364 NEXT; 10365 encoding = xmlParseEncName(ctxt); 10366 if (RAW != '"') { 10367 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10368 xmlFree((xmlChar *) encoding); 10369 return(NULL); 10370 } else 10371 NEXT; 10372 } else if (RAW == '\''){ 10373 NEXT; 10374 encoding = xmlParseEncName(ctxt); 10375 if (RAW != '\'') { 10376 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10377 xmlFree((xmlChar *) encoding); 10378 return(NULL); 10379 } else 10380 NEXT; 10381 } else { 10382 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10383 } 10384 10385 /* 10386 * Non standard parsing, allowing the user to ignore encoding 10387 */ 10388 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10389 xmlFree((xmlChar *) encoding); 10390 return(NULL); 10391 } 10392 10393 /* 10394 * UTF-16 encoding switch has already taken place at this stage, 10395 * more over the little-endian/big-endian selection is already done 10396 */ 10397 if ((encoding != NULL) && 10398 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10399 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10400 /* 10401 * If no encoding was passed to the parser, that we are 10402 * using UTF-16 and no decoder is present i.e. the 10403 * document is apparently UTF-8 compatible, then raise an 10404 * encoding mismatch fatal error 10405 */ 10406 if ((ctxt->encoding == NULL) && 10407 (ctxt->input->buf != NULL) && 10408 (ctxt->input->buf->encoder == NULL)) { 10409 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10410 "Document labelled UTF-16 but has UTF-8 content\n"); 10411 } 10412 if (ctxt->encoding != NULL) 10413 xmlFree((xmlChar *) ctxt->encoding); 10414 ctxt->encoding = encoding; 10415 } 10416 /* 10417 * UTF-8 encoding is handled natively 10418 */ 10419 else if ((encoding != NULL) && 10420 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10421 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10422 if (ctxt->encoding != NULL) 10423 xmlFree((xmlChar *) ctxt->encoding); 10424 ctxt->encoding = encoding; 10425 } 10426 else if (encoding != NULL) { 10427 xmlCharEncodingHandlerPtr handler; 10428 10429 if (ctxt->input->encoding != NULL) 10430 xmlFree((xmlChar *) ctxt->input->encoding); 10431 ctxt->input->encoding = encoding; 10432 10433 handler = xmlFindCharEncodingHandler((const char *) encoding); 10434 if (handler != NULL) { 10435 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10436 /* failed to convert */ 10437 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10438 return(NULL); 10439 } 10440 } else { 10441 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10442 "Unsupported encoding %s\n", encoding); 10443 return(NULL); 10444 } 10445 } 10446 } 10447 return(encoding); 10448 } 10449 10450 /** 10451 * xmlParseSDDecl: 10452 * @ctxt: an XML parser context 10453 * 10454 * parse the XML standalone declaration 10455 * 10456 * [32] SDDecl ::= S 'standalone' Eq 10457 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10458 * 10459 * [ VC: Standalone Document Declaration ] 10460 * TODO The standalone document declaration must have the value "no" 10461 * if any external markup declarations contain declarations of: 10462 * - attributes with default values, if elements to which these 10463 * attributes apply appear in the document without specifications 10464 * of values for these attributes, or 10465 * - entities (other than amp, lt, gt, apos, quot), if references 10466 * to those entities appear in the document, or 10467 * - attributes with values subject to normalization, where the 10468 * attribute appears in the document with a value which will change 10469 * as a result of normalization, or 10470 * - element types with element content, if white space occurs directly 10471 * within any instance of those types. 10472 * 10473 * Returns: 10474 * 1 if standalone="yes" 10475 * 0 if standalone="no" 10476 * -2 if standalone attribute is missing or invalid 10477 * (A standalone value of -2 means that the XML declaration was found, 10478 * but no value was specified for the standalone attribute). 10479 */ 10480 10481 int 10482 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10483 int standalone = -2; 10484 10485 SKIP_BLANKS; 10486 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10487 SKIP(10); 10488 SKIP_BLANKS; 10489 if (RAW != '=') { 10490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10491 return(standalone); 10492 } 10493 NEXT; 10494 SKIP_BLANKS; 10495 if (RAW == '\''){ 10496 NEXT; 10497 if ((RAW == 'n') && (NXT(1) == 'o')) { 10498 standalone = 0; 10499 SKIP(2); 10500 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10501 (NXT(2) == 's')) { 10502 standalone = 1; 10503 SKIP(3); 10504 } else { 10505 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10506 } 10507 if (RAW != '\'') { 10508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10509 } else 10510 NEXT; 10511 } else if (RAW == '"'){ 10512 NEXT; 10513 if ((RAW == 'n') && (NXT(1) == 'o')) { 10514 standalone = 0; 10515 SKIP(2); 10516 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10517 (NXT(2) == 's')) { 10518 standalone = 1; 10519 SKIP(3); 10520 } else { 10521 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10522 } 10523 if (RAW != '"') { 10524 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10525 } else 10526 NEXT; 10527 } else { 10528 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10529 } 10530 } 10531 return(standalone); 10532 } 10533 10534 /** 10535 * xmlParseXMLDecl: 10536 * @ctxt: an XML parser context 10537 * 10538 * parse an XML declaration header 10539 * 10540 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10541 */ 10542 10543 void 10544 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10545 xmlChar *version; 10546 10547 /* 10548 * This value for standalone indicates that the document has an 10549 * XML declaration but it does not have a standalone attribute. 10550 * It will be overwritten later if a standalone attribute is found. 10551 */ 10552 ctxt->input->standalone = -2; 10553 10554 /* 10555 * We know that '<?xml' is here. 10556 */ 10557 SKIP(5); 10558 10559 if (!IS_BLANK_CH(RAW)) { 10560 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10561 "Blank needed after '<?xml'\n"); 10562 } 10563 SKIP_BLANKS; 10564 10565 /* 10566 * We must have the VersionInfo here. 10567 */ 10568 version = xmlParseVersionInfo(ctxt); 10569 if (version == NULL) { 10570 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10571 } else { 10572 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10573 /* 10574 * Changed here for XML-1.0 5th edition 10575 */ 10576 if (ctxt->options & XML_PARSE_OLD10) { 10577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10578 "Unsupported version '%s'\n", 10579 version); 10580 } else { 10581 if ((version[0] == '1') && ((version[1] == '.'))) { 10582 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10583 "Unsupported version '%s'\n", 10584 version, NULL); 10585 } else { 10586 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10587 "Unsupported version '%s'\n", 10588 version); 10589 } 10590 } 10591 } 10592 if (ctxt->version != NULL) 10593 xmlFree((void *) ctxt->version); 10594 ctxt->version = version; 10595 } 10596 10597 /* 10598 * We may have the encoding declaration 10599 */ 10600 if (!IS_BLANK_CH(RAW)) { 10601 if ((RAW == '?') && (NXT(1) == '>')) { 10602 SKIP(2); 10603 return; 10604 } 10605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10606 } 10607 xmlParseEncodingDecl(ctxt); 10608 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10609 (ctxt->instate == XML_PARSER_EOF)) { 10610 /* 10611 * The XML REC instructs us to stop parsing right here 10612 */ 10613 return; 10614 } 10615 10616 /* 10617 * We may have the standalone status. 10618 */ 10619 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10620 if ((RAW == '?') && (NXT(1) == '>')) { 10621 SKIP(2); 10622 return; 10623 } 10624 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10625 } 10626 10627 /* 10628 * We can grow the input buffer freely at that point 10629 */ 10630 GROW; 10631 10632 SKIP_BLANKS; 10633 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10634 10635 SKIP_BLANKS; 10636 if ((RAW == '?') && (NXT(1) == '>')) { 10637 SKIP(2); 10638 } else if (RAW == '>') { 10639 /* Deprecated old WD ... */ 10640 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10641 NEXT; 10642 } else { 10643 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10644 MOVETO_ENDTAG(CUR_PTR); 10645 NEXT; 10646 } 10647 } 10648 10649 /** 10650 * xmlParseMisc: 10651 * @ctxt: an XML parser context 10652 * 10653 * parse an XML Misc* optional field. 10654 * 10655 * [27] Misc ::= Comment | PI | S 10656 */ 10657 10658 void 10659 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10660 while ((ctxt->instate != XML_PARSER_EOF) && 10661 (((RAW == '<') && (NXT(1) == '?')) || 10662 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10663 IS_BLANK_CH(CUR))) { 10664 if ((RAW == '<') && (NXT(1) == '?')) { 10665 xmlParsePI(ctxt); 10666 } else if (IS_BLANK_CH(CUR)) { 10667 NEXT; 10668 } else 10669 xmlParseComment(ctxt); 10670 } 10671 } 10672 10673 /** 10674 * xmlParseDocument: 10675 * @ctxt: an XML parser context 10676 * 10677 * parse an XML document (and build a tree if using the standard SAX 10678 * interface). 10679 * 10680 * [1] document ::= prolog element Misc* 10681 * 10682 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10683 * 10684 * Returns 0, -1 in case of error. the parser context is augmented 10685 * as a result of the parsing. 10686 */ 10687 10688 int 10689 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10690 xmlChar start[4]; 10691 xmlCharEncoding enc; 10692 10693 xmlInitParser(); 10694 10695 if ((ctxt == NULL) || (ctxt->input == NULL)) 10696 return(-1); 10697 10698 GROW; 10699 10700 /* 10701 * SAX: detecting the level. 10702 */ 10703 xmlDetectSAX2(ctxt); 10704 10705 /* 10706 * SAX: beginning of the document processing. 10707 */ 10708 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10709 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10710 if (ctxt->instate == XML_PARSER_EOF) 10711 return(-1); 10712 10713 if ((ctxt->encoding == NULL) && 10714 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10715 /* 10716 * Get the 4 first bytes and decode the charset 10717 * if enc != XML_CHAR_ENCODING_NONE 10718 * plug some encoding conversion routines. 10719 */ 10720 start[0] = RAW; 10721 start[1] = NXT(1); 10722 start[2] = NXT(2); 10723 start[3] = NXT(3); 10724 enc = xmlDetectCharEncoding(&start[0], 4); 10725 if (enc != XML_CHAR_ENCODING_NONE) { 10726 xmlSwitchEncoding(ctxt, enc); 10727 } 10728 } 10729 10730 10731 if (CUR == 0) { 10732 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10733 return(-1); 10734 } 10735 10736 /* 10737 * Check for the XMLDecl in the Prolog. 10738 * do not GROW here to avoid the detected encoder to decode more 10739 * than just the first line, unless the amount of data is really 10740 * too small to hold "<?xml version="1.0" encoding="foo" 10741 */ 10742 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10743 GROW; 10744 } 10745 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10746 10747 /* 10748 * Note that we will switch encoding on the fly. 10749 */ 10750 xmlParseXMLDecl(ctxt); 10751 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10752 (ctxt->instate == XML_PARSER_EOF)) { 10753 /* 10754 * The XML REC instructs us to stop parsing right here 10755 */ 10756 return(-1); 10757 } 10758 ctxt->standalone = ctxt->input->standalone; 10759 SKIP_BLANKS; 10760 } else { 10761 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10762 } 10763 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10764 ctxt->sax->startDocument(ctxt->userData); 10765 if (ctxt->instate == XML_PARSER_EOF) 10766 return(-1); 10767 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10768 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10769 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10770 } 10771 10772 /* 10773 * The Misc part of the Prolog 10774 */ 10775 GROW; 10776 xmlParseMisc(ctxt); 10777 10778 /* 10779 * Then possibly doc type declaration(s) and more Misc 10780 * (doctypedecl Misc*)? 10781 */ 10782 GROW; 10783 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10784 10785 ctxt->inSubset = 1; 10786 xmlParseDocTypeDecl(ctxt); 10787 if (RAW == '[') { 10788 ctxt->instate = XML_PARSER_DTD; 10789 xmlParseInternalSubset(ctxt); 10790 if (ctxt->instate == XML_PARSER_EOF) 10791 return(-1); 10792 } 10793 10794 /* 10795 * Create and update the external subset. 10796 */ 10797 ctxt->inSubset = 2; 10798 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10799 (!ctxt->disableSAX)) 10800 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10801 ctxt->extSubSystem, ctxt->extSubURI); 10802 if (ctxt->instate == XML_PARSER_EOF) 10803 return(-1); 10804 ctxt->inSubset = 0; 10805 10806 xmlCleanSpecialAttr(ctxt); 10807 10808 ctxt->instate = XML_PARSER_PROLOG; 10809 xmlParseMisc(ctxt); 10810 } 10811 10812 /* 10813 * Time to start parsing the tree itself 10814 */ 10815 GROW; 10816 if (RAW != '<') { 10817 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10818 "Start tag expected, '<' not found\n"); 10819 } else { 10820 ctxt->instate = XML_PARSER_CONTENT; 10821 xmlParseElement(ctxt); 10822 ctxt->instate = XML_PARSER_EPILOG; 10823 10824 10825 /* 10826 * The Misc part at the end 10827 */ 10828 xmlParseMisc(ctxt); 10829 10830 if (RAW != 0) { 10831 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10832 } 10833 ctxt->instate = XML_PARSER_EOF; 10834 } 10835 10836 /* 10837 * SAX: end of the document processing. 10838 */ 10839 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10840 ctxt->sax->endDocument(ctxt->userData); 10841 10842 /* 10843 * Remove locally kept entity definitions if the tree was not built 10844 */ 10845 if ((ctxt->myDoc != NULL) && 10846 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10847 xmlFreeDoc(ctxt->myDoc); 10848 ctxt->myDoc = NULL; 10849 } 10850 10851 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10852 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10853 if (ctxt->valid) 10854 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10855 if (ctxt->nsWellFormed) 10856 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10857 if (ctxt->options & XML_PARSE_OLD10) 10858 ctxt->myDoc->properties |= XML_DOC_OLD10; 10859 } 10860 if (! ctxt->wellFormed) { 10861 ctxt->valid = 0; 10862 return(-1); 10863 } 10864 return(0); 10865 } 10866 10867 /** 10868 * xmlParseExtParsedEnt: 10869 * @ctxt: an XML parser context 10870 * 10871 * parse a general parsed entity 10872 * An external general parsed entity is well-formed if it matches the 10873 * production labeled extParsedEnt. 10874 * 10875 * [78] extParsedEnt ::= TextDecl? content 10876 * 10877 * Returns 0, -1 in case of error. the parser context is augmented 10878 * as a result of the parsing. 10879 */ 10880 10881 int 10882 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10883 xmlChar start[4]; 10884 xmlCharEncoding enc; 10885 10886 if ((ctxt == NULL) || (ctxt->input == NULL)) 10887 return(-1); 10888 10889 xmlDefaultSAXHandlerInit(); 10890 10891 xmlDetectSAX2(ctxt); 10892 10893 GROW; 10894 10895 /* 10896 * SAX: beginning of the document processing. 10897 */ 10898 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10899 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10900 10901 /* 10902 * Get the 4 first bytes and decode the charset 10903 * if enc != XML_CHAR_ENCODING_NONE 10904 * plug some encoding conversion routines. 10905 */ 10906 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10907 start[0] = RAW; 10908 start[1] = NXT(1); 10909 start[2] = NXT(2); 10910 start[3] = NXT(3); 10911 enc = xmlDetectCharEncoding(start, 4); 10912 if (enc != XML_CHAR_ENCODING_NONE) { 10913 xmlSwitchEncoding(ctxt, enc); 10914 } 10915 } 10916 10917 10918 if (CUR == 0) { 10919 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10920 } 10921 10922 /* 10923 * Check for the XMLDecl in the Prolog. 10924 */ 10925 GROW; 10926 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10927 10928 /* 10929 * Note that we will switch encoding on the fly. 10930 */ 10931 xmlParseXMLDecl(ctxt); 10932 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10933 /* 10934 * The XML REC instructs us to stop parsing right here 10935 */ 10936 return(-1); 10937 } 10938 SKIP_BLANKS; 10939 } else { 10940 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10941 } 10942 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10943 ctxt->sax->startDocument(ctxt->userData); 10944 if (ctxt->instate == XML_PARSER_EOF) 10945 return(-1); 10946 10947 /* 10948 * Doing validity checking on chunk doesn't make sense 10949 */ 10950 ctxt->instate = XML_PARSER_CONTENT; 10951 ctxt->validate = 0; 10952 ctxt->loadsubset = 0; 10953 ctxt->depth = 0; 10954 10955 xmlParseContent(ctxt); 10956 if (ctxt->instate == XML_PARSER_EOF) 10957 return(-1); 10958 10959 if ((RAW == '<') && (NXT(1) == '/')) { 10960 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10961 } else if (RAW != 0) { 10962 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10963 } 10964 10965 /* 10966 * SAX: end of the document processing. 10967 */ 10968 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10969 ctxt->sax->endDocument(ctxt->userData); 10970 10971 if (! ctxt->wellFormed) return(-1); 10972 return(0); 10973 } 10974 10975 #ifdef LIBXML_PUSH_ENABLED 10976 /************************************************************************ 10977 * * 10978 * Progressive parsing interfaces * 10979 * * 10980 ************************************************************************/ 10981 10982 /** 10983 * xmlParseLookupSequence: 10984 * @ctxt: an XML parser context 10985 * @first: the first char to lookup 10986 * @next: the next char to lookup or zero 10987 * @third: the next char to lookup or zero 10988 * 10989 * Try to find if a sequence (first, next, third) or just (first next) or 10990 * (first) is available in the input stream. 10991 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10992 * to avoid rescanning sequences of bytes, it DOES change the state of the 10993 * parser, do not use liberally. 10994 * 10995 * Returns the index to the current parsing point if the full sequence 10996 * is available, -1 otherwise. 10997 */ 10998 static int 10999 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11000 xmlChar next, xmlChar third) { 11001 int base, len; 11002 xmlParserInputPtr in; 11003 const xmlChar *buf; 11004 11005 in = ctxt->input; 11006 if (in == NULL) return(-1); 11007 base = in->cur - in->base; 11008 if (base < 0) return(-1); 11009 if (ctxt->checkIndex > base) 11010 base = ctxt->checkIndex; 11011 if (in->buf == NULL) { 11012 buf = in->base; 11013 len = in->length; 11014 } else { 11015 buf = xmlBufContent(in->buf->buffer); 11016 len = xmlBufUse(in->buf->buffer); 11017 } 11018 /* take into account the sequence length */ 11019 if (third) len -= 2; 11020 else if (next) len --; 11021 for (;base < len;base++) { 11022 if (buf[base] == first) { 11023 if (third != 0) { 11024 if ((buf[base + 1] != next) || 11025 (buf[base + 2] != third)) continue; 11026 } else if (next != 0) { 11027 if (buf[base + 1] != next) continue; 11028 } 11029 ctxt->checkIndex = 0; 11030 #ifdef DEBUG_PUSH 11031 if (next == 0) 11032 xmlGenericError(xmlGenericErrorContext, 11033 "PP: lookup '%c' found at %d\n", 11034 first, base); 11035 else if (third == 0) 11036 xmlGenericError(xmlGenericErrorContext, 11037 "PP: lookup '%c%c' found at %d\n", 11038 first, next, base); 11039 else 11040 xmlGenericError(xmlGenericErrorContext, 11041 "PP: lookup '%c%c%c' found at %d\n", 11042 first, next, third, base); 11043 #endif 11044 return(base - (in->cur - in->base)); 11045 } 11046 } 11047 ctxt->checkIndex = base; 11048 #ifdef DEBUG_PUSH 11049 if (next == 0) 11050 xmlGenericError(xmlGenericErrorContext, 11051 "PP: lookup '%c' failed\n", first); 11052 else if (third == 0) 11053 xmlGenericError(xmlGenericErrorContext, 11054 "PP: lookup '%c%c' failed\n", first, next); 11055 else 11056 xmlGenericError(xmlGenericErrorContext, 11057 "PP: lookup '%c%c%c' failed\n", first, next, third); 11058 #endif 11059 return(-1); 11060 } 11061 11062 /** 11063 * xmlParseGetLasts: 11064 * @ctxt: an XML parser context 11065 * @lastlt: pointer to store the last '<' from the input 11066 * @lastgt: pointer to store the last '>' from the input 11067 * 11068 * Lookup the last < and > in the current chunk 11069 */ 11070 static void 11071 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11072 const xmlChar **lastgt) { 11073 const xmlChar *tmp; 11074 11075 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11076 xmlGenericError(xmlGenericErrorContext, 11077 "Internal error: xmlParseGetLasts\n"); 11078 return; 11079 } 11080 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11081 tmp = ctxt->input->end; 11082 tmp--; 11083 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11084 if (tmp < ctxt->input->base) { 11085 *lastlt = NULL; 11086 *lastgt = NULL; 11087 } else { 11088 *lastlt = tmp; 11089 tmp++; 11090 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11091 if (*tmp == '\'') { 11092 tmp++; 11093 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11094 if (tmp < ctxt->input->end) tmp++; 11095 } else if (*tmp == '"') { 11096 tmp++; 11097 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11098 if (tmp < ctxt->input->end) tmp++; 11099 } else 11100 tmp++; 11101 } 11102 if (tmp < ctxt->input->end) 11103 *lastgt = tmp; 11104 else { 11105 tmp = *lastlt; 11106 tmp--; 11107 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11108 if (tmp >= ctxt->input->base) 11109 *lastgt = tmp; 11110 else 11111 *lastgt = NULL; 11112 } 11113 } 11114 } else { 11115 *lastlt = NULL; 11116 *lastgt = NULL; 11117 } 11118 } 11119 /** 11120 * xmlCheckCdataPush: 11121 * @cur: pointer to the block of characters 11122 * @len: length of the block in bytes 11123 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11124 * 11125 * Check that the block of characters is okay as SCdata content [20] 11126 * 11127 * Returns the number of bytes to pass if okay, a negative index where an 11128 * UTF-8 error occurred otherwise 11129 */ 11130 static int 11131 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11132 int ix; 11133 unsigned char c; 11134 int codepoint; 11135 11136 if ((utf == NULL) || (len <= 0)) 11137 return(0); 11138 11139 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11140 c = utf[ix]; 11141 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11142 if (c >= 0x20) 11143 ix++; 11144 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11145 ix++; 11146 else 11147 return(-ix); 11148 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11149 if (ix + 2 > len) return(complete ? -ix : ix); 11150 if ((utf[ix+1] & 0xc0 ) != 0x80) 11151 return(-ix); 11152 codepoint = (utf[ix] & 0x1f) << 6; 11153 codepoint |= utf[ix+1] & 0x3f; 11154 if (!xmlIsCharQ(codepoint)) 11155 return(-ix); 11156 ix += 2; 11157 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11158 if (ix + 3 > len) return(complete ? -ix : ix); 11159 if (((utf[ix+1] & 0xc0) != 0x80) || 11160 ((utf[ix+2] & 0xc0) != 0x80)) 11161 return(-ix); 11162 codepoint = (utf[ix] & 0xf) << 12; 11163 codepoint |= (utf[ix+1] & 0x3f) << 6; 11164 codepoint |= utf[ix+2] & 0x3f; 11165 if (!xmlIsCharQ(codepoint)) 11166 return(-ix); 11167 ix += 3; 11168 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11169 if (ix + 4 > len) return(complete ? -ix : ix); 11170 if (((utf[ix+1] & 0xc0) != 0x80) || 11171 ((utf[ix+2] & 0xc0) != 0x80) || 11172 ((utf[ix+3] & 0xc0) != 0x80)) 11173 return(-ix); 11174 codepoint = (utf[ix] & 0x7) << 18; 11175 codepoint |= (utf[ix+1] & 0x3f) << 12; 11176 codepoint |= (utf[ix+2] & 0x3f) << 6; 11177 codepoint |= utf[ix+3] & 0x3f; 11178 if (!xmlIsCharQ(codepoint)) 11179 return(-ix); 11180 ix += 4; 11181 } else /* unknown encoding */ 11182 return(-ix); 11183 } 11184 return(ix); 11185 } 11186 11187 /** 11188 * xmlParseTryOrFinish: 11189 * @ctxt: an XML parser context 11190 * @terminate: last chunk indicator 11191 * 11192 * Try to progress on parsing 11193 * 11194 * Returns zero if no parsing was possible 11195 */ 11196 static int 11197 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11198 int ret = 0; 11199 int avail, tlen; 11200 xmlChar cur, next; 11201 const xmlChar *lastlt, *lastgt; 11202 11203 if (ctxt->input == NULL) 11204 return(0); 11205 11206 #ifdef DEBUG_PUSH 11207 switch (ctxt->instate) { 11208 case XML_PARSER_EOF: 11209 xmlGenericError(xmlGenericErrorContext, 11210 "PP: try EOF\n"); break; 11211 case XML_PARSER_START: 11212 xmlGenericError(xmlGenericErrorContext, 11213 "PP: try START\n"); break; 11214 case XML_PARSER_MISC: 11215 xmlGenericError(xmlGenericErrorContext, 11216 "PP: try MISC\n");break; 11217 case XML_PARSER_COMMENT: 11218 xmlGenericError(xmlGenericErrorContext, 11219 "PP: try COMMENT\n");break; 11220 case XML_PARSER_PROLOG: 11221 xmlGenericError(xmlGenericErrorContext, 11222 "PP: try PROLOG\n");break; 11223 case XML_PARSER_START_TAG: 11224 xmlGenericError(xmlGenericErrorContext, 11225 "PP: try START_TAG\n");break; 11226 case XML_PARSER_CONTENT: 11227 xmlGenericError(xmlGenericErrorContext, 11228 "PP: try CONTENT\n");break; 11229 case XML_PARSER_CDATA_SECTION: 11230 xmlGenericError(xmlGenericErrorContext, 11231 "PP: try CDATA_SECTION\n");break; 11232 case XML_PARSER_END_TAG: 11233 xmlGenericError(xmlGenericErrorContext, 11234 "PP: try END_TAG\n");break; 11235 case XML_PARSER_ENTITY_DECL: 11236 xmlGenericError(xmlGenericErrorContext, 11237 "PP: try ENTITY_DECL\n");break; 11238 case XML_PARSER_ENTITY_VALUE: 11239 xmlGenericError(xmlGenericErrorContext, 11240 "PP: try ENTITY_VALUE\n");break; 11241 case XML_PARSER_ATTRIBUTE_VALUE: 11242 xmlGenericError(xmlGenericErrorContext, 11243 "PP: try ATTRIBUTE_VALUE\n");break; 11244 case XML_PARSER_DTD: 11245 xmlGenericError(xmlGenericErrorContext, 11246 "PP: try DTD\n");break; 11247 case XML_PARSER_EPILOG: 11248 xmlGenericError(xmlGenericErrorContext, 11249 "PP: try EPILOG\n");break; 11250 case XML_PARSER_PI: 11251 xmlGenericError(xmlGenericErrorContext, 11252 "PP: try PI\n");break; 11253 case XML_PARSER_IGNORE: 11254 xmlGenericError(xmlGenericErrorContext, 11255 "PP: try IGNORE\n");break; 11256 } 11257 #endif 11258 11259 if ((ctxt->input != NULL) && 11260 (ctxt->input->cur - ctxt->input->base > 4096)) { 11261 xmlSHRINK(ctxt); 11262 ctxt->checkIndex = 0; 11263 } 11264 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11265 11266 while (ctxt->instate != XML_PARSER_EOF) { 11267 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11268 return(0); 11269 11270 if (ctxt->input == NULL) break; 11271 if (ctxt->input->buf == NULL) 11272 avail = ctxt->input->length - 11273 (ctxt->input->cur - ctxt->input->base); 11274 else { 11275 /* 11276 * If we are operating on converted input, try to flush 11277 * remaining chars to avoid them stalling in the non-converted 11278 * buffer. But do not do this in document start where 11279 * encoding="..." may not have been read and we work on a 11280 * guessed encoding. 11281 */ 11282 if ((ctxt->instate != XML_PARSER_START) && 11283 (ctxt->input->buf->raw != NULL) && 11284 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11285 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11286 ctxt->input); 11287 size_t current = ctxt->input->cur - ctxt->input->base; 11288 11289 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11290 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11291 base, current); 11292 } 11293 avail = xmlBufUse(ctxt->input->buf->buffer) - 11294 (ctxt->input->cur - ctxt->input->base); 11295 } 11296 if (avail < 1) 11297 goto done; 11298 switch (ctxt->instate) { 11299 case XML_PARSER_EOF: 11300 /* 11301 * Document parsing is done ! 11302 */ 11303 goto done; 11304 case XML_PARSER_START: 11305 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11306 xmlChar start[4]; 11307 xmlCharEncoding enc; 11308 11309 /* 11310 * Very first chars read from the document flow. 11311 */ 11312 if (avail < 4) 11313 goto done; 11314 11315 /* 11316 * Get the 4 first bytes and decode the charset 11317 * if enc != XML_CHAR_ENCODING_NONE 11318 * plug some encoding conversion routines, 11319 * else xmlSwitchEncoding will set to (default) 11320 * UTF8. 11321 */ 11322 start[0] = RAW; 11323 start[1] = NXT(1); 11324 start[2] = NXT(2); 11325 start[3] = NXT(3); 11326 enc = xmlDetectCharEncoding(start, 4); 11327 xmlSwitchEncoding(ctxt, enc); 11328 break; 11329 } 11330 11331 if (avail < 2) 11332 goto done; 11333 cur = ctxt->input->cur[0]; 11334 next = ctxt->input->cur[1]; 11335 if (cur == 0) { 11336 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11337 ctxt->sax->setDocumentLocator(ctxt->userData, 11338 &xmlDefaultSAXLocator); 11339 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11340 xmlHaltParser(ctxt); 11341 #ifdef DEBUG_PUSH 11342 xmlGenericError(xmlGenericErrorContext, 11343 "PP: entering EOF\n"); 11344 #endif 11345 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11346 ctxt->sax->endDocument(ctxt->userData); 11347 goto done; 11348 } 11349 if ((cur == '<') && (next == '?')) { 11350 /* PI or XML decl */ 11351 if (avail < 5) return(ret); 11352 if ((!terminate) && 11353 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11354 return(ret); 11355 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11356 ctxt->sax->setDocumentLocator(ctxt->userData, 11357 &xmlDefaultSAXLocator); 11358 if ((ctxt->input->cur[2] == 'x') && 11359 (ctxt->input->cur[3] == 'm') && 11360 (ctxt->input->cur[4] == 'l') && 11361 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11362 ret += 5; 11363 #ifdef DEBUG_PUSH 11364 xmlGenericError(xmlGenericErrorContext, 11365 "PP: Parsing XML Decl\n"); 11366 #endif 11367 xmlParseXMLDecl(ctxt); 11368 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11369 /* 11370 * The XML REC instructs us to stop parsing right 11371 * here 11372 */ 11373 xmlHaltParser(ctxt); 11374 return(0); 11375 } 11376 ctxt->standalone = ctxt->input->standalone; 11377 if ((ctxt->encoding == NULL) && 11378 (ctxt->input->encoding != NULL)) 11379 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11380 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11381 (!ctxt->disableSAX)) 11382 ctxt->sax->startDocument(ctxt->userData); 11383 ctxt->instate = XML_PARSER_MISC; 11384 #ifdef DEBUG_PUSH 11385 xmlGenericError(xmlGenericErrorContext, 11386 "PP: entering MISC\n"); 11387 #endif 11388 } else { 11389 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11390 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11391 (!ctxt->disableSAX)) 11392 ctxt->sax->startDocument(ctxt->userData); 11393 ctxt->instate = XML_PARSER_MISC; 11394 #ifdef DEBUG_PUSH 11395 xmlGenericError(xmlGenericErrorContext, 11396 "PP: entering MISC\n"); 11397 #endif 11398 } 11399 } else { 11400 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11401 ctxt->sax->setDocumentLocator(ctxt->userData, 11402 &xmlDefaultSAXLocator); 11403 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11404 if (ctxt->version == NULL) { 11405 xmlErrMemory(ctxt, NULL); 11406 break; 11407 } 11408 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11409 (!ctxt->disableSAX)) 11410 ctxt->sax->startDocument(ctxt->userData); 11411 ctxt->instate = XML_PARSER_MISC; 11412 #ifdef DEBUG_PUSH 11413 xmlGenericError(xmlGenericErrorContext, 11414 "PP: entering MISC\n"); 11415 #endif 11416 } 11417 break; 11418 case XML_PARSER_START_TAG: { 11419 const xmlChar *name; 11420 const xmlChar *prefix = NULL; 11421 const xmlChar *URI = NULL; 11422 int line = ctxt->input->line; 11423 int nsNr = ctxt->nsNr; 11424 11425 if ((avail < 2) && (ctxt->inputNr == 1)) 11426 goto done; 11427 cur = ctxt->input->cur[0]; 11428 if (cur != '<') { 11429 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11430 xmlHaltParser(ctxt); 11431 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11432 ctxt->sax->endDocument(ctxt->userData); 11433 goto done; 11434 } 11435 if (!terminate) { 11436 if (ctxt->progressive) { 11437 /* > can be found unescaped in attribute values */ 11438 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11439 goto done; 11440 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11441 goto done; 11442 } 11443 } 11444 if (ctxt->spaceNr == 0) 11445 spacePush(ctxt, -1); 11446 else if (*ctxt->space == -2) 11447 spacePush(ctxt, -1); 11448 else 11449 spacePush(ctxt, *ctxt->space); 11450 #ifdef LIBXML_SAX1_ENABLED 11451 if (ctxt->sax2) 11452 #endif /* LIBXML_SAX1_ENABLED */ 11453 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11454 #ifdef LIBXML_SAX1_ENABLED 11455 else 11456 name = xmlParseStartTag(ctxt); 11457 #endif /* LIBXML_SAX1_ENABLED */ 11458 if (ctxt->instate == XML_PARSER_EOF) 11459 goto done; 11460 if (name == NULL) { 11461 spacePop(ctxt); 11462 xmlHaltParser(ctxt); 11463 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11464 ctxt->sax->endDocument(ctxt->userData); 11465 goto done; 11466 } 11467 #ifdef LIBXML_VALID_ENABLED 11468 /* 11469 * [ VC: Root Element Type ] 11470 * The Name in the document type declaration must match 11471 * the element type of the root element. 11472 */ 11473 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11474 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11475 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11476 #endif /* LIBXML_VALID_ENABLED */ 11477 11478 /* 11479 * Check for an Empty Element. 11480 */ 11481 if ((RAW == '/') && (NXT(1) == '>')) { 11482 SKIP(2); 11483 11484 if (ctxt->sax2) { 11485 if ((ctxt->sax != NULL) && 11486 (ctxt->sax->endElementNs != NULL) && 11487 (!ctxt->disableSAX)) 11488 ctxt->sax->endElementNs(ctxt->userData, name, 11489 prefix, URI); 11490 if (ctxt->nsNr - nsNr > 0) 11491 nsPop(ctxt, ctxt->nsNr - nsNr); 11492 #ifdef LIBXML_SAX1_ENABLED 11493 } else { 11494 if ((ctxt->sax != NULL) && 11495 (ctxt->sax->endElement != NULL) && 11496 (!ctxt->disableSAX)) 11497 ctxt->sax->endElement(ctxt->userData, name); 11498 #endif /* LIBXML_SAX1_ENABLED */ 11499 } 11500 if (ctxt->instate == XML_PARSER_EOF) 11501 goto done; 11502 spacePop(ctxt); 11503 if (ctxt->nameNr == 0) { 11504 ctxt->instate = XML_PARSER_EPILOG; 11505 } else { 11506 ctxt->instate = XML_PARSER_CONTENT; 11507 } 11508 ctxt->progressive = 1; 11509 break; 11510 } 11511 if (RAW == '>') { 11512 NEXT; 11513 } else { 11514 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11515 "Couldn't find end of Start Tag %s\n", 11516 name); 11517 nodePop(ctxt); 11518 spacePop(ctxt); 11519 } 11520 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); 11521 11522 ctxt->instate = XML_PARSER_CONTENT; 11523 ctxt->progressive = 1; 11524 break; 11525 } 11526 case XML_PARSER_CONTENT: { 11527 const xmlChar *test; 11528 unsigned int cons; 11529 if ((avail < 2) && (ctxt->inputNr == 1)) 11530 goto done; 11531 cur = ctxt->input->cur[0]; 11532 next = ctxt->input->cur[1]; 11533 11534 test = CUR_PTR; 11535 cons = ctxt->input->consumed; 11536 if ((cur == '<') && (next == '/')) { 11537 ctxt->instate = XML_PARSER_END_TAG; 11538 break; 11539 } else if ((cur == '<') && (next == '?')) { 11540 if ((!terminate) && 11541 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11542 ctxt->progressive = XML_PARSER_PI; 11543 goto done; 11544 } 11545 xmlParsePI(ctxt); 11546 ctxt->instate = XML_PARSER_CONTENT; 11547 ctxt->progressive = 1; 11548 } else if ((cur == '<') && (next != '!')) { 11549 ctxt->instate = XML_PARSER_START_TAG; 11550 break; 11551 } else if ((cur == '<') && (next == '!') && 11552 (ctxt->input->cur[2] == '-') && 11553 (ctxt->input->cur[3] == '-')) { 11554 int term; 11555 11556 if (avail < 4) 11557 goto done; 11558 ctxt->input->cur += 4; 11559 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11560 ctxt->input->cur -= 4; 11561 if ((!terminate) && (term < 0)) { 11562 ctxt->progressive = XML_PARSER_COMMENT; 11563 goto done; 11564 } 11565 xmlParseComment(ctxt); 11566 ctxt->instate = XML_PARSER_CONTENT; 11567 ctxt->progressive = 1; 11568 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11569 (ctxt->input->cur[2] == '[') && 11570 (ctxt->input->cur[3] == 'C') && 11571 (ctxt->input->cur[4] == 'D') && 11572 (ctxt->input->cur[5] == 'A') && 11573 (ctxt->input->cur[6] == 'T') && 11574 (ctxt->input->cur[7] == 'A') && 11575 (ctxt->input->cur[8] == '[')) { 11576 SKIP(9); 11577 ctxt->instate = XML_PARSER_CDATA_SECTION; 11578 break; 11579 } else if ((cur == '<') && (next == '!') && 11580 (avail < 9)) { 11581 goto done; 11582 } else if (cur == '&') { 11583 if ((!terminate) && 11584 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11585 goto done; 11586 xmlParseReference(ctxt); 11587 } else { 11588 /* TODO Avoid the extra copy, handle directly !!! */ 11589 /* 11590 * Goal of the following test is: 11591 * - minimize calls to the SAX 'character' callback 11592 * when they are mergeable 11593 * - handle an problem for isBlank when we only parse 11594 * a sequence of blank chars and the next one is 11595 * not available to check against '<' presence. 11596 * - tries to homogenize the differences in SAX 11597 * callbacks between the push and pull versions 11598 * of the parser. 11599 */ 11600 if ((ctxt->inputNr == 1) && 11601 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11602 if (!terminate) { 11603 if (ctxt->progressive) { 11604 if ((lastlt == NULL) || 11605 (ctxt->input->cur > lastlt)) 11606 goto done; 11607 } else if (xmlParseLookupSequence(ctxt, 11608 '<', 0, 0) < 0) { 11609 goto done; 11610 } 11611 } 11612 } 11613 ctxt->checkIndex = 0; 11614 xmlParseCharData(ctxt, 0); 11615 } 11616 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11617 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11618 "detected an error in element content\n"); 11619 xmlHaltParser(ctxt); 11620 break; 11621 } 11622 break; 11623 } 11624 case XML_PARSER_END_TAG: 11625 if (avail < 2) 11626 goto done; 11627 if (!terminate) { 11628 if (ctxt->progressive) { 11629 /* > can be found unescaped in attribute values */ 11630 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11631 goto done; 11632 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11633 goto done; 11634 } 11635 } 11636 if (ctxt->sax2) { 11637 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 11638 nameNsPop(ctxt); 11639 } 11640 #ifdef LIBXML_SAX1_ENABLED 11641 else 11642 xmlParseEndTag1(ctxt, 0); 11643 #endif /* LIBXML_SAX1_ENABLED */ 11644 if (ctxt->instate == XML_PARSER_EOF) { 11645 /* Nothing */ 11646 } else if (ctxt->nameNr == 0) { 11647 ctxt->instate = XML_PARSER_EPILOG; 11648 } else { 11649 ctxt->instate = XML_PARSER_CONTENT; 11650 } 11651 break; 11652 case XML_PARSER_CDATA_SECTION: { 11653 /* 11654 * The Push mode need to have the SAX callback for 11655 * cdataBlock merge back contiguous callbacks. 11656 */ 11657 int base; 11658 11659 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11660 if (base < 0) { 11661 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11662 int tmp; 11663 11664 tmp = xmlCheckCdataPush(ctxt->input->cur, 11665 XML_PARSER_BIG_BUFFER_SIZE, 0); 11666 if (tmp < 0) { 11667 tmp = -tmp; 11668 ctxt->input->cur += tmp; 11669 goto encoding_error; 11670 } 11671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11672 if (ctxt->sax->cdataBlock != NULL) 11673 ctxt->sax->cdataBlock(ctxt->userData, 11674 ctxt->input->cur, tmp); 11675 else if (ctxt->sax->characters != NULL) 11676 ctxt->sax->characters(ctxt->userData, 11677 ctxt->input->cur, tmp); 11678 } 11679 if (ctxt->instate == XML_PARSER_EOF) 11680 goto done; 11681 SKIPL(tmp); 11682 ctxt->checkIndex = 0; 11683 } 11684 goto done; 11685 } else { 11686 int tmp; 11687 11688 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11689 if ((tmp < 0) || (tmp != base)) { 11690 tmp = -tmp; 11691 ctxt->input->cur += tmp; 11692 goto encoding_error; 11693 } 11694 if ((ctxt->sax != NULL) && (base == 0) && 11695 (ctxt->sax->cdataBlock != NULL) && 11696 (!ctxt->disableSAX)) { 11697 /* 11698 * Special case to provide identical behaviour 11699 * between pull and push parsers on enpty CDATA 11700 * sections 11701 */ 11702 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11703 (!strncmp((const char *)&ctxt->input->cur[-9], 11704 "<![CDATA[", 9))) 11705 ctxt->sax->cdataBlock(ctxt->userData, 11706 BAD_CAST "", 0); 11707 } else if ((ctxt->sax != NULL) && (base > 0) && 11708 (!ctxt->disableSAX)) { 11709 if (ctxt->sax->cdataBlock != NULL) 11710 ctxt->sax->cdataBlock(ctxt->userData, 11711 ctxt->input->cur, base); 11712 else if (ctxt->sax->characters != NULL) 11713 ctxt->sax->characters(ctxt->userData, 11714 ctxt->input->cur, base); 11715 } 11716 if (ctxt->instate == XML_PARSER_EOF) 11717 goto done; 11718 SKIPL(base + 3); 11719 ctxt->checkIndex = 0; 11720 ctxt->instate = XML_PARSER_CONTENT; 11721 #ifdef DEBUG_PUSH 11722 xmlGenericError(xmlGenericErrorContext, 11723 "PP: entering CONTENT\n"); 11724 #endif 11725 } 11726 break; 11727 } 11728 case XML_PARSER_MISC: 11729 SKIP_BLANKS; 11730 if (ctxt->input->buf == NULL) 11731 avail = ctxt->input->length - 11732 (ctxt->input->cur - ctxt->input->base); 11733 else 11734 avail = xmlBufUse(ctxt->input->buf->buffer) - 11735 (ctxt->input->cur - ctxt->input->base); 11736 if (avail < 2) 11737 goto done; 11738 cur = ctxt->input->cur[0]; 11739 next = ctxt->input->cur[1]; 11740 if ((cur == '<') && (next == '?')) { 11741 if ((!terminate) && 11742 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11743 ctxt->progressive = XML_PARSER_PI; 11744 goto done; 11745 } 11746 #ifdef DEBUG_PUSH 11747 xmlGenericError(xmlGenericErrorContext, 11748 "PP: Parsing PI\n"); 11749 #endif 11750 xmlParsePI(ctxt); 11751 if (ctxt->instate == XML_PARSER_EOF) 11752 goto done; 11753 ctxt->instate = XML_PARSER_MISC; 11754 ctxt->progressive = 1; 11755 ctxt->checkIndex = 0; 11756 } else if ((cur == '<') && (next == '!') && 11757 (ctxt->input->cur[2] == '-') && 11758 (ctxt->input->cur[3] == '-')) { 11759 if ((!terminate) && 11760 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11761 ctxt->progressive = XML_PARSER_COMMENT; 11762 goto done; 11763 } 11764 #ifdef DEBUG_PUSH 11765 xmlGenericError(xmlGenericErrorContext, 11766 "PP: Parsing Comment\n"); 11767 #endif 11768 xmlParseComment(ctxt); 11769 if (ctxt->instate == XML_PARSER_EOF) 11770 goto done; 11771 ctxt->instate = XML_PARSER_MISC; 11772 ctxt->progressive = 1; 11773 ctxt->checkIndex = 0; 11774 } else if ((cur == '<') && (next == '!') && 11775 (ctxt->input->cur[2] == 'D') && 11776 (ctxt->input->cur[3] == 'O') && 11777 (ctxt->input->cur[4] == 'C') && 11778 (ctxt->input->cur[5] == 'T') && 11779 (ctxt->input->cur[6] == 'Y') && 11780 (ctxt->input->cur[7] == 'P') && 11781 (ctxt->input->cur[8] == 'E')) { 11782 if ((!terminate) && 11783 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11784 ctxt->progressive = XML_PARSER_DTD; 11785 goto done; 11786 } 11787 #ifdef DEBUG_PUSH 11788 xmlGenericError(xmlGenericErrorContext, 11789 "PP: Parsing internal subset\n"); 11790 #endif 11791 ctxt->inSubset = 1; 11792 ctxt->progressive = 0; 11793 ctxt->checkIndex = 0; 11794 xmlParseDocTypeDecl(ctxt); 11795 if (ctxt->instate == XML_PARSER_EOF) 11796 goto done; 11797 if (RAW == '[') { 11798 ctxt->instate = XML_PARSER_DTD; 11799 #ifdef DEBUG_PUSH 11800 xmlGenericError(xmlGenericErrorContext, 11801 "PP: entering DTD\n"); 11802 #endif 11803 } else { 11804 /* 11805 * Create and update the external subset. 11806 */ 11807 ctxt->inSubset = 2; 11808 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11809 (ctxt->sax->externalSubset != NULL)) 11810 ctxt->sax->externalSubset(ctxt->userData, 11811 ctxt->intSubName, ctxt->extSubSystem, 11812 ctxt->extSubURI); 11813 ctxt->inSubset = 0; 11814 xmlCleanSpecialAttr(ctxt); 11815 ctxt->instate = XML_PARSER_PROLOG; 11816 #ifdef DEBUG_PUSH 11817 xmlGenericError(xmlGenericErrorContext, 11818 "PP: entering PROLOG\n"); 11819 #endif 11820 } 11821 } else if ((cur == '<') && (next == '!') && 11822 (avail < 9)) { 11823 goto done; 11824 } else { 11825 ctxt->instate = XML_PARSER_START_TAG; 11826 ctxt->progressive = XML_PARSER_START_TAG; 11827 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11828 #ifdef DEBUG_PUSH 11829 xmlGenericError(xmlGenericErrorContext, 11830 "PP: entering START_TAG\n"); 11831 #endif 11832 } 11833 break; 11834 case XML_PARSER_PROLOG: 11835 SKIP_BLANKS; 11836 if (ctxt->input->buf == NULL) 11837 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11838 else 11839 avail = xmlBufUse(ctxt->input->buf->buffer) - 11840 (ctxt->input->cur - ctxt->input->base); 11841 if (avail < 2) 11842 goto done; 11843 cur = ctxt->input->cur[0]; 11844 next = ctxt->input->cur[1]; 11845 if ((cur == '<') && (next == '?')) { 11846 if ((!terminate) && 11847 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11848 ctxt->progressive = XML_PARSER_PI; 11849 goto done; 11850 } 11851 #ifdef DEBUG_PUSH 11852 xmlGenericError(xmlGenericErrorContext, 11853 "PP: Parsing PI\n"); 11854 #endif 11855 xmlParsePI(ctxt); 11856 if (ctxt->instate == XML_PARSER_EOF) 11857 goto done; 11858 ctxt->instate = XML_PARSER_PROLOG; 11859 ctxt->progressive = 1; 11860 } else if ((cur == '<') && (next == '!') && 11861 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11862 if ((!terminate) && 11863 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11864 ctxt->progressive = XML_PARSER_COMMENT; 11865 goto done; 11866 } 11867 #ifdef DEBUG_PUSH 11868 xmlGenericError(xmlGenericErrorContext, 11869 "PP: Parsing Comment\n"); 11870 #endif 11871 xmlParseComment(ctxt); 11872 if (ctxt->instate == XML_PARSER_EOF) 11873 goto done; 11874 ctxt->instate = XML_PARSER_PROLOG; 11875 ctxt->progressive = 1; 11876 } else if ((cur == '<') && (next == '!') && 11877 (avail < 4)) { 11878 goto done; 11879 } else { 11880 ctxt->instate = XML_PARSER_START_TAG; 11881 if (ctxt->progressive == 0) 11882 ctxt->progressive = XML_PARSER_START_TAG; 11883 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11884 #ifdef DEBUG_PUSH 11885 xmlGenericError(xmlGenericErrorContext, 11886 "PP: entering START_TAG\n"); 11887 #endif 11888 } 11889 break; 11890 case XML_PARSER_EPILOG: 11891 SKIP_BLANKS; 11892 if (ctxt->input->buf == NULL) 11893 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11894 else 11895 avail = xmlBufUse(ctxt->input->buf->buffer) - 11896 (ctxt->input->cur - ctxt->input->base); 11897 if (avail < 2) 11898 goto done; 11899 cur = ctxt->input->cur[0]; 11900 next = ctxt->input->cur[1]; 11901 if ((cur == '<') && (next == '?')) { 11902 if ((!terminate) && 11903 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11904 ctxt->progressive = XML_PARSER_PI; 11905 goto done; 11906 } 11907 #ifdef DEBUG_PUSH 11908 xmlGenericError(xmlGenericErrorContext, 11909 "PP: Parsing PI\n"); 11910 #endif 11911 xmlParsePI(ctxt); 11912 if (ctxt->instate == XML_PARSER_EOF) 11913 goto done; 11914 ctxt->instate = XML_PARSER_EPILOG; 11915 ctxt->progressive = 1; 11916 } else if ((cur == '<') && (next == '!') && 11917 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11918 if ((!terminate) && 11919 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11920 ctxt->progressive = XML_PARSER_COMMENT; 11921 goto done; 11922 } 11923 #ifdef DEBUG_PUSH 11924 xmlGenericError(xmlGenericErrorContext, 11925 "PP: Parsing Comment\n"); 11926 #endif 11927 xmlParseComment(ctxt); 11928 if (ctxt->instate == XML_PARSER_EOF) 11929 goto done; 11930 ctxt->instate = XML_PARSER_EPILOG; 11931 ctxt->progressive = 1; 11932 } else if ((cur == '<') && (next == '!') && 11933 (avail < 4)) { 11934 goto done; 11935 } else { 11936 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11937 xmlHaltParser(ctxt); 11938 #ifdef DEBUG_PUSH 11939 xmlGenericError(xmlGenericErrorContext, 11940 "PP: entering EOF\n"); 11941 #endif 11942 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11943 ctxt->sax->endDocument(ctxt->userData); 11944 goto done; 11945 } 11946 break; 11947 case XML_PARSER_DTD: { 11948 /* 11949 * Sorry but progressive parsing of the internal subset 11950 * is not expected to be supported. We first check that 11951 * the full content of the internal subset is available and 11952 * the parsing is launched only at that point. 11953 * Internal subset ends up with "']' S? '>'" in an unescaped 11954 * section and not in a ']]>' sequence which are conditional 11955 * sections (whoever argued to keep that crap in XML deserve 11956 * a place in hell !). 11957 */ 11958 int base, i; 11959 xmlChar *buf; 11960 xmlChar quote = 0; 11961 size_t use; 11962 11963 base = ctxt->input->cur - ctxt->input->base; 11964 if (base < 0) return(0); 11965 if (ctxt->checkIndex > base) 11966 base = ctxt->checkIndex; 11967 buf = xmlBufContent(ctxt->input->buf->buffer); 11968 use = xmlBufUse(ctxt->input->buf->buffer); 11969 for (;(unsigned int) base < use; base++) { 11970 if (quote != 0) { 11971 if (buf[base] == quote) 11972 quote = 0; 11973 continue; 11974 } 11975 if ((quote == 0) && (buf[base] == '<')) { 11976 int found = 0; 11977 /* special handling of comments */ 11978 if (((unsigned int) base + 4 < use) && 11979 (buf[base + 1] == '!') && 11980 (buf[base + 2] == '-') && 11981 (buf[base + 3] == '-')) { 11982 for (;(unsigned int) base + 3 < use; base++) { 11983 if ((buf[base] == '-') && 11984 (buf[base + 1] == '-') && 11985 (buf[base + 2] == '>')) { 11986 found = 1; 11987 base += 2; 11988 break; 11989 } 11990 } 11991 if (!found) { 11992 #if 0 11993 fprintf(stderr, "unfinished comment\n"); 11994 #endif 11995 break; /* for */ 11996 } 11997 continue; 11998 } 11999 } 12000 if (buf[base] == '"') { 12001 quote = '"'; 12002 continue; 12003 } 12004 if (buf[base] == '\'') { 12005 quote = '\''; 12006 continue; 12007 } 12008 if (buf[base] == ']') { 12009 #if 0 12010 fprintf(stderr, "%c%c%c%c: ", buf[base], 12011 buf[base + 1], buf[base + 2], buf[base + 3]); 12012 #endif 12013 if ((unsigned int) base +1 >= use) 12014 break; 12015 if (buf[base + 1] == ']') { 12016 /* conditional crap, skip both ']' ! */ 12017 base++; 12018 continue; 12019 } 12020 for (i = 1; (unsigned int) base + i < use; i++) { 12021 if (buf[base + i] == '>') { 12022 #if 0 12023 fprintf(stderr, "found\n"); 12024 #endif 12025 goto found_end_int_subset; 12026 } 12027 if (!IS_BLANK_CH(buf[base + i])) { 12028 #if 0 12029 fprintf(stderr, "not found\n"); 12030 #endif 12031 goto not_end_of_int_subset; 12032 } 12033 } 12034 #if 0 12035 fprintf(stderr, "end of stream\n"); 12036 #endif 12037 break; 12038 12039 } 12040 not_end_of_int_subset: 12041 continue; /* for */ 12042 } 12043 /* 12044 * We didn't found the end of the Internal subset 12045 */ 12046 if (quote == 0) 12047 ctxt->checkIndex = base; 12048 else 12049 ctxt->checkIndex = 0; 12050 #ifdef DEBUG_PUSH 12051 if (next == 0) 12052 xmlGenericError(xmlGenericErrorContext, 12053 "PP: lookup of int subset end filed\n"); 12054 #endif 12055 goto done; 12056 12057 found_end_int_subset: 12058 ctxt->checkIndex = 0; 12059 xmlParseInternalSubset(ctxt); 12060 if (ctxt->instate == XML_PARSER_EOF) 12061 goto done; 12062 ctxt->inSubset = 2; 12063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12064 (ctxt->sax->externalSubset != NULL)) 12065 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12066 ctxt->extSubSystem, ctxt->extSubURI); 12067 ctxt->inSubset = 0; 12068 xmlCleanSpecialAttr(ctxt); 12069 if (ctxt->instate == XML_PARSER_EOF) 12070 goto done; 12071 ctxt->instate = XML_PARSER_PROLOG; 12072 ctxt->checkIndex = 0; 12073 #ifdef DEBUG_PUSH 12074 xmlGenericError(xmlGenericErrorContext, 12075 "PP: entering PROLOG\n"); 12076 #endif 12077 break; 12078 } 12079 case XML_PARSER_COMMENT: 12080 xmlGenericError(xmlGenericErrorContext, 12081 "PP: internal error, state == COMMENT\n"); 12082 ctxt->instate = XML_PARSER_CONTENT; 12083 #ifdef DEBUG_PUSH 12084 xmlGenericError(xmlGenericErrorContext, 12085 "PP: entering CONTENT\n"); 12086 #endif 12087 break; 12088 case XML_PARSER_IGNORE: 12089 xmlGenericError(xmlGenericErrorContext, 12090 "PP: internal error, state == IGNORE"); 12091 ctxt->instate = XML_PARSER_DTD; 12092 #ifdef DEBUG_PUSH 12093 xmlGenericError(xmlGenericErrorContext, 12094 "PP: entering DTD\n"); 12095 #endif 12096 break; 12097 case XML_PARSER_PI: 12098 xmlGenericError(xmlGenericErrorContext, 12099 "PP: internal error, state == PI\n"); 12100 ctxt->instate = XML_PARSER_CONTENT; 12101 #ifdef DEBUG_PUSH 12102 xmlGenericError(xmlGenericErrorContext, 12103 "PP: entering CONTENT\n"); 12104 #endif 12105 break; 12106 case XML_PARSER_ENTITY_DECL: 12107 xmlGenericError(xmlGenericErrorContext, 12108 "PP: internal error, state == ENTITY_DECL\n"); 12109 ctxt->instate = XML_PARSER_DTD; 12110 #ifdef DEBUG_PUSH 12111 xmlGenericError(xmlGenericErrorContext, 12112 "PP: entering DTD\n"); 12113 #endif 12114 break; 12115 case XML_PARSER_ENTITY_VALUE: 12116 xmlGenericError(xmlGenericErrorContext, 12117 "PP: internal error, state == ENTITY_VALUE\n"); 12118 ctxt->instate = XML_PARSER_CONTENT; 12119 #ifdef DEBUG_PUSH 12120 xmlGenericError(xmlGenericErrorContext, 12121 "PP: entering DTD\n"); 12122 #endif 12123 break; 12124 case XML_PARSER_ATTRIBUTE_VALUE: 12125 xmlGenericError(xmlGenericErrorContext, 12126 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12127 ctxt->instate = XML_PARSER_START_TAG; 12128 #ifdef DEBUG_PUSH 12129 xmlGenericError(xmlGenericErrorContext, 12130 "PP: entering START_TAG\n"); 12131 #endif 12132 break; 12133 case XML_PARSER_SYSTEM_LITERAL: 12134 xmlGenericError(xmlGenericErrorContext, 12135 "PP: internal error, state == SYSTEM_LITERAL\n"); 12136 ctxt->instate = XML_PARSER_START_TAG; 12137 #ifdef DEBUG_PUSH 12138 xmlGenericError(xmlGenericErrorContext, 12139 "PP: entering START_TAG\n"); 12140 #endif 12141 break; 12142 case XML_PARSER_PUBLIC_LITERAL: 12143 xmlGenericError(xmlGenericErrorContext, 12144 "PP: internal error, state == PUBLIC_LITERAL\n"); 12145 ctxt->instate = XML_PARSER_START_TAG; 12146 #ifdef DEBUG_PUSH 12147 xmlGenericError(xmlGenericErrorContext, 12148 "PP: entering START_TAG\n"); 12149 #endif 12150 break; 12151 } 12152 } 12153 done: 12154 #ifdef DEBUG_PUSH 12155 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12156 #endif 12157 return(ret); 12158 encoding_error: 12159 { 12160 char buffer[150]; 12161 12162 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12163 ctxt->input->cur[0], ctxt->input->cur[1], 12164 ctxt->input->cur[2], ctxt->input->cur[3]); 12165 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12166 "Input is not proper UTF-8, indicate encoding !\n%s", 12167 BAD_CAST buffer, NULL); 12168 } 12169 return(0); 12170 } 12171 12172 /** 12173 * xmlParseCheckTransition: 12174 * @ctxt: an XML parser context 12175 * @chunk: a char array 12176 * @size: the size in byte of the chunk 12177 * 12178 * Check depending on the current parser state if the chunk given must be 12179 * processed immediately or one need more data to advance on parsing. 12180 * 12181 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12182 */ 12183 static int 12184 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12185 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12186 return(-1); 12187 if (ctxt->instate == XML_PARSER_START_TAG) { 12188 if (memchr(chunk, '>', size) != NULL) 12189 return(1); 12190 return(0); 12191 } 12192 if (ctxt->progressive == XML_PARSER_COMMENT) { 12193 if (memchr(chunk, '>', size) != NULL) 12194 return(1); 12195 return(0); 12196 } 12197 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12198 if (memchr(chunk, '>', size) != NULL) 12199 return(1); 12200 return(0); 12201 } 12202 if (ctxt->progressive == XML_PARSER_PI) { 12203 if (memchr(chunk, '>', size) != NULL) 12204 return(1); 12205 return(0); 12206 } 12207 if (ctxt->instate == XML_PARSER_END_TAG) { 12208 if (memchr(chunk, '>', size) != NULL) 12209 return(1); 12210 return(0); 12211 } 12212 if ((ctxt->progressive == XML_PARSER_DTD) || 12213 (ctxt->instate == XML_PARSER_DTD)) { 12214 if (memchr(chunk, '>', size) != NULL) 12215 return(1); 12216 return(0); 12217 } 12218 return(1); 12219 } 12220 12221 /** 12222 * xmlParseChunk: 12223 * @ctxt: an XML parser context 12224 * @chunk: an char array 12225 * @size: the size in byte of the chunk 12226 * @terminate: last chunk indicator 12227 * 12228 * Parse a Chunk of memory 12229 * 12230 * Returns zero if no error, the xmlParserErrors otherwise. 12231 */ 12232 int 12233 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12234 int terminate) { 12235 int end_in_lf = 0; 12236 int remain = 0; 12237 size_t old_avail = 0; 12238 size_t avail = 0; 12239 12240 if (ctxt == NULL) 12241 return(XML_ERR_INTERNAL_ERROR); 12242 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12243 return(ctxt->errNo); 12244 if (ctxt->instate == XML_PARSER_EOF) 12245 return(-1); 12246 if (ctxt->instate == XML_PARSER_START) 12247 xmlDetectSAX2(ctxt); 12248 if ((size > 0) && (chunk != NULL) && (!terminate) && 12249 (chunk[size - 1] == '\r')) { 12250 end_in_lf = 1; 12251 size--; 12252 } 12253 12254 xmldecl_done: 12255 12256 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12257 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12258 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12259 size_t cur = ctxt->input->cur - ctxt->input->base; 12260 int res; 12261 12262 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12263 /* 12264 * Specific handling if we autodetected an encoding, we should not 12265 * push more than the first line ... which depend on the encoding 12266 * And only push the rest once the final encoding was detected 12267 */ 12268 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12269 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12270 unsigned int len = 45; 12271 12272 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12273 BAD_CAST "UTF-16")) || 12274 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12275 BAD_CAST "UTF16"))) 12276 len = 90; 12277 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12278 BAD_CAST "UCS-4")) || 12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12280 BAD_CAST "UCS4"))) 12281 len = 180; 12282 12283 if (ctxt->input->buf->rawconsumed < len) 12284 len -= ctxt->input->buf->rawconsumed; 12285 12286 /* 12287 * Change size for reading the initial declaration only 12288 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12289 * will blindly copy extra bytes from memory. 12290 */ 12291 if ((unsigned int) size > len) { 12292 remain = size - len; 12293 size = len; 12294 } else { 12295 remain = 0; 12296 } 12297 } 12298 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12299 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12300 if (res < 0) { 12301 ctxt->errNo = XML_PARSER_EOF; 12302 xmlHaltParser(ctxt); 12303 return (XML_PARSER_EOF); 12304 } 12305 #ifdef DEBUG_PUSH 12306 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12307 #endif 12308 12309 } else if (ctxt->instate != XML_PARSER_EOF) { 12310 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12311 xmlParserInputBufferPtr in = ctxt->input->buf; 12312 if ((in->encoder != NULL) && (in->buffer != NULL) && 12313 (in->raw != NULL)) { 12314 int nbchars; 12315 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12316 size_t current = ctxt->input->cur - ctxt->input->base; 12317 12318 nbchars = xmlCharEncInput(in, terminate); 12319 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12320 if (nbchars < 0) { 12321 /* TODO 2.6.0 */ 12322 xmlGenericError(xmlGenericErrorContext, 12323 "xmlParseChunk: encoder error\n"); 12324 xmlHaltParser(ctxt); 12325 return(XML_ERR_INVALID_ENCODING); 12326 } 12327 } 12328 } 12329 } 12330 if (remain != 0) { 12331 xmlParseTryOrFinish(ctxt, 0); 12332 } else { 12333 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12334 avail = xmlBufUse(ctxt->input->buf->buffer); 12335 /* 12336 * Depending on the current state it may not be such 12337 * a good idea to try parsing if there is nothing in the chunk 12338 * which would be worth doing a parser state transition and we 12339 * need to wait for more data 12340 */ 12341 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12342 (old_avail == 0) || (avail == 0) || 12343 (xmlParseCheckTransition(ctxt, 12344 (const char *)&ctxt->input->base[old_avail], 12345 avail - old_avail))) 12346 xmlParseTryOrFinish(ctxt, terminate); 12347 } 12348 if (ctxt->instate == XML_PARSER_EOF) 12349 return(ctxt->errNo); 12350 12351 if ((ctxt->input != NULL) && 12352 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12353 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12354 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12355 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12356 xmlHaltParser(ctxt); 12357 } 12358 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12359 return(ctxt->errNo); 12360 12361 if (remain != 0) { 12362 chunk += size; 12363 size = remain; 12364 remain = 0; 12365 goto xmldecl_done; 12366 } 12367 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12368 (ctxt->input->buf != NULL)) { 12369 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12370 ctxt->input); 12371 size_t current = ctxt->input->cur - ctxt->input->base; 12372 12373 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12374 12375 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12376 base, current); 12377 } 12378 if (terminate) { 12379 /* 12380 * Check for termination 12381 */ 12382 int cur_avail = 0; 12383 12384 if (ctxt->input != NULL) { 12385 if (ctxt->input->buf == NULL) 12386 cur_avail = ctxt->input->length - 12387 (ctxt->input->cur - ctxt->input->base); 12388 else 12389 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12390 (ctxt->input->cur - ctxt->input->base); 12391 } 12392 12393 if ((ctxt->instate != XML_PARSER_EOF) && 12394 (ctxt->instate != XML_PARSER_EPILOG)) { 12395 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12396 } 12397 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12398 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12399 } 12400 if (ctxt->instate != XML_PARSER_EOF) { 12401 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12402 ctxt->sax->endDocument(ctxt->userData); 12403 } 12404 ctxt->instate = XML_PARSER_EOF; 12405 } 12406 if (ctxt->wellFormed == 0) 12407 return((xmlParserErrors) ctxt->errNo); 12408 else 12409 return(0); 12410 } 12411 12412 /************************************************************************ 12413 * * 12414 * I/O front end functions to the parser * 12415 * * 12416 ************************************************************************/ 12417 12418 /** 12419 * xmlCreatePushParserCtxt: 12420 * @sax: a SAX handler 12421 * @user_data: The user data returned on SAX callbacks 12422 * @chunk: a pointer to an array of chars 12423 * @size: number of chars in the array 12424 * @filename: an optional file name or URI 12425 * 12426 * Create a parser context for using the XML parser in push mode. 12427 * If @buffer and @size are non-NULL, the data is used to detect 12428 * the encoding. The remaining characters will be parsed so they 12429 * don't need to be fed in again through xmlParseChunk. 12430 * To allow content encoding detection, @size should be >= 4 12431 * The value of @filename is used for fetching external entities 12432 * and error/warning reports. 12433 * 12434 * Returns the new parser context or NULL 12435 */ 12436 12437 xmlParserCtxtPtr 12438 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12439 const char *chunk, int size, const char *filename) { 12440 xmlParserCtxtPtr ctxt; 12441 xmlParserInputPtr inputStream; 12442 xmlParserInputBufferPtr buf; 12443 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12444 12445 /* 12446 * plug some encoding conversion routines 12447 */ 12448 if ((chunk != NULL) && (size >= 4)) 12449 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12450 12451 buf = xmlAllocParserInputBuffer(enc); 12452 if (buf == NULL) return(NULL); 12453 12454 ctxt = xmlNewParserCtxt(); 12455 if (ctxt == NULL) { 12456 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12457 xmlFreeParserInputBuffer(buf); 12458 return(NULL); 12459 } 12460 ctxt->dictNames = 1; 12461 if (sax != NULL) { 12462 #ifdef LIBXML_SAX1_ENABLED 12463 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12464 #endif /* LIBXML_SAX1_ENABLED */ 12465 xmlFree(ctxt->sax); 12466 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12467 if (ctxt->sax == NULL) { 12468 xmlErrMemory(ctxt, NULL); 12469 xmlFreeParserInputBuffer(buf); 12470 xmlFreeParserCtxt(ctxt); 12471 return(NULL); 12472 } 12473 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12474 if (sax->initialized == XML_SAX2_MAGIC) 12475 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12476 else 12477 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12478 if (user_data != NULL) 12479 ctxt->userData = user_data; 12480 } 12481 if (filename == NULL) { 12482 ctxt->directory = NULL; 12483 } else { 12484 ctxt->directory = xmlParserGetDirectory(filename); 12485 } 12486 12487 inputStream = xmlNewInputStream(ctxt); 12488 if (inputStream == NULL) { 12489 xmlFreeParserCtxt(ctxt); 12490 xmlFreeParserInputBuffer(buf); 12491 return(NULL); 12492 } 12493 12494 if (filename == NULL) 12495 inputStream->filename = NULL; 12496 else { 12497 inputStream->filename = (char *) 12498 xmlCanonicPath((const xmlChar *) filename); 12499 if (inputStream->filename == NULL) { 12500 xmlFreeParserCtxt(ctxt); 12501 xmlFreeParserInputBuffer(buf); 12502 return(NULL); 12503 } 12504 } 12505 inputStream->buf = buf; 12506 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12507 inputPush(ctxt, inputStream); 12508 12509 /* 12510 * If the caller didn't provide an initial 'chunk' for determining 12511 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12512 * that it can be automatically determined later 12513 */ 12514 if ((size == 0) || (chunk == NULL)) { 12515 ctxt->charset = XML_CHAR_ENCODING_NONE; 12516 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12517 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12518 size_t cur = ctxt->input->cur - ctxt->input->base; 12519 12520 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12521 12522 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12523 #ifdef DEBUG_PUSH 12524 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12525 #endif 12526 } 12527 12528 if (enc != XML_CHAR_ENCODING_NONE) { 12529 xmlSwitchEncoding(ctxt, enc); 12530 } 12531 12532 return(ctxt); 12533 } 12534 #endif /* LIBXML_PUSH_ENABLED */ 12535 12536 /** 12537 * xmlHaltParser: 12538 * @ctxt: an XML parser context 12539 * 12540 * Blocks further parser processing don't override error 12541 * for internal use 12542 */ 12543 static void 12544 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12545 if (ctxt == NULL) 12546 return; 12547 ctxt->instate = XML_PARSER_EOF; 12548 ctxt->disableSAX = 1; 12549 while (ctxt->inputNr > 1) 12550 xmlFreeInputStream(inputPop(ctxt)); 12551 if (ctxt->input != NULL) { 12552 /* 12553 * in case there was a specific allocation deallocate before 12554 * overriding base 12555 */ 12556 if (ctxt->input->free != NULL) { 12557 ctxt->input->free((xmlChar *) ctxt->input->base); 12558 ctxt->input->free = NULL; 12559 } 12560 if (ctxt->input->buf != NULL) { 12561 xmlFreeParserInputBuffer(ctxt->input->buf); 12562 ctxt->input->buf = NULL; 12563 } 12564 ctxt->input->cur = BAD_CAST""; 12565 ctxt->input->length = 0; 12566 ctxt->input->base = ctxt->input->cur; 12567 ctxt->input->end = ctxt->input->cur; 12568 } 12569 } 12570 12571 /** 12572 * xmlStopParser: 12573 * @ctxt: an XML parser context 12574 * 12575 * Blocks further parser processing 12576 */ 12577 void 12578 xmlStopParser(xmlParserCtxtPtr ctxt) { 12579 if (ctxt == NULL) 12580 return; 12581 xmlHaltParser(ctxt); 12582 ctxt->errNo = XML_ERR_USER_STOP; 12583 } 12584 12585 /** 12586 * xmlCreateIOParserCtxt: 12587 * @sax: a SAX handler 12588 * @user_data: The user data returned on SAX callbacks 12589 * @ioread: an I/O read function 12590 * @ioclose: an I/O close function 12591 * @ioctx: an I/O handler 12592 * @enc: the charset encoding if known 12593 * 12594 * Create a parser context for using the XML parser with an existing 12595 * I/O stream 12596 * 12597 * Returns the new parser context or NULL 12598 */ 12599 xmlParserCtxtPtr 12600 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12601 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12602 void *ioctx, xmlCharEncoding enc) { 12603 xmlParserCtxtPtr ctxt; 12604 xmlParserInputPtr inputStream; 12605 xmlParserInputBufferPtr buf; 12606 12607 if (ioread == NULL) return(NULL); 12608 12609 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12610 if (buf == NULL) { 12611 if (ioclose != NULL) 12612 ioclose(ioctx); 12613 return (NULL); 12614 } 12615 12616 ctxt = xmlNewParserCtxt(); 12617 if (ctxt == NULL) { 12618 xmlFreeParserInputBuffer(buf); 12619 return(NULL); 12620 } 12621 if (sax != NULL) { 12622 #ifdef LIBXML_SAX1_ENABLED 12623 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12624 #endif /* LIBXML_SAX1_ENABLED */ 12625 xmlFree(ctxt->sax); 12626 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12627 if (ctxt->sax == NULL) { 12628 xmlErrMemory(ctxt, NULL); 12629 xmlFreeParserCtxt(ctxt); 12630 return(NULL); 12631 } 12632 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12633 if (sax->initialized == XML_SAX2_MAGIC) 12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12635 else 12636 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12637 if (user_data != NULL) 12638 ctxt->userData = user_data; 12639 } 12640 12641 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12642 if (inputStream == NULL) { 12643 xmlFreeParserCtxt(ctxt); 12644 return(NULL); 12645 } 12646 inputPush(ctxt, inputStream); 12647 12648 return(ctxt); 12649 } 12650 12651 #ifdef LIBXML_VALID_ENABLED 12652 /************************************************************************ 12653 * * 12654 * Front ends when parsing a DTD * 12655 * * 12656 ************************************************************************/ 12657 12658 /** 12659 * xmlIOParseDTD: 12660 * @sax: the SAX handler block or NULL 12661 * @input: an Input Buffer 12662 * @enc: the charset encoding if known 12663 * 12664 * Load and parse a DTD 12665 * 12666 * Returns the resulting xmlDtdPtr or NULL in case of error. 12667 * @input will be freed by the function in any case. 12668 */ 12669 12670 xmlDtdPtr 12671 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12672 xmlCharEncoding enc) { 12673 xmlDtdPtr ret = NULL; 12674 xmlParserCtxtPtr ctxt; 12675 xmlParserInputPtr pinput = NULL; 12676 xmlChar start[4]; 12677 12678 if (input == NULL) 12679 return(NULL); 12680 12681 ctxt = xmlNewParserCtxt(); 12682 if (ctxt == NULL) { 12683 xmlFreeParserInputBuffer(input); 12684 return(NULL); 12685 } 12686 12687 /* We are loading a DTD */ 12688 ctxt->options |= XML_PARSE_DTDLOAD; 12689 12690 /* 12691 * Set-up the SAX context 12692 */ 12693 if (sax != NULL) { 12694 if (ctxt->sax != NULL) 12695 xmlFree(ctxt->sax); 12696 ctxt->sax = sax; 12697 ctxt->userData = ctxt; 12698 } 12699 xmlDetectSAX2(ctxt); 12700 12701 /* 12702 * generate a parser input from the I/O handler 12703 */ 12704 12705 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12706 if (pinput == NULL) { 12707 if (sax != NULL) ctxt->sax = NULL; 12708 xmlFreeParserInputBuffer(input); 12709 xmlFreeParserCtxt(ctxt); 12710 return(NULL); 12711 } 12712 12713 /* 12714 * plug some encoding conversion routines here. 12715 */ 12716 if (xmlPushInput(ctxt, pinput) < 0) { 12717 if (sax != NULL) ctxt->sax = NULL; 12718 xmlFreeParserCtxt(ctxt); 12719 return(NULL); 12720 } 12721 if (enc != XML_CHAR_ENCODING_NONE) { 12722 xmlSwitchEncoding(ctxt, enc); 12723 } 12724 12725 pinput->filename = NULL; 12726 pinput->line = 1; 12727 pinput->col = 1; 12728 pinput->base = ctxt->input->cur; 12729 pinput->cur = ctxt->input->cur; 12730 pinput->free = NULL; 12731 12732 /* 12733 * let's parse that entity knowing it's an external subset. 12734 */ 12735 ctxt->inSubset = 2; 12736 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12737 if (ctxt->myDoc == NULL) { 12738 xmlErrMemory(ctxt, "New Doc failed"); 12739 return(NULL); 12740 } 12741 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12742 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12743 BAD_CAST "none", BAD_CAST "none"); 12744 12745 if ((enc == XML_CHAR_ENCODING_NONE) && 12746 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12747 /* 12748 * Get the 4 first bytes and decode the charset 12749 * if enc != XML_CHAR_ENCODING_NONE 12750 * plug some encoding conversion routines. 12751 */ 12752 start[0] = RAW; 12753 start[1] = NXT(1); 12754 start[2] = NXT(2); 12755 start[3] = NXT(3); 12756 enc = xmlDetectCharEncoding(start, 4); 12757 if (enc != XML_CHAR_ENCODING_NONE) { 12758 xmlSwitchEncoding(ctxt, enc); 12759 } 12760 } 12761 12762 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12763 12764 if (ctxt->myDoc != NULL) { 12765 if (ctxt->wellFormed) { 12766 ret = ctxt->myDoc->extSubset; 12767 ctxt->myDoc->extSubset = NULL; 12768 if (ret != NULL) { 12769 xmlNodePtr tmp; 12770 12771 ret->doc = NULL; 12772 tmp = ret->children; 12773 while (tmp != NULL) { 12774 tmp->doc = NULL; 12775 tmp = tmp->next; 12776 } 12777 } 12778 } else { 12779 ret = NULL; 12780 } 12781 xmlFreeDoc(ctxt->myDoc); 12782 ctxt->myDoc = NULL; 12783 } 12784 if (sax != NULL) ctxt->sax = NULL; 12785 xmlFreeParserCtxt(ctxt); 12786 12787 return(ret); 12788 } 12789 12790 /** 12791 * xmlSAXParseDTD: 12792 * @sax: the SAX handler block 12793 * @ExternalID: a NAME* containing the External ID of the DTD 12794 * @SystemID: a NAME* containing the URL to the DTD 12795 * 12796 * Load and parse an external subset. 12797 * 12798 * Returns the resulting xmlDtdPtr or NULL in case of error. 12799 */ 12800 12801 xmlDtdPtr 12802 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12803 const xmlChar *SystemID) { 12804 xmlDtdPtr ret = NULL; 12805 xmlParserCtxtPtr ctxt; 12806 xmlParserInputPtr input = NULL; 12807 xmlCharEncoding enc; 12808 xmlChar* systemIdCanonic; 12809 12810 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12811 12812 ctxt = xmlNewParserCtxt(); 12813 if (ctxt == NULL) { 12814 return(NULL); 12815 } 12816 12817 /* We are loading a DTD */ 12818 ctxt->options |= XML_PARSE_DTDLOAD; 12819 12820 /* 12821 * Set-up the SAX context 12822 */ 12823 if (sax != NULL) { 12824 if (ctxt->sax != NULL) 12825 xmlFree(ctxt->sax); 12826 ctxt->sax = sax; 12827 ctxt->userData = ctxt; 12828 } 12829 12830 /* 12831 * Canonicalise the system ID 12832 */ 12833 systemIdCanonic = xmlCanonicPath(SystemID); 12834 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12835 xmlFreeParserCtxt(ctxt); 12836 return(NULL); 12837 } 12838 12839 /* 12840 * Ask the Entity resolver to load the damn thing 12841 */ 12842 12843 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12844 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12845 systemIdCanonic); 12846 if (input == NULL) { 12847 if (sax != NULL) ctxt->sax = NULL; 12848 xmlFreeParserCtxt(ctxt); 12849 if (systemIdCanonic != NULL) 12850 xmlFree(systemIdCanonic); 12851 return(NULL); 12852 } 12853 12854 /* 12855 * plug some encoding conversion routines here. 12856 */ 12857 if (xmlPushInput(ctxt, input) < 0) { 12858 if (sax != NULL) ctxt->sax = NULL; 12859 xmlFreeParserCtxt(ctxt); 12860 if (systemIdCanonic != NULL) 12861 xmlFree(systemIdCanonic); 12862 return(NULL); 12863 } 12864 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12865 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12866 xmlSwitchEncoding(ctxt, enc); 12867 } 12868 12869 if (input->filename == NULL) 12870 input->filename = (char *) systemIdCanonic; 12871 else 12872 xmlFree(systemIdCanonic); 12873 input->line = 1; 12874 input->col = 1; 12875 input->base = ctxt->input->cur; 12876 input->cur = ctxt->input->cur; 12877 input->free = NULL; 12878 12879 /* 12880 * let's parse that entity knowing it's an external subset. 12881 */ 12882 ctxt->inSubset = 2; 12883 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12884 if (ctxt->myDoc == NULL) { 12885 xmlErrMemory(ctxt, "New Doc failed"); 12886 if (sax != NULL) ctxt->sax = NULL; 12887 xmlFreeParserCtxt(ctxt); 12888 return(NULL); 12889 } 12890 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12892 ExternalID, SystemID); 12893 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12894 12895 if (ctxt->myDoc != NULL) { 12896 if (ctxt->wellFormed) { 12897 ret = ctxt->myDoc->extSubset; 12898 ctxt->myDoc->extSubset = NULL; 12899 if (ret != NULL) { 12900 xmlNodePtr tmp; 12901 12902 ret->doc = NULL; 12903 tmp = ret->children; 12904 while (tmp != NULL) { 12905 tmp->doc = NULL; 12906 tmp = tmp->next; 12907 } 12908 } 12909 } else { 12910 ret = NULL; 12911 } 12912 xmlFreeDoc(ctxt->myDoc); 12913 ctxt->myDoc = NULL; 12914 } 12915 if (sax != NULL) ctxt->sax = NULL; 12916 xmlFreeParserCtxt(ctxt); 12917 12918 return(ret); 12919 } 12920 12921 12922 /** 12923 * xmlParseDTD: 12924 * @ExternalID: a NAME* containing the External ID of the DTD 12925 * @SystemID: a NAME* containing the URL to the DTD 12926 * 12927 * Load and parse an external subset. 12928 * 12929 * Returns the resulting xmlDtdPtr or NULL in case of error. 12930 */ 12931 12932 xmlDtdPtr 12933 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12934 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12935 } 12936 #endif /* LIBXML_VALID_ENABLED */ 12937 12938 /************************************************************************ 12939 * * 12940 * Front ends when parsing an Entity * 12941 * * 12942 ************************************************************************/ 12943 12944 /** 12945 * xmlParseCtxtExternalEntity: 12946 * @ctx: the existing parsing context 12947 * @URL: the URL for the entity to load 12948 * @ID: the System ID for the entity to load 12949 * @lst: the return value for the set of parsed nodes 12950 * 12951 * Parse an external general entity within an existing parsing context 12952 * An external general parsed entity is well-formed if it matches the 12953 * production labeled extParsedEnt. 12954 * 12955 * [78] extParsedEnt ::= TextDecl? content 12956 * 12957 * Returns 0 if the entity is well formed, -1 in case of args problem and 12958 * the parser error code otherwise 12959 */ 12960 12961 int 12962 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12963 const xmlChar *ID, xmlNodePtr *lst) { 12964 void *userData; 12965 12966 if (ctx == NULL) return(-1); 12967 /* 12968 * If the user provided their own SAX callbacks, then reuse the 12969 * userData callback field, otherwise the expected setup in a 12970 * DOM builder is to have userData == ctxt 12971 */ 12972 if (ctx->userData == ctx) 12973 userData = NULL; 12974 else 12975 userData = ctx->userData; 12976 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax, 12977 userData, ctx->depth + 1, 12978 URL, ID, lst); 12979 } 12980 12981 /** 12982 * xmlParseExternalEntityPrivate: 12983 * @doc: the document the chunk pertains to 12984 * @oldctxt: the previous parser context if available 12985 * @sax: the SAX handler block (possibly NULL) 12986 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12987 * @depth: Used for loop detection, use 0 12988 * @URL: the URL for the entity to load 12989 * @ID: the System ID for the entity to load 12990 * @list: the return value for the set of parsed nodes 12991 * 12992 * Private version of xmlParseExternalEntity() 12993 * 12994 * Returns 0 if the entity is well formed, -1 in case of args problem and 12995 * the parser error code otherwise 12996 */ 12997 12998 static xmlParserErrors 12999 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13000 xmlSAXHandlerPtr sax, 13001 void *user_data, int depth, const xmlChar *URL, 13002 const xmlChar *ID, xmlNodePtr *list) { 13003 xmlParserCtxtPtr ctxt; 13004 xmlDocPtr newDoc; 13005 xmlNodePtr newRoot; 13006 xmlSAXHandlerPtr oldsax = NULL; 13007 xmlParserErrors ret = XML_ERR_OK; 13008 xmlChar start[4]; 13009 xmlCharEncoding enc; 13010 13011 if (((depth > 40) && 13012 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13013 (depth > 1024)) { 13014 return(XML_ERR_ENTITY_LOOP); 13015 } 13016 13017 if (list != NULL) 13018 *list = NULL; 13019 if ((URL == NULL) && (ID == NULL)) 13020 return(XML_ERR_INTERNAL_ERROR); 13021 if (doc == NULL) 13022 return(XML_ERR_INTERNAL_ERROR); 13023 13024 13025 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13026 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13027 ctxt->userData = ctxt; 13028 if (sax != NULL) { 13029 oldsax = ctxt->sax; 13030 ctxt->sax = sax; 13031 if (user_data != NULL) 13032 ctxt->userData = user_data; 13033 } 13034 xmlDetectSAX2(ctxt); 13035 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13036 if (newDoc == NULL) { 13037 xmlFreeParserCtxt(ctxt); 13038 return(XML_ERR_INTERNAL_ERROR); 13039 } 13040 newDoc->properties = XML_DOC_INTERNAL; 13041 if (doc) { 13042 newDoc->intSubset = doc->intSubset; 13043 newDoc->extSubset = doc->extSubset; 13044 if (doc->dict) { 13045 newDoc->dict = doc->dict; 13046 xmlDictReference(newDoc->dict); 13047 } 13048 if (doc->URL != NULL) { 13049 newDoc->URL = xmlStrdup(doc->URL); 13050 } 13051 } 13052 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13053 if (newRoot == NULL) { 13054 if (sax != NULL) 13055 ctxt->sax = oldsax; 13056 xmlFreeParserCtxt(ctxt); 13057 newDoc->intSubset = NULL; 13058 newDoc->extSubset = NULL; 13059 xmlFreeDoc(newDoc); 13060 return(XML_ERR_INTERNAL_ERROR); 13061 } 13062 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13063 nodePush(ctxt, newDoc->children); 13064 if (doc == NULL) { 13065 ctxt->myDoc = newDoc; 13066 } else { 13067 ctxt->myDoc = doc; 13068 newRoot->doc = doc; 13069 } 13070 13071 /* 13072 * Get the 4 first bytes and decode the charset 13073 * if enc != XML_CHAR_ENCODING_NONE 13074 * plug some encoding conversion routines. 13075 */ 13076 GROW; 13077 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13078 start[0] = RAW; 13079 start[1] = NXT(1); 13080 start[2] = NXT(2); 13081 start[3] = NXT(3); 13082 enc = xmlDetectCharEncoding(start, 4); 13083 if (enc != XML_CHAR_ENCODING_NONE) { 13084 xmlSwitchEncoding(ctxt, enc); 13085 } 13086 } 13087 13088 /* 13089 * Parse a possible text declaration first 13090 */ 13091 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13092 xmlParseTextDecl(ctxt); 13093 /* 13094 * An XML-1.0 document can't reference an entity not XML-1.0 13095 */ 13096 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) && 13097 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13098 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13099 "Version mismatch between document and entity\n"); 13100 } 13101 } 13102 13103 ctxt->instate = XML_PARSER_CONTENT; 13104 ctxt->depth = depth; 13105 if (oldctxt != NULL) { 13106 ctxt->_private = oldctxt->_private; 13107 ctxt->loadsubset = oldctxt->loadsubset; 13108 ctxt->validate = oldctxt->validate; 13109 ctxt->valid = oldctxt->valid; 13110 ctxt->replaceEntities = oldctxt->replaceEntities; 13111 if (oldctxt->validate) { 13112 ctxt->vctxt.error = oldctxt->vctxt.error; 13113 ctxt->vctxt.warning = oldctxt->vctxt.warning; 13114 ctxt->vctxt.userData = oldctxt->vctxt.userData; 13115 } 13116 ctxt->external = oldctxt->external; 13117 if (ctxt->dict) xmlDictFree(ctxt->dict); 13118 ctxt->dict = oldctxt->dict; 13119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13122 ctxt->dictNames = oldctxt->dictNames; 13123 ctxt->attsDefault = oldctxt->attsDefault; 13124 ctxt->attsSpecial = oldctxt->attsSpecial; 13125 ctxt->linenumbers = oldctxt->linenumbers; 13126 ctxt->record_info = oldctxt->record_info; 13127 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13128 ctxt->node_seq.length = oldctxt->node_seq.length; 13129 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13130 } else { 13131 /* 13132 * Doing validity checking on chunk without context 13133 * doesn't make sense 13134 */ 13135 ctxt->_private = NULL; 13136 ctxt->validate = 0; 13137 ctxt->external = 2; 13138 ctxt->loadsubset = 0; 13139 } 13140 13141 xmlParseContent(ctxt); 13142 13143 if ((RAW == '<') && (NXT(1) == '/')) { 13144 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13145 } else if (RAW != 0) { 13146 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13147 } 13148 if (ctxt->node != newDoc->children) { 13149 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13150 } 13151 13152 if (!ctxt->wellFormed) { 13153 if (ctxt->errNo == 0) 13154 ret = XML_ERR_INTERNAL_ERROR; 13155 else 13156 ret = (xmlParserErrors)ctxt->errNo; 13157 } else { 13158 if (list != NULL) { 13159 xmlNodePtr cur; 13160 13161 /* 13162 * Return the newly created nodeset after unlinking it from 13163 * they pseudo parent. 13164 */ 13165 cur = newDoc->children->children; 13166 *list = cur; 13167 while (cur != NULL) { 13168 cur->parent = NULL; 13169 cur = cur->next; 13170 } 13171 newDoc->children->children = NULL; 13172 } 13173 ret = XML_ERR_OK; 13174 } 13175 13176 /* 13177 * Record in the parent context the number of entities replacement 13178 * done when parsing that reference. 13179 */ 13180 if (oldctxt != NULL) 13181 oldctxt->nbentities += ctxt->nbentities; 13182 13183 /* 13184 * Also record the size of the entity parsed 13185 */ 13186 if (ctxt->input != NULL && oldctxt != NULL) { 13187 oldctxt->sizeentities += ctxt->input->consumed; 13188 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13189 } 13190 /* 13191 * And record the last error if any 13192 */ 13193 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13194 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13195 13196 if (sax != NULL) 13197 ctxt->sax = oldsax; 13198 if (oldctxt != NULL) { 13199 ctxt->dict = NULL; 13200 ctxt->attsDefault = NULL; 13201 ctxt->attsSpecial = NULL; 13202 oldctxt->validate = ctxt->validate; 13203 oldctxt->valid = ctxt->valid; 13204 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13205 oldctxt->node_seq.length = ctxt->node_seq.length; 13206 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13207 } 13208 ctxt->node_seq.maximum = 0; 13209 ctxt->node_seq.length = 0; 13210 ctxt->node_seq.buffer = NULL; 13211 xmlFreeParserCtxt(ctxt); 13212 newDoc->intSubset = NULL; 13213 newDoc->extSubset = NULL; 13214 xmlFreeDoc(newDoc); 13215 13216 return(ret); 13217 } 13218 13219 #ifdef LIBXML_SAX1_ENABLED 13220 /** 13221 * xmlParseExternalEntity: 13222 * @doc: the document the chunk pertains to 13223 * @sax: the SAX handler block (possibly NULL) 13224 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13225 * @depth: Used for loop detection, use 0 13226 * @URL: the URL for the entity to load 13227 * @ID: the System ID for the entity to load 13228 * @lst: the return value for the set of parsed nodes 13229 * 13230 * Parse an external general entity 13231 * An external general parsed entity is well-formed if it matches the 13232 * production labeled extParsedEnt. 13233 * 13234 * [78] extParsedEnt ::= TextDecl? content 13235 * 13236 * Returns 0 if the entity is well formed, -1 in case of args problem and 13237 * the parser error code otherwise 13238 */ 13239 13240 int 13241 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13242 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13243 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13244 ID, lst)); 13245 } 13246 13247 /** 13248 * xmlParseBalancedChunkMemory: 13249 * @doc: the document the chunk pertains to (must not be NULL) 13250 * @sax: the SAX handler block (possibly NULL) 13251 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13252 * @depth: Used for loop detection, use 0 13253 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13254 * @lst: the return value for the set of parsed nodes 13255 * 13256 * Parse a well-balanced chunk of an XML document 13257 * called by the parser 13258 * The allowed sequence for the Well Balanced Chunk is the one defined by 13259 * the content production in the XML grammar: 13260 * 13261 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13262 * 13263 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13264 * the parser error code otherwise 13265 */ 13266 13267 int 13268 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13269 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13270 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13271 depth, string, lst, 0 ); 13272 } 13273 #endif /* LIBXML_SAX1_ENABLED */ 13274 13275 /** 13276 * xmlParseBalancedChunkMemoryInternal: 13277 * @oldctxt: the existing parsing context 13278 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13279 * @user_data: the user data field for the parser context 13280 * @lst: the return value for the set of parsed nodes 13281 * 13282 * 13283 * Parse a well-balanced chunk of an XML document 13284 * called by the parser 13285 * The allowed sequence for the Well Balanced Chunk is the one defined by 13286 * the content production in the XML grammar: 13287 * 13288 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13289 * 13290 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13291 * error code otherwise 13292 * 13293 * In case recover is set to 1, the nodelist will not be empty even if 13294 * the parsed chunk is not well balanced. 13295 */ 13296 static xmlParserErrors 13297 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13298 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13299 xmlParserCtxtPtr ctxt; 13300 xmlDocPtr newDoc = NULL; 13301 xmlNodePtr newRoot; 13302 xmlSAXHandlerPtr oldsax = NULL; 13303 xmlNodePtr content = NULL; 13304 xmlNodePtr last = NULL; 13305 int size; 13306 xmlParserErrors ret = XML_ERR_OK; 13307 #ifdef SAX2 13308 int i; 13309 #endif 13310 13311 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13312 (oldctxt->depth > 1024)) { 13313 return(XML_ERR_ENTITY_LOOP); 13314 } 13315 13316 13317 if (lst != NULL) 13318 *lst = NULL; 13319 if (string == NULL) 13320 return(XML_ERR_INTERNAL_ERROR); 13321 13322 size = xmlStrlen(string); 13323 13324 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13325 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13326 if (user_data != NULL) 13327 ctxt->userData = user_data; 13328 else 13329 ctxt->userData = ctxt; 13330 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13331 ctxt->dict = oldctxt->dict; 13332 ctxt->input_id = oldctxt->input_id + 1; 13333 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13334 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13335 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13336 13337 #ifdef SAX2 13338 /* propagate namespaces down the entity */ 13339 for (i = 0;i < oldctxt->nsNr;i += 2) { 13340 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13341 } 13342 #endif 13343 13344 oldsax = ctxt->sax; 13345 ctxt->sax = oldctxt->sax; 13346 xmlDetectSAX2(ctxt); 13347 ctxt->replaceEntities = oldctxt->replaceEntities; 13348 ctxt->options = oldctxt->options; 13349 13350 ctxt->_private = oldctxt->_private; 13351 if (oldctxt->myDoc == NULL) { 13352 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13353 if (newDoc == NULL) { 13354 ctxt->sax = oldsax; 13355 ctxt->dict = NULL; 13356 xmlFreeParserCtxt(ctxt); 13357 return(XML_ERR_INTERNAL_ERROR); 13358 } 13359 newDoc->properties = XML_DOC_INTERNAL; 13360 newDoc->dict = ctxt->dict; 13361 xmlDictReference(newDoc->dict); 13362 ctxt->myDoc = newDoc; 13363 } else { 13364 ctxt->myDoc = oldctxt->myDoc; 13365 content = ctxt->myDoc->children; 13366 last = ctxt->myDoc->last; 13367 } 13368 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13369 if (newRoot == NULL) { 13370 ctxt->sax = oldsax; 13371 ctxt->dict = NULL; 13372 xmlFreeParserCtxt(ctxt); 13373 if (newDoc != NULL) { 13374 xmlFreeDoc(newDoc); 13375 } 13376 return(XML_ERR_INTERNAL_ERROR); 13377 } 13378 ctxt->myDoc->children = NULL; 13379 ctxt->myDoc->last = NULL; 13380 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13381 nodePush(ctxt, ctxt->myDoc->children); 13382 ctxt->instate = XML_PARSER_CONTENT; 13383 ctxt->depth = oldctxt->depth + 1; 13384 13385 ctxt->validate = 0; 13386 ctxt->loadsubset = oldctxt->loadsubset; 13387 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13388 /* 13389 * ID/IDREF registration will be done in xmlValidateElement below 13390 */ 13391 ctxt->loadsubset |= XML_SKIP_IDS; 13392 } 13393 ctxt->dictNames = oldctxt->dictNames; 13394 ctxt->attsDefault = oldctxt->attsDefault; 13395 ctxt->attsSpecial = oldctxt->attsSpecial; 13396 13397 xmlParseContent(ctxt); 13398 if ((RAW == '<') && (NXT(1) == '/')) { 13399 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13400 } else if (RAW != 0) { 13401 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13402 } 13403 if (ctxt->node != ctxt->myDoc->children) { 13404 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13405 } 13406 13407 if (!ctxt->wellFormed) { 13408 if (ctxt->errNo == 0) 13409 ret = XML_ERR_INTERNAL_ERROR; 13410 else 13411 ret = (xmlParserErrors)ctxt->errNo; 13412 } else { 13413 ret = XML_ERR_OK; 13414 } 13415 13416 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13417 xmlNodePtr cur; 13418 13419 /* 13420 * Return the newly created nodeset after unlinking it from 13421 * they pseudo parent. 13422 */ 13423 cur = ctxt->myDoc->children->children; 13424 *lst = cur; 13425 while (cur != NULL) { 13426 #ifdef LIBXML_VALID_ENABLED 13427 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13428 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13429 (cur->type == XML_ELEMENT_NODE)) { 13430 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13431 oldctxt->myDoc, cur); 13432 } 13433 #endif /* LIBXML_VALID_ENABLED */ 13434 cur->parent = NULL; 13435 cur = cur->next; 13436 } 13437 ctxt->myDoc->children->children = NULL; 13438 } 13439 if (ctxt->myDoc != NULL) { 13440 xmlFreeNode(ctxt->myDoc->children); 13441 ctxt->myDoc->children = content; 13442 ctxt->myDoc->last = last; 13443 } 13444 13445 /* 13446 * Record in the parent context the number of entities replacement 13447 * done when parsing that reference. 13448 */ 13449 if (oldctxt != NULL) 13450 oldctxt->nbentities += ctxt->nbentities; 13451 13452 /* 13453 * Also record the last error if any 13454 */ 13455 if (ctxt->lastError.code != XML_ERR_OK) 13456 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13457 13458 ctxt->sax = oldsax; 13459 ctxt->dict = NULL; 13460 ctxt->attsDefault = NULL; 13461 ctxt->attsSpecial = NULL; 13462 xmlFreeParserCtxt(ctxt); 13463 if (newDoc != NULL) { 13464 xmlFreeDoc(newDoc); 13465 } 13466 13467 return(ret); 13468 } 13469 13470 /** 13471 * xmlParseInNodeContext: 13472 * @node: the context node 13473 * @data: the input string 13474 * @datalen: the input string length in bytes 13475 * @options: a combination of xmlParserOption 13476 * @lst: the return value for the set of parsed nodes 13477 * 13478 * Parse a well-balanced chunk of an XML document 13479 * within the context (DTD, namespaces, etc ...) of the given node. 13480 * 13481 * The allowed sequence for the data is a Well Balanced Chunk defined by 13482 * the content production in the XML grammar: 13483 * 13484 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13485 * 13486 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13487 * error code otherwise 13488 */ 13489 xmlParserErrors 13490 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13491 int options, xmlNodePtr *lst) { 13492 #ifdef SAX2 13493 xmlParserCtxtPtr ctxt; 13494 xmlDocPtr doc = NULL; 13495 xmlNodePtr fake, cur; 13496 int nsnr = 0; 13497 13498 xmlParserErrors ret = XML_ERR_OK; 13499 13500 /* 13501 * check all input parameters, grab the document 13502 */ 13503 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13504 return(XML_ERR_INTERNAL_ERROR); 13505 switch (node->type) { 13506 case XML_ELEMENT_NODE: 13507 case XML_ATTRIBUTE_NODE: 13508 case XML_TEXT_NODE: 13509 case XML_CDATA_SECTION_NODE: 13510 case XML_ENTITY_REF_NODE: 13511 case XML_PI_NODE: 13512 case XML_COMMENT_NODE: 13513 case XML_DOCUMENT_NODE: 13514 case XML_HTML_DOCUMENT_NODE: 13515 break; 13516 default: 13517 return(XML_ERR_INTERNAL_ERROR); 13518 13519 } 13520 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13521 (node->type != XML_DOCUMENT_NODE) && 13522 (node->type != XML_HTML_DOCUMENT_NODE)) 13523 node = node->parent; 13524 if (node == NULL) 13525 return(XML_ERR_INTERNAL_ERROR); 13526 if (node->type == XML_ELEMENT_NODE) 13527 doc = node->doc; 13528 else 13529 doc = (xmlDocPtr) node; 13530 if (doc == NULL) 13531 return(XML_ERR_INTERNAL_ERROR); 13532 13533 /* 13534 * allocate a context and set-up everything not related to the 13535 * node position in the tree 13536 */ 13537 if (doc->type == XML_DOCUMENT_NODE) 13538 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13539 #ifdef LIBXML_HTML_ENABLED 13540 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13541 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13542 /* 13543 * When parsing in context, it makes no sense to add implied 13544 * elements like html/body/etc... 13545 */ 13546 options |= HTML_PARSE_NOIMPLIED; 13547 } 13548 #endif 13549 else 13550 return(XML_ERR_INTERNAL_ERROR); 13551 13552 if (ctxt == NULL) 13553 return(XML_ERR_NO_MEMORY); 13554 13555 /* 13556 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13557 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13558 * we must wait until the last moment to free the original one. 13559 */ 13560 if (doc->dict != NULL) { 13561 if (ctxt->dict != NULL) 13562 xmlDictFree(ctxt->dict); 13563 ctxt->dict = doc->dict; 13564 } else 13565 options |= XML_PARSE_NODICT; 13566 13567 if (doc->encoding != NULL) { 13568 xmlCharEncodingHandlerPtr hdlr; 13569 13570 if (ctxt->encoding != NULL) 13571 xmlFree((xmlChar *) ctxt->encoding); 13572 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13573 13574 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13575 if (hdlr != NULL) { 13576 xmlSwitchToEncoding(ctxt, hdlr); 13577 } else { 13578 return(XML_ERR_UNSUPPORTED_ENCODING); 13579 } 13580 } 13581 13582 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13583 xmlDetectSAX2(ctxt); 13584 ctxt->myDoc = doc; 13585 /* parsing in context, i.e. as within existing content */ 13586 ctxt->input_id = 2; 13587 ctxt->instate = XML_PARSER_CONTENT; 13588 13589 fake = xmlNewComment(NULL); 13590 if (fake == NULL) { 13591 xmlFreeParserCtxt(ctxt); 13592 return(XML_ERR_NO_MEMORY); 13593 } 13594 xmlAddChild(node, fake); 13595 13596 if (node->type == XML_ELEMENT_NODE) { 13597 nodePush(ctxt, node); 13598 /* 13599 * initialize the SAX2 namespaces stack 13600 */ 13601 cur = node; 13602 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13603 xmlNsPtr ns = cur->nsDef; 13604 const xmlChar *iprefix, *ihref; 13605 13606 while (ns != NULL) { 13607 if (ctxt->dict) { 13608 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13609 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13610 } else { 13611 iprefix = ns->prefix; 13612 ihref = ns->href; 13613 } 13614 13615 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13616 nsPush(ctxt, iprefix, ihref); 13617 nsnr++; 13618 } 13619 ns = ns->next; 13620 } 13621 cur = cur->parent; 13622 } 13623 } 13624 13625 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13626 /* 13627 * ID/IDREF registration will be done in xmlValidateElement below 13628 */ 13629 ctxt->loadsubset |= XML_SKIP_IDS; 13630 } 13631 13632 #ifdef LIBXML_HTML_ENABLED 13633 if (doc->type == XML_HTML_DOCUMENT_NODE) 13634 __htmlParseContent(ctxt); 13635 else 13636 #endif 13637 xmlParseContent(ctxt); 13638 13639 nsPop(ctxt, nsnr); 13640 if ((RAW == '<') && (NXT(1) == '/')) { 13641 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13642 } else if (RAW != 0) { 13643 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13644 } 13645 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13646 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13647 ctxt->wellFormed = 0; 13648 } 13649 13650 if (!ctxt->wellFormed) { 13651 if (ctxt->errNo == 0) 13652 ret = XML_ERR_INTERNAL_ERROR; 13653 else 13654 ret = (xmlParserErrors)ctxt->errNo; 13655 } else { 13656 ret = XML_ERR_OK; 13657 } 13658 13659 /* 13660 * Return the newly created nodeset after unlinking it from 13661 * the pseudo sibling. 13662 */ 13663 13664 cur = fake->next; 13665 fake->next = NULL; 13666 node->last = fake; 13667 13668 if (cur != NULL) { 13669 cur->prev = NULL; 13670 } 13671 13672 *lst = cur; 13673 13674 while (cur != NULL) { 13675 cur->parent = NULL; 13676 cur = cur->next; 13677 } 13678 13679 xmlUnlinkNode(fake); 13680 xmlFreeNode(fake); 13681 13682 13683 if (ret != XML_ERR_OK) { 13684 xmlFreeNodeList(*lst); 13685 *lst = NULL; 13686 } 13687 13688 if (doc->dict != NULL) 13689 ctxt->dict = NULL; 13690 xmlFreeParserCtxt(ctxt); 13691 13692 return(ret); 13693 #else /* !SAX2 */ 13694 return(XML_ERR_INTERNAL_ERROR); 13695 #endif 13696 } 13697 13698 #ifdef LIBXML_SAX1_ENABLED 13699 /** 13700 * xmlParseBalancedChunkMemoryRecover: 13701 * @doc: the document the chunk pertains to (must not be NULL) 13702 * @sax: the SAX handler block (possibly NULL) 13703 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13704 * @depth: Used for loop detection, use 0 13705 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13706 * @lst: the return value for the set of parsed nodes 13707 * @recover: return nodes even if the data is broken (use 0) 13708 * 13709 * 13710 * Parse a well-balanced chunk of an XML document 13711 * called by the parser 13712 * The allowed sequence for the Well Balanced Chunk is the one defined by 13713 * the content production in the XML grammar: 13714 * 13715 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13716 * 13717 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13718 * the parser error code otherwise 13719 * 13720 * In case recover is set to 1, the nodelist will not be empty even if 13721 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13722 * some extent. 13723 */ 13724 int 13725 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13726 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13727 int recover) { 13728 xmlParserCtxtPtr ctxt; 13729 xmlDocPtr newDoc; 13730 xmlSAXHandlerPtr oldsax = NULL; 13731 xmlNodePtr content, newRoot; 13732 int size; 13733 int ret = 0; 13734 13735 if (depth > 40) { 13736 return(XML_ERR_ENTITY_LOOP); 13737 } 13738 13739 13740 if (lst != NULL) 13741 *lst = NULL; 13742 if (string == NULL) 13743 return(-1); 13744 13745 size = xmlStrlen(string); 13746 13747 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13748 if (ctxt == NULL) return(-1); 13749 ctxt->userData = ctxt; 13750 if (sax != NULL) { 13751 oldsax = ctxt->sax; 13752 ctxt->sax = sax; 13753 if (user_data != NULL) 13754 ctxt->userData = user_data; 13755 } 13756 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13757 if (newDoc == NULL) { 13758 xmlFreeParserCtxt(ctxt); 13759 return(-1); 13760 } 13761 newDoc->properties = XML_DOC_INTERNAL; 13762 if ((doc != NULL) && (doc->dict != NULL)) { 13763 xmlDictFree(ctxt->dict); 13764 ctxt->dict = doc->dict; 13765 xmlDictReference(ctxt->dict); 13766 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13767 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13768 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13769 ctxt->dictNames = 1; 13770 } else { 13771 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13772 } 13773 /* doc == NULL is only supported for historic reasons */ 13774 if (doc != NULL) { 13775 newDoc->intSubset = doc->intSubset; 13776 newDoc->extSubset = doc->extSubset; 13777 } 13778 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13779 if (newRoot == NULL) { 13780 if (sax != NULL) 13781 ctxt->sax = oldsax; 13782 xmlFreeParserCtxt(ctxt); 13783 newDoc->intSubset = NULL; 13784 newDoc->extSubset = NULL; 13785 xmlFreeDoc(newDoc); 13786 return(-1); 13787 } 13788 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13789 nodePush(ctxt, newRoot); 13790 /* doc == NULL is only supported for historic reasons */ 13791 if (doc == NULL) { 13792 ctxt->myDoc = newDoc; 13793 } else { 13794 ctxt->myDoc = newDoc; 13795 newDoc->children->doc = doc; 13796 /* Ensure that doc has XML spec namespace */ 13797 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13798 newDoc->oldNs = doc->oldNs; 13799 } 13800 ctxt->instate = XML_PARSER_CONTENT; 13801 ctxt->input_id = 2; 13802 ctxt->depth = depth; 13803 13804 /* 13805 * Doing validity checking on chunk doesn't make sense 13806 */ 13807 ctxt->validate = 0; 13808 ctxt->loadsubset = 0; 13809 xmlDetectSAX2(ctxt); 13810 13811 if ( doc != NULL ){ 13812 content = doc->children; 13813 doc->children = NULL; 13814 xmlParseContent(ctxt); 13815 doc->children = content; 13816 } 13817 else { 13818 xmlParseContent(ctxt); 13819 } 13820 if ((RAW == '<') && (NXT(1) == '/')) { 13821 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13822 } else if (RAW != 0) { 13823 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13824 } 13825 if (ctxt->node != newDoc->children) { 13826 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13827 } 13828 13829 if (!ctxt->wellFormed) { 13830 if (ctxt->errNo == 0) 13831 ret = 1; 13832 else 13833 ret = ctxt->errNo; 13834 } else { 13835 ret = 0; 13836 } 13837 13838 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13839 xmlNodePtr cur; 13840 13841 /* 13842 * Return the newly created nodeset after unlinking it from 13843 * they pseudo parent. 13844 */ 13845 cur = newDoc->children->children; 13846 *lst = cur; 13847 while (cur != NULL) { 13848 xmlSetTreeDoc(cur, doc); 13849 cur->parent = NULL; 13850 cur = cur->next; 13851 } 13852 newDoc->children->children = NULL; 13853 } 13854 13855 if (sax != NULL) 13856 ctxt->sax = oldsax; 13857 xmlFreeParserCtxt(ctxt); 13858 newDoc->intSubset = NULL; 13859 newDoc->extSubset = NULL; 13860 /* This leaks the namespace list if doc == NULL */ 13861 newDoc->oldNs = NULL; 13862 xmlFreeDoc(newDoc); 13863 13864 return(ret); 13865 } 13866 13867 /** 13868 * xmlSAXParseEntity: 13869 * @sax: the SAX handler block 13870 * @filename: the filename 13871 * 13872 * parse an XML external entity out of context and build a tree. 13873 * It use the given SAX function block to handle the parsing callback. 13874 * If sax is NULL, fallback to the default DOM tree building routines. 13875 * 13876 * [78] extParsedEnt ::= TextDecl? content 13877 * 13878 * This correspond to a "Well Balanced" chunk 13879 * 13880 * Returns the resulting document tree 13881 */ 13882 13883 xmlDocPtr 13884 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13885 xmlDocPtr ret; 13886 xmlParserCtxtPtr ctxt; 13887 13888 ctxt = xmlCreateFileParserCtxt(filename); 13889 if (ctxt == NULL) { 13890 return(NULL); 13891 } 13892 if (sax != NULL) { 13893 if (ctxt->sax != NULL) 13894 xmlFree(ctxt->sax); 13895 ctxt->sax = sax; 13896 ctxt->userData = NULL; 13897 } 13898 13899 xmlParseExtParsedEnt(ctxt); 13900 13901 if (ctxt->wellFormed) 13902 ret = ctxt->myDoc; 13903 else { 13904 ret = NULL; 13905 xmlFreeDoc(ctxt->myDoc); 13906 ctxt->myDoc = NULL; 13907 } 13908 if (sax != NULL) 13909 ctxt->sax = NULL; 13910 xmlFreeParserCtxt(ctxt); 13911 13912 return(ret); 13913 } 13914 13915 /** 13916 * xmlParseEntity: 13917 * @filename: the filename 13918 * 13919 * parse an XML external entity out of context and build a tree. 13920 * 13921 * [78] extParsedEnt ::= TextDecl? content 13922 * 13923 * This correspond to a "Well Balanced" chunk 13924 * 13925 * Returns the resulting document tree 13926 */ 13927 13928 xmlDocPtr 13929 xmlParseEntity(const char *filename) { 13930 return(xmlSAXParseEntity(NULL, filename)); 13931 } 13932 #endif /* LIBXML_SAX1_ENABLED */ 13933 13934 /** 13935 * xmlCreateEntityParserCtxtInternal: 13936 * @URL: the entity URL 13937 * @ID: the entity PUBLIC ID 13938 * @base: a possible base for the target URI 13939 * @pctx: parser context used to set options on new context 13940 * 13941 * Create a parser context for an external entity 13942 * Automatic support for ZLIB/Compress compressed document is provided 13943 * by default if found at compile-time. 13944 * 13945 * Returns the new parser context or NULL 13946 */ 13947 static xmlParserCtxtPtr 13948 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13949 const xmlChar *base, xmlParserCtxtPtr pctx) { 13950 xmlParserCtxtPtr ctxt; 13951 xmlParserInputPtr inputStream; 13952 char *directory = NULL; 13953 xmlChar *uri; 13954 13955 ctxt = xmlNewParserCtxt(); 13956 if (ctxt == NULL) { 13957 return(NULL); 13958 } 13959 13960 if (pctx != NULL) { 13961 ctxt->options = pctx->options; 13962 ctxt->_private = pctx->_private; 13963 /* 13964 * this is a subparser of pctx, so the input_id should be 13965 * incremented to distinguish from main entity 13966 */ 13967 ctxt->input_id = pctx->input_id + 1; 13968 } 13969 13970 /* Don't read from stdin. */ 13971 if (xmlStrcmp(URL, BAD_CAST "-") == 0) 13972 URL = BAD_CAST "./-"; 13973 13974 uri = xmlBuildURI(URL, base); 13975 13976 if (uri == NULL) { 13977 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13978 if (inputStream == NULL) { 13979 xmlFreeParserCtxt(ctxt); 13980 return(NULL); 13981 } 13982 13983 inputPush(ctxt, inputStream); 13984 13985 if ((ctxt->directory == NULL) && (directory == NULL)) 13986 directory = xmlParserGetDirectory((char *)URL); 13987 if ((ctxt->directory == NULL) && (directory != NULL)) 13988 ctxt->directory = directory; 13989 } else { 13990 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13991 if (inputStream == NULL) { 13992 xmlFree(uri); 13993 xmlFreeParserCtxt(ctxt); 13994 return(NULL); 13995 } 13996 13997 inputPush(ctxt, inputStream); 13998 13999 if ((ctxt->directory == NULL) && (directory == NULL)) 14000 directory = xmlParserGetDirectory((char *)uri); 14001 if ((ctxt->directory == NULL) && (directory != NULL)) 14002 ctxt->directory = directory; 14003 xmlFree(uri); 14004 } 14005 return(ctxt); 14006 } 14007 14008 /** 14009 * xmlCreateEntityParserCtxt: 14010 * @URL: the entity URL 14011 * @ID: the entity PUBLIC ID 14012 * @base: a possible base for the target URI 14013 * 14014 * Create a parser context for an external entity 14015 * Automatic support for ZLIB/Compress compressed document is provided 14016 * by default if found at compile-time. 14017 * 14018 * Returns the new parser context or NULL 14019 */ 14020 xmlParserCtxtPtr 14021 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14022 const xmlChar *base) { 14023 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14024 14025 } 14026 14027 /************************************************************************ 14028 * * 14029 * Front ends when parsing from a file * 14030 * * 14031 ************************************************************************/ 14032 14033 /** 14034 * xmlCreateURLParserCtxt: 14035 * @filename: the filename or URL 14036 * @options: a combination of xmlParserOption 14037 * 14038 * Create a parser context for a file or URL content. 14039 * Automatic support for ZLIB/Compress compressed document is provided 14040 * by default if found at compile-time and for file accesses 14041 * 14042 * Returns the new parser context or NULL 14043 */ 14044 xmlParserCtxtPtr 14045 xmlCreateURLParserCtxt(const char *filename, int options) 14046 { 14047 xmlParserCtxtPtr ctxt; 14048 xmlParserInputPtr inputStream; 14049 char *directory = NULL; 14050 14051 ctxt = xmlNewParserCtxt(); 14052 if (ctxt == NULL) { 14053 xmlErrMemory(NULL, "cannot allocate parser context"); 14054 return(NULL); 14055 } 14056 14057 if (options) 14058 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14059 ctxt->linenumbers = 1; 14060 14061 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14062 if (inputStream == NULL) { 14063 xmlFreeParserCtxt(ctxt); 14064 return(NULL); 14065 } 14066 14067 inputPush(ctxt, inputStream); 14068 if ((ctxt->directory == NULL) && (directory == NULL)) 14069 directory = xmlParserGetDirectory(filename); 14070 if ((ctxt->directory == NULL) && (directory != NULL)) 14071 ctxt->directory = directory; 14072 14073 return(ctxt); 14074 } 14075 14076 /** 14077 * xmlCreateFileParserCtxt: 14078 * @filename: the filename 14079 * 14080 * Create a parser context for a file content. 14081 * Automatic support for ZLIB/Compress compressed document is provided 14082 * by default if found at compile-time. 14083 * 14084 * Returns the new parser context or NULL 14085 */ 14086 xmlParserCtxtPtr 14087 xmlCreateFileParserCtxt(const char *filename) 14088 { 14089 return(xmlCreateURLParserCtxt(filename, 0)); 14090 } 14091 14092 #ifdef LIBXML_SAX1_ENABLED 14093 /** 14094 * xmlSAXParseFileWithData: 14095 * @sax: the SAX handler block 14096 * @filename: the filename 14097 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14098 * documents 14099 * @data: the userdata 14100 * 14101 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14102 * compressed document is provided by default if found at compile-time. 14103 * It use the given SAX function block to handle the parsing callback. 14104 * If sax is NULL, fallback to the default DOM tree building routines. 14105 * 14106 * User data (void *) is stored within the parser context in the 14107 * context's _private member, so it is available nearly everywhere in libxml 14108 * 14109 * Returns the resulting document tree 14110 */ 14111 14112 xmlDocPtr 14113 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14114 int recovery, void *data) { 14115 xmlDocPtr ret; 14116 xmlParserCtxtPtr ctxt; 14117 14118 xmlInitParser(); 14119 14120 ctxt = xmlCreateFileParserCtxt(filename); 14121 if (ctxt == NULL) { 14122 return(NULL); 14123 } 14124 if (sax != NULL) { 14125 if (ctxt->sax != NULL) 14126 xmlFree(ctxt->sax); 14127 ctxt->sax = sax; 14128 } 14129 xmlDetectSAX2(ctxt); 14130 if (data!=NULL) { 14131 ctxt->_private = data; 14132 } 14133 14134 if (ctxt->directory == NULL) 14135 ctxt->directory = xmlParserGetDirectory(filename); 14136 14137 ctxt->recovery = recovery; 14138 14139 xmlParseDocument(ctxt); 14140 14141 if ((ctxt->wellFormed) || recovery) { 14142 ret = ctxt->myDoc; 14143 if ((ret != NULL) && (ctxt->input->buf != NULL)) { 14144 if (ctxt->input->buf->compressed > 0) 14145 ret->compression = 9; 14146 else 14147 ret->compression = ctxt->input->buf->compressed; 14148 } 14149 } 14150 else { 14151 ret = NULL; 14152 xmlFreeDoc(ctxt->myDoc); 14153 ctxt->myDoc = NULL; 14154 } 14155 if (sax != NULL) 14156 ctxt->sax = NULL; 14157 xmlFreeParserCtxt(ctxt); 14158 14159 return(ret); 14160 } 14161 14162 /** 14163 * xmlSAXParseFile: 14164 * @sax: the SAX handler block 14165 * @filename: the filename 14166 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14167 * documents 14168 * 14169 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14170 * compressed document is provided by default if found at compile-time. 14171 * It use the given SAX function block to handle the parsing callback. 14172 * If sax is NULL, fallback to the default DOM tree building routines. 14173 * 14174 * Returns the resulting document tree 14175 */ 14176 14177 xmlDocPtr 14178 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14179 int recovery) { 14180 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14181 } 14182 14183 /** 14184 * xmlRecoverDoc: 14185 * @cur: a pointer to an array of xmlChar 14186 * 14187 * parse an XML in-memory document and build a tree. 14188 * In the case the document is not Well Formed, a attempt to build a 14189 * tree is tried anyway 14190 * 14191 * Returns the resulting document tree or NULL in case of failure 14192 */ 14193 14194 xmlDocPtr 14195 xmlRecoverDoc(const xmlChar *cur) { 14196 return(xmlSAXParseDoc(NULL, cur, 1)); 14197 } 14198 14199 /** 14200 * xmlParseFile: 14201 * @filename: the filename 14202 * 14203 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14204 * compressed document is provided by default if found at compile-time. 14205 * 14206 * Returns the resulting document tree if the file was wellformed, 14207 * NULL otherwise. 14208 */ 14209 14210 xmlDocPtr 14211 xmlParseFile(const char *filename) { 14212 return(xmlSAXParseFile(NULL, filename, 0)); 14213 } 14214 14215 /** 14216 * xmlRecoverFile: 14217 * @filename: the filename 14218 * 14219 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14220 * compressed document is provided by default if found at compile-time. 14221 * In the case the document is not Well Formed, it attempts to build 14222 * a tree anyway 14223 * 14224 * Returns the resulting document tree or NULL in case of failure 14225 */ 14226 14227 xmlDocPtr 14228 xmlRecoverFile(const char *filename) { 14229 return(xmlSAXParseFile(NULL, filename, 1)); 14230 } 14231 14232 14233 /** 14234 * xmlSetupParserForBuffer: 14235 * @ctxt: an XML parser context 14236 * @buffer: a xmlChar * buffer 14237 * @filename: a file name 14238 * 14239 * Setup the parser context to parse a new buffer; Clears any prior 14240 * contents from the parser context. The buffer parameter must not be 14241 * NULL, but the filename parameter can be 14242 */ 14243 void 14244 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14245 const char* filename) 14246 { 14247 xmlParserInputPtr input; 14248 14249 if ((ctxt == NULL) || (buffer == NULL)) 14250 return; 14251 14252 input = xmlNewInputStream(ctxt); 14253 if (input == NULL) { 14254 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14255 xmlClearParserCtxt(ctxt); 14256 return; 14257 } 14258 14259 xmlClearParserCtxt(ctxt); 14260 if (filename != NULL) 14261 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14262 input->base = buffer; 14263 input->cur = buffer; 14264 input->end = &buffer[xmlStrlen(buffer)]; 14265 inputPush(ctxt, input); 14266 } 14267 14268 /** 14269 * xmlSAXUserParseFile: 14270 * @sax: a SAX handler 14271 * @user_data: The user data returned on SAX callbacks 14272 * @filename: a file name 14273 * 14274 * parse an XML file and call the given SAX handler routines. 14275 * Automatic support for ZLIB/Compress compressed document is provided 14276 * 14277 * Returns 0 in case of success or a error number otherwise 14278 */ 14279 int 14280 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14281 const char *filename) { 14282 int ret = 0; 14283 xmlParserCtxtPtr ctxt; 14284 14285 ctxt = xmlCreateFileParserCtxt(filename); 14286 if (ctxt == NULL) return -1; 14287 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14288 xmlFree(ctxt->sax); 14289 ctxt->sax = sax; 14290 xmlDetectSAX2(ctxt); 14291 14292 if (user_data != NULL) 14293 ctxt->userData = user_data; 14294 14295 xmlParseDocument(ctxt); 14296 14297 if (ctxt->wellFormed) 14298 ret = 0; 14299 else { 14300 if (ctxt->errNo != 0) 14301 ret = ctxt->errNo; 14302 else 14303 ret = -1; 14304 } 14305 if (sax != NULL) 14306 ctxt->sax = NULL; 14307 if (ctxt->myDoc != NULL) { 14308 xmlFreeDoc(ctxt->myDoc); 14309 ctxt->myDoc = NULL; 14310 } 14311 xmlFreeParserCtxt(ctxt); 14312 14313 return ret; 14314 } 14315 #endif /* LIBXML_SAX1_ENABLED */ 14316 14317 /************************************************************************ 14318 * * 14319 * Front ends when parsing from memory * 14320 * * 14321 ************************************************************************/ 14322 14323 /** 14324 * xmlCreateMemoryParserCtxt: 14325 * @buffer: a pointer to a char array 14326 * @size: the size of the array 14327 * 14328 * Create a parser context for an XML in-memory document. 14329 * 14330 * Returns the new parser context or NULL 14331 */ 14332 xmlParserCtxtPtr 14333 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14334 xmlParserCtxtPtr ctxt; 14335 xmlParserInputPtr input; 14336 xmlParserInputBufferPtr buf; 14337 14338 if (buffer == NULL) 14339 return(NULL); 14340 if (size <= 0) 14341 return(NULL); 14342 14343 ctxt = xmlNewParserCtxt(); 14344 if (ctxt == NULL) 14345 return(NULL); 14346 14347 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14348 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14349 if (buf == NULL) { 14350 xmlFreeParserCtxt(ctxt); 14351 return(NULL); 14352 } 14353 14354 input = xmlNewInputStream(ctxt); 14355 if (input == NULL) { 14356 xmlFreeParserInputBuffer(buf); 14357 xmlFreeParserCtxt(ctxt); 14358 return(NULL); 14359 } 14360 14361 input->filename = NULL; 14362 input->buf = buf; 14363 xmlBufResetInput(input->buf->buffer, input); 14364 14365 inputPush(ctxt, input); 14366 return(ctxt); 14367 } 14368 14369 #ifdef LIBXML_SAX1_ENABLED 14370 /** 14371 * xmlSAXParseMemoryWithData: 14372 * @sax: the SAX handler block 14373 * @buffer: an pointer to a char array 14374 * @size: the size of the array 14375 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14376 * documents 14377 * @data: the userdata 14378 * 14379 * parse an XML in-memory block and use the given SAX function block 14380 * to handle the parsing callback. If sax is NULL, fallback to the default 14381 * DOM tree building routines. 14382 * 14383 * User data (void *) is stored within the parser context in the 14384 * context's _private member, so it is available nearly everywhere in libxml 14385 * 14386 * Returns the resulting document tree 14387 */ 14388 14389 xmlDocPtr 14390 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14391 int size, int recovery, void *data) { 14392 xmlDocPtr ret; 14393 xmlParserCtxtPtr ctxt; 14394 14395 xmlInitParser(); 14396 14397 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14398 if (ctxt == NULL) return(NULL); 14399 if (sax != NULL) { 14400 if (ctxt->sax != NULL) 14401 xmlFree(ctxt->sax); 14402 ctxt->sax = sax; 14403 } 14404 xmlDetectSAX2(ctxt); 14405 if (data!=NULL) { 14406 ctxt->_private=data; 14407 } 14408 14409 ctxt->recovery = recovery; 14410 14411 xmlParseDocument(ctxt); 14412 14413 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14414 else { 14415 ret = NULL; 14416 xmlFreeDoc(ctxt->myDoc); 14417 ctxt->myDoc = NULL; 14418 } 14419 if (sax != NULL) 14420 ctxt->sax = NULL; 14421 xmlFreeParserCtxt(ctxt); 14422 14423 return(ret); 14424 } 14425 14426 /** 14427 * xmlSAXParseMemory: 14428 * @sax: the SAX handler block 14429 * @buffer: an pointer to a char array 14430 * @size: the size of the array 14431 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14432 * documents 14433 * 14434 * parse an XML in-memory block and use the given SAX function block 14435 * to handle the parsing callback. If sax is NULL, fallback to the default 14436 * DOM tree building routines. 14437 * 14438 * Returns the resulting document tree 14439 */ 14440 xmlDocPtr 14441 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14442 int size, int recovery) { 14443 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14444 } 14445 14446 /** 14447 * xmlParseMemory: 14448 * @buffer: an pointer to a char array 14449 * @size: the size of the array 14450 * 14451 * parse an XML in-memory block and build a tree. 14452 * 14453 * Returns the resulting document tree 14454 */ 14455 14456 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14457 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14458 } 14459 14460 /** 14461 * xmlRecoverMemory: 14462 * @buffer: an pointer to a char array 14463 * @size: the size of the array 14464 * 14465 * parse an XML in-memory block and build a tree. 14466 * In the case the document is not Well Formed, an attempt to 14467 * build a tree is tried anyway 14468 * 14469 * Returns the resulting document tree or NULL in case of error 14470 */ 14471 14472 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14473 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14474 } 14475 14476 /** 14477 * xmlSAXUserParseMemory: 14478 * @sax: a SAX handler 14479 * @user_data: The user data returned on SAX callbacks 14480 * @buffer: an in-memory XML document input 14481 * @size: the length of the XML document in bytes 14482 * 14483 * A better SAX parsing routine. 14484 * parse an XML in-memory buffer and call the given SAX handler routines. 14485 * 14486 * Returns 0 in case of success or a error number otherwise 14487 */ 14488 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14489 const char *buffer, int size) { 14490 int ret = 0; 14491 xmlParserCtxtPtr ctxt; 14492 14493 xmlInitParser(); 14494 14495 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14496 if (ctxt == NULL) return -1; 14497 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14498 xmlFree(ctxt->sax); 14499 ctxt->sax = sax; 14500 xmlDetectSAX2(ctxt); 14501 14502 if (user_data != NULL) 14503 ctxt->userData = user_data; 14504 14505 xmlParseDocument(ctxt); 14506 14507 if (ctxt->wellFormed) 14508 ret = 0; 14509 else { 14510 if (ctxt->errNo != 0) 14511 ret = ctxt->errNo; 14512 else 14513 ret = -1; 14514 } 14515 if (sax != NULL) 14516 ctxt->sax = NULL; 14517 if (ctxt->myDoc != NULL) { 14518 xmlFreeDoc(ctxt->myDoc); 14519 ctxt->myDoc = NULL; 14520 } 14521 xmlFreeParserCtxt(ctxt); 14522 14523 return ret; 14524 } 14525 #endif /* LIBXML_SAX1_ENABLED */ 14526 14527 /** 14528 * xmlCreateDocParserCtxt: 14529 * @cur: a pointer to an array of xmlChar 14530 * 14531 * Creates a parser context for an XML in-memory document. 14532 * 14533 * Returns the new parser context or NULL 14534 */ 14535 xmlParserCtxtPtr 14536 xmlCreateDocParserCtxt(const xmlChar *cur) { 14537 int len; 14538 14539 if (cur == NULL) 14540 return(NULL); 14541 len = xmlStrlen(cur); 14542 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14543 } 14544 14545 #ifdef LIBXML_SAX1_ENABLED 14546 /** 14547 * xmlSAXParseDoc: 14548 * @sax: the SAX handler block 14549 * @cur: a pointer to an array of xmlChar 14550 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14551 * documents 14552 * 14553 * parse an XML in-memory document and build a tree. 14554 * It use the given SAX function block to handle the parsing callback. 14555 * If sax is NULL, fallback to the default DOM tree building routines. 14556 * 14557 * Returns the resulting document tree 14558 */ 14559 14560 xmlDocPtr 14561 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14562 xmlDocPtr ret; 14563 xmlParserCtxtPtr ctxt; 14564 xmlSAXHandlerPtr oldsax = NULL; 14565 14566 if (cur == NULL) return(NULL); 14567 14568 14569 ctxt = xmlCreateDocParserCtxt(cur); 14570 if (ctxt == NULL) return(NULL); 14571 if (sax != NULL) { 14572 oldsax = ctxt->sax; 14573 ctxt->sax = sax; 14574 ctxt->userData = NULL; 14575 } 14576 xmlDetectSAX2(ctxt); 14577 14578 xmlParseDocument(ctxt); 14579 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14580 else { 14581 ret = NULL; 14582 xmlFreeDoc(ctxt->myDoc); 14583 ctxt->myDoc = NULL; 14584 } 14585 if (sax != NULL) 14586 ctxt->sax = oldsax; 14587 xmlFreeParserCtxt(ctxt); 14588 14589 return(ret); 14590 } 14591 14592 /** 14593 * xmlParseDoc: 14594 * @cur: a pointer to an array of xmlChar 14595 * 14596 * parse an XML in-memory document and build a tree. 14597 * 14598 * Returns the resulting document tree 14599 */ 14600 14601 xmlDocPtr 14602 xmlParseDoc(const xmlChar *cur) { 14603 return(xmlSAXParseDoc(NULL, cur, 0)); 14604 } 14605 #endif /* LIBXML_SAX1_ENABLED */ 14606 14607 #ifdef LIBXML_LEGACY_ENABLED 14608 /************************************************************************ 14609 * * 14610 * Specific function to keep track of entities references * 14611 * and used by the XSLT debugger * 14612 * * 14613 ************************************************************************/ 14614 14615 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14616 14617 /** 14618 * xmlAddEntityReference: 14619 * @ent : A valid entity 14620 * @firstNode : A valid first node for children of entity 14621 * @lastNode : A valid last node of children entity 14622 * 14623 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14624 */ 14625 static void 14626 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14627 xmlNodePtr lastNode) 14628 { 14629 if (xmlEntityRefFunc != NULL) { 14630 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14631 } 14632 } 14633 14634 14635 /** 14636 * xmlSetEntityReferenceFunc: 14637 * @func: A valid function 14638 * 14639 * Set the function to call call back when a xml reference has been made 14640 */ 14641 void 14642 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14643 { 14644 xmlEntityRefFunc = func; 14645 } 14646 #endif /* LIBXML_LEGACY_ENABLED */ 14647 14648 /************************************************************************ 14649 * * 14650 * Miscellaneous * 14651 * * 14652 ************************************************************************/ 14653 14654 #ifdef LIBXML_XPATH_ENABLED 14655 #include <libxml/xpath.h> 14656 #endif 14657 14658 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14659 static int xmlParserInitialized = 0; 14660 14661 /** 14662 * xmlInitParser: 14663 * 14664 * Initialization function for the XML parser. 14665 * This is not reentrant. Call once before processing in case of 14666 * use in multithreaded programs. 14667 */ 14668 14669 void 14670 xmlInitParser(void) { 14671 if (xmlParserInitialized != 0) 14672 return; 14673 14674 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL)) 14675 atexit(xmlCleanupParser); 14676 #endif 14677 14678 #ifdef LIBXML_THREAD_ENABLED 14679 __xmlGlobalInitMutexLock(); 14680 if (xmlParserInitialized == 0) { 14681 #endif 14682 xmlInitThreads(); 14683 xmlInitGlobals(); 14684 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14685 (xmlGenericError == NULL)) 14686 initGenericErrorDefaultFunc(NULL); 14687 xmlInitMemory(); 14688 xmlInitializeDict(); 14689 xmlInitCharEncodingHandlers(); 14690 xmlDefaultSAXHandlerInit(); 14691 xmlRegisterDefaultInputCallbacks(); 14692 #ifdef LIBXML_OUTPUT_ENABLED 14693 xmlRegisterDefaultOutputCallbacks(); 14694 #endif /* LIBXML_OUTPUT_ENABLED */ 14695 #ifdef LIBXML_HTML_ENABLED 14696 htmlInitAutoClose(); 14697 htmlDefaultSAXHandlerInit(); 14698 #endif 14699 #ifdef LIBXML_XPATH_ENABLED 14700 xmlXPathInit(); 14701 #endif 14702 xmlParserInitialized = 1; 14703 #ifdef LIBXML_THREAD_ENABLED 14704 } 14705 __xmlGlobalInitMutexUnlock(); 14706 #endif 14707 } 14708 14709 /** 14710 * xmlCleanupParser: 14711 * 14712 * This function name is somewhat misleading. It does not clean up 14713 * parser state, it cleans up memory allocated by the library itself. 14714 * It is a cleanup function for the XML library. It tries to reclaim all 14715 * related global memory allocated for the library processing. 14716 * It doesn't deallocate any document related memory. One should 14717 * call xmlCleanupParser() only when the process has finished using 14718 * the library and all XML/HTML documents built with it. 14719 * See also xmlInitParser() which has the opposite function of preparing 14720 * the library for operations. 14721 * 14722 * WARNING: if your application is multithreaded or has plugin support 14723 * calling this may crash the application if another thread or 14724 * a plugin is still using libxml2. It's sometimes very hard to 14725 * guess if libxml2 is in use in the application, some libraries 14726 * or plugins may use it without notice. In case of doubt abstain 14727 * from calling this function or do it just before calling exit() 14728 * to avoid leak reports from valgrind ! 14729 */ 14730 14731 void 14732 xmlCleanupParser(void) { 14733 if (!xmlParserInitialized) 14734 return; 14735 14736 xmlCleanupCharEncodingHandlers(); 14737 #ifdef LIBXML_CATALOG_ENABLED 14738 xmlCatalogCleanup(); 14739 #endif 14740 xmlDictCleanup(); 14741 xmlCleanupInputCallbacks(); 14742 #ifdef LIBXML_OUTPUT_ENABLED 14743 xmlCleanupOutputCallbacks(); 14744 #endif 14745 #ifdef LIBXML_SCHEMAS_ENABLED 14746 xmlSchemaCleanupTypes(); 14747 xmlRelaxNGCleanupTypes(); 14748 #endif 14749 xmlResetLastError(); 14750 xmlCleanupGlobals(); 14751 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14752 xmlCleanupMemory(); 14753 xmlParserInitialized = 0; 14754 } 14755 14756 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \ 14757 !defined(_WIN32) 14758 static void 14759 ATTRIBUTE_DESTRUCTOR 14760 xmlDestructor(void) { 14761 /* 14762 * Calling custom deallocation functions in a destructor can cause 14763 * problems, for example with Nokogiri. 14764 */ 14765 if (xmlFree == free) 14766 xmlCleanupParser(); 14767 } 14768 #endif 14769 14770 /************************************************************************ 14771 * * 14772 * New set (2.6.0) of simpler and more flexible APIs * 14773 * * 14774 ************************************************************************/ 14775 14776 /** 14777 * DICT_FREE: 14778 * @str: a string 14779 * 14780 * Free a string if it is not owned by the "dict" dictionary in the 14781 * current scope 14782 */ 14783 #define DICT_FREE(str) \ 14784 if ((str) && ((!dict) || \ 14785 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14786 xmlFree((char *)(str)); 14787 14788 /** 14789 * xmlCtxtReset: 14790 * @ctxt: an XML parser context 14791 * 14792 * Reset a parser context 14793 */ 14794 void 14795 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14796 { 14797 xmlParserInputPtr input; 14798 xmlDictPtr dict; 14799 14800 if (ctxt == NULL) 14801 return; 14802 14803 dict = ctxt->dict; 14804 14805 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14806 xmlFreeInputStream(input); 14807 } 14808 ctxt->inputNr = 0; 14809 ctxt->input = NULL; 14810 14811 ctxt->spaceNr = 0; 14812 if (ctxt->spaceTab != NULL) { 14813 ctxt->spaceTab[0] = -1; 14814 ctxt->space = &ctxt->spaceTab[0]; 14815 } else { 14816 ctxt->space = NULL; 14817 } 14818 14819 14820 ctxt->nodeNr = 0; 14821 ctxt->node = NULL; 14822 14823 ctxt->nameNr = 0; 14824 ctxt->name = NULL; 14825 14826 DICT_FREE(ctxt->version); 14827 ctxt->version = NULL; 14828 DICT_FREE(ctxt->encoding); 14829 ctxt->encoding = NULL; 14830 DICT_FREE(ctxt->directory); 14831 ctxt->directory = NULL; 14832 DICT_FREE(ctxt->extSubURI); 14833 ctxt->extSubURI = NULL; 14834 DICT_FREE(ctxt->extSubSystem); 14835 ctxt->extSubSystem = NULL; 14836 if (ctxt->myDoc != NULL) 14837 xmlFreeDoc(ctxt->myDoc); 14838 ctxt->myDoc = NULL; 14839 14840 ctxt->standalone = -1; 14841 ctxt->hasExternalSubset = 0; 14842 ctxt->hasPErefs = 0; 14843 ctxt->html = 0; 14844 ctxt->external = 0; 14845 ctxt->instate = XML_PARSER_START; 14846 ctxt->token = 0; 14847 14848 ctxt->wellFormed = 1; 14849 ctxt->nsWellFormed = 1; 14850 ctxt->disableSAX = 0; 14851 ctxt->valid = 1; 14852 #if 0 14853 ctxt->vctxt.userData = ctxt; 14854 ctxt->vctxt.error = xmlParserValidityError; 14855 ctxt->vctxt.warning = xmlParserValidityWarning; 14856 #endif 14857 ctxt->record_info = 0; 14858 ctxt->checkIndex = 0; 14859 ctxt->inSubset = 0; 14860 ctxt->errNo = XML_ERR_OK; 14861 ctxt->depth = 0; 14862 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14863 ctxt->catalogs = NULL; 14864 ctxt->nbentities = 0; 14865 ctxt->sizeentities = 0; 14866 ctxt->sizeentcopy = 0; 14867 xmlInitNodeInfoSeq(&ctxt->node_seq); 14868 14869 if (ctxt->attsDefault != NULL) { 14870 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 14871 ctxt->attsDefault = NULL; 14872 } 14873 if (ctxt->attsSpecial != NULL) { 14874 xmlHashFree(ctxt->attsSpecial, NULL); 14875 ctxt->attsSpecial = NULL; 14876 } 14877 14878 #ifdef LIBXML_CATALOG_ENABLED 14879 if (ctxt->catalogs != NULL) 14880 xmlCatalogFreeLocal(ctxt->catalogs); 14881 #endif 14882 if (ctxt->lastError.code != XML_ERR_OK) 14883 xmlResetError(&ctxt->lastError); 14884 } 14885 14886 /** 14887 * xmlCtxtResetPush: 14888 * @ctxt: an XML parser context 14889 * @chunk: a pointer to an array of chars 14890 * @size: number of chars in the array 14891 * @filename: an optional file name or URI 14892 * @encoding: the document encoding, or NULL 14893 * 14894 * Reset a push parser context 14895 * 14896 * Returns 0 in case of success and 1 in case of error 14897 */ 14898 int 14899 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14900 int size, const char *filename, const char *encoding) 14901 { 14902 xmlParserInputPtr inputStream; 14903 xmlParserInputBufferPtr buf; 14904 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14905 14906 if (ctxt == NULL) 14907 return(1); 14908 14909 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14910 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14911 14912 buf = xmlAllocParserInputBuffer(enc); 14913 if (buf == NULL) 14914 return(1); 14915 14916 if (ctxt == NULL) { 14917 xmlFreeParserInputBuffer(buf); 14918 return(1); 14919 } 14920 14921 xmlCtxtReset(ctxt); 14922 14923 if (filename == NULL) { 14924 ctxt->directory = NULL; 14925 } else { 14926 ctxt->directory = xmlParserGetDirectory(filename); 14927 } 14928 14929 inputStream = xmlNewInputStream(ctxt); 14930 if (inputStream == NULL) { 14931 xmlFreeParserInputBuffer(buf); 14932 return(1); 14933 } 14934 14935 if (filename == NULL) 14936 inputStream->filename = NULL; 14937 else 14938 inputStream->filename = (char *) 14939 xmlCanonicPath((const xmlChar *) filename); 14940 inputStream->buf = buf; 14941 xmlBufResetInput(buf->buffer, inputStream); 14942 14943 inputPush(ctxt, inputStream); 14944 14945 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14946 (ctxt->input->buf != NULL)) { 14947 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14948 size_t cur = ctxt->input->cur - ctxt->input->base; 14949 14950 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14951 14952 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14953 #ifdef DEBUG_PUSH 14954 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14955 #endif 14956 } 14957 14958 if (encoding != NULL) { 14959 xmlCharEncodingHandlerPtr hdlr; 14960 14961 if (ctxt->encoding != NULL) 14962 xmlFree((xmlChar *) ctxt->encoding); 14963 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14964 14965 hdlr = xmlFindCharEncodingHandler(encoding); 14966 if (hdlr != NULL) { 14967 xmlSwitchToEncoding(ctxt, hdlr); 14968 } else { 14969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14970 "Unsupported encoding %s\n", BAD_CAST encoding); 14971 } 14972 } else if (enc != XML_CHAR_ENCODING_NONE) { 14973 xmlSwitchEncoding(ctxt, enc); 14974 } 14975 14976 return(0); 14977 } 14978 14979 14980 /** 14981 * xmlCtxtUseOptionsInternal: 14982 * @ctxt: an XML parser context 14983 * @options: a combination of xmlParserOption 14984 * @encoding: the user provided encoding to use 14985 * 14986 * Applies the options to the parser context 14987 * 14988 * Returns 0 in case of success, the set of unknown or unimplemented options 14989 * in case of error. 14990 */ 14991 static int 14992 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14993 { 14994 if (ctxt == NULL) 14995 return(-1); 14996 if (encoding != NULL) { 14997 if (ctxt->encoding != NULL) 14998 xmlFree((xmlChar *) ctxt->encoding); 14999 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15000 } 15001 if (options & XML_PARSE_RECOVER) { 15002 ctxt->recovery = 1; 15003 options -= XML_PARSE_RECOVER; 15004 ctxt->options |= XML_PARSE_RECOVER; 15005 } else 15006 ctxt->recovery = 0; 15007 if (options & XML_PARSE_DTDLOAD) { 15008 ctxt->loadsubset = XML_DETECT_IDS; 15009 options -= XML_PARSE_DTDLOAD; 15010 ctxt->options |= XML_PARSE_DTDLOAD; 15011 } else 15012 ctxt->loadsubset = 0; 15013 if (options & XML_PARSE_DTDATTR) { 15014 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15015 options -= XML_PARSE_DTDATTR; 15016 ctxt->options |= XML_PARSE_DTDATTR; 15017 } 15018 if (options & XML_PARSE_NOENT) { 15019 ctxt->replaceEntities = 1; 15020 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15021 options -= XML_PARSE_NOENT; 15022 ctxt->options |= XML_PARSE_NOENT; 15023 } else 15024 ctxt->replaceEntities = 0; 15025 if (options & XML_PARSE_PEDANTIC) { 15026 ctxt->pedantic = 1; 15027 options -= XML_PARSE_PEDANTIC; 15028 ctxt->options |= XML_PARSE_PEDANTIC; 15029 } else 15030 ctxt->pedantic = 0; 15031 if (options & XML_PARSE_NOBLANKS) { 15032 ctxt->keepBlanks = 0; 15033 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15034 options -= XML_PARSE_NOBLANKS; 15035 ctxt->options |= XML_PARSE_NOBLANKS; 15036 } else 15037 ctxt->keepBlanks = 1; 15038 if (options & XML_PARSE_DTDVALID) { 15039 ctxt->validate = 1; 15040 if (options & XML_PARSE_NOWARNING) 15041 ctxt->vctxt.warning = NULL; 15042 if (options & XML_PARSE_NOERROR) 15043 ctxt->vctxt.error = NULL; 15044 options -= XML_PARSE_DTDVALID; 15045 ctxt->options |= XML_PARSE_DTDVALID; 15046 } else 15047 ctxt->validate = 0; 15048 if (options & XML_PARSE_NOWARNING) { 15049 ctxt->sax->warning = NULL; 15050 options -= XML_PARSE_NOWARNING; 15051 } 15052 if (options & XML_PARSE_NOERROR) { 15053 ctxt->sax->error = NULL; 15054 ctxt->sax->fatalError = NULL; 15055 options -= XML_PARSE_NOERROR; 15056 } 15057 #ifdef LIBXML_SAX1_ENABLED 15058 if (options & XML_PARSE_SAX1) { 15059 ctxt->sax->startElement = xmlSAX2StartElement; 15060 ctxt->sax->endElement = xmlSAX2EndElement; 15061 ctxt->sax->startElementNs = NULL; 15062 ctxt->sax->endElementNs = NULL; 15063 ctxt->sax->initialized = 1; 15064 options -= XML_PARSE_SAX1; 15065 ctxt->options |= XML_PARSE_SAX1; 15066 } 15067 #endif /* LIBXML_SAX1_ENABLED */ 15068 if (options & XML_PARSE_NODICT) { 15069 ctxt->dictNames = 0; 15070 options -= XML_PARSE_NODICT; 15071 ctxt->options |= XML_PARSE_NODICT; 15072 } else { 15073 ctxt->dictNames = 1; 15074 } 15075 if (options & XML_PARSE_NOCDATA) { 15076 ctxt->sax->cdataBlock = NULL; 15077 options -= XML_PARSE_NOCDATA; 15078 ctxt->options |= XML_PARSE_NOCDATA; 15079 } 15080 if (options & XML_PARSE_NSCLEAN) { 15081 ctxt->options |= XML_PARSE_NSCLEAN; 15082 options -= XML_PARSE_NSCLEAN; 15083 } 15084 if (options & XML_PARSE_NONET) { 15085 ctxt->options |= XML_PARSE_NONET; 15086 options -= XML_PARSE_NONET; 15087 } 15088 if (options & XML_PARSE_COMPACT) { 15089 ctxt->options |= XML_PARSE_COMPACT; 15090 options -= XML_PARSE_COMPACT; 15091 } 15092 if (options & XML_PARSE_OLD10) { 15093 ctxt->options |= XML_PARSE_OLD10; 15094 options -= XML_PARSE_OLD10; 15095 } 15096 if (options & XML_PARSE_NOBASEFIX) { 15097 ctxt->options |= XML_PARSE_NOBASEFIX; 15098 options -= XML_PARSE_NOBASEFIX; 15099 } 15100 if (options & XML_PARSE_HUGE) { 15101 ctxt->options |= XML_PARSE_HUGE; 15102 options -= XML_PARSE_HUGE; 15103 if (ctxt->dict != NULL) 15104 xmlDictSetLimit(ctxt->dict, 0); 15105 } 15106 if (options & XML_PARSE_OLDSAX) { 15107 ctxt->options |= XML_PARSE_OLDSAX; 15108 options -= XML_PARSE_OLDSAX; 15109 } 15110 if (options & XML_PARSE_IGNORE_ENC) { 15111 ctxt->options |= XML_PARSE_IGNORE_ENC; 15112 options -= XML_PARSE_IGNORE_ENC; 15113 } 15114 if (options & XML_PARSE_BIG_LINES) { 15115 ctxt->options |= XML_PARSE_BIG_LINES; 15116 options -= XML_PARSE_BIG_LINES; 15117 } 15118 ctxt->linenumbers = 1; 15119 return (options); 15120 } 15121 15122 /** 15123 * xmlCtxtUseOptions: 15124 * @ctxt: an XML parser context 15125 * @options: a combination of xmlParserOption 15126 * 15127 * Applies the options to the parser context 15128 * 15129 * Returns 0 in case of success, the set of unknown or unimplemented options 15130 * in case of error. 15131 */ 15132 int 15133 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15134 { 15135 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15136 } 15137 15138 /** 15139 * xmlDoRead: 15140 * @ctxt: an XML parser context 15141 * @URL: the base URL to use for the document 15142 * @encoding: the document encoding, or NULL 15143 * @options: a combination of xmlParserOption 15144 * @reuse: keep the context for reuse 15145 * 15146 * Common front-end for the xmlRead functions 15147 * 15148 * Returns the resulting document tree or NULL 15149 */ 15150 static xmlDocPtr 15151 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15152 int options, int reuse) 15153 { 15154 xmlDocPtr ret; 15155 15156 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15157 if (encoding != NULL) { 15158 xmlCharEncodingHandlerPtr hdlr; 15159 15160 hdlr = xmlFindCharEncodingHandler(encoding); 15161 if (hdlr != NULL) 15162 xmlSwitchToEncoding(ctxt, hdlr); 15163 } 15164 if ((URL != NULL) && (ctxt->input != NULL) && 15165 (ctxt->input->filename == NULL)) 15166 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15167 xmlParseDocument(ctxt); 15168 if ((ctxt->wellFormed) || ctxt->recovery) 15169 ret = ctxt->myDoc; 15170 else { 15171 ret = NULL; 15172 if (ctxt->myDoc != NULL) { 15173 xmlFreeDoc(ctxt->myDoc); 15174 } 15175 } 15176 ctxt->myDoc = NULL; 15177 if (!reuse) { 15178 xmlFreeParserCtxt(ctxt); 15179 } 15180 15181 return (ret); 15182 } 15183 15184 /** 15185 * xmlReadDoc: 15186 * @cur: a pointer to a zero terminated string 15187 * @URL: the base URL to use for the document 15188 * @encoding: the document encoding, or NULL 15189 * @options: a combination of xmlParserOption 15190 * 15191 * parse an XML in-memory document and build a tree. 15192 * 15193 * Returns the resulting document tree 15194 */ 15195 xmlDocPtr 15196 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15197 { 15198 xmlParserCtxtPtr ctxt; 15199 15200 if (cur == NULL) 15201 return (NULL); 15202 xmlInitParser(); 15203 15204 ctxt = xmlCreateDocParserCtxt(cur); 15205 if (ctxt == NULL) 15206 return (NULL); 15207 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15208 } 15209 15210 /** 15211 * xmlReadFile: 15212 * @filename: a file or URL 15213 * @encoding: the document encoding, or NULL 15214 * @options: a combination of xmlParserOption 15215 * 15216 * parse an XML file from the filesystem or the network. 15217 * 15218 * Returns the resulting document tree 15219 */ 15220 xmlDocPtr 15221 xmlReadFile(const char *filename, const char *encoding, int options) 15222 { 15223 xmlParserCtxtPtr ctxt; 15224 15225 xmlInitParser(); 15226 ctxt = xmlCreateURLParserCtxt(filename, options); 15227 if (ctxt == NULL) 15228 return (NULL); 15229 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15230 } 15231 15232 /** 15233 * xmlReadMemory: 15234 * @buffer: a pointer to a char array 15235 * @size: the size of the array 15236 * @URL: the base URL to use for the document 15237 * @encoding: the document encoding, or NULL 15238 * @options: a combination of xmlParserOption 15239 * 15240 * parse an XML in-memory document and build a tree. 15241 * 15242 * Returns the resulting document tree 15243 */ 15244 xmlDocPtr 15245 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15246 { 15247 xmlParserCtxtPtr ctxt; 15248 15249 xmlInitParser(); 15250 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15251 if (ctxt == NULL) 15252 return (NULL); 15253 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15254 } 15255 15256 /** 15257 * xmlReadFd: 15258 * @fd: an open file descriptor 15259 * @URL: the base URL to use for the document 15260 * @encoding: the document encoding, or NULL 15261 * @options: a combination of xmlParserOption 15262 * 15263 * parse an XML from a file descriptor and build a tree. 15264 * NOTE that the file descriptor will not be closed when the 15265 * reader is closed or reset. 15266 * 15267 * Returns the resulting document tree 15268 */ 15269 xmlDocPtr 15270 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15271 { 15272 xmlParserCtxtPtr ctxt; 15273 xmlParserInputBufferPtr input; 15274 xmlParserInputPtr stream; 15275 15276 if (fd < 0) 15277 return (NULL); 15278 xmlInitParser(); 15279 15280 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15281 if (input == NULL) 15282 return (NULL); 15283 input->closecallback = NULL; 15284 ctxt = xmlNewParserCtxt(); 15285 if (ctxt == NULL) { 15286 xmlFreeParserInputBuffer(input); 15287 return (NULL); 15288 } 15289 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15290 if (stream == NULL) { 15291 xmlFreeParserInputBuffer(input); 15292 xmlFreeParserCtxt(ctxt); 15293 return (NULL); 15294 } 15295 inputPush(ctxt, stream); 15296 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15297 } 15298 15299 /** 15300 * xmlReadIO: 15301 * @ioread: an I/O read function 15302 * @ioclose: an I/O close function 15303 * @ioctx: an I/O handler 15304 * @URL: the base URL to use for the document 15305 * @encoding: the document encoding, or NULL 15306 * @options: a combination of xmlParserOption 15307 * 15308 * parse an XML document from I/O functions and source and build a tree. 15309 * 15310 * Returns the resulting document tree 15311 */ 15312 xmlDocPtr 15313 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15314 void *ioctx, const char *URL, const char *encoding, int options) 15315 { 15316 xmlParserCtxtPtr ctxt; 15317 xmlParserInputBufferPtr input; 15318 xmlParserInputPtr stream; 15319 15320 if (ioread == NULL) 15321 return (NULL); 15322 xmlInitParser(); 15323 15324 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15325 XML_CHAR_ENCODING_NONE); 15326 if (input == NULL) { 15327 if (ioclose != NULL) 15328 ioclose(ioctx); 15329 return (NULL); 15330 } 15331 ctxt = xmlNewParserCtxt(); 15332 if (ctxt == NULL) { 15333 xmlFreeParserInputBuffer(input); 15334 return (NULL); 15335 } 15336 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15337 if (stream == NULL) { 15338 xmlFreeParserInputBuffer(input); 15339 xmlFreeParserCtxt(ctxt); 15340 return (NULL); 15341 } 15342 inputPush(ctxt, stream); 15343 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15344 } 15345 15346 /** 15347 * xmlCtxtReadDoc: 15348 * @ctxt: an XML parser context 15349 * @cur: a pointer to a zero terminated string 15350 * @URL: the base URL to use for the document 15351 * @encoding: the document encoding, or NULL 15352 * @options: a combination of xmlParserOption 15353 * 15354 * parse an XML in-memory document and build a tree. 15355 * This reuses the existing @ctxt parser context 15356 * 15357 * Returns the resulting document tree 15358 */ 15359 xmlDocPtr 15360 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15361 const char *URL, const char *encoding, int options) 15362 { 15363 xmlParserInputPtr stream; 15364 15365 if (cur == NULL) 15366 return (NULL); 15367 if (ctxt == NULL) 15368 return (NULL); 15369 xmlInitParser(); 15370 15371 xmlCtxtReset(ctxt); 15372 15373 stream = xmlNewStringInputStream(ctxt, cur); 15374 if (stream == NULL) { 15375 return (NULL); 15376 } 15377 inputPush(ctxt, stream); 15378 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15379 } 15380 15381 /** 15382 * xmlCtxtReadFile: 15383 * @ctxt: an XML parser context 15384 * @filename: a file or URL 15385 * @encoding: the document encoding, or NULL 15386 * @options: a combination of xmlParserOption 15387 * 15388 * parse an XML file from the filesystem or the network. 15389 * This reuses the existing @ctxt parser context 15390 * 15391 * Returns the resulting document tree 15392 */ 15393 xmlDocPtr 15394 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15395 const char *encoding, int options) 15396 { 15397 xmlParserInputPtr stream; 15398 15399 if (filename == NULL) 15400 return (NULL); 15401 if (ctxt == NULL) 15402 return (NULL); 15403 xmlInitParser(); 15404 15405 xmlCtxtReset(ctxt); 15406 15407 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15408 if (stream == NULL) { 15409 return (NULL); 15410 } 15411 inputPush(ctxt, stream); 15412 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15413 } 15414 15415 /** 15416 * xmlCtxtReadMemory: 15417 * @ctxt: an XML parser context 15418 * @buffer: a pointer to a char array 15419 * @size: the size of the array 15420 * @URL: the base URL to use for the document 15421 * @encoding: the document encoding, or NULL 15422 * @options: a combination of xmlParserOption 15423 * 15424 * parse an XML in-memory document and build a tree. 15425 * This reuses the existing @ctxt parser context 15426 * 15427 * Returns the resulting document tree 15428 */ 15429 xmlDocPtr 15430 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15431 const char *URL, const char *encoding, int options) 15432 { 15433 xmlParserInputBufferPtr input; 15434 xmlParserInputPtr stream; 15435 15436 if (ctxt == NULL) 15437 return (NULL); 15438 if (buffer == NULL) 15439 return (NULL); 15440 xmlInitParser(); 15441 15442 xmlCtxtReset(ctxt); 15443 15444 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15445 if (input == NULL) { 15446 return(NULL); 15447 } 15448 15449 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15450 if (stream == NULL) { 15451 xmlFreeParserInputBuffer(input); 15452 return(NULL); 15453 } 15454 15455 inputPush(ctxt, stream); 15456 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15457 } 15458 15459 /** 15460 * xmlCtxtReadFd: 15461 * @ctxt: an XML parser context 15462 * @fd: an open file descriptor 15463 * @URL: the base URL to use for the document 15464 * @encoding: the document encoding, or NULL 15465 * @options: a combination of xmlParserOption 15466 * 15467 * parse an XML from a file descriptor and build a tree. 15468 * This reuses the existing @ctxt parser context 15469 * NOTE that the file descriptor will not be closed when the 15470 * reader is closed or reset. 15471 * 15472 * Returns the resulting document tree 15473 */ 15474 xmlDocPtr 15475 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15476 const char *URL, const char *encoding, int options) 15477 { 15478 xmlParserInputBufferPtr input; 15479 xmlParserInputPtr stream; 15480 15481 if (fd < 0) 15482 return (NULL); 15483 if (ctxt == NULL) 15484 return (NULL); 15485 xmlInitParser(); 15486 15487 xmlCtxtReset(ctxt); 15488 15489 15490 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15491 if (input == NULL) 15492 return (NULL); 15493 input->closecallback = NULL; 15494 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15495 if (stream == NULL) { 15496 xmlFreeParserInputBuffer(input); 15497 return (NULL); 15498 } 15499 inputPush(ctxt, stream); 15500 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15501 } 15502 15503 /** 15504 * xmlCtxtReadIO: 15505 * @ctxt: an XML parser context 15506 * @ioread: an I/O read function 15507 * @ioclose: an I/O close function 15508 * @ioctx: an I/O handler 15509 * @URL: the base URL to use for the document 15510 * @encoding: the document encoding, or NULL 15511 * @options: a combination of xmlParserOption 15512 * 15513 * parse an XML document from I/O functions and source and build a tree. 15514 * This reuses the existing @ctxt parser context 15515 * 15516 * Returns the resulting document tree 15517 */ 15518 xmlDocPtr 15519 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15520 xmlInputCloseCallback ioclose, void *ioctx, 15521 const char *URL, 15522 const char *encoding, int options) 15523 { 15524 xmlParserInputBufferPtr input; 15525 xmlParserInputPtr stream; 15526 15527 if (ioread == NULL) 15528 return (NULL); 15529 if (ctxt == NULL) 15530 return (NULL); 15531 xmlInitParser(); 15532 15533 xmlCtxtReset(ctxt); 15534 15535 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15536 XML_CHAR_ENCODING_NONE); 15537 if (input == NULL) { 15538 if (ioclose != NULL) 15539 ioclose(ioctx); 15540 return (NULL); 15541 } 15542 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15543 if (stream == NULL) { 15544 xmlFreeParserInputBuffer(input); 15545 return (NULL); 15546 } 15547 inputPush(ctxt, stream); 15548 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15549 } 15550 15551 #define bottom_parser 15552 #include "elfgcchack.h" 15553