1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 /* To avoid EBCDIC trouble when parsing on zOS */ 34 #if defined(__MVS__) 35 #pragma convert("ISO8859-1") 36 #endif 37 38 #define IN_LIBXML 39 #include "libxml.h" 40 41 #if defined(_WIN32) && !defined (__CYGWIN__) 42 #define XML_DIR_SEP '\\' 43 #else 44 #define XML_DIR_SEP '/' 45 #endif 46 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <string.h> 50 #include <stdarg.h> 51 #include <stddef.h> 52 #include <libxml/xmlmemory.h> 53 #include <libxml/threads.h> 54 #include <libxml/globals.h> 55 #include <libxml/tree.h> 56 #include <libxml/parser.h> 57 #include <libxml/parserInternals.h> 58 #include <libxml/valid.h> 59 #include <libxml/entities.h> 60 #include <libxml/xmlerror.h> 61 #include <libxml/encoding.h> 62 #include <libxml/xmlIO.h> 63 #include <libxml/uri.h> 64 #ifdef LIBXML_CATALOG_ENABLED 65 #include <libxml/catalog.h> 66 #endif 67 #ifdef LIBXML_SCHEMAS_ENABLED 68 #include <libxml/xmlschemastypes.h> 69 #include <libxml/relaxng.h> 70 #endif 71 #ifdef HAVE_CTYPE_H 72 #include <ctype.h> 73 #endif 74 #ifdef HAVE_STDLIB_H 75 #include <stdlib.h> 76 #endif 77 #ifdef HAVE_SYS_STAT_H 78 #include <sys/stat.h> 79 #endif 80 #ifdef HAVE_FCNTL_H 81 #include <fcntl.h> 82 #endif 83 #ifdef HAVE_UNISTD_H 84 #include <unistd.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 static void 91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93 static xmlParserCtxtPtr 94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 98 99 static int 100 xmlParseElementStart(xmlParserCtxtPtr ctxt); 101 102 static void 103 xmlParseElementEnd(xmlParserCtxtPtr ctxt); 104 105 /************************************************************************ 106 * * 107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 108 * * 109 ************************************************************************/ 110 111 #define XML_PARSER_BIG_ENTITY 1000 112 #define XML_PARSER_LOT_ENTITY 5000 113 114 /* 115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 116 * replacement over the size in byte of the input indicates that you have 117 * and exponential behaviour. A value of 10 correspond to at least 3 entity 118 * replacement per byte of input. 119 */ 120 #define XML_PARSER_NON_LINEAR 10 121 122 /* 123 * xmlParserEntityCheck 124 * 125 * Function to check non-linear entity expansion behaviour 126 * This is here to detect and stop exponential linear entity expansion 127 * This is not a limitation of the parser but a safety 128 * boundary feature. It can be disabled with the XML_PARSE_HUGE 129 * parser option. 130 */ 131 static int 132 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 133 xmlEntityPtr ent, size_t replacement) 134 { 135 size_t consumed = 0; 136 137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 138 return (0); 139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 140 return (1); 141 142 /* 143 * This may look absurd but is needed to detect 144 * entities problems 145 */ 146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 147 (ent->content != NULL) && (ent->checked == 0) && 148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 149 unsigned long oldnbent = ctxt->nbentities, diff; 150 xmlChar *rep; 151 152 ent->checked = 1; 153 154 ++ctxt->depth; 155 rep = xmlStringDecodeEntities(ctxt, ent->content, 156 XML_SUBSTITUTE_REF, 0, 0, 0); 157 --ctxt->depth; 158 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { 159 ent->content[0] = 0; 160 } 161 162 diff = ctxt->nbentities - oldnbent + 1; 163 if (diff > INT_MAX / 2) 164 diff = INT_MAX / 2; 165 ent->checked = diff * 2; 166 if (rep != NULL) { 167 if (xmlStrchr(rep, '<')) 168 ent->checked |= 1; 169 xmlFree(rep); 170 rep = NULL; 171 } 172 } 173 if (replacement != 0) { 174 if (replacement < XML_MAX_TEXT_LENGTH) 175 return(0); 176 177 /* 178 * If the volume of entity copy reaches 10 times the 179 * amount of parsed data and over the large text threshold 180 * then that's very likely to be an abuse. 181 */ 182 if (ctxt->input != NULL) { 183 consumed = ctxt->input->consumed + 184 (ctxt->input->cur - ctxt->input->base); 185 } 186 consumed += ctxt->sizeentities; 187 188 if (replacement < XML_PARSER_NON_LINEAR * consumed) 189 return(0); 190 } else if (size != 0) { 191 /* 192 * Do the check based on the replacement size of the entity 193 */ 194 if (size < XML_PARSER_BIG_ENTITY) 195 return(0); 196 197 /* 198 * A limit on the amount of text data reasonably used 199 */ 200 if (ctxt->input != NULL) { 201 consumed = ctxt->input->consumed + 202 (ctxt->input->cur - ctxt->input->base); 203 } 204 consumed += ctxt->sizeentities; 205 206 if ((size < XML_PARSER_NON_LINEAR * consumed) && 207 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 208 return (0); 209 } else if (ent != NULL) { 210 /* 211 * use the number of parsed entities in the replacement 212 */ 213 size = ent->checked / 2; 214 215 /* 216 * The amount of data parsed counting entities size only once 217 */ 218 if (ctxt->input != NULL) { 219 consumed = ctxt->input->consumed + 220 (ctxt->input->cur - ctxt->input->base); 221 } 222 consumed += ctxt->sizeentities; 223 224 /* 225 * Check the density of entities for the amount of data 226 * knowing an entity reference will take at least 3 bytes 227 */ 228 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 229 return (0); 230 } else { 231 /* 232 * strange we got no data for checking 233 */ 234 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 235 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 236 (ctxt->nbentities <= 10000)) 237 return (0); 238 } 239 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 240 return (1); 241 } 242 243 /** 244 * xmlParserMaxDepth: 245 * 246 * arbitrary depth limit for the XML documents that we allow to 247 * process. This is not a limitation of the parser but a safety 248 * boundary feature. It can be disabled with the XML_PARSE_HUGE 249 * parser option. 250 */ 251 unsigned int xmlParserMaxDepth = 256; 252 253 254 255 #define SAX2 1 256 #define XML_PARSER_BIG_BUFFER_SIZE 300 257 #define XML_PARSER_BUFFER_SIZE 100 258 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 259 260 /** 261 * XML_PARSER_CHUNK_SIZE 262 * 263 * When calling GROW that's the minimal amount of data 264 * the parser expected to have received. It is not a hard 265 * limit but an optimization when reading strings like Names 266 * It is not strictly needed as long as inputs available characters 267 * are followed by 0, which should be provided by the I/O level 268 */ 269 #define XML_PARSER_CHUNK_SIZE 100 270 271 /* 272 * List of XML prefixed PI allowed by W3C specs 273 */ 274 275 static const char *xmlW3CPIs[] = { 276 "xml-stylesheet", 277 "xml-model", 278 NULL 279 }; 280 281 282 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 283 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 284 const xmlChar **str); 285 286 static xmlParserErrors 287 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 288 xmlSAXHandlerPtr sax, 289 void *user_data, int depth, const xmlChar *URL, 290 const xmlChar *ID, xmlNodePtr *list); 291 292 static int 293 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 294 const char *encoding); 295 #ifdef LIBXML_LEGACY_ENABLED 296 static void 297 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 298 xmlNodePtr lastNode); 299 #endif /* LIBXML_LEGACY_ENABLED */ 300 301 static xmlParserErrors 302 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 303 const xmlChar *string, void *user_data, xmlNodePtr *lst); 304 305 static int 306 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 307 308 /************************************************************************ 309 * * 310 * Some factorized error routines * 311 * * 312 ************************************************************************/ 313 314 /** 315 * xmlErrAttributeDup: 316 * @ctxt: an XML parser context 317 * @prefix: the attribute prefix 318 * @localname: the attribute localname 319 * 320 * Handle a redefinition of attribute error 321 */ 322 static void 323 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 324 const xmlChar * localname) 325 { 326 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 327 (ctxt->instate == XML_PARSER_EOF)) 328 return; 329 if (ctxt != NULL) 330 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 331 332 if (prefix == NULL) 333 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 334 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 335 (const char *) localname, NULL, NULL, 0, 0, 336 "Attribute %s redefined\n", localname); 337 else 338 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 339 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 340 (const char *) prefix, (const char *) localname, 341 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 342 localname); 343 if (ctxt != NULL) { 344 ctxt->wellFormed = 0; 345 if (ctxt->recovery == 0) 346 ctxt->disableSAX = 1; 347 } 348 } 349 350 /** 351 * xmlFatalErr: 352 * @ctxt: an XML parser context 353 * @error: the error number 354 * @extra: extra information string 355 * 356 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 357 */ 358 static void 359 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 360 { 361 const char *errmsg; 362 363 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 364 (ctxt->instate == XML_PARSER_EOF)) 365 return; 366 switch (error) { 367 case XML_ERR_INVALID_HEX_CHARREF: 368 errmsg = "CharRef: invalid hexadecimal value"; 369 break; 370 case XML_ERR_INVALID_DEC_CHARREF: 371 errmsg = "CharRef: invalid decimal value"; 372 break; 373 case XML_ERR_INVALID_CHARREF: 374 errmsg = "CharRef: invalid value"; 375 break; 376 case XML_ERR_INTERNAL_ERROR: 377 errmsg = "internal error"; 378 break; 379 case XML_ERR_PEREF_AT_EOF: 380 errmsg = "PEReference at end of document"; 381 break; 382 case XML_ERR_PEREF_IN_PROLOG: 383 errmsg = "PEReference in prolog"; 384 break; 385 case XML_ERR_PEREF_IN_EPILOG: 386 errmsg = "PEReference in epilog"; 387 break; 388 case XML_ERR_PEREF_NO_NAME: 389 errmsg = "PEReference: no name"; 390 break; 391 case XML_ERR_PEREF_SEMICOL_MISSING: 392 errmsg = "PEReference: expecting ';'"; 393 break; 394 case XML_ERR_ENTITY_LOOP: 395 errmsg = "Detected an entity reference loop"; 396 break; 397 case XML_ERR_ENTITY_NOT_STARTED: 398 errmsg = "EntityValue: \" or ' expected"; 399 break; 400 case XML_ERR_ENTITY_PE_INTERNAL: 401 errmsg = "PEReferences forbidden in internal subset"; 402 break; 403 case XML_ERR_ENTITY_NOT_FINISHED: 404 errmsg = "EntityValue: \" or ' expected"; 405 break; 406 case XML_ERR_ATTRIBUTE_NOT_STARTED: 407 errmsg = "AttValue: \" or ' expected"; 408 break; 409 case XML_ERR_LT_IN_ATTRIBUTE: 410 errmsg = "Unescaped '<' not allowed in attributes values"; 411 break; 412 case XML_ERR_LITERAL_NOT_STARTED: 413 errmsg = "SystemLiteral \" or ' expected"; 414 break; 415 case XML_ERR_LITERAL_NOT_FINISHED: 416 errmsg = "Unfinished System or Public ID \" or ' expected"; 417 break; 418 case XML_ERR_MISPLACED_CDATA_END: 419 errmsg = "Sequence ']]>' not allowed in content"; 420 break; 421 case XML_ERR_URI_REQUIRED: 422 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 423 break; 424 case XML_ERR_PUBID_REQUIRED: 425 errmsg = "PUBLIC, the Public Identifier is missing"; 426 break; 427 case XML_ERR_HYPHEN_IN_COMMENT: 428 errmsg = "Comment must not contain '--' (double-hyphen)"; 429 break; 430 case XML_ERR_PI_NOT_STARTED: 431 errmsg = "xmlParsePI : no target name"; 432 break; 433 case XML_ERR_RESERVED_XML_NAME: 434 errmsg = "Invalid PI name"; 435 break; 436 case XML_ERR_NOTATION_NOT_STARTED: 437 errmsg = "NOTATION: Name expected here"; 438 break; 439 case XML_ERR_NOTATION_NOT_FINISHED: 440 errmsg = "'>' required to close NOTATION declaration"; 441 break; 442 case XML_ERR_VALUE_REQUIRED: 443 errmsg = "Entity value required"; 444 break; 445 case XML_ERR_URI_FRAGMENT: 446 errmsg = "Fragment not allowed"; 447 break; 448 case XML_ERR_ATTLIST_NOT_STARTED: 449 errmsg = "'(' required to start ATTLIST enumeration"; 450 break; 451 case XML_ERR_NMTOKEN_REQUIRED: 452 errmsg = "NmToken expected in ATTLIST enumeration"; 453 break; 454 case XML_ERR_ATTLIST_NOT_FINISHED: 455 errmsg = "')' required to finish ATTLIST enumeration"; 456 break; 457 case XML_ERR_MIXED_NOT_STARTED: 458 errmsg = "MixedContentDecl : '|' or ')*' expected"; 459 break; 460 case XML_ERR_PCDATA_REQUIRED: 461 errmsg = "MixedContentDecl : '#PCDATA' expected"; 462 break; 463 case XML_ERR_ELEMCONTENT_NOT_STARTED: 464 errmsg = "ContentDecl : Name or '(' expected"; 465 break; 466 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 467 errmsg = "ContentDecl : ',' '|' or ')' expected"; 468 break; 469 case XML_ERR_PEREF_IN_INT_SUBSET: 470 errmsg = 471 "PEReference: forbidden within markup decl in internal subset"; 472 break; 473 case XML_ERR_GT_REQUIRED: 474 errmsg = "expected '>'"; 475 break; 476 case XML_ERR_CONDSEC_INVALID: 477 errmsg = "XML conditional section '[' expected"; 478 break; 479 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 480 errmsg = "Content error in the external subset"; 481 break; 482 case XML_ERR_CONDSEC_INVALID_KEYWORD: 483 errmsg = 484 "conditional section INCLUDE or IGNORE keyword expected"; 485 break; 486 case XML_ERR_CONDSEC_NOT_FINISHED: 487 errmsg = "XML conditional section not closed"; 488 break; 489 case XML_ERR_XMLDECL_NOT_STARTED: 490 errmsg = "Text declaration '<?xml' required"; 491 break; 492 case XML_ERR_XMLDECL_NOT_FINISHED: 493 errmsg = "parsing XML declaration: '?>' expected"; 494 break; 495 case XML_ERR_EXT_ENTITY_STANDALONE: 496 errmsg = "external parsed entities cannot be standalone"; 497 break; 498 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 499 errmsg = "EntityRef: expecting ';'"; 500 break; 501 case XML_ERR_DOCTYPE_NOT_FINISHED: 502 errmsg = "DOCTYPE improperly terminated"; 503 break; 504 case XML_ERR_LTSLASH_REQUIRED: 505 errmsg = "EndTag: '</' not found"; 506 break; 507 case XML_ERR_EQUAL_REQUIRED: 508 errmsg = "expected '='"; 509 break; 510 case XML_ERR_STRING_NOT_CLOSED: 511 errmsg = "String not closed expecting \" or '"; 512 break; 513 case XML_ERR_STRING_NOT_STARTED: 514 errmsg = "String not started expecting ' or \""; 515 break; 516 case XML_ERR_ENCODING_NAME: 517 errmsg = "Invalid XML encoding name"; 518 break; 519 case XML_ERR_STANDALONE_VALUE: 520 errmsg = "standalone accepts only 'yes' or 'no'"; 521 break; 522 case XML_ERR_DOCUMENT_EMPTY: 523 errmsg = "Document is empty"; 524 break; 525 case XML_ERR_DOCUMENT_END: 526 errmsg = "Extra content at the end of the document"; 527 break; 528 case XML_ERR_NOT_WELL_BALANCED: 529 errmsg = "chunk is not well balanced"; 530 break; 531 case XML_ERR_EXTRA_CONTENT: 532 errmsg = "extra content at the end of well balanced chunk"; 533 break; 534 case XML_ERR_VERSION_MISSING: 535 errmsg = "Malformed declaration expecting version"; 536 break; 537 case XML_ERR_NAME_TOO_LONG: 538 errmsg = "Name too long use XML_PARSE_HUGE option"; 539 break; 540 #if 0 541 case: 542 errmsg = ""; 543 break; 544 #endif 545 default: 546 errmsg = "Unregistered error message"; 547 } 548 if (ctxt != NULL) 549 ctxt->errNo = error; 550 if (info == NULL) { 551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 552 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 553 errmsg); 554 } else { 555 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 556 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 557 errmsg, info); 558 } 559 if (ctxt != NULL) { 560 ctxt->wellFormed = 0; 561 if (ctxt->recovery == 0) 562 ctxt->disableSAX = 1; 563 } 564 } 565 566 /** 567 * xmlFatalErrMsg: 568 * @ctxt: an XML parser context 569 * @error: the error number 570 * @msg: the error message 571 * 572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 573 */ 574 static void LIBXML_ATTR_FORMAT(3,0) 575 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 576 const char *msg) 577 { 578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 579 (ctxt->instate == XML_PARSER_EOF)) 580 return; 581 if (ctxt != NULL) 582 ctxt->errNo = error; 583 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 584 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 585 if (ctxt != NULL) { 586 ctxt->wellFormed = 0; 587 if (ctxt->recovery == 0) 588 ctxt->disableSAX = 1; 589 } 590 } 591 592 /** 593 * xmlWarningMsg: 594 * @ctxt: an XML parser context 595 * @error: the error number 596 * @msg: the error message 597 * @str1: extra data 598 * @str2: extra data 599 * 600 * Handle a warning. 601 */ 602 static void LIBXML_ATTR_FORMAT(3,0) 603 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 604 const char *msg, const xmlChar *str1, const xmlChar *str2) 605 { 606 xmlStructuredErrorFunc schannel = NULL; 607 608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 609 (ctxt->instate == XML_PARSER_EOF)) 610 return; 611 if ((ctxt != NULL) && (ctxt->sax != NULL) && 612 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 613 schannel = ctxt->sax->serror; 614 if (ctxt != NULL) { 615 __xmlRaiseError(schannel, 616 (ctxt->sax) ? ctxt->sax->warning : NULL, 617 ctxt->userData, 618 ctxt, NULL, XML_FROM_PARSER, error, 619 XML_ERR_WARNING, NULL, 0, 620 (const char *) str1, (const char *) str2, NULL, 0, 0, 621 msg, (const char *) str1, (const char *) str2); 622 } else { 623 __xmlRaiseError(schannel, NULL, NULL, 624 ctxt, NULL, XML_FROM_PARSER, error, 625 XML_ERR_WARNING, NULL, 0, 626 (const char *) str1, (const char *) str2, NULL, 0, 0, 627 msg, (const char *) str1, (const char *) str2); 628 } 629 } 630 631 /** 632 * xmlValidityError: 633 * @ctxt: an XML parser context 634 * @error: the error number 635 * @msg: the error message 636 * @str1: extra data 637 * 638 * Handle a validity error. 639 */ 640 static void LIBXML_ATTR_FORMAT(3,0) 641 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 642 const char *msg, const xmlChar *str1, const xmlChar *str2) 643 { 644 xmlStructuredErrorFunc schannel = NULL; 645 646 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 647 (ctxt->instate == XML_PARSER_EOF)) 648 return; 649 if (ctxt != NULL) { 650 ctxt->errNo = error; 651 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 652 schannel = ctxt->sax->serror; 653 } 654 if (ctxt != NULL) { 655 __xmlRaiseError(schannel, 656 ctxt->vctxt.error, ctxt->vctxt.userData, 657 ctxt, NULL, XML_FROM_DTD, error, 658 XML_ERR_ERROR, NULL, 0, (const char *) str1, 659 (const char *) str2, NULL, 0, 0, 660 msg, (const char *) str1, (const char *) str2); 661 ctxt->valid = 0; 662 } else { 663 __xmlRaiseError(schannel, NULL, NULL, 664 ctxt, NULL, XML_FROM_DTD, error, 665 XML_ERR_ERROR, NULL, 0, (const char *) str1, 666 (const char *) str2, NULL, 0, 0, 667 msg, (const char *) str1, (const char *) str2); 668 } 669 } 670 671 /** 672 * xmlFatalErrMsgInt: 673 * @ctxt: an XML parser context 674 * @error: the error number 675 * @msg: the error message 676 * @val: an integer value 677 * 678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 679 */ 680 static void LIBXML_ATTR_FORMAT(3,0) 681 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 682 const char *msg, int val) 683 { 684 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 685 (ctxt->instate == XML_PARSER_EOF)) 686 return; 687 if (ctxt != NULL) 688 ctxt->errNo = error; 689 __xmlRaiseError(NULL, NULL, NULL, 690 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 691 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 692 if (ctxt != NULL) { 693 ctxt->wellFormed = 0; 694 if (ctxt->recovery == 0) 695 ctxt->disableSAX = 1; 696 } 697 } 698 699 /** 700 * xmlFatalErrMsgStrIntStr: 701 * @ctxt: an XML parser context 702 * @error: the error number 703 * @msg: the error message 704 * @str1: an string info 705 * @val: an integer value 706 * @str2: an string info 707 * 708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 709 */ 710 static void LIBXML_ATTR_FORMAT(3,0) 711 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 712 const char *msg, const xmlChar *str1, int val, 713 const xmlChar *str2) 714 { 715 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 716 (ctxt->instate == XML_PARSER_EOF)) 717 return; 718 if (ctxt != NULL) 719 ctxt->errNo = error; 720 __xmlRaiseError(NULL, NULL, NULL, 721 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 722 NULL, 0, (const char *) str1, (const char *) str2, 723 NULL, val, 0, msg, str1, val, str2); 724 if (ctxt != NULL) { 725 ctxt->wellFormed = 0; 726 if (ctxt->recovery == 0) 727 ctxt->disableSAX = 1; 728 } 729 } 730 731 /** 732 * xmlFatalErrMsgStr: 733 * @ctxt: an XML parser context 734 * @error: the error number 735 * @msg: the error message 736 * @val: a string value 737 * 738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 739 */ 740 static void LIBXML_ATTR_FORMAT(3,0) 741 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 742 const char *msg, const xmlChar * val) 743 { 744 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 745 (ctxt->instate == XML_PARSER_EOF)) 746 return; 747 if (ctxt != NULL) 748 ctxt->errNo = error; 749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 750 XML_FROM_PARSER, error, XML_ERR_FATAL, 751 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 752 val); 753 if (ctxt != NULL) { 754 ctxt->wellFormed = 0; 755 if (ctxt->recovery == 0) 756 ctxt->disableSAX = 1; 757 } 758 } 759 760 /** 761 * xmlErrMsgStr: 762 * @ctxt: an XML parser context 763 * @error: the error number 764 * @msg: the error message 765 * @val: a string value 766 * 767 * Handle a non fatal parser error 768 */ 769 static void LIBXML_ATTR_FORMAT(3,0) 770 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 771 const char *msg, const xmlChar * val) 772 { 773 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 774 (ctxt->instate == XML_PARSER_EOF)) 775 return; 776 if (ctxt != NULL) 777 ctxt->errNo = error; 778 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 779 XML_FROM_PARSER, error, XML_ERR_ERROR, 780 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 781 val); 782 } 783 784 /** 785 * xmlNsErr: 786 * @ctxt: an XML parser context 787 * @error: the error number 788 * @msg: the message 789 * @info1: extra information string 790 * @info2: extra information string 791 * 792 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 793 */ 794 static void LIBXML_ATTR_FORMAT(3,0) 795 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 796 const char *msg, 797 const xmlChar * info1, const xmlChar * info2, 798 const xmlChar * info3) 799 { 800 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 801 (ctxt->instate == XML_PARSER_EOF)) 802 return; 803 if (ctxt != NULL) 804 ctxt->errNo = error; 805 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 806 XML_ERR_ERROR, NULL, 0, (const char *) info1, 807 (const char *) info2, (const char *) info3, 0, 0, msg, 808 info1, info2, info3); 809 if (ctxt != NULL) 810 ctxt->nsWellFormed = 0; 811 } 812 813 /** 814 * xmlNsWarn 815 * @ctxt: an XML parser context 816 * @error: the error number 817 * @msg: the message 818 * @info1: extra information string 819 * @info2: extra information string 820 * 821 * Handle a namespace warning error 822 */ 823 static void LIBXML_ATTR_FORMAT(3,0) 824 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 825 const char *msg, 826 const xmlChar * info1, const xmlChar * info2, 827 const xmlChar * info3) 828 { 829 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 830 (ctxt->instate == XML_PARSER_EOF)) 831 return; 832 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 833 XML_ERR_WARNING, NULL, 0, (const char *) info1, 834 (const char *) info2, (const char *) info3, 0, 0, msg, 835 info1, info2, info3); 836 } 837 838 /************************************************************************ 839 * * 840 * Library wide options * 841 * * 842 ************************************************************************/ 843 844 /** 845 * xmlHasFeature: 846 * @feature: the feature to be examined 847 * 848 * Examines if the library has been compiled with a given feature. 849 * 850 * Returns a non-zero value if the feature exist, otherwise zero. 851 * Returns zero (0) if the feature does not exist or an unknown 852 * unknown feature is requested, non-zero otherwise. 853 */ 854 int 855 xmlHasFeature(xmlFeature feature) 856 { 857 switch (feature) { 858 case XML_WITH_THREAD: 859 #ifdef LIBXML_THREAD_ENABLED 860 return(1); 861 #else 862 return(0); 863 #endif 864 case XML_WITH_TREE: 865 #ifdef LIBXML_TREE_ENABLED 866 return(1); 867 #else 868 return(0); 869 #endif 870 case XML_WITH_OUTPUT: 871 #ifdef LIBXML_OUTPUT_ENABLED 872 return(1); 873 #else 874 return(0); 875 #endif 876 case XML_WITH_PUSH: 877 #ifdef LIBXML_PUSH_ENABLED 878 return(1); 879 #else 880 return(0); 881 #endif 882 case XML_WITH_READER: 883 #ifdef LIBXML_READER_ENABLED 884 return(1); 885 #else 886 return(0); 887 #endif 888 case XML_WITH_PATTERN: 889 #ifdef LIBXML_PATTERN_ENABLED 890 return(1); 891 #else 892 return(0); 893 #endif 894 case XML_WITH_WRITER: 895 #ifdef LIBXML_WRITER_ENABLED 896 return(1); 897 #else 898 return(0); 899 #endif 900 case XML_WITH_SAX1: 901 #ifdef LIBXML_SAX1_ENABLED 902 return(1); 903 #else 904 return(0); 905 #endif 906 case XML_WITH_FTP: 907 #ifdef LIBXML_FTP_ENABLED 908 return(1); 909 #else 910 return(0); 911 #endif 912 case XML_WITH_HTTP: 913 #ifdef LIBXML_HTTP_ENABLED 914 return(1); 915 #else 916 return(0); 917 #endif 918 case XML_WITH_VALID: 919 #ifdef LIBXML_VALID_ENABLED 920 return(1); 921 #else 922 return(0); 923 #endif 924 case XML_WITH_HTML: 925 #ifdef LIBXML_HTML_ENABLED 926 return(1); 927 #else 928 return(0); 929 #endif 930 case XML_WITH_LEGACY: 931 #ifdef LIBXML_LEGACY_ENABLED 932 return(1); 933 #else 934 return(0); 935 #endif 936 case XML_WITH_C14N: 937 #ifdef LIBXML_C14N_ENABLED 938 return(1); 939 #else 940 return(0); 941 #endif 942 case XML_WITH_CATALOG: 943 #ifdef LIBXML_CATALOG_ENABLED 944 return(1); 945 #else 946 return(0); 947 #endif 948 case XML_WITH_XPATH: 949 #ifdef LIBXML_XPATH_ENABLED 950 return(1); 951 #else 952 return(0); 953 #endif 954 case XML_WITH_XPTR: 955 #ifdef LIBXML_XPTR_ENABLED 956 return(1); 957 #else 958 return(0); 959 #endif 960 case XML_WITH_XINCLUDE: 961 #ifdef LIBXML_XINCLUDE_ENABLED 962 return(1); 963 #else 964 return(0); 965 #endif 966 case XML_WITH_ICONV: 967 #ifdef LIBXML_ICONV_ENABLED 968 return(1); 969 #else 970 return(0); 971 #endif 972 case XML_WITH_ISO8859X: 973 #ifdef LIBXML_ISO8859X_ENABLED 974 return(1); 975 #else 976 return(0); 977 #endif 978 case XML_WITH_UNICODE: 979 #ifdef LIBXML_UNICODE_ENABLED 980 return(1); 981 #else 982 return(0); 983 #endif 984 case XML_WITH_REGEXP: 985 #ifdef LIBXML_REGEXP_ENABLED 986 return(1); 987 #else 988 return(0); 989 #endif 990 case XML_WITH_AUTOMATA: 991 #ifdef LIBXML_AUTOMATA_ENABLED 992 return(1); 993 #else 994 return(0); 995 #endif 996 case XML_WITH_EXPR: 997 #ifdef LIBXML_EXPR_ENABLED 998 return(1); 999 #else 1000 return(0); 1001 #endif 1002 case XML_WITH_SCHEMAS: 1003 #ifdef LIBXML_SCHEMAS_ENABLED 1004 return(1); 1005 #else 1006 return(0); 1007 #endif 1008 case XML_WITH_SCHEMATRON: 1009 #ifdef LIBXML_SCHEMATRON_ENABLED 1010 return(1); 1011 #else 1012 return(0); 1013 #endif 1014 case XML_WITH_MODULES: 1015 #ifdef LIBXML_MODULES_ENABLED 1016 return(1); 1017 #else 1018 return(0); 1019 #endif 1020 case XML_WITH_DEBUG: 1021 #ifdef LIBXML_DEBUG_ENABLED 1022 return(1); 1023 #else 1024 return(0); 1025 #endif 1026 case XML_WITH_DEBUG_MEM: 1027 #ifdef DEBUG_MEMORY_LOCATION 1028 return(1); 1029 #else 1030 return(0); 1031 #endif 1032 case XML_WITH_DEBUG_RUN: 1033 #ifdef LIBXML_DEBUG_RUNTIME 1034 return(1); 1035 #else 1036 return(0); 1037 #endif 1038 case XML_WITH_ZLIB: 1039 #ifdef LIBXML_ZLIB_ENABLED 1040 return(1); 1041 #else 1042 return(0); 1043 #endif 1044 case XML_WITH_LZMA: 1045 #ifdef LIBXML_LZMA_ENABLED 1046 return(1); 1047 #else 1048 return(0); 1049 #endif 1050 case XML_WITH_ICU: 1051 #ifdef LIBXML_ICU_ENABLED 1052 return(1); 1053 #else 1054 return(0); 1055 #endif 1056 default: 1057 break; 1058 } 1059 return(0); 1060 } 1061 1062 /************************************************************************ 1063 * * 1064 * SAX2 defaulted attributes handling * 1065 * * 1066 ************************************************************************/ 1067 1068 /** 1069 * xmlDetectSAX2: 1070 * @ctxt: an XML parser context 1071 * 1072 * Do the SAX2 detection and specific initialization 1073 */ 1074 static void 1075 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1076 if (ctxt == NULL) return; 1077 #ifdef LIBXML_SAX1_ENABLED 1078 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1079 ((ctxt->sax->startElementNs != NULL) || 1080 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1081 #else 1082 ctxt->sax2 = 1; 1083 #endif /* LIBXML_SAX1_ENABLED */ 1084 1085 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1086 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1087 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1088 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1089 (ctxt->str_xml_ns == NULL)) { 1090 xmlErrMemory(ctxt, NULL); 1091 } 1092 } 1093 1094 typedef struct _xmlDefAttrs xmlDefAttrs; 1095 typedef xmlDefAttrs *xmlDefAttrsPtr; 1096 struct _xmlDefAttrs { 1097 int nbAttrs; /* number of defaulted attributes on that element */ 1098 int maxAttrs; /* the size of the array */ 1099 #if __STDC_VERSION__ >= 199901L 1100 /* Using a C99 flexible array member avoids UBSan errors. */ 1101 const xmlChar *values[]; /* array of localname/prefix/values/external */ 1102 #else 1103 const xmlChar *values[5]; 1104 #endif 1105 }; 1106 1107 /** 1108 * xmlAttrNormalizeSpace: 1109 * @src: the source string 1110 * @dst: the target string 1111 * 1112 * Normalize the space in non CDATA attribute values: 1113 * If the attribute type is not CDATA, then the XML processor MUST further 1114 * process the normalized attribute value by discarding any leading and 1115 * trailing space (#x20) characters, and by replacing sequences of space 1116 * (#x20) characters by a single space (#x20) character. 1117 * Note that the size of dst need to be at least src, and if one doesn't need 1118 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1119 * passing src as dst is just fine. 1120 * 1121 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1122 * is needed. 1123 */ 1124 static xmlChar * 1125 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1126 { 1127 if ((src == NULL) || (dst == NULL)) 1128 return(NULL); 1129 1130 while (*src == 0x20) src++; 1131 while (*src != 0) { 1132 if (*src == 0x20) { 1133 while (*src == 0x20) src++; 1134 if (*src != 0) 1135 *dst++ = 0x20; 1136 } else { 1137 *dst++ = *src++; 1138 } 1139 } 1140 *dst = 0; 1141 if (dst == src) 1142 return(NULL); 1143 return(dst); 1144 } 1145 1146 /** 1147 * xmlAttrNormalizeSpace2: 1148 * @src: the source string 1149 * 1150 * Normalize the space in non CDATA attribute values, a slightly more complex 1151 * front end to avoid allocation problems when running on attribute values 1152 * coming from the input. 1153 * 1154 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1155 * is needed. 1156 */ 1157 static const xmlChar * 1158 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1159 { 1160 int i; 1161 int remove_head = 0; 1162 int need_realloc = 0; 1163 const xmlChar *cur; 1164 1165 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1166 return(NULL); 1167 i = *len; 1168 if (i <= 0) 1169 return(NULL); 1170 1171 cur = src; 1172 while (*cur == 0x20) { 1173 cur++; 1174 remove_head++; 1175 } 1176 while (*cur != 0) { 1177 if (*cur == 0x20) { 1178 cur++; 1179 if ((*cur == 0x20) || (*cur == 0)) { 1180 need_realloc = 1; 1181 break; 1182 } 1183 } else 1184 cur++; 1185 } 1186 if (need_realloc) { 1187 xmlChar *ret; 1188 1189 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1190 if (ret == NULL) { 1191 xmlErrMemory(ctxt, NULL); 1192 return(NULL); 1193 } 1194 xmlAttrNormalizeSpace(ret, ret); 1195 *len = (int) strlen((const char *)ret); 1196 return(ret); 1197 } else if (remove_head) { 1198 *len -= remove_head; 1199 memmove(src, src + remove_head, 1 + *len); 1200 return(src); 1201 } 1202 return(NULL); 1203 } 1204 1205 /** 1206 * xmlAddDefAttrs: 1207 * @ctxt: an XML parser context 1208 * @fullname: the element fullname 1209 * @fullattr: the attribute fullname 1210 * @value: the attribute value 1211 * 1212 * Add a defaulted attribute for an element 1213 */ 1214 static void 1215 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1216 const xmlChar *fullname, 1217 const xmlChar *fullattr, 1218 const xmlChar *value) { 1219 xmlDefAttrsPtr defaults; 1220 int len; 1221 const xmlChar *name; 1222 const xmlChar *prefix; 1223 1224 /* 1225 * Allows to detect attribute redefinitions 1226 */ 1227 if (ctxt->attsSpecial != NULL) { 1228 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1229 return; 1230 } 1231 1232 if (ctxt->attsDefault == NULL) { 1233 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1234 if (ctxt->attsDefault == NULL) 1235 goto mem_error; 1236 } 1237 1238 /* 1239 * split the element name into prefix:localname , the string found 1240 * are within the DTD and then not associated to namespace names. 1241 */ 1242 name = xmlSplitQName3(fullname, &len); 1243 if (name == NULL) { 1244 name = xmlDictLookup(ctxt->dict, fullname, -1); 1245 prefix = NULL; 1246 } else { 1247 name = xmlDictLookup(ctxt->dict, name, -1); 1248 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1249 } 1250 1251 /* 1252 * make sure there is some storage 1253 */ 1254 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1255 if (defaults == NULL) { 1256 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1257 (4 * 5) * sizeof(const xmlChar *)); 1258 if (defaults == NULL) 1259 goto mem_error; 1260 defaults->nbAttrs = 0; 1261 defaults->maxAttrs = 4; 1262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1263 defaults, NULL) < 0) { 1264 xmlFree(defaults); 1265 goto mem_error; 1266 } 1267 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1268 xmlDefAttrsPtr temp; 1269 1270 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1271 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1272 if (temp == NULL) 1273 goto mem_error; 1274 defaults = temp; 1275 defaults->maxAttrs *= 2; 1276 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1277 defaults, NULL) < 0) { 1278 xmlFree(defaults); 1279 goto mem_error; 1280 } 1281 } 1282 1283 /* 1284 * Split the element name into prefix:localname , the string found 1285 * are within the DTD and hen not associated to namespace names. 1286 */ 1287 name = xmlSplitQName3(fullattr, &len); 1288 if (name == NULL) { 1289 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1290 prefix = NULL; 1291 } else { 1292 name = xmlDictLookup(ctxt->dict, name, -1); 1293 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1294 } 1295 1296 defaults->values[5 * defaults->nbAttrs] = name; 1297 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1298 /* intern the string and precompute the end */ 1299 len = xmlStrlen(value); 1300 value = xmlDictLookup(ctxt->dict, value, len); 1301 defaults->values[5 * defaults->nbAttrs + 2] = value; 1302 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1303 if (ctxt->external) 1304 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1305 else 1306 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1307 defaults->nbAttrs++; 1308 1309 return; 1310 1311 mem_error: 1312 xmlErrMemory(ctxt, NULL); 1313 return; 1314 } 1315 1316 /** 1317 * xmlAddSpecialAttr: 1318 * @ctxt: an XML parser context 1319 * @fullname: the element fullname 1320 * @fullattr: the attribute fullname 1321 * @type: the attribute type 1322 * 1323 * Register this attribute type 1324 */ 1325 static void 1326 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1327 const xmlChar *fullname, 1328 const xmlChar *fullattr, 1329 int type) 1330 { 1331 if (ctxt->attsSpecial == NULL) { 1332 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1333 if (ctxt->attsSpecial == NULL) 1334 goto mem_error; 1335 } 1336 1337 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1338 return; 1339 1340 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1341 (void *) (ptrdiff_t) type); 1342 return; 1343 1344 mem_error: 1345 xmlErrMemory(ctxt, NULL); 1346 return; 1347 } 1348 1349 /** 1350 * xmlCleanSpecialAttrCallback: 1351 * 1352 * Removes CDATA attributes from the special attribute table 1353 */ 1354 static void 1355 xmlCleanSpecialAttrCallback(void *payload, void *data, 1356 const xmlChar *fullname, const xmlChar *fullattr, 1357 const xmlChar *unused ATTRIBUTE_UNUSED) { 1358 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1359 1360 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1361 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1362 } 1363 } 1364 1365 /** 1366 * xmlCleanSpecialAttr: 1367 * @ctxt: an XML parser context 1368 * 1369 * Trim the list of attributes defined to remove all those of type 1370 * CDATA as they are not special. This call should be done when finishing 1371 * to parse the DTD and before starting to parse the document root. 1372 */ 1373 static void 1374 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1375 { 1376 if (ctxt->attsSpecial == NULL) 1377 return; 1378 1379 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1380 1381 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1382 xmlHashFree(ctxt->attsSpecial, NULL); 1383 ctxt->attsSpecial = NULL; 1384 } 1385 return; 1386 } 1387 1388 /** 1389 * xmlCheckLanguageID: 1390 * @lang: pointer to the string value 1391 * 1392 * Checks that the value conforms to the LanguageID production: 1393 * 1394 * NOTE: this is somewhat deprecated, those productions were removed from 1395 * the XML Second edition. 1396 * 1397 * [33] LanguageID ::= Langcode ('-' Subcode)* 1398 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1399 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1400 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1401 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1402 * [38] Subcode ::= ([a-z] | [A-Z])+ 1403 * 1404 * The current REC reference the successors of RFC 1766, currently 5646 1405 * 1406 * http://www.rfc-editor.org/rfc/rfc5646.txt 1407 * langtag = language 1408 * ["-" script] 1409 * ["-" region] 1410 * *("-" variant) 1411 * *("-" extension) 1412 * ["-" privateuse] 1413 * language = 2*3ALPHA ; shortest ISO 639 code 1414 * ["-" extlang] ; sometimes followed by 1415 * ; extended language subtags 1416 * / 4ALPHA ; or reserved for future use 1417 * / 5*8ALPHA ; or registered language subtag 1418 * 1419 * extlang = 3ALPHA ; selected ISO 639 codes 1420 * *2("-" 3ALPHA) ; permanently reserved 1421 * 1422 * script = 4ALPHA ; ISO 15924 code 1423 * 1424 * region = 2ALPHA ; ISO 3166-1 code 1425 * / 3DIGIT ; UN M.49 code 1426 * 1427 * variant = 5*8alphanum ; registered variants 1428 * / (DIGIT 3alphanum) 1429 * 1430 * extension = singleton 1*("-" (2*8alphanum)) 1431 * 1432 * ; Single alphanumerics 1433 * ; "x" reserved for private use 1434 * singleton = DIGIT ; 0 - 9 1435 * / %x41-57 ; A - W 1436 * / %x59-5A ; Y - Z 1437 * / %x61-77 ; a - w 1438 * / %x79-7A ; y - z 1439 * 1440 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1441 * The parser below doesn't try to cope with extension or privateuse 1442 * that could be added but that's not interoperable anyway 1443 * 1444 * Returns 1 if correct 0 otherwise 1445 **/ 1446 int 1447 xmlCheckLanguageID(const xmlChar * lang) 1448 { 1449 const xmlChar *cur = lang, *nxt; 1450 1451 if (cur == NULL) 1452 return (0); 1453 if (((cur[0] == 'i') && (cur[1] == '-')) || 1454 ((cur[0] == 'I') && (cur[1] == '-')) || 1455 ((cur[0] == 'x') && (cur[1] == '-')) || 1456 ((cur[0] == 'X') && (cur[1] == '-'))) { 1457 /* 1458 * Still allow IANA code and user code which were coming 1459 * from the previous version of the XML-1.0 specification 1460 * it's deprecated but we should not fail 1461 */ 1462 cur += 2; 1463 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1464 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1465 cur++; 1466 return(cur[0] == 0); 1467 } 1468 nxt = cur; 1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1471 nxt++; 1472 if (nxt - cur >= 4) { 1473 /* 1474 * Reserved 1475 */ 1476 if ((nxt - cur > 8) || (nxt[0] != 0)) 1477 return(0); 1478 return(1); 1479 } 1480 if (nxt - cur < 2) 1481 return(0); 1482 /* we got an ISO 639 code */ 1483 if (nxt[0] == 0) 1484 return(1); 1485 if (nxt[0] != '-') 1486 return(0); 1487 1488 nxt++; 1489 cur = nxt; 1490 /* now we can have extlang or script or region or variant */ 1491 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1492 goto region_m49; 1493 1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1496 nxt++; 1497 if (nxt - cur == 4) 1498 goto script; 1499 if (nxt - cur == 2) 1500 goto region; 1501 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1502 goto variant; 1503 if (nxt - cur != 3) 1504 return(0); 1505 /* we parsed an extlang */ 1506 if (nxt[0] == 0) 1507 return(1); 1508 if (nxt[0] != '-') 1509 return(0); 1510 1511 nxt++; 1512 cur = nxt; 1513 /* now we can have script or region or variant */ 1514 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1515 goto region_m49; 1516 1517 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1518 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1519 nxt++; 1520 if (nxt - cur == 2) 1521 goto region; 1522 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1523 goto variant; 1524 if (nxt - cur != 4) 1525 return(0); 1526 /* we parsed a script */ 1527 script: 1528 if (nxt[0] == 0) 1529 return(1); 1530 if (nxt[0] != '-') 1531 return(0); 1532 1533 nxt++; 1534 cur = nxt; 1535 /* now we can have region or variant */ 1536 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1537 goto region_m49; 1538 1539 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1540 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1541 nxt++; 1542 1543 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1544 goto variant; 1545 if (nxt - cur != 2) 1546 return(0); 1547 /* we parsed a region */ 1548 region: 1549 if (nxt[0] == 0) 1550 return(1); 1551 if (nxt[0] != '-') 1552 return(0); 1553 1554 nxt++; 1555 cur = nxt; 1556 /* now we can just have a variant */ 1557 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1558 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1559 nxt++; 1560 1561 if ((nxt - cur < 5) || (nxt - cur > 8)) 1562 return(0); 1563 1564 /* we parsed a variant */ 1565 variant: 1566 if (nxt[0] == 0) 1567 return(1); 1568 if (nxt[0] != '-') 1569 return(0); 1570 /* extensions and private use subtags not checked */ 1571 return (1); 1572 1573 region_m49: 1574 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1575 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1576 nxt += 3; 1577 goto region; 1578 } 1579 return(0); 1580 } 1581 1582 /************************************************************************ 1583 * * 1584 * Parser stacks related functions and macros * 1585 * * 1586 ************************************************************************/ 1587 1588 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1589 const xmlChar ** str); 1590 1591 #ifdef SAX2 1592 /** 1593 * nsPush: 1594 * @ctxt: an XML parser context 1595 * @prefix: the namespace prefix or NULL 1596 * @URL: the namespace name 1597 * 1598 * Pushes a new parser namespace on top of the ns stack 1599 * 1600 * Returns -1 in case of error, -2 if the namespace should be discarded 1601 * and the index in the stack otherwise. 1602 */ 1603 static int 1604 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1605 { 1606 if (ctxt->options & XML_PARSE_NSCLEAN) { 1607 int i; 1608 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1609 if (ctxt->nsTab[i] == prefix) { 1610 /* in scope */ 1611 if (ctxt->nsTab[i + 1] == URL) 1612 return(-2); 1613 /* out of scope keep it */ 1614 break; 1615 } 1616 } 1617 } 1618 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1619 ctxt->nsMax = 10; 1620 ctxt->nsNr = 0; 1621 ctxt->nsTab = (const xmlChar **) 1622 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1623 if (ctxt->nsTab == NULL) { 1624 xmlErrMemory(ctxt, NULL); 1625 ctxt->nsMax = 0; 1626 return (-1); 1627 } 1628 } else if (ctxt->nsNr >= ctxt->nsMax) { 1629 const xmlChar ** tmp; 1630 ctxt->nsMax *= 2; 1631 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1632 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1633 if (tmp == NULL) { 1634 xmlErrMemory(ctxt, NULL); 1635 ctxt->nsMax /= 2; 1636 return (-1); 1637 } 1638 ctxt->nsTab = tmp; 1639 } 1640 ctxt->nsTab[ctxt->nsNr++] = prefix; 1641 ctxt->nsTab[ctxt->nsNr++] = URL; 1642 return (ctxt->nsNr); 1643 } 1644 /** 1645 * nsPop: 1646 * @ctxt: an XML parser context 1647 * @nr: the number to pop 1648 * 1649 * Pops the top @nr parser prefix/namespace from the ns stack 1650 * 1651 * Returns the number of namespaces removed 1652 */ 1653 static int 1654 nsPop(xmlParserCtxtPtr ctxt, int nr) 1655 { 1656 int i; 1657 1658 if (ctxt->nsTab == NULL) return(0); 1659 if (ctxt->nsNr < nr) { 1660 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1661 nr = ctxt->nsNr; 1662 } 1663 if (ctxt->nsNr <= 0) 1664 return (0); 1665 1666 for (i = 0;i < nr;i++) { 1667 ctxt->nsNr--; 1668 ctxt->nsTab[ctxt->nsNr] = NULL; 1669 } 1670 return(nr); 1671 } 1672 #endif 1673 1674 static int 1675 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1676 const xmlChar **atts; 1677 int *attallocs; 1678 int maxatts; 1679 1680 if (ctxt->atts == NULL) { 1681 maxatts = 55; /* allow for 10 attrs by default */ 1682 atts = (const xmlChar **) 1683 xmlMalloc(maxatts * sizeof(xmlChar *)); 1684 if (atts == NULL) goto mem_error; 1685 ctxt->atts = atts; 1686 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1687 if (attallocs == NULL) goto mem_error; 1688 ctxt->attallocs = attallocs; 1689 ctxt->maxatts = maxatts; 1690 } else if (nr + 5 > ctxt->maxatts) { 1691 maxatts = (nr + 5) * 2; 1692 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1693 maxatts * sizeof(const xmlChar *)); 1694 if (atts == NULL) goto mem_error; 1695 ctxt->atts = atts; 1696 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1697 (maxatts / 5) * sizeof(int)); 1698 if (attallocs == NULL) goto mem_error; 1699 ctxt->attallocs = attallocs; 1700 ctxt->maxatts = maxatts; 1701 } 1702 return(ctxt->maxatts); 1703 mem_error: 1704 xmlErrMemory(ctxt, NULL); 1705 return(-1); 1706 } 1707 1708 /** 1709 * inputPush: 1710 * @ctxt: an XML parser context 1711 * @value: the parser input 1712 * 1713 * Pushes a new parser input on top of the input stack 1714 * 1715 * Returns -1 in case of error, the index in the stack otherwise 1716 */ 1717 int 1718 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1719 { 1720 if ((ctxt == NULL) || (value == NULL)) 1721 return(-1); 1722 if (ctxt->inputNr >= ctxt->inputMax) { 1723 ctxt->inputMax *= 2; 1724 ctxt->inputTab = 1725 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1726 ctxt->inputMax * 1727 sizeof(ctxt->inputTab[0])); 1728 if (ctxt->inputTab == NULL) { 1729 xmlErrMemory(ctxt, NULL); 1730 xmlFreeInputStream(value); 1731 ctxt->inputMax /= 2; 1732 value = NULL; 1733 return (-1); 1734 } 1735 } 1736 ctxt->inputTab[ctxt->inputNr] = value; 1737 ctxt->input = value; 1738 return (ctxt->inputNr++); 1739 } 1740 /** 1741 * inputPop: 1742 * @ctxt: an XML parser context 1743 * 1744 * Pops the top parser input from the input stack 1745 * 1746 * Returns the input just removed 1747 */ 1748 xmlParserInputPtr 1749 inputPop(xmlParserCtxtPtr ctxt) 1750 { 1751 xmlParserInputPtr ret; 1752 1753 if (ctxt == NULL) 1754 return(NULL); 1755 if (ctxt->inputNr <= 0) 1756 return (NULL); 1757 ctxt->inputNr--; 1758 if (ctxt->inputNr > 0) 1759 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1760 else 1761 ctxt->input = NULL; 1762 ret = ctxt->inputTab[ctxt->inputNr]; 1763 ctxt->inputTab[ctxt->inputNr] = NULL; 1764 return (ret); 1765 } 1766 /** 1767 * nodePush: 1768 * @ctxt: an XML parser context 1769 * @value: the element node 1770 * 1771 * Pushes a new element node on top of the node stack 1772 * 1773 * Returns -1 in case of error, the index in the stack otherwise 1774 */ 1775 int 1776 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1777 { 1778 if (ctxt == NULL) return(0); 1779 if (ctxt->nodeNr >= ctxt->nodeMax) { 1780 xmlNodePtr *tmp; 1781 1782 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1783 ctxt->nodeMax * 2 * 1784 sizeof(ctxt->nodeTab[0])); 1785 if (tmp == NULL) { 1786 xmlErrMemory(ctxt, NULL); 1787 return (-1); 1788 } 1789 ctxt->nodeTab = tmp; 1790 ctxt->nodeMax *= 2; 1791 } 1792 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1793 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1794 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1795 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1796 xmlParserMaxDepth); 1797 xmlHaltParser(ctxt); 1798 return(-1); 1799 } 1800 ctxt->nodeTab[ctxt->nodeNr] = value; 1801 ctxt->node = value; 1802 return (ctxt->nodeNr++); 1803 } 1804 1805 /** 1806 * nodePop: 1807 * @ctxt: an XML parser context 1808 * 1809 * Pops the top element node from the node stack 1810 * 1811 * Returns the node just removed 1812 */ 1813 xmlNodePtr 1814 nodePop(xmlParserCtxtPtr ctxt) 1815 { 1816 xmlNodePtr ret; 1817 1818 if (ctxt == NULL) return(NULL); 1819 if (ctxt->nodeNr <= 0) 1820 return (NULL); 1821 ctxt->nodeNr--; 1822 if (ctxt->nodeNr > 0) 1823 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1824 else 1825 ctxt->node = NULL; 1826 ret = ctxt->nodeTab[ctxt->nodeNr]; 1827 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1828 return (ret); 1829 } 1830 1831 /** 1832 * nameNsPush: 1833 * @ctxt: an XML parser context 1834 * @value: the element name 1835 * @prefix: the element prefix 1836 * @URI: the element namespace name 1837 * 1838 * Pushes a new element name/prefix/URL on top of the name stack 1839 * 1840 * Returns -1 in case of error, the index in the stack otherwise 1841 */ 1842 static int 1843 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1844 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1845 { 1846 if (ctxt->nameNr >= ctxt->nameMax) { 1847 const xmlChar * *tmp; 1848 void **tmp2; 1849 ctxt->nameMax *= 2; 1850 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1851 ctxt->nameMax * 1852 sizeof(ctxt->nameTab[0])); 1853 if (tmp == NULL) { 1854 ctxt->nameMax /= 2; 1855 goto mem_error; 1856 } 1857 ctxt->nameTab = tmp; 1858 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1859 ctxt->nameMax * 3 * 1860 sizeof(ctxt->pushTab[0])); 1861 if (tmp2 == NULL) { 1862 ctxt->nameMax /= 2; 1863 goto mem_error; 1864 } 1865 ctxt->pushTab = tmp2; 1866 } else if (ctxt->pushTab == NULL) { 1867 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 1868 sizeof(ctxt->pushTab[0])); 1869 if (ctxt->pushTab == NULL) 1870 goto mem_error; 1871 } 1872 ctxt->nameTab[ctxt->nameNr] = value; 1873 ctxt->name = value; 1874 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1875 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1876 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr; 1877 return (ctxt->nameNr++); 1878 mem_error: 1879 xmlErrMemory(ctxt, NULL); 1880 return (-1); 1881 } 1882 #ifdef LIBXML_PUSH_ENABLED 1883 /** 1884 * nameNsPop: 1885 * @ctxt: an XML parser context 1886 * 1887 * Pops the top element/prefix/URI name from the name stack 1888 * 1889 * Returns the name just removed 1890 */ 1891 static const xmlChar * 1892 nameNsPop(xmlParserCtxtPtr ctxt) 1893 { 1894 const xmlChar *ret; 1895 1896 if (ctxt->nameNr <= 0) 1897 return (NULL); 1898 ctxt->nameNr--; 1899 if (ctxt->nameNr > 0) 1900 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1901 else 1902 ctxt->name = NULL; 1903 ret = ctxt->nameTab[ctxt->nameNr]; 1904 ctxt->nameTab[ctxt->nameNr] = NULL; 1905 return (ret); 1906 } 1907 #endif /* LIBXML_PUSH_ENABLED */ 1908 1909 /** 1910 * namePush: 1911 * @ctxt: an XML parser context 1912 * @value: the element name 1913 * 1914 * Pushes a new element name on top of the name stack 1915 * 1916 * Returns -1 in case of error, the index in the stack otherwise 1917 */ 1918 int 1919 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1920 { 1921 if (ctxt == NULL) return (-1); 1922 1923 if (ctxt->nameNr >= ctxt->nameMax) { 1924 const xmlChar * *tmp; 1925 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1926 ctxt->nameMax * 2 * 1927 sizeof(ctxt->nameTab[0])); 1928 if (tmp == NULL) { 1929 goto mem_error; 1930 } 1931 ctxt->nameTab = tmp; 1932 ctxt->nameMax *= 2; 1933 } 1934 ctxt->nameTab[ctxt->nameNr] = value; 1935 ctxt->name = value; 1936 return (ctxt->nameNr++); 1937 mem_error: 1938 xmlErrMemory(ctxt, NULL); 1939 return (-1); 1940 } 1941 /** 1942 * namePop: 1943 * @ctxt: an XML parser context 1944 * 1945 * Pops the top element name from the name stack 1946 * 1947 * Returns the name just removed 1948 */ 1949 const xmlChar * 1950 namePop(xmlParserCtxtPtr ctxt) 1951 { 1952 const xmlChar *ret; 1953 1954 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1955 return (NULL); 1956 ctxt->nameNr--; 1957 if (ctxt->nameNr > 0) 1958 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1959 else 1960 ctxt->name = NULL; 1961 ret = ctxt->nameTab[ctxt->nameNr]; 1962 ctxt->nameTab[ctxt->nameNr] = NULL; 1963 return (ret); 1964 } 1965 1966 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1967 if (ctxt->spaceNr >= ctxt->spaceMax) { 1968 int *tmp; 1969 1970 ctxt->spaceMax *= 2; 1971 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1972 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1973 if (tmp == NULL) { 1974 xmlErrMemory(ctxt, NULL); 1975 ctxt->spaceMax /=2; 1976 return(-1); 1977 } 1978 ctxt->spaceTab = tmp; 1979 } 1980 ctxt->spaceTab[ctxt->spaceNr] = val; 1981 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1982 return(ctxt->spaceNr++); 1983 } 1984 1985 static int spacePop(xmlParserCtxtPtr ctxt) { 1986 int ret; 1987 if (ctxt->spaceNr <= 0) return(0); 1988 ctxt->spaceNr--; 1989 if (ctxt->spaceNr > 0) 1990 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1991 else 1992 ctxt->space = &ctxt->spaceTab[0]; 1993 ret = ctxt->spaceTab[ctxt->spaceNr]; 1994 ctxt->spaceTab[ctxt->spaceNr] = -1; 1995 return(ret); 1996 } 1997 1998 /* 1999 * Macros for accessing the content. Those should be used only by the parser, 2000 * and not exported. 2001 * 2002 * Dirty macros, i.e. one often need to make assumption on the context to 2003 * use them 2004 * 2005 * CUR_PTR return the current pointer to the xmlChar to be parsed. 2006 * To be used with extreme caution since operations consuming 2007 * characters may move the input buffer to a different location ! 2008 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2009 * This should be used internally by the parser 2010 * only to compare to ASCII values otherwise it would break when 2011 * running with UTF-8 encoding. 2012 * RAW same as CUR but in the input buffer, bypass any token 2013 * extraction that may have been done 2014 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2015 * to compare on ASCII based substring. 2016 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2017 * strings without newlines within the parser. 2018 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2019 * defined char within the parser. 2020 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2021 * 2022 * NEXT Skip to the next character, this does the proper decoding 2023 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2024 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2025 * CUR_CHAR(l) returns the current unicode character (int), set l 2026 * to the number of xmlChars used for the encoding [0-5]. 2027 * CUR_SCHAR same but operate on a string instead of the context 2028 * COPY_BUF copy the current unicode char to the target buffer, increment 2029 * the index 2030 * GROW, SHRINK handling of input buffers 2031 */ 2032 2033 #define RAW (*ctxt->input->cur) 2034 #define CUR (*ctxt->input->cur) 2035 #define NXT(val) ctxt->input->cur[(val)] 2036 #define CUR_PTR ctxt->input->cur 2037 #define BASE_PTR ctxt->input->base 2038 2039 #define CMP4( s, c1, c2, c3, c4 ) \ 2040 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2041 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2042 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2043 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2044 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2045 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2046 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2047 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2048 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2049 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2050 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2051 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2052 ((unsigned char *) s)[ 8 ] == c9 ) 2053 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2054 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2055 ((unsigned char *) s)[ 9 ] == c10 ) 2056 2057 #define SKIP(val) do { \ 2058 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2059 if (*ctxt->input->cur == 0) \ 2060 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2061 } while (0) 2062 2063 #define SKIPL(val) do { \ 2064 int skipl; \ 2065 for(skipl=0; skipl<val; skipl++) { \ 2066 if (*(ctxt->input->cur) == '\n') { \ 2067 ctxt->input->line++; ctxt->input->col = 1; \ 2068 } else ctxt->input->col++; \ 2069 ctxt->nbChars++; \ 2070 ctxt->input->cur++; \ 2071 } \ 2072 if (*ctxt->input->cur == 0) \ 2073 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2074 } while (0) 2075 2076 #define SHRINK if ((ctxt->progressive == 0) && \ 2077 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2078 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2079 xmlSHRINK (ctxt); 2080 2081 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2082 xmlParserInputShrink(ctxt->input); 2083 if (*ctxt->input->cur == 0) 2084 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2085 } 2086 2087 #define GROW if ((ctxt->progressive == 0) && \ 2088 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2089 xmlGROW (ctxt); 2090 2091 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2092 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur; 2093 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base; 2094 2095 if (((curEnd > XML_MAX_LOOKUP_LIMIT) || 2096 (curBase > XML_MAX_LOOKUP_LIMIT)) && 2097 ((ctxt->input->buf) && 2098 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) && 2099 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2100 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2101 xmlHaltParser(ctxt); 2102 return; 2103 } 2104 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2105 if ((ctxt->input->cur > ctxt->input->end) || 2106 (ctxt->input->cur < ctxt->input->base)) { 2107 xmlHaltParser(ctxt); 2108 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2109 return; 2110 } 2111 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) 2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2113 } 2114 2115 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2116 2117 #define NEXT xmlNextChar(ctxt) 2118 2119 #define NEXT1 { \ 2120 ctxt->input->col++; \ 2121 ctxt->input->cur++; \ 2122 ctxt->nbChars++; \ 2123 if (*ctxt->input->cur == 0) \ 2124 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2125 } 2126 2127 #define NEXTL(l) do { \ 2128 if (*(ctxt->input->cur) == '\n') { \ 2129 ctxt->input->line++; ctxt->input->col = 1; \ 2130 } else ctxt->input->col++; \ 2131 ctxt->input->cur += l; \ 2132 } while (0) 2133 2134 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2135 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2136 2137 #define COPY_BUF(l,b,i,v) \ 2138 if (l == 1) b[i++] = (xmlChar) v; \ 2139 else i += xmlCopyCharMultiByte(&b[i],v) 2140 2141 /** 2142 * xmlSkipBlankChars: 2143 * @ctxt: the XML parser context 2144 * 2145 * skip all blanks character found at that point in the input streams. 2146 * It pops up finished entities in the process if allowable at that point. 2147 * 2148 * Returns the number of space chars skipped 2149 */ 2150 2151 int 2152 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2153 int res = 0; 2154 2155 /* 2156 * It's Okay to use CUR/NEXT here since all the blanks are on 2157 * the ASCII range. 2158 */ 2159 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2160 const xmlChar *cur; 2161 /* 2162 * if we are in the document content, go really fast 2163 */ 2164 cur = ctxt->input->cur; 2165 while (IS_BLANK_CH(*cur)) { 2166 if (*cur == '\n') { 2167 ctxt->input->line++; ctxt->input->col = 1; 2168 } else { 2169 ctxt->input->col++; 2170 } 2171 cur++; 2172 res++; 2173 if (*cur == 0) { 2174 ctxt->input->cur = cur; 2175 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2176 cur = ctxt->input->cur; 2177 } 2178 } 2179 ctxt->input->cur = cur; 2180 } else { 2181 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2182 2183 while (1) { 2184 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2185 NEXT; 2186 } else if (CUR == '%') { 2187 /* 2188 * Need to handle support of entities branching here 2189 */ 2190 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2191 break; 2192 xmlParsePEReference(ctxt); 2193 } else if (CUR == 0) { 2194 if (ctxt->inputNr <= 1) 2195 break; 2196 xmlPopInput(ctxt); 2197 } else { 2198 break; 2199 } 2200 2201 /* 2202 * Also increase the counter when entering or exiting a PERef. 2203 * The spec says: "When a parameter-entity reference is recognized 2204 * in the DTD and included, its replacement text MUST be enlarged 2205 * by the attachment of one leading and one following space (#x20) 2206 * character." 2207 */ 2208 res++; 2209 } 2210 } 2211 return(res); 2212 } 2213 2214 /************************************************************************ 2215 * * 2216 * Commodity functions to handle entities * 2217 * * 2218 ************************************************************************/ 2219 2220 /** 2221 * xmlPopInput: 2222 * @ctxt: an XML parser context 2223 * 2224 * xmlPopInput: the current input pointed by ctxt->input came to an end 2225 * pop it and return the next char. 2226 * 2227 * Returns the current xmlChar in the parser context 2228 */ 2229 xmlChar 2230 xmlPopInput(xmlParserCtxtPtr ctxt) { 2231 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2232 if (xmlParserDebugEntities) 2233 xmlGenericError(xmlGenericErrorContext, 2234 "Popping input %d\n", ctxt->inputNr); 2235 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2236 (ctxt->instate != XML_PARSER_EOF)) 2237 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2238 "Unfinished entity outside the DTD"); 2239 xmlFreeInputStream(inputPop(ctxt)); 2240 if (*ctxt->input->cur == 0) 2241 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2242 return(CUR); 2243 } 2244 2245 /** 2246 * xmlPushInput: 2247 * @ctxt: an XML parser context 2248 * @input: an XML parser input fragment (entity, XML fragment ...). 2249 * 2250 * xmlPushInput: switch to a new input stream which is stacked on top 2251 * of the previous one(s). 2252 * Returns -1 in case of error or the index in the input stack 2253 */ 2254 int 2255 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2256 int ret; 2257 if (input == NULL) return(-1); 2258 2259 if (xmlParserDebugEntities) { 2260 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2261 xmlGenericError(xmlGenericErrorContext, 2262 "%s(%d): ", ctxt->input->filename, 2263 ctxt->input->line); 2264 xmlGenericError(xmlGenericErrorContext, 2265 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2266 } 2267 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2268 (ctxt->inputNr > 1024)) { 2269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2270 while (ctxt->inputNr > 1) 2271 xmlFreeInputStream(inputPop(ctxt)); 2272 return(-1); 2273 } 2274 ret = inputPush(ctxt, input); 2275 if (ctxt->instate == XML_PARSER_EOF) 2276 return(-1); 2277 GROW; 2278 return(ret); 2279 } 2280 2281 /** 2282 * xmlParseCharRef: 2283 * @ctxt: an XML parser context 2284 * 2285 * parse Reference declarations 2286 * 2287 * [66] CharRef ::= '&#' [0-9]+ ';' | 2288 * '&#x' [0-9a-fA-F]+ ';' 2289 * 2290 * [ WFC: Legal Character ] 2291 * Characters referred to using character references must match the 2292 * production for Char. 2293 * 2294 * Returns the value parsed (as an int), 0 in case of error 2295 */ 2296 int 2297 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2298 int val = 0; 2299 int count = 0; 2300 2301 /* 2302 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2303 */ 2304 if ((RAW == '&') && (NXT(1) == '#') && 2305 (NXT(2) == 'x')) { 2306 SKIP(3); 2307 GROW; 2308 while (RAW != ';') { /* loop blocked by count */ 2309 if (count++ > 20) { 2310 count = 0; 2311 GROW; 2312 if (ctxt->instate == XML_PARSER_EOF) 2313 return(0); 2314 } 2315 if ((RAW >= '0') && (RAW <= '9')) 2316 val = val * 16 + (CUR - '0'); 2317 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2318 val = val * 16 + (CUR - 'a') + 10; 2319 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2320 val = val * 16 + (CUR - 'A') + 10; 2321 else { 2322 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2323 val = 0; 2324 break; 2325 } 2326 if (val > 0x110000) 2327 val = 0x110000; 2328 2329 NEXT; 2330 count++; 2331 } 2332 if (RAW == ';') { 2333 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2334 ctxt->input->col++; 2335 ctxt->nbChars ++; 2336 ctxt->input->cur++; 2337 } 2338 } else if ((RAW == '&') && (NXT(1) == '#')) { 2339 SKIP(2); 2340 GROW; 2341 while (RAW != ';') { /* loop blocked by count */ 2342 if (count++ > 20) { 2343 count = 0; 2344 GROW; 2345 if (ctxt->instate == XML_PARSER_EOF) 2346 return(0); 2347 } 2348 if ((RAW >= '0') && (RAW <= '9')) 2349 val = val * 10 + (CUR - '0'); 2350 else { 2351 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2352 val = 0; 2353 break; 2354 } 2355 if (val > 0x110000) 2356 val = 0x110000; 2357 2358 NEXT; 2359 count++; 2360 } 2361 if (RAW == ';') { 2362 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2363 ctxt->input->col++; 2364 ctxt->nbChars ++; 2365 ctxt->input->cur++; 2366 } 2367 } else { 2368 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2369 } 2370 2371 /* 2372 * [ WFC: Legal Character ] 2373 * Characters referred to using character references must match the 2374 * production for Char. 2375 */ 2376 if (val >= 0x110000) { 2377 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2378 "xmlParseCharRef: character reference out of bounds\n", 2379 val); 2380 } else if (IS_CHAR(val)) { 2381 return(val); 2382 } else { 2383 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2384 "xmlParseCharRef: invalid xmlChar value %d\n", 2385 val); 2386 } 2387 return(0); 2388 } 2389 2390 /** 2391 * xmlParseStringCharRef: 2392 * @ctxt: an XML parser context 2393 * @str: a pointer to an index in the string 2394 * 2395 * parse Reference declarations, variant parsing from a string rather 2396 * than an an input flow. 2397 * 2398 * [66] CharRef ::= '&#' [0-9]+ ';' | 2399 * '&#x' [0-9a-fA-F]+ ';' 2400 * 2401 * [ WFC: Legal Character ] 2402 * Characters referred to using character references must match the 2403 * production for Char. 2404 * 2405 * Returns the value parsed (as an int), 0 in case of error, str will be 2406 * updated to the current value of the index 2407 */ 2408 static int 2409 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2410 const xmlChar *ptr; 2411 xmlChar cur; 2412 int val = 0; 2413 2414 if ((str == NULL) || (*str == NULL)) return(0); 2415 ptr = *str; 2416 cur = *ptr; 2417 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2418 ptr += 3; 2419 cur = *ptr; 2420 while (cur != ';') { /* Non input consuming loop */ 2421 if ((cur >= '0') && (cur <= '9')) 2422 val = val * 16 + (cur - '0'); 2423 else if ((cur >= 'a') && (cur <= 'f')) 2424 val = val * 16 + (cur - 'a') + 10; 2425 else if ((cur >= 'A') && (cur <= 'F')) 2426 val = val * 16 + (cur - 'A') + 10; 2427 else { 2428 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2429 val = 0; 2430 break; 2431 } 2432 if (val > 0x110000) 2433 val = 0x110000; 2434 2435 ptr++; 2436 cur = *ptr; 2437 } 2438 if (cur == ';') 2439 ptr++; 2440 } else if ((cur == '&') && (ptr[1] == '#')){ 2441 ptr += 2; 2442 cur = *ptr; 2443 while (cur != ';') { /* Non input consuming loops */ 2444 if ((cur >= '0') && (cur <= '9')) 2445 val = val * 10 + (cur - '0'); 2446 else { 2447 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2448 val = 0; 2449 break; 2450 } 2451 if (val > 0x110000) 2452 val = 0x110000; 2453 2454 ptr++; 2455 cur = *ptr; 2456 } 2457 if (cur == ';') 2458 ptr++; 2459 } else { 2460 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2461 return(0); 2462 } 2463 *str = ptr; 2464 2465 /* 2466 * [ WFC: Legal Character ] 2467 * Characters referred to using character references must match the 2468 * production for Char. 2469 */ 2470 if (val >= 0x110000) { 2471 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2472 "xmlParseStringCharRef: character reference out of bounds\n", 2473 val); 2474 } else if (IS_CHAR(val)) { 2475 return(val); 2476 } else { 2477 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2478 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2479 val); 2480 } 2481 return(0); 2482 } 2483 2484 /** 2485 * xmlParserHandlePEReference: 2486 * @ctxt: the parser context 2487 * 2488 * [69] PEReference ::= '%' Name ';' 2489 * 2490 * [ WFC: No Recursion ] 2491 * A parsed entity must not contain a recursive 2492 * reference to itself, either directly or indirectly. 2493 * 2494 * [ WFC: Entity Declared ] 2495 * In a document without any DTD, a document with only an internal DTD 2496 * subset which contains no parameter entity references, or a document 2497 * with "standalone='yes'", ... ... The declaration of a parameter 2498 * entity must precede any reference to it... 2499 * 2500 * [ VC: Entity Declared ] 2501 * In a document with an external subset or external parameter entities 2502 * with "standalone='no'", ... ... The declaration of a parameter entity 2503 * must precede any reference to it... 2504 * 2505 * [ WFC: In DTD ] 2506 * Parameter-entity references may only appear in the DTD. 2507 * NOTE: misleading but this is handled. 2508 * 2509 * A PEReference may have been detected in the current input stream 2510 * the handling is done accordingly to 2511 * http://www.w3.org/TR/REC-xml#entproc 2512 * i.e. 2513 * - Included in literal in entity values 2514 * - Included as Parameter Entity reference within DTDs 2515 */ 2516 void 2517 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2518 switch(ctxt->instate) { 2519 case XML_PARSER_CDATA_SECTION: 2520 return; 2521 case XML_PARSER_COMMENT: 2522 return; 2523 case XML_PARSER_START_TAG: 2524 return; 2525 case XML_PARSER_END_TAG: 2526 return; 2527 case XML_PARSER_EOF: 2528 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2529 return; 2530 case XML_PARSER_PROLOG: 2531 case XML_PARSER_START: 2532 case XML_PARSER_MISC: 2533 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2534 return; 2535 case XML_PARSER_ENTITY_DECL: 2536 case XML_PARSER_CONTENT: 2537 case XML_PARSER_ATTRIBUTE_VALUE: 2538 case XML_PARSER_PI: 2539 case XML_PARSER_SYSTEM_LITERAL: 2540 case XML_PARSER_PUBLIC_LITERAL: 2541 /* we just ignore it there */ 2542 return; 2543 case XML_PARSER_EPILOG: 2544 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2545 return; 2546 case XML_PARSER_ENTITY_VALUE: 2547 /* 2548 * NOTE: in the case of entity values, we don't do the 2549 * substitution here since we need the literal 2550 * entity value to be able to save the internal 2551 * subset of the document. 2552 * This will be handled by xmlStringDecodeEntities 2553 */ 2554 return; 2555 case XML_PARSER_DTD: 2556 /* 2557 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2558 * In the internal DTD subset, parameter-entity references 2559 * can occur only where markup declarations can occur, not 2560 * within markup declarations. 2561 * In that case this is handled in xmlParseMarkupDecl 2562 */ 2563 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2564 return; 2565 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2566 return; 2567 break; 2568 case XML_PARSER_IGNORE: 2569 return; 2570 } 2571 2572 xmlParsePEReference(ctxt); 2573 } 2574 2575 /* 2576 * Macro used to grow the current buffer. 2577 * buffer##_size is expected to be a size_t 2578 * mem_error: is expected to handle memory allocation failures 2579 */ 2580 #define growBuffer(buffer, n) { \ 2581 xmlChar *tmp; \ 2582 size_t new_size = buffer##_size * 2 + n; \ 2583 if (new_size < buffer##_size) goto mem_error; \ 2584 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2585 if (tmp == NULL) goto mem_error; \ 2586 buffer = tmp; \ 2587 buffer##_size = new_size; \ 2588 } 2589 2590 /** 2591 * xmlStringLenDecodeEntities: 2592 * @ctxt: the parser context 2593 * @str: the input string 2594 * @len: the string length 2595 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2596 * @end: an end marker xmlChar, 0 if none 2597 * @end2: an end marker xmlChar, 0 if none 2598 * @end3: an end marker xmlChar, 0 if none 2599 * 2600 * Takes a entity string content and process to do the adequate substitutions. 2601 * 2602 * [67] Reference ::= EntityRef | CharRef 2603 * 2604 * [69] PEReference ::= '%' Name ';' 2605 * 2606 * Returns A newly allocated string with the substitution done. The caller 2607 * must deallocate it ! 2608 */ 2609 xmlChar * 2610 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2611 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2612 xmlChar *buffer = NULL; 2613 size_t buffer_size = 0; 2614 size_t nbchars = 0; 2615 2616 xmlChar *current = NULL; 2617 xmlChar *rep = NULL; 2618 const xmlChar *last; 2619 xmlEntityPtr ent; 2620 int c,l; 2621 2622 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2623 return(NULL); 2624 last = str + len; 2625 2626 if (((ctxt->depth > 40) && 2627 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2628 (ctxt->depth > 1024)) { 2629 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2630 return(NULL); 2631 } 2632 2633 /* 2634 * allocate a translation buffer. 2635 */ 2636 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2637 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2638 if (buffer == NULL) goto mem_error; 2639 2640 /* 2641 * OK loop until we reach one of the ending char or a size limit. 2642 * we are operating on already parsed values. 2643 */ 2644 if (str < last) 2645 c = CUR_SCHAR(str, l); 2646 else 2647 c = 0; 2648 while ((c != 0) && (c != end) && /* non input consuming loop */ 2649 (c != end2) && (c != end3)) { 2650 2651 if (c == 0) break; 2652 if ((c == '&') && (str[1] == '#')) { 2653 int val = xmlParseStringCharRef(ctxt, &str); 2654 if (val == 0) 2655 goto int_error; 2656 COPY_BUF(0,buffer,nbchars,val); 2657 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2658 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2659 } 2660 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2661 if (xmlParserDebugEntities) 2662 xmlGenericError(xmlGenericErrorContext, 2663 "String decoding Entity Reference: %.30s\n", 2664 str); 2665 ent = xmlParseStringEntityRef(ctxt, &str); 2666 xmlParserEntityCheck(ctxt, 0, ent, 0); 2667 if (ent != NULL) 2668 ctxt->nbentities += ent->checked / 2; 2669 if ((ent != NULL) && 2670 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2671 if (ent->content != NULL) { 2672 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2673 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2674 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2675 } 2676 } else { 2677 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2678 "predefined entity has no content\n"); 2679 goto int_error; 2680 } 2681 } else if ((ent != NULL) && (ent->content != NULL)) { 2682 ctxt->depth++; 2683 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2684 0, 0, 0); 2685 ctxt->depth--; 2686 if (rep == NULL) 2687 goto int_error; 2688 2689 current = rep; 2690 while (*current != 0) { /* non input consuming loop */ 2691 buffer[nbchars++] = *current++; 2692 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2693 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2694 goto int_error; 2695 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2696 } 2697 } 2698 xmlFree(rep); 2699 rep = NULL; 2700 } else if (ent != NULL) { 2701 int i = xmlStrlen(ent->name); 2702 const xmlChar *cur = ent->name; 2703 2704 buffer[nbchars++] = '&'; 2705 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2706 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2707 } 2708 for (;i > 0;i--) 2709 buffer[nbchars++] = *cur++; 2710 buffer[nbchars++] = ';'; 2711 } 2712 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2713 if (xmlParserDebugEntities) 2714 xmlGenericError(xmlGenericErrorContext, 2715 "String decoding PE Reference: %.30s\n", str); 2716 ent = xmlParseStringPEReference(ctxt, &str); 2717 xmlParserEntityCheck(ctxt, 0, ent, 0); 2718 if (ent != NULL) 2719 ctxt->nbentities += ent->checked / 2; 2720 if (ent != NULL) { 2721 if (ent->content == NULL) { 2722 /* 2723 * Note: external parsed entities will not be loaded, 2724 * it is not required for a non-validating parser to 2725 * complete external PEReferences coming from the 2726 * internal subset 2727 */ 2728 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2729 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2730 (ctxt->validate != 0)) { 2731 xmlLoadEntityContent(ctxt, ent); 2732 } else { 2733 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2734 "not validating will not read content for PE entity %s\n", 2735 ent->name, NULL); 2736 } 2737 } 2738 ctxt->depth++; 2739 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2740 0, 0, 0); 2741 ctxt->depth--; 2742 if (rep == NULL) 2743 goto int_error; 2744 current = rep; 2745 while (*current != 0) { /* non input consuming loop */ 2746 buffer[nbchars++] = *current++; 2747 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2748 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2749 goto int_error; 2750 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2751 } 2752 } 2753 xmlFree(rep); 2754 rep = NULL; 2755 } 2756 } else { 2757 COPY_BUF(l,buffer,nbchars,c); 2758 str += l; 2759 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2760 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2761 } 2762 } 2763 if (str < last) 2764 c = CUR_SCHAR(str, l); 2765 else 2766 c = 0; 2767 } 2768 buffer[nbchars] = 0; 2769 return(buffer); 2770 2771 mem_error: 2772 xmlErrMemory(ctxt, NULL); 2773 int_error: 2774 if (rep != NULL) 2775 xmlFree(rep); 2776 if (buffer != NULL) 2777 xmlFree(buffer); 2778 return(NULL); 2779 } 2780 2781 /** 2782 * xmlStringDecodeEntities: 2783 * @ctxt: the parser context 2784 * @str: the input string 2785 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2786 * @end: an end marker xmlChar, 0 if none 2787 * @end2: an end marker xmlChar, 0 if none 2788 * @end3: an end marker xmlChar, 0 if none 2789 * 2790 * Takes a entity string content and process to do the adequate substitutions. 2791 * 2792 * [67] Reference ::= EntityRef | CharRef 2793 * 2794 * [69] PEReference ::= '%' Name ';' 2795 * 2796 * Returns A newly allocated string with the substitution done. The caller 2797 * must deallocate it ! 2798 */ 2799 xmlChar * 2800 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2801 xmlChar end, xmlChar end2, xmlChar end3) { 2802 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2803 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2804 end, end2, end3)); 2805 } 2806 2807 /************************************************************************ 2808 * * 2809 * Commodity functions, cleanup needed ? * 2810 * * 2811 ************************************************************************/ 2812 2813 /** 2814 * areBlanks: 2815 * @ctxt: an XML parser context 2816 * @str: a xmlChar * 2817 * @len: the size of @str 2818 * @blank_chars: we know the chars are blanks 2819 * 2820 * Is this a sequence of blank chars that one can ignore ? 2821 * 2822 * Returns 1 if ignorable 0 otherwise. 2823 */ 2824 2825 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2826 int blank_chars) { 2827 int i, ret; 2828 xmlNodePtr lastChild; 2829 2830 /* 2831 * Don't spend time trying to differentiate them, the same callback is 2832 * used ! 2833 */ 2834 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2835 return(0); 2836 2837 /* 2838 * Check for xml:space value. 2839 */ 2840 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2841 (*(ctxt->space) == -2)) 2842 return(0); 2843 2844 /* 2845 * Check that the string is made of blanks 2846 */ 2847 if (blank_chars == 0) { 2848 for (i = 0;i < len;i++) 2849 if (!(IS_BLANK_CH(str[i]))) return(0); 2850 } 2851 2852 /* 2853 * Look if the element is mixed content in the DTD if available 2854 */ 2855 if (ctxt->node == NULL) return(0); 2856 if (ctxt->myDoc != NULL) { 2857 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2858 if (ret == 0) return(1); 2859 if (ret == 1) return(0); 2860 } 2861 2862 /* 2863 * Otherwise, heuristic :-\ 2864 */ 2865 if ((RAW != '<') && (RAW != 0xD)) return(0); 2866 if ((ctxt->node->children == NULL) && 2867 (RAW == '<') && (NXT(1) == '/')) return(0); 2868 2869 lastChild = xmlGetLastChild(ctxt->node); 2870 if (lastChild == NULL) { 2871 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2872 (ctxt->node->content != NULL)) return(0); 2873 } else if (xmlNodeIsText(lastChild)) 2874 return(0); 2875 else if ((ctxt->node->children != NULL) && 2876 (xmlNodeIsText(ctxt->node->children))) 2877 return(0); 2878 return(1); 2879 } 2880 2881 /************************************************************************ 2882 * * 2883 * Extra stuff for namespace support * 2884 * Relates to http://www.w3.org/TR/WD-xml-names * 2885 * * 2886 ************************************************************************/ 2887 2888 /** 2889 * xmlSplitQName: 2890 * @ctxt: an XML parser context 2891 * @name: an XML parser context 2892 * @prefix: a xmlChar ** 2893 * 2894 * parse an UTF8 encoded XML qualified name string 2895 * 2896 * [NS 5] QName ::= (Prefix ':')? LocalPart 2897 * 2898 * [NS 6] Prefix ::= NCName 2899 * 2900 * [NS 7] LocalPart ::= NCName 2901 * 2902 * Returns the local part, and prefix is updated 2903 * to get the Prefix if any. 2904 */ 2905 2906 xmlChar * 2907 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2908 xmlChar buf[XML_MAX_NAMELEN + 5]; 2909 xmlChar *buffer = NULL; 2910 int len = 0; 2911 int max = XML_MAX_NAMELEN; 2912 xmlChar *ret = NULL; 2913 const xmlChar *cur = name; 2914 int c; 2915 2916 if (prefix == NULL) return(NULL); 2917 *prefix = NULL; 2918 2919 if (cur == NULL) return(NULL); 2920 2921 #ifndef XML_XML_NAMESPACE 2922 /* xml: prefix is not really a namespace */ 2923 if ((cur[0] == 'x') && (cur[1] == 'm') && 2924 (cur[2] == 'l') && (cur[3] == ':')) 2925 return(xmlStrdup(name)); 2926 #endif 2927 2928 /* nasty but well=formed */ 2929 if (cur[0] == ':') 2930 return(xmlStrdup(name)); 2931 2932 c = *cur++; 2933 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2934 buf[len++] = c; 2935 c = *cur++; 2936 } 2937 if (len >= max) { 2938 /* 2939 * Okay someone managed to make a huge name, so he's ready to pay 2940 * for the processing speed. 2941 */ 2942 max = len * 2; 2943 2944 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2945 if (buffer == NULL) { 2946 xmlErrMemory(ctxt, NULL); 2947 return(NULL); 2948 } 2949 memcpy(buffer, buf, len); 2950 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2951 if (len + 10 > max) { 2952 xmlChar *tmp; 2953 2954 max *= 2; 2955 tmp = (xmlChar *) xmlRealloc(buffer, 2956 max * sizeof(xmlChar)); 2957 if (tmp == NULL) { 2958 xmlFree(buffer); 2959 xmlErrMemory(ctxt, NULL); 2960 return(NULL); 2961 } 2962 buffer = tmp; 2963 } 2964 buffer[len++] = c; 2965 c = *cur++; 2966 } 2967 buffer[len] = 0; 2968 } 2969 2970 if ((c == ':') && (*cur == 0)) { 2971 if (buffer != NULL) 2972 xmlFree(buffer); 2973 *prefix = NULL; 2974 return(xmlStrdup(name)); 2975 } 2976 2977 if (buffer == NULL) 2978 ret = xmlStrndup(buf, len); 2979 else { 2980 ret = buffer; 2981 buffer = NULL; 2982 max = XML_MAX_NAMELEN; 2983 } 2984 2985 2986 if (c == ':') { 2987 c = *cur; 2988 *prefix = ret; 2989 if (c == 0) { 2990 return(xmlStrndup(BAD_CAST "", 0)); 2991 } 2992 len = 0; 2993 2994 /* 2995 * Check that the first character is proper to start 2996 * a new name 2997 */ 2998 if (!(((c >= 0x61) && (c <= 0x7A)) || 2999 ((c >= 0x41) && (c <= 0x5A)) || 3000 (c == '_') || (c == ':'))) { 3001 int l; 3002 int first = CUR_SCHAR(cur, l); 3003 3004 if (!IS_LETTER(first) && (first != '_')) { 3005 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3006 "Name %s is not XML Namespace compliant\n", 3007 name); 3008 } 3009 } 3010 cur++; 3011 3012 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3013 buf[len++] = c; 3014 c = *cur++; 3015 } 3016 if (len >= max) { 3017 /* 3018 * Okay someone managed to make a huge name, so he's ready to pay 3019 * for the processing speed. 3020 */ 3021 max = len * 2; 3022 3023 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3024 if (buffer == NULL) { 3025 xmlErrMemory(ctxt, NULL); 3026 return(NULL); 3027 } 3028 memcpy(buffer, buf, len); 3029 while (c != 0) { /* tested bigname2.xml */ 3030 if (len + 10 > max) { 3031 xmlChar *tmp; 3032 3033 max *= 2; 3034 tmp = (xmlChar *) xmlRealloc(buffer, 3035 max * sizeof(xmlChar)); 3036 if (tmp == NULL) { 3037 xmlErrMemory(ctxt, NULL); 3038 xmlFree(buffer); 3039 return(NULL); 3040 } 3041 buffer = tmp; 3042 } 3043 buffer[len++] = c; 3044 c = *cur++; 3045 } 3046 buffer[len] = 0; 3047 } 3048 3049 if (buffer == NULL) 3050 ret = xmlStrndup(buf, len); 3051 else { 3052 ret = buffer; 3053 } 3054 } 3055 3056 return(ret); 3057 } 3058 3059 /************************************************************************ 3060 * * 3061 * The parser itself * 3062 * Relates to http://www.w3.org/TR/REC-xml * 3063 * * 3064 ************************************************************************/ 3065 3066 /************************************************************************ 3067 * * 3068 * Routines to parse Name, NCName and NmToken * 3069 * * 3070 ************************************************************************/ 3071 #ifdef DEBUG 3072 static unsigned long nbParseName = 0; 3073 static unsigned long nbParseNmToken = 0; 3074 static unsigned long nbParseNCName = 0; 3075 static unsigned long nbParseNCNameComplex = 0; 3076 static unsigned long nbParseNameComplex = 0; 3077 static unsigned long nbParseStringName = 0; 3078 #endif 3079 3080 /* 3081 * The two following functions are related to the change of accepted 3082 * characters for Name and NmToken in the Revision 5 of XML-1.0 3083 * They correspond to the modified production [4] and the new production [4a] 3084 * changes in that revision. Also note that the macros used for the 3085 * productions Letter, Digit, CombiningChar and Extender are not needed 3086 * anymore. 3087 * We still keep compatibility to pre-revision5 parsing semantic if the 3088 * new XML_PARSE_OLD10 option is given to the parser. 3089 */ 3090 static int 3091 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3092 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3093 /* 3094 * Use the new checks of production [4] [4a] amd [5] of the 3095 * Update 5 of XML-1.0 3096 */ 3097 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3098 (((c >= 'a') && (c <= 'z')) || 3099 ((c >= 'A') && (c <= 'Z')) || 3100 (c == '_') || (c == ':') || 3101 ((c >= 0xC0) && (c <= 0xD6)) || 3102 ((c >= 0xD8) && (c <= 0xF6)) || 3103 ((c >= 0xF8) && (c <= 0x2FF)) || 3104 ((c >= 0x370) && (c <= 0x37D)) || 3105 ((c >= 0x37F) && (c <= 0x1FFF)) || 3106 ((c >= 0x200C) && (c <= 0x200D)) || 3107 ((c >= 0x2070) && (c <= 0x218F)) || 3108 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3109 ((c >= 0x3001) && (c <= 0xD7FF)) || 3110 ((c >= 0xF900) && (c <= 0xFDCF)) || 3111 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3112 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3113 return(1); 3114 } else { 3115 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3116 return(1); 3117 } 3118 return(0); 3119 } 3120 3121 static int 3122 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3123 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3124 /* 3125 * Use the new checks of production [4] [4a] amd [5] of the 3126 * Update 5 of XML-1.0 3127 */ 3128 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3129 (((c >= 'a') && (c <= 'z')) || 3130 ((c >= 'A') && (c <= 'Z')) || 3131 ((c >= '0') && (c <= '9')) || /* !start */ 3132 (c == '_') || (c == ':') || 3133 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3134 ((c >= 0xC0) && (c <= 0xD6)) || 3135 ((c >= 0xD8) && (c <= 0xF6)) || 3136 ((c >= 0xF8) && (c <= 0x2FF)) || 3137 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3138 ((c >= 0x370) && (c <= 0x37D)) || 3139 ((c >= 0x37F) && (c <= 0x1FFF)) || 3140 ((c >= 0x200C) && (c <= 0x200D)) || 3141 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3142 ((c >= 0x2070) && (c <= 0x218F)) || 3143 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3144 ((c >= 0x3001) && (c <= 0xD7FF)) || 3145 ((c >= 0xF900) && (c <= 0xFDCF)) || 3146 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3147 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3148 return(1); 3149 } else { 3150 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3151 (c == '.') || (c == '-') || 3152 (c == '_') || (c == ':') || 3153 (IS_COMBINING(c)) || 3154 (IS_EXTENDER(c))) 3155 return(1); 3156 } 3157 return(0); 3158 } 3159 3160 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3161 int *len, int *alloc, int normalize); 3162 3163 static const xmlChar * 3164 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3165 int len = 0, l; 3166 int c; 3167 int count = 0; 3168 3169 #ifdef DEBUG 3170 nbParseNameComplex++; 3171 #endif 3172 3173 /* 3174 * Handler for more complex cases 3175 */ 3176 GROW; 3177 if (ctxt->instate == XML_PARSER_EOF) 3178 return(NULL); 3179 c = CUR_CHAR(l); 3180 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3181 /* 3182 * Use the new checks of production [4] [4a] amd [5] of the 3183 * Update 5 of XML-1.0 3184 */ 3185 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3186 (!(((c >= 'a') && (c <= 'z')) || 3187 ((c >= 'A') && (c <= 'Z')) || 3188 (c == '_') || (c == ':') || 3189 ((c >= 0xC0) && (c <= 0xD6)) || 3190 ((c >= 0xD8) && (c <= 0xF6)) || 3191 ((c >= 0xF8) && (c <= 0x2FF)) || 3192 ((c >= 0x370) && (c <= 0x37D)) || 3193 ((c >= 0x37F) && (c <= 0x1FFF)) || 3194 ((c >= 0x200C) && (c <= 0x200D)) || 3195 ((c >= 0x2070) && (c <= 0x218F)) || 3196 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3197 ((c >= 0x3001) && (c <= 0xD7FF)) || 3198 ((c >= 0xF900) && (c <= 0xFDCF)) || 3199 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3200 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3201 return(NULL); 3202 } 3203 len += l; 3204 NEXTL(l); 3205 c = CUR_CHAR(l); 3206 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3207 (((c >= 'a') && (c <= 'z')) || 3208 ((c >= 'A') && (c <= 'Z')) || 3209 ((c >= '0') && (c <= '9')) || /* !start */ 3210 (c == '_') || (c == ':') || 3211 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3212 ((c >= 0xC0) && (c <= 0xD6)) || 3213 ((c >= 0xD8) && (c <= 0xF6)) || 3214 ((c >= 0xF8) && (c <= 0x2FF)) || 3215 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3216 ((c >= 0x370) && (c <= 0x37D)) || 3217 ((c >= 0x37F) && (c <= 0x1FFF)) || 3218 ((c >= 0x200C) && (c <= 0x200D)) || 3219 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3220 ((c >= 0x2070) && (c <= 0x218F)) || 3221 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3222 ((c >= 0x3001) && (c <= 0xD7FF)) || 3223 ((c >= 0xF900) && (c <= 0xFDCF)) || 3224 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3225 ((c >= 0x10000) && (c <= 0xEFFFF)) 3226 )) { 3227 if (count++ > XML_PARSER_CHUNK_SIZE) { 3228 count = 0; 3229 GROW; 3230 if (ctxt->instate == XML_PARSER_EOF) 3231 return(NULL); 3232 } 3233 len += l; 3234 NEXTL(l); 3235 c = CUR_CHAR(l); 3236 } 3237 } else { 3238 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3239 (!IS_LETTER(c) && (c != '_') && 3240 (c != ':'))) { 3241 return(NULL); 3242 } 3243 len += l; 3244 NEXTL(l); 3245 c = CUR_CHAR(l); 3246 3247 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3248 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3249 (c == '.') || (c == '-') || 3250 (c == '_') || (c == ':') || 3251 (IS_COMBINING(c)) || 3252 (IS_EXTENDER(c)))) { 3253 if (count++ > XML_PARSER_CHUNK_SIZE) { 3254 count = 0; 3255 GROW; 3256 if (ctxt->instate == XML_PARSER_EOF) 3257 return(NULL); 3258 } 3259 len += l; 3260 NEXTL(l); 3261 c = CUR_CHAR(l); 3262 } 3263 } 3264 if ((len > XML_MAX_NAME_LENGTH) && 3265 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3266 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3267 return(NULL); 3268 } 3269 if (ctxt->input->cur - ctxt->input->base < len) { 3270 /* 3271 * There were a couple of bugs where PERefs lead to to a change 3272 * of the buffer. Check the buffer size to avoid passing an invalid 3273 * pointer to xmlDictLookup. 3274 */ 3275 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3276 "unexpected change of input buffer"); 3277 return (NULL); 3278 } 3279 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3280 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3281 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3282 } 3283 3284 /** 3285 * xmlParseName: 3286 * @ctxt: an XML parser context 3287 * 3288 * parse an XML name. 3289 * 3290 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3291 * CombiningChar | Extender 3292 * 3293 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3294 * 3295 * [6] Names ::= Name (#x20 Name)* 3296 * 3297 * Returns the Name parsed or NULL 3298 */ 3299 3300 const xmlChar * 3301 xmlParseName(xmlParserCtxtPtr ctxt) { 3302 const xmlChar *in; 3303 const xmlChar *ret; 3304 int count = 0; 3305 3306 GROW; 3307 3308 #ifdef DEBUG 3309 nbParseName++; 3310 #endif 3311 3312 /* 3313 * Accelerator for simple ASCII names 3314 */ 3315 in = ctxt->input->cur; 3316 if (((*in >= 0x61) && (*in <= 0x7A)) || 3317 ((*in >= 0x41) && (*in <= 0x5A)) || 3318 (*in == '_') || (*in == ':')) { 3319 in++; 3320 while (((*in >= 0x61) && (*in <= 0x7A)) || 3321 ((*in >= 0x41) && (*in <= 0x5A)) || 3322 ((*in >= 0x30) && (*in <= 0x39)) || 3323 (*in == '_') || (*in == '-') || 3324 (*in == ':') || (*in == '.')) 3325 in++; 3326 if ((*in > 0) && (*in < 0x80)) { 3327 count = in - ctxt->input->cur; 3328 if ((count > XML_MAX_NAME_LENGTH) && 3329 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3330 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3331 return(NULL); 3332 } 3333 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3334 ctxt->input->cur = in; 3335 ctxt->nbChars += count; 3336 ctxt->input->col += count; 3337 if (ret == NULL) 3338 xmlErrMemory(ctxt, NULL); 3339 return(ret); 3340 } 3341 } 3342 /* accelerator for special cases */ 3343 return(xmlParseNameComplex(ctxt)); 3344 } 3345 3346 static const xmlChar * 3347 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3348 int len = 0, l; 3349 int c; 3350 int count = 0; 3351 size_t startPosition = 0; 3352 3353 #ifdef DEBUG 3354 nbParseNCNameComplex++; 3355 #endif 3356 3357 /* 3358 * Handler for more complex cases 3359 */ 3360 GROW; 3361 startPosition = CUR_PTR - BASE_PTR; 3362 c = CUR_CHAR(l); 3363 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3364 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3365 return(NULL); 3366 } 3367 3368 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3369 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3370 if (count++ > XML_PARSER_CHUNK_SIZE) { 3371 if ((len > XML_MAX_NAME_LENGTH) && 3372 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3373 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3374 return(NULL); 3375 } 3376 count = 0; 3377 GROW; 3378 if (ctxt->instate == XML_PARSER_EOF) 3379 return(NULL); 3380 } 3381 len += l; 3382 NEXTL(l); 3383 c = CUR_CHAR(l); 3384 if (c == 0) { 3385 count = 0; 3386 /* 3387 * when shrinking to extend the buffer we really need to preserve 3388 * the part of the name we already parsed. Hence rolling back 3389 * by current length. 3390 */ 3391 ctxt->input->cur -= l; 3392 GROW; 3393 if (ctxt->instate == XML_PARSER_EOF) 3394 return(NULL); 3395 ctxt->input->cur += l; 3396 c = CUR_CHAR(l); 3397 } 3398 } 3399 if ((len > XML_MAX_NAME_LENGTH) && 3400 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3401 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3402 return(NULL); 3403 } 3404 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3405 } 3406 3407 /** 3408 * xmlParseNCName: 3409 * @ctxt: an XML parser context 3410 * @len: length of the string parsed 3411 * 3412 * parse an XML name. 3413 * 3414 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3415 * CombiningChar | Extender 3416 * 3417 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3418 * 3419 * Returns the Name parsed or NULL 3420 */ 3421 3422 static const xmlChar * 3423 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3424 const xmlChar *in, *e; 3425 const xmlChar *ret; 3426 int count = 0; 3427 3428 #ifdef DEBUG 3429 nbParseNCName++; 3430 #endif 3431 3432 /* 3433 * Accelerator for simple ASCII names 3434 */ 3435 in = ctxt->input->cur; 3436 e = ctxt->input->end; 3437 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3438 ((*in >= 0x41) && (*in <= 0x5A)) || 3439 (*in == '_')) && (in < e)) { 3440 in++; 3441 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3442 ((*in >= 0x41) && (*in <= 0x5A)) || 3443 ((*in >= 0x30) && (*in <= 0x39)) || 3444 (*in == '_') || (*in == '-') || 3445 (*in == '.')) && (in < e)) 3446 in++; 3447 if (in >= e) 3448 goto complex; 3449 if ((*in > 0) && (*in < 0x80)) { 3450 count = in - ctxt->input->cur; 3451 if ((count > XML_MAX_NAME_LENGTH) && 3452 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3453 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3454 return(NULL); 3455 } 3456 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3457 ctxt->input->cur = in; 3458 ctxt->nbChars += count; 3459 ctxt->input->col += count; 3460 if (ret == NULL) { 3461 xmlErrMemory(ctxt, NULL); 3462 } 3463 return(ret); 3464 } 3465 } 3466 complex: 3467 return(xmlParseNCNameComplex(ctxt)); 3468 } 3469 3470 /** 3471 * xmlParseNameAndCompare: 3472 * @ctxt: an XML parser context 3473 * 3474 * parse an XML name and compares for match 3475 * (specialized for endtag parsing) 3476 * 3477 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3478 * and the name for mismatch 3479 */ 3480 3481 static const xmlChar * 3482 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3483 register const xmlChar *cmp = other; 3484 register const xmlChar *in; 3485 const xmlChar *ret; 3486 3487 GROW; 3488 if (ctxt->instate == XML_PARSER_EOF) 3489 return(NULL); 3490 3491 in = ctxt->input->cur; 3492 while (*in != 0 && *in == *cmp) { 3493 ++in; 3494 ++cmp; 3495 ctxt->input->col++; 3496 } 3497 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3498 /* success */ 3499 ctxt->input->cur = in; 3500 return (const xmlChar*) 1; 3501 } 3502 /* failure (or end of input buffer), check with full function */ 3503 ret = xmlParseName (ctxt); 3504 /* strings coming from the dictionary direct compare possible */ 3505 if (ret == other) { 3506 return (const xmlChar*) 1; 3507 } 3508 return ret; 3509 } 3510 3511 /** 3512 * xmlParseStringName: 3513 * @ctxt: an XML parser context 3514 * @str: a pointer to the string pointer (IN/OUT) 3515 * 3516 * parse an XML name. 3517 * 3518 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3519 * CombiningChar | Extender 3520 * 3521 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3522 * 3523 * [6] Names ::= Name (#x20 Name)* 3524 * 3525 * Returns the Name parsed or NULL. The @str pointer 3526 * is updated to the current location in the string. 3527 */ 3528 3529 static xmlChar * 3530 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3531 xmlChar buf[XML_MAX_NAMELEN + 5]; 3532 const xmlChar *cur = *str; 3533 int len = 0, l; 3534 int c; 3535 3536 #ifdef DEBUG 3537 nbParseStringName++; 3538 #endif 3539 3540 c = CUR_SCHAR(cur, l); 3541 if (!xmlIsNameStartChar(ctxt, c)) { 3542 return(NULL); 3543 } 3544 3545 COPY_BUF(l,buf,len,c); 3546 cur += l; 3547 c = CUR_SCHAR(cur, l); 3548 while (xmlIsNameChar(ctxt, c)) { 3549 COPY_BUF(l,buf,len,c); 3550 cur += l; 3551 c = CUR_SCHAR(cur, l); 3552 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3553 /* 3554 * Okay someone managed to make a huge name, so he's ready to pay 3555 * for the processing speed. 3556 */ 3557 xmlChar *buffer; 3558 int max = len * 2; 3559 3560 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3561 if (buffer == NULL) { 3562 xmlErrMemory(ctxt, NULL); 3563 return(NULL); 3564 } 3565 memcpy(buffer, buf, len); 3566 while (xmlIsNameChar(ctxt, c)) { 3567 if (len + 10 > max) { 3568 xmlChar *tmp; 3569 3570 if ((len > XML_MAX_NAME_LENGTH) && 3571 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3572 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3573 xmlFree(buffer); 3574 return(NULL); 3575 } 3576 max *= 2; 3577 tmp = (xmlChar *) xmlRealloc(buffer, 3578 max * sizeof(xmlChar)); 3579 if (tmp == NULL) { 3580 xmlErrMemory(ctxt, NULL); 3581 xmlFree(buffer); 3582 return(NULL); 3583 } 3584 buffer = tmp; 3585 } 3586 COPY_BUF(l,buffer,len,c); 3587 cur += l; 3588 c = CUR_SCHAR(cur, l); 3589 } 3590 buffer[len] = 0; 3591 *str = cur; 3592 return(buffer); 3593 } 3594 } 3595 if ((len > XML_MAX_NAME_LENGTH) && 3596 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3597 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3598 return(NULL); 3599 } 3600 *str = cur; 3601 return(xmlStrndup(buf, len)); 3602 } 3603 3604 /** 3605 * xmlParseNmtoken: 3606 * @ctxt: an XML parser context 3607 * 3608 * parse an XML Nmtoken. 3609 * 3610 * [7] Nmtoken ::= (NameChar)+ 3611 * 3612 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3613 * 3614 * Returns the Nmtoken parsed or NULL 3615 */ 3616 3617 xmlChar * 3618 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3619 xmlChar buf[XML_MAX_NAMELEN + 5]; 3620 int len = 0, l; 3621 int c; 3622 int count = 0; 3623 3624 #ifdef DEBUG 3625 nbParseNmToken++; 3626 #endif 3627 3628 GROW; 3629 if (ctxt->instate == XML_PARSER_EOF) 3630 return(NULL); 3631 c = CUR_CHAR(l); 3632 3633 while (xmlIsNameChar(ctxt, c)) { 3634 if (count++ > XML_PARSER_CHUNK_SIZE) { 3635 count = 0; 3636 GROW; 3637 } 3638 COPY_BUF(l,buf,len,c); 3639 NEXTL(l); 3640 c = CUR_CHAR(l); 3641 if (c == 0) { 3642 count = 0; 3643 GROW; 3644 if (ctxt->instate == XML_PARSER_EOF) 3645 return(NULL); 3646 c = CUR_CHAR(l); 3647 } 3648 if (len >= XML_MAX_NAMELEN) { 3649 /* 3650 * Okay someone managed to make a huge token, so he's ready to pay 3651 * for the processing speed. 3652 */ 3653 xmlChar *buffer; 3654 int max = len * 2; 3655 3656 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3657 if (buffer == NULL) { 3658 xmlErrMemory(ctxt, NULL); 3659 return(NULL); 3660 } 3661 memcpy(buffer, buf, len); 3662 while (xmlIsNameChar(ctxt, c)) { 3663 if (count++ > XML_PARSER_CHUNK_SIZE) { 3664 count = 0; 3665 GROW; 3666 if (ctxt->instate == XML_PARSER_EOF) { 3667 xmlFree(buffer); 3668 return(NULL); 3669 } 3670 } 3671 if (len + 10 > max) { 3672 xmlChar *tmp; 3673 3674 if ((max > XML_MAX_NAME_LENGTH) && 3675 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3676 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3677 xmlFree(buffer); 3678 return(NULL); 3679 } 3680 max *= 2; 3681 tmp = (xmlChar *) xmlRealloc(buffer, 3682 max * sizeof(xmlChar)); 3683 if (tmp == NULL) { 3684 xmlErrMemory(ctxt, NULL); 3685 xmlFree(buffer); 3686 return(NULL); 3687 } 3688 buffer = tmp; 3689 } 3690 COPY_BUF(l,buffer,len,c); 3691 NEXTL(l); 3692 c = CUR_CHAR(l); 3693 } 3694 buffer[len] = 0; 3695 return(buffer); 3696 } 3697 } 3698 if (len == 0) 3699 return(NULL); 3700 if ((len > XML_MAX_NAME_LENGTH) && 3701 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3702 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3703 return(NULL); 3704 } 3705 return(xmlStrndup(buf, len)); 3706 } 3707 3708 /** 3709 * xmlParseEntityValue: 3710 * @ctxt: an XML parser context 3711 * @orig: if non-NULL store a copy of the original entity value 3712 * 3713 * parse a value for ENTITY declarations 3714 * 3715 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3716 * "'" ([^%&'] | PEReference | Reference)* "'" 3717 * 3718 * Returns the EntityValue parsed with reference substituted or NULL 3719 */ 3720 3721 xmlChar * 3722 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3723 xmlChar *buf = NULL; 3724 int len = 0; 3725 int size = XML_PARSER_BUFFER_SIZE; 3726 int c, l; 3727 xmlChar stop; 3728 xmlChar *ret = NULL; 3729 const xmlChar *cur = NULL; 3730 xmlParserInputPtr input; 3731 3732 if (RAW == '"') stop = '"'; 3733 else if (RAW == '\'') stop = '\''; 3734 else { 3735 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3736 return(NULL); 3737 } 3738 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3739 if (buf == NULL) { 3740 xmlErrMemory(ctxt, NULL); 3741 return(NULL); 3742 } 3743 3744 /* 3745 * The content of the entity definition is copied in a buffer. 3746 */ 3747 3748 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3749 input = ctxt->input; 3750 GROW; 3751 if (ctxt->instate == XML_PARSER_EOF) 3752 goto error; 3753 NEXT; 3754 c = CUR_CHAR(l); 3755 /* 3756 * NOTE: 4.4.5 Included in Literal 3757 * When a parameter entity reference appears in a literal entity 3758 * value, ... a single or double quote character in the replacement 3759 * text is always treated as a normal data character and will not 3760 * terminate the literal. 3761 * In practice it means we stop the loop only when back at parsing 3762 * the initial entity and the quote is found 3763 */ 3764 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3765 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3766 if (len + 5 >= size) { 3767 xmlChar *tmp; 3768 3769 size *= 2; 3770 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3771 if (tmp == NULL) { 3772 xmlErrMemory(ctxt, NULL); 3773 goto error; 3774 } 3775 buf = tmp; 3776 } 3777 COPY_BUF(l,buf,len,c); 3778 NEXTL(l); 3779 3780 GROW; 3781 c = CUR_CHAR(l); 3782 if (c == 0) { 3783 GROW; 3784 c = CUR_CHAR(l); 3785 } 3786 } 3787 buf[len] = 0; 3788 if (ctxt->instate == XML_PARSER_EOF) 3789 goto error; 3790 if (c != stop) { 3791 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3792 goto error; 3793 } 3794 NEXT; 3795 3796 /* 3797 * Raise problem w.r.t. '&' and '%' being used in non-entities 3798 * reference constructs. Note Charref will be handled in 3799 * xmlStringDecodeEntities() 3800 */ 3801 cur = buf; 3802 while (*cur != 0) { /* non input consuming */ 3803 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3804 xmlChar *name; 3805 xmlChar tmp = *cur; 3806 int nameOk = 0; 3807 3808 cur++; 3809 name = xmlParseStringName(ctxt, &cur); 3810 if (name != NULL) { 3811 nameOk = 1; 3812 xmlFree(name); 3813 } 3814 if ((nameOk == 0) || (*cur != ';')) { 3815 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3816 "EntityValue: '%c' forbidden except for entities references\n", 3817 tmp); 3818 goto error; 3819 } 3820 if ((tmp == '%') && (ctxt->inSubset == 1) && 3821 (ctxt->inputNr == 1)) { 3822 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3823 goto error; 3824 } 3825 if (*cur == 0) 3826 break; 3827 } 3828 cur++; 3829 } 3830 3831 /* 3832 * Then PEReference entities are substituted. 3833 * 3834 * NOTE: 4.4.7 Bypassed 3835 * When a general entity reference appears in the EntityValue in 3836 * an entity declaration, it is bypassed and left as is. 3837 * so XML_SUBSTITUTE_REF is not set here. 3838 */ 3839 ++ctxt->depth; 3840 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3841 0, 0, 0); 3842 --ctxt->depth; 3843 if (orig != NULL) { 3844 *orig = buf; 3845 buf = NULL; 3846 } 3847 3848 error: 3849 if (buf != NULL) 3850 xmlFree(buf); 3851 return(ret); 3852 } 3853 3854 /** 3855 * xmlParseAttValueComplex: 3856 * @ctxt: an XML parser context 3857 * @len: the resulting attribute len 3858 * @normalize: whether to apply the inner normalization 3859 * 3860 * parse a value for an attribute, this is the fallback function 3861 * of xmlParseAttValue() when the attribute parsing requires handling 3862 * of non-ASCII characters, or normalization compaction. 3863 * 3864 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3865 */ 3866 static xmlChar * 3867 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3868 xmlChar limit = 0; 3869 xmlChar *buf = NULL; 3870 xmlChar *rep = NULL; 3871 size_t len = 0; 3872 size_t buf_size = 0; 3873 int c, l, in_space = 0; 3874 xmlChar *current = NULL; 3875 xmlEntityPtr ent; 3876 3877 if (NXT(0) == '"') { 3878 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3879 limit = '"'; 3880 NEXT; 3881 } else if (NXT(0) == '\'') { 3882 limit = '\''; 3883 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3884 NEXT; 3885 } else { 3886 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3887 return(NULL); 3888 } 3889 3890 /* 3891 * allocate a translation buffer. 3892 */ 3893 buf_size = XML_PARSER_BUFFER_SIZE; 3894 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3895 if (buf == NULL) goto mem_error; 3896 3897 /* 3898 * OK loop until we reach one of the ending char or a size limit. 3899 */ 3900 c = CUR_CHAR(l); 3901 while (((NXT(0) != limit) && /* checked */ 3902 (IS_CHAR(c)) && (c != '<')) && 3903 (ctxt->instate != XML_PARSER_EOF)) { 3904 /* 3905 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3906 * special option is given 3907 */ 3908 if ((len > XML_MAX_TEXT_LENGTH) && 3909 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3910 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3911 "AttValue length too long\n"); 3912 goto mem_error; 3913 } 3914 if (c == 0) break; 3915 if (c == '&') { 3916 in_space = 0; 3917 if (NXT(1) == '#') { 3918 int val = xmlParseCharRef(ctxt); 3919 3920 if (val == '&') { 3921 if (ctxt->replaceEntities) { 3922 if (len + 10 > buf_size) { 3923 growBuffer(buf, 10); 3924 } 3925 buf[len++] = '&'; 3926 } else { 3927 /* 3928 * The reparsing will be done in xmlStringGetNodeList() 3929 * called by the attribute() function in SAX.c 3930 */ 3931 if (len + 10 > buf_size) { 3932 growBuffer(buf, 10); 3933 } 3934 buf[len++] = '&'; 3935 buf[len++] = '#'; 3936 buf[len++] = '3'; 3937 buf[len++] = '8'; 3938 buf[len++] = ';'; 3939 } 3940 } else if (val != 0) { 3941 if (len + 10 > buf_size) { 3942 growBuffer(buf, 10); 3943 } 3944 len += xmlCopyChar(0, &buf[len], val); 3945 } 3946 } else { 3947 ent = xmlParseEntityRef(ctxt); 3948 ctxt->nbentities++; 3949 if (ent != NULL) 3950 ctxt->nbentities += ent->owner; 3951 if ((ent != NULL) && 3952 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3953 if (len + 10 > buf_size) { 3954 growBuffer(buf, 10); 3955 } 3956 if ((ctxt->replaceEntities == 0) && 3957 (ent->content[0] == '&')) { 3958 buf[len++] = '&'; 3959 buf[len++] = '#'; 3960 buf[len++] = '3'; 3961 buf[len++] = '8'; 3962 buf[len++] = ';'; 3963 } else { 3964 buf[len++] = ent->content[0]; 3965 } 3966 } else if ((ent != NULL) && 3967 (ctxt->replaceEntities != 0)) { 3968 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3969 ++ctxt->depth; 3970 rep = xmlStringDecodeEntities(ctxt, ent->content, 3971 XML_SUBSTITUTE_REF, 3972 0, 0, 0); 3973 --ctxt->depth; 3974 if (rep != NULL) { 3975 current = rep; 3976 while (*current != 0) { /* non input consuming */ 3977 if ((*current == 0xD) || (*current == 0xA) || 3978 (*current == 0x9)) { 3979 buf[len++] = 0x20; 3980 current++; 3981 } else 3982 buf[len++] = *current++; 3983 if (len + 10 > buf_size) { 3984 growBuffer(buf, 10); 3985 } 3986 } 3987 xmlFree(rep); 3988 rep = NULL; 3989 } 3990 } else { 3991 if (len + 10 > buf_size) { 3992 growBuffer(buf, 10); 3993 } 3994 if (ent->content != NULL) 3995 buf[len++] = ent->content[0]; 3996 } 3997 } else if (ent != NULL) { 3998 int i = xmlStrlen(ent->name); 3999 const xmlChar *cur = ent->name; 4000 4001 /* 4002 * This may look absurd but is needed to detect 4003 * entities problems 4004 */ 4005 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4006 (ent->content != NULL) && (ent->checked == 0)) { 4007 unsigned long oldnbent = ctxt->nbentities, diff; 4008 4009 ++ctxt->depth; 4010 rep = xmlStringDecodeEntities(ctxt, ent->content, 4011 XML_SUBSTITUTE_REF, 0, 0, 0); 4012 --ctxt->depth; 4013 4014 diff = ctxt->nbentities - oldnbent + 1; 4015 if (diff > INT_MAX / 2) 4016 diff = INT_MAX / 2; 4017 ent->checked = diff * 2; 4018 if (rep != NULL) { 4019 if (xmlStrchr(rep, '<')) 4020 ent->checked |= 1; 4021 xmlFree(rep); 4022 rep = NULL; 4023 } else { 4024 ent->content[0] = 0; 4025 } 4026 } 4027 4028 /* 4029 * Just output the reference 4030 */ 4031 buf[len++] = '&'; 4032 while (len + i + 10 > buf_size) { 4033 growBuffer(buf, i + 10); 4034 } 4035 for (;i > 0;i--) 4036 buf[len++] = *cur++; 4037 buf[len++] = ';'; 4038 } 4039 } 4040 } else { 4041 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4042 if ((len != 0) || (!normalize)) { 4043 if ((!normalize) || (!in_space)) { 4044 COPY_BUF(l,buf,len,0x20); 4045 while (len + 10 > buf_size) { 4046 growBuffer(buf, 10); 4047 } 4048 } 4049 in_space = 1; 4050 } 4051 } else { 4052 in_space = 0; 4053 COPY_BUF(l,buf,len,c); 4054 if (len + 10 > buf_size) { 4055 growBuffer(buf, 10); 4056 } 4057 } 4058 NEXTL(l); 4059 } 4060 GROW; 4061 c = CUR_CHAR(l); 4062 } 4063 if (ctxt->instate == XML_PARSER_EOF) 4064 goto error; 4065 4066 if ((in_space) && (normalize)) { 4067 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4068 } 4069 buf[len] = 0; 4070 if (RAW == '<') { 4071 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4072 } else if (RAW != limit) { 4073 if ((c != 0) && (!IS_CHAR(c))) { 4074 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4075 "invalid character in attribute value\n"); 4076 } else { 4077 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4078 "AttValue: ' expected\n"); 4079 } 4080 } else 4081 NEXT; 4082 4083 /* 4084 * There we potentially risk an overflow, don't allow attribute value of 4085 * length more than INT_MAX it is a very reasonable assumption ! 4086 */ 4087 if (len >= INT_MAX) { 4088 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4089 "AttValue length too long\n"); 4090 goto mem_error; 4091 } 4092 4093 if (attlen != NULL) *attlen = (int) len; 4094 return(buf); 4095 4096 mem_error: 4097 xmlErrMemory(ctxt, NULL); 4098 error: 4099 if (buf != NULL) 4100 xmlFree(buf); 4101 if (rep != NULL) 4102 xmlFree(rep); 4103 return(NULL); 4104 } 4105 4106 /** 4107 * xmlParseAttValue: 4108 * @ctxt: an XML parser context 4109 * 4110 * parse a value for an attribute 4111 * Note: the parser won't do substitution of entities here, this 4112 * will be handled later in xmlStringGetNodeList 4113 * 4114 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4115 * "'" ([^<&'] | Reference)* "'" 4116 * 4117 * 3.3.3 Attribute-Value Normalization: 4118 * Before the value of an attribute is passed to the application or 4119 * checked for validity, the XML processor must normalize it as follows: 4120 * - a character reference is processed by appending the referenced 4121 * character to the attribute value 4122 * - an entity reference is processed by recursively processing the 4123 * replacement text of the entity 4124 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4125 * appending #x20 to the normalized value, except that only a single 4126 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4127 * parsed entity or the literal entity value of an internal parsed entity 4128 * - other characters are processed by appending them to the normalized value 4129 * If the declared value is not CDATA, then the XML processor must further 4130 * process the normalized attribute value by discarding any leading and 4131 * trailing space (#x20) characters, and by replacing sequences of space 4132 * (#x20) characters by a single space (#x20) character. 4133 * All attributes for which no declaration has been read should be treated 4134 * by a non-validating parser as if declared CDATA. 4135 * 4136 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4137 */ 4138 4139 4140 xmlChar * 4141 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4142 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4143 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4144 } 4145 4146 /** 4147 * xmlParseSystemLiteral: 4148 * @ctxt: an XML parser context 4149 * 4150 * parse an XML Literal 4151 * 4152 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4153 * 4154 * Returns the SystemLiteral parsed or NULL 4155 */ 4156 4157 xmlChar * 4158 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4159 xmlChar *buf = NULL; 4160 int len = 0; 4161 int size = XML_PARSER_BUFFER_SIZE; 4162 int cur, l; 4163 xmlChar stop; 4164 int state = ctxt->instate; 4165 int count = 0; 4166 4167 SHRINK; 4168 if (RAW == '"') { 4169 NEXT; 4170 stop = '"'; 4171 } else if (RAW == '\'') { 4172 NEXT; 4173 stop = '\''; 4174 } else { 4175 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4176 return(NULL); 4177 } 4178 4179 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4180 if (buf == NULL) { 4181 xmlErrMemory(ctxt, NULL); 4182 return(NULL); 4183 } 4184 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4185 cur = CUR_CHAR(l); 4186 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4187 if (len + 5 >= size) { 4188 xmlChar *tmp; 4189 4190 if ((size > XML_MAX_NAME_LENGTH) && 4191 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4192 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4193 xmlFree(buf); 4194 ctxt->instate = (xmlParserInputState) state; 4195 return(NULL); 4196 } 4197 size *= 2; 4198 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4199 if (tmp == NULL) { 4200 xmlFree(buf); 4201 xmlErrMemory(ctxt, NULL); 4202 ctxt->instate = (xmlParserInputState) state; 4203 return(NULL); 4204 } 4205 buf = tmp; 4206 } 4207 count++; 4208 if (count > 50) { 4209 GROW; 4210 count = 0; 4211 if (ctxt->instate == XML_PARSER_EOF) { 4212 xmlFree(buf); 4213 return(NULL); 4214 } 4215 } 4216 COPY_BUF(l,buf,len,cur); 4217 NEXTL(l); 4218 cur = CUR_CHAR(l); 4219 if (cur == 0) { 4220 GROW; 4221 SHRINK; 4222 cur = CUR_CHAR(l); 4223 } 4224 } 4225 buf[len] = 0; 4226 ctxt->instate = (xmlParserInputState) state; 4227 if (!IS_CHAR(cur)) { 4228 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4229 } else { 4230 NEXT; 4231 } 4232 return(buf); 4233 } 4234 4235 /** 4236 * xmlParsePubidLiteral: 4237 * @ctxt: an XML parser context 4238 * 4239 * parse an XML public literal 4240 * 4241 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4242 * 4243 * Returns the PubidLiteral parsed or NULL. 4244 */ 4245 4246 xmlChar * 4247 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4248 xmlChar *buf = NULL; 4249 int len = 0; 4250 int size = XML_PARSER_BUFFER_SIZE; 4251 xmlChar cur; 4252 xmlChar stop; 4253 int count = 0; 4254 xmlParserInputState oldstate = ctxt->instate; 4255 4256 SHRINK; 4257 if (RAW == '"') { 4258 NEXT; 4259 stop = '"'; 4260 } else if (RAW == '\'') { 4261 NEXT; 4262 stop = '\''; 4263 } else { 4264 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4265 return(NULL); 4266 } 4267 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4268 if (buf == NULL) { 4269 xmlErrMemory(ctxt, NULL); 4270 return(NULL); 4271 } 4272 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4273 cur = CUR; 4274 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4275 if (len + 1 >= size) { 4276 xmlChar *tmp; 4277 4278 if ((size > XML_MAX_NAME_LENGTH) && 4279 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4280 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4281 xmlFree(buf); 4282 return(NULL); 4283 } 4284 size *= 2; 4285 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4286 if (tmp == NULL) { 4287 xmlErrMemory(ctxt, NULL); 4288 xmlFree(buf); 4289 return(NULL); 4290 } 4291 buf = tmp; 4292 } 4293 buf[len++] = cur; 4294 count++; 4295 if (count > 50) { 4296 GROW; 4297 count = 0; 4298 if (ctxt->instate == XML_PARSER_EOF) { 4299 xmlFree(buf); 4300 return(NULL); 4301 } 4302 } 4303 NEXT; 4304 cur = CUR; 4305 if (cur == 0) { 4306 GROW; 4307 SHRINK; 4308 cur = CUR; 4309 } 4310 } 4311 buf[len] = 0; 4312 if (cur != stop) { 4313 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4314 } else { 4315 NEXT; 4316 } 4317 ctxt->instate = oldstate; 4318 return(buf); 4319 } 4320 4321 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4322 4323 /* 4324 * used for the test in the inner loop of the char data testing 4325 */ 4326 static const unsigned char test_char_data[256] = { 4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4328 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4331 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4332 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4333 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4334 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4335 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4336 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4337 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4338 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4339 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4340 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4341 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4342 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4345 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4346 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4347 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4348 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4349 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4350 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4351 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4352 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4353 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4354 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4356 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4359 }; 4360 4361 /** 4362 * xmlParseCharData: 4363 * @ctxt: an XML parser context 4364 * @cdata: int indicating whether we are within a CDATA section 4365 * 4366 * parse a CharData section. 4367 * if we are within a CDATA section ']]>' marks an end of section. 4368 * 4369 * The right angle bracket (>) may be represented using the string ">", 4370 * and must, for compatibility, be escaped using ">" or a character 4371 * reference when it appears in the string "]]>" in content, when that 4372 * string is not marking the end of a CDATA section. 4373 * 4374 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4375 */ 4376 4377 void 4378 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4379 const xmlChar *in; 4380 int nbchar = 0; 4381 int line = ctxt->input->line; 4382 int col = ctxt->input->col; 4383 int ccol; 4384 4385 SHRINK; 4386 GROW; 4387 /* 4388 * Accelerated common case where input don't need to be 4389 * modified before passing it to the handler. 4390 */ 4391 if (!cdata) { 4392 in = ctxt->input->cur; 4393 do { 4394 get_more_space: 4395 while (*in == 0x20) { in++; ctxt->input->col++; } 4396 if (*in == 0xA) { 4397 do { 4398 ctxt->input->line++; ctxt->input->col = 1; 4399 in++; 4400 } while (*in == 0xA); 4401 goto get_more_space; 4402 } 4403 if (*in == '<') { 4404 nbchar = in - ctxt->input->cur; 4405 if (nbchar > 0) { 4406 const xmlChar *tmp = ctxt->input->cur; 4407 ctxt->input->cur = in; 4408 4409 if ((ctxt->sax != NULL) && 4410 (ctxt->sax->ignorableWhitespace != 4411 ctxt->sax->characters)) { 4412 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4413 if (ctxt->sax->ignorableWhitespace != NULL) 4414 ctxt->sax->ignorableWhitespace(ctxt->userData, 4415 tmp, nbchar); 4416 } else { 4417 if (ctxt->sax->characters != NULL) 4418 ctxt->sax->characters(ctxt->userData, 4419 tmp, nbchar); 4420 if (*ctxt->space == -1) 4421 *ctxt->space = -2; 4422 } 4423 } else if ((ctxt->sax != NULL) && 4424 (ctxt->sax->characters != NULL)) { 4425 ctxt->sax->characters(ctxt->userData, 4426 tmp, nbchar); 4427 } 4428 } 4429 return; 4430 } 4431 4432 get_more: 4433 ccol = ctxt->input->col; 4434 while (test_char_data[*in]) { 4435 in++; 4436 ccol++; 4437 } 4438 ctxt->input->col = ccol; 4439 if (*in == 0xA) { 4440 do { 4441 ctxt->input->line++; ctxt->input->col = 1; 4442 in++; 4443 } while (*in == 0xA); 4444 goto get_more; 4445 } 4446 if (*in == ']') { 4447 if ((in[1] == ']') && (in[2] == '>')) { 4448 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4449 ctxt->input->cur = in + 1; 4450 return; 4451 } 4452 in++; 4453 ctxt->input->col++; 4454 goto get_more; 4455 } 4456 nbchar = in - ctxt->input->cur; 4457 if (nbchar > 0) { 4458 if ((ctxt->sax != NULL) && 4459 (ctxt->sax->ignorableWhitespace != 4460 ctxt->sax->characters) && 4461 (IS_BLANK_CH(*ctxt->input->cur))) { 4462 const xmlChar *tmp = ctxt->input->cur; 4463 ctxt->input->cur = in; 4464 4465 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4466 if (ctxt->sax->ignorableWhitespace != NULL) 4467 ctxt->sax->ignorableWhitespace(ctxt->userData, 4468 tmp, nbchar); 4469 } else { 4470 if (ctxt->sax->characters != NULL) 4471 ctxt->sax->characters(ctxt->userData, 4472 tmp, nbchar); 4473 if (*ctxt->space == -1) 4474 *ctxt->space = -2; 4475 } 4476 line = ctxt->input->line; 4477 col = ctxt->input->col; 4478 } else if (ctxt->sax != NULL) { 4479 if (ctxt->sax->characters != NULL) 4480 ctxt->sax->characters(ctxt->userData, 4481 ctxt->input->cur, nbchar); 4482 line = ctxt->input->line; 4483 col = ctxt->input->col; 4484 } 4485 /* something really bad happened in the SAX callback */ 4486 if (ctxt->instate != XML_PARSER_CONTENT) 4487 return; 4488 } 4489 ctxt->input->cur = in; 4490 if (*in == 0xD) { 4491 in++; 4492 if (*in == 0xA) { 4493 ctxt->input->cur = in; 4494 in++; 4495 ctxt->input->line++; ctxt->input->col = 1; 4496 continue; /* while */ 4497 } 4498 in--; 4499 } 4500 if (*in == '<') { 4501 return; 4502 } 4503 if (*in == '&') { 4504 return; 4505 } 4506 SHRINK; 4507 GROW; 4508 if (ctxt->instate == XML_PARSER_EOF) 4509 return; 4510 in = ctxt->input->cur; 4511 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4512 nbchar = 0; 4513 } 4514 ctxt->input->line = line; 4515 ctxt->input->col = col; 4516 xmlParseCharDataComplex(ctxt, cdata); 4517 } 4518 4519 /** 4520 * xmlParseCharDataComplex: 4521 * @ctxt: an XML parser context 4522 * @cdata: int indicating whether we are within a CDATA section 4523 * 4524 * parse a CharData section.this is the fallback function 4525 * of xmlParseCharData() when the parsing requires handling 4526 * of non-ASCII characters. 4527 */ 4528 static void 4529 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4530 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4531 int nbchar = 0; 4532 int cur, l; 4533 int count = 0; 4534 4535 SHRINK; 4536 GROW; 4537 cur = CUR_CHAR(l); 4538 while ((cur != '<') && /* checked */ 4539 (cur != '&') && 4540 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4541 if ((cur == ']') && (NXT(1) == ']') && 4542 (NXT(2) == '>')) { 4543 if (cdata) break; 4544 else { 4545 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4546 } 4547 } 4548 COPY_BUF(l,buf,nbchar,cur); 4549 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4550 buf[nbchar] = 0; 4551 4552 /* 4553 * OK the segment is to be consumed as chars. 4554 */ 4555 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4556 if (areBlanks(ctxt, buf, nbchar, 0)) { 4557 if (ctxt->sax->ignorableWhitespace != NULL) 4558 ctxt->sax->ignorableWhitespace(ctxt->userData, 4559 buf, nbchar); 4560 } else { 4561 if (ctxt->sax->characters != NULL) 4562 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4563 if ((ctxt->sax->characters != 4564 ctxt->sax->ignorableWhitespace) && 4565 (*ctxt->space == -1)) 4566 *ctxt->space = -2; 4567 } 4568 } 4569 nbchar = 0; 4570 /* something really bad happened in the SAX callback */ 4571 if (ctxt->instate != XML_PARSER_CONTENT) 4572 return; 4573 } 4574 count++; 4575 if (count > 50) { 4576 GROW; 4577 count = 0; 4578 if (ctxt->instate == XML_PARSER_EOF) 4579 return; 4580 } 4581 NEXTL(l); 4582 cur = CUR_CHAR(l); 4583 } 4584 if (nbchar != 0) { 4585 buf[nbchar] = 0; 4586 /* 4587 * OK the segment is to be consumed as chars. 4588 */ 4589 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4590 if (areBlanks(ctxt, buf, nbchar, 0)) { 4591 if (ctxt->sax->ignorableWhitespace != NULL) 4592 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4593 } else { 4594 if (ctxt->sax->characters != NULL) 4595 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4596 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4597 (*ctxt->space == -1)) 4598 *ctxt->space = -2; 4599 } 4600 } 4601 } 4602 if ((cur != 0) && (!IS_CHAR(cur))) { 4603 /* Generate the error and skip the offending character */ 4604 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4605 "PCDATA invalid Char value %d\n", 4606 cur); 4607 NEXTL(l); 4608 } 4609 } 4610 4611 /** 4612 * xmlParseExternalID: 4613 * @ctxt: an XML parser context 4614 * @publicID: a xmlChar** receiving PubidLiteral 4615 * @strict: indicate whether we should restrict parsing to only 4616 * production [75], see NOTE below 4617 * 4618 * Parse an External ID or a Public ID 4619 * 4620 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4621 * 'PUBLIC' S PubidLiteral S SystemLiteral 4622 * 4623 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4624 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4625 * 4626 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4627 * 4628 * Returns the function returns SystemLiteral and in the second 4629 * case publicID receives PubidLiteral, is strict is off 4630 * it is possible to return NULL and have publicID set. 4631 */ 4632 4633 xmlChar * 4634 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4635 xmlChar *URI = NULL; 4636 4637 SHRINK; 4638 4639 *publicID = NULL; 4640 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4641 SKIP(6); 4642 if (SKIP_BLANKS == 0) { 4643 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4644 "Space required after 'SYSTEM'\n"); 4645 } 4646 URI = xmlParseSystemLiteral(ctxt); 4647 if (URI == NULL) { 4648 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4649 } 4650 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4651 SKIP(6); 4652 if (SKIP_BLANKS == 0) { 4653 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4654 "Space required after 'PUBLIC'\n"); 4655 } 4656 *publicID = xmlParsePubidLiteral(ctxt); 4657 if (*publicID == NULL) { 4658 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4659 } 4660 if (strict) { 4661 /* 4662 * We don't handle [83] so "S SystemLiteral" is required. 4663 */ 4664 if (SKIP_BLANKS == 0) { 4665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4666 "Space required after the Public Identifier\n"); 4667 } 4668 } else { 4669 /* 4670 * We handle [83] so we return immediately, if 4671 * "S SystemLiteral" is not detected. We skip blanks if no 4672 * system literal was found, but this is harmless since we must 4673 * be at the end of a NotationDecl. 4674 */ 4675 if (SKIP_BLANKS == 0) return(NULL); 4676 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4677 } 4678 URI = xmlParseSystemLiteral(ctxt); 4679 if (URI == NULL) { 4680 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4681 } 4682 } 4683 return(URI); 4684 } 4685 4686 /** 4687 * xmlParseCommentComplex: 4688 * @ctxt: an XML parser context 4689 * @buf: the already parsed part of the buffer 4690 * @len: number of bytes in the buffer 4691 * @size: allocated size of the buffer 4692 * 4693 * Skip an XML (SGML) comment <!-- .... --> 4694 * The spec says that "For compatibility, the string "--" (double-hyphen) 4695 * must not occur within comments. " 4696 * This is the slow routine in case the accelerator for ascii didn't work 4697 * 4698 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4699 */ 4700 static void 4701 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4702 size_t len, size_t size) { 4703 int q, ql; 4704 int r, rl; 4705 int cur, l; 4706 size_t count = 0; 4707 int inputid; 4708 4709 inputid = ctxt->input->id; 4710 4711 if (buf == NULL) { 4712 len = 0; 4713 size = XML_PARSER_BUFFER_SIZE; 4714 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4715 if (buf == NULL) { 4716 xmlErrMemory(ctxt, NULL); 4717 return; 4718 } 4719 } 4720 GROW; /* Assure there's enough input data */ 4721 q = CUR_CHAR(ql); 4722 if (q == 0) 4723 goto not_terminated; 4724 if (!IS_CHAR(q)) { 4725 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4726 "xmlParseComment: invalid xmlChar value %d\n", 4727 q); 4728 xmlFree (buf); 4729 return; 4730 } 4731 NEXTL(ql); 4732 r = CUR_CHAR(rl); 4733 if (r == 0) 4734 goto not_terminated; 4735 if (!IS_CHAR(r)) { 4736 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4737 "xmlParseComment: invalid xmlChar value %d\n", 4738 q); 4739 xmlFree (buf); 4740 return; 4741 } 4742 NEXTL(rl); 4743 cur = CUR_CHAR(l); 4744 if (cur == 0) 4745 goto not_terminated; 4746 while (IS_CHAR(cur) && /* checked */ 4747 ((cur != '>') || 4748 (r != '-') || (q != '-'))) { 4749 if ((r == '-') && (q == '-')) { 4750 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4751 } 4752 if ((len > XML_MAX_TEXT_LENGTH) && 4753 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4754 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4755 "Comment too big found", NULL); 4756 xmlFree (buf); 4757 return; 4758 } 4759 if (len + 5 >= size) { 4760 xmlChar *new_buf; 4761 size_t new_size; 4762 4763 new_size = size * 2; 4764 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4765 if (new_buf == NULL) { 4766 xmlFree (buf); 4767 xmlErrMemory(ctxt, NULL); 4768 return; 4769 } 4770 buf = new_buf; 4771 size = new_size; 4772 } 4773 COPY_BUF(ql,buf,len,q); 4774 q = r; 4775 ql = rl; 4776 r = cur; 4777 rl = l; 4778 4779 count++; 4780 if (count > 50) { 4781 GROW; 4782 count = 0; 4783 if (ctxt->instate == XML_PARSER_EOF) { 4784 xmlFree(buf); 4785 return; 4786 } 4787 } 4788 NEXTL(l); 4789 cur = CUR_CHAR(l); 4790 if (cur == 0) { 4791 SHRINK; 4792 GROW; 4793 cur = CUR_CHAR(l); 4794 } 4795 } 4796 buf[len] = 0; 4797 if (cur == 0) { 4798 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4799 "Comment not terminated \n<!--%.50s\n", buf); 4800 } else if (!IS_CHAR(cur)) { 4801 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4802 "xmlParseComment: invalid xmlChar value %d\n", 4803 cur); 4804 } else { 4805 if (inputid != ctxt->input->id) { 4806 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4807 "Comment doesn't start and stop in the same" 4808 " entity\n"); 4809 } 4810 NEXT; 4811 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4812 (!ctxt->disableSAX)) 4813 ctxt->sax->comment(ctxt->userData, buf); 4814 } 4815 xmlFree(buf); 4816 return; 4817 not_terminated: 4818 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4819 "Comment not terminated\n", NULL); 4820 xmlFree(buf); 4821 return; 4822 } 4823 4824 /** 4825 * xmlParseComment: 4826 * @ctxt: an XML parser context 4827 * 4828 * Skip an XML (SGML) comment <!-- .... --> 4829 * The spec says that "For compatibility, the string "--" (double-hyphen) 4830 * must not occur within comments. " 4831 * 4832 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4833 */ 4834 void 4835 xmlParseComment(xmlParserCtxtPtr ctxt) { 4836 xmlChar *buf = NULL; 4837 size_t size = XML_PARSER_BUFFER_SIZE; 4838 size_t len = 0; 4839 xmlParserInputState state; 4840 const xmlChar *in; 4841 size_t nbchar = 0; 4842 int ccol; 4843 int inputid; 4844 4845 /* 4846 * Check that there is a comment right here. 4847 */ 4848 if ((RAW != '<') || (NXT(1) != '!') || 4849 (NXT(2) != '-') || (NXT(3) != '-')) return; 4850 state = ctxt->instate; 4851 ctxt->instate = XML_PARSER_COMMENT; 4852 inputid = ctxt->input->id; 4853 SKIP(4); 4854 SHRINK; 4855 GROW; 4856 4857 /* 4858 * Accelerated common case where input don't need to be 4859 * modified before passing it to the handler. 4860 */ 4861 in = ctxt->input->cur; 4862 do { 4863 if (*in == 0xA) { 4864 do { 4865 ctxt->input->line++; ctxt->input->col = 1; 4866 in++; 4867 } while (*in == 0xA); 4868 } 4869 get_more: 4870 ccol = ctxt->input->col; 4871 while (((*in > '-') && (*in <= 0x7F)) || 4872 ((*in >= 0x20) && (*in < '-')) || 4873 (*in == 0x09)) { 4874 in++; 4875 ccol++; 4876 } 4877 ctxt->input->col = ccol; 4878 if (*in == 0xA) { 4879 do { 4880 ctxt->input->line++; ctxt->input->col = 1; 4881 in++; 4882 } while (*in == 0xA); 4883 goto get_more; 4884 } 4885 nbchar = in - ctxt->input->cur; 4886 /* 4887 * save current set of data 4888 */ 4889 if (nbchar > 0) { 4890 if ((ctxt->sax != NULL) && 4891 (ctxt->sax->comment != NULL)) { 4892 if (buf == NULL) { 4893 if ((*in == '-') && (in[1] == '-')) 4894 size = nbchar + 1; 4895 else 4896 size = XML_PARSER_BUFFER_SIZE + nbchar; 4897 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4898 if (buf == NULL) { 4899 xmlErrMemory(ctxt, NULL); 4900 ctxt->instate = state; 4901 return; 4902 } 4903 len = 0; 4904 } else if (len + nbchar + 1 >= size) { 4905 xmlChar *new_buf; 4906 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4907 new_buf = (xmlChar *) xmlRealloc(buf, 4908 size * sizeof(xmlChar)); 4909 if (new_buf == NULL) { 4910 xmlFree (buf); 4911 xmlErrMemory(ctxt, NULL); 4912 ctxt->instate = state; 4913 return; 4914 } 4915 buf = new_buf; 4916 } 4917 memcpy(&buf[len], ctxt->input->cur, nbchar); 4918 len += nbchar; 4919 buf[len] = 0; 4920 } 4921 } 4922 if ((len > XML_MAX_TEXT_LENGTH) && 4923 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4924 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4925 "Comment too big found", NULL); 4926 xmlFree (buf); 4927 return; 4928 } 4929 ctxt->input->cur = in; 4930 if (*in == 0xA) { 4931 in++; 4932 ctxt->input->line++; ctxt->input->col = 1; 4933 } 4934 if (*in == 0xD) { 4935 in++; 4936 if (*in == 0xA) { 4937 ctxt->input->cur = in; 4938 in++; 4939 ctxt->input->line++; ctxt->input->col = 1; 4940 continue; /* while */ 4941 } 4942 in--; 4943 } 4944 SHRINK; 4945 GROW; 4946 if (ctxt->instate == XML_PARSER_EOF) { 4947 xmlFree(buf); 4948 return; 4949 } 4950 in = ctxt->input->cur; 4951 if (*in == '-') { 4952 if (in[1] == '-') { 4953 if (in[2] == '>') { 4954 if (ctxt->input->id != inputid) { 4955 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4956 "comment doesn't start and stop in the" 4957 " same entity\n"); 4958 } 4959 SKIP(3); 4960 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4961 (!ctxt->disableSAX)) { 4962 if (buf != NULL) 4963 ctxt->sax->comment(ctxt->userData, buf); 4964 else 4965 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4966 } 4967 if (buf != NULL) 4968 xmlFree(buf); 4969 if (ctxt->instate != XML_PARSER_EOF) 4970 ctxt->instate = state; 4971 return; 4972 } 4973 if (buf != NULL) { 4974 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4975 "Double hyphen within comment: " 4976 "<!--%.50s\n", 4977 buf); 4978 } else 4979 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4980 "Double hyphen within comment\n", NULL); 4981 if (ctxt->instate == XML_PARSER_EOF) { 4982 xmlFree(buf); 4983 return; 4984 } 4985 in++; 4986 ctxt->input->col++; 4987 } 4988 in++; 4989 ctxt->input->col++; 4990 goto get_more; 4991 } 4992 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4993 xmlParseCommentComplex(ctxt, buf, len, size); 4994 ctxt->instate = state; 4995 return; 4996 } 4997 4998 4999 /** 5000 * xmlParsePITarget: 5001 * @ctxt: an XML parser context 5002 * 5003 * parse the name of a PI 5004 * 5005 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5006 * 5007 * Returns the PITarget name or NULL 5008 */ 5009 5010 const xmlChar * 5011 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5012 const xmlChar *name; 5013 5014 name = xmlParseName(ctxt); 5015 if ((name != NULL) && 5016 ((name[0] == 'x') || (name[0] == 'X')) && 5017 ((name[1] == 'm') || (name[1] == 'M')) && 5018 ((name[2] == 'l') || (name[2] == 'L'))) { 5019 int i; 5020 if ((name[0] == 'x') && (name[1] == 'm') && 5021 (name[2] == 'l') && (name[3] == 0)) { 5022 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5023 "XML declaration allowed only at the start of the document\n"); 5024 return(name); 5025 } else if (name[3] == 0) { 5026 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5027 return(name); 5028 } 5029 for (i = 0;;i++) { 5030 if (xmlW3CPIs[i] == NULL) break; 5031 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5032 return(name); 5033 } 5034 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5035 "xmlParsePITarget: invalid name prefix 'xml'\n", 5036 NULL, NULL); 5037 } 5038 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5039 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5040 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5041 } 5042 return(name); 5043 } 5044 5045 #ifdef LIBXML_CATALOG_ENABLED 5046 /** 5047 * xmlParseCatalogPI: 5048 * @ctxt: an XML parser context 5049 * @catalog: the PI value string 5050 * 5051 * parse an XML Catalog Processing Instruction. 5052 * 5053 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5054 * 5055 * Occurs only if allowed by the user and if happening in the Misc 5056 * part of the document before any doctype informations 5057 * This will add the given catalog to the parsing context in order 5058 * to be used if there is a resolution need further down in the document 5059 */ 5060 5061 static void 5062 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5063 xmlChar *URL = NULL; 5064 const xmlChar *tmp, *base; 5065 xmlChar marker; 5066 5067 tmp = catalog; 5068 while (IS_BLANK_CH(*tmp)) tmp++; 5069 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5070 goto error; 5071 tmp += 7; 5072 while (IS_BLANK_CH(*tmp)) tmp++; 5073 if (*tmp != '=') { 5074 return; 5075 } 5076 tmp++; 5077 while (IS_BLANK_CH(*tmp)) tmp++; 5078 marker = *tmp; 5079 if ((marker != '\'') && (marker != '"')) 5080 goto error; 5081 tmp++; 5082 base = tmp; 5083 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5084 if (*tmp == 0) 5085 goto error; 5086 URL = xmlStrndup(base, tmp - base); 5087 tmp++; 5088 while (IS_BLANK_CH(*tmp)) tmp++; 5089 if (*tmp != 0) 5090 goto error; 5091 5092 if (URL != NULL) { 5093 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5094 xmlFree(URL); 5095 } 5096 return; 5097 5098 error: 5099 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5100 "Catalog PI syntax error: %s\n", 5101 catalog, NULL); 5102 if (URL != NULL) 5103 xmlFree(URL); 5104 } 5105 #endif 5106 5107 /** 5108 * xmlParsePI: 5109 * @ctxt: an XML parser context 5110 * 5111 * parse an XML Processing Instruction. 5112 * 5113 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5114 * 5115 * The processing is transferred to SAX once parsed. 5116 */ 5117 5118 void 5119 xmlParsePI(xmlParserCtxtPtr ctxt) { 5120 xmlChar *buf = NULL; 5121 size_t len = 0; 5122 size_t size = XML_PARSER_BUFFER_SIZE; 5123 int cur, l; 5124 const xmlChar *target; 5125 xmlParserInputState state; 5126 int count = 0; 5127 5128 if ((RAW == '<') && (NXT(1) == '?')) { 5129 int inputid = ctxt->input->id; 5130 state = ctxt->instate; 5131 ctxt->instate = XML_PARSER_PI; 5132 /* 5133 * this is a Processing Instruction. 5134 */ 5135 SKIP(2); 5136 SHRINK; 5137 5138 /* 5139 * Parse the target name and check for special support like 5140 * namespace. 5141 */ 5142 target = xmlParsePITarget(ctxt); 5143 if (target != NULL) { 5144 if ((RAW == '?') && (NXT(1) == '>')) { 5145 if (inputid != ctxt->input->id) { 5146 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5147 "PI declaration doesn't start and stop in" 5148 " the same entity\n"); 5149 } 5150 SKIP(2); 5151 5152 /* 5153 * SAX: PI detected. 5154 */ 5155 if ((ctxt->sax) && (!ctxt->disableSAX) && 5156 (ctxt->sax->processingInstruction != NULL)) 5157 ctxt->sax->processingInstruction(ctxt->userData, 5158 target, NULL); 5159 if (ctxt->instate != XML_PARSER_EOF) 5160 ctxt->instate = state; 5161 return; 5162 } 5163 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5164 if (buf == NULL) { 5165 xmlErrMemory(ctxt, NULL); 5166 ctxt->instate = state; 5167 return; 5168 } 5169 if (SKIP_BLANKS == 0) { 5170 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5171 "ParsePI: PI %s space expected\n", target); 5172 } 5173 cur = CUR_CHAR(l); 5174 while (IS_CHAR(cur) && /* checked */ 5175 ((cur != '?') || (NXT(1) != '>'))) { 5176 if (len + 5 >= size) { 5177 xmlChar *tmp; 5178 size_t new_size = size * 2; 5179 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5180 if (tmp == NULL) { 5181 xmlErrMemory(ctxt, NULL); 5182 xmlFree(buf); 5183 ctxt->instate = state; 5184 return; 5185 } 5186 buf = tmp; 5187 size = new_size; 5188 } 5189 count++; 5190 if (count > 50) { 5191 GROW; 5192 if (ctxt->instate == XML_PARSER_EOF) { 5193 xmlFree(buf); 5194 return; 5195 } 5196 count = 0; 5197 if ((len > XML_MAX_TEXT_LENGTH) && 5198 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5199 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5200 "PI %s too big found", target); 5201 xmlFree(buf); 5202 ctxt->instate = state; 5203 return; 5204 } 5205 } 5206 COPY_BUF(l,buf,len,cur); 5207 NEXTL(l); 5208 cur = CUR_CHAR(l); 5209 if (cur == 0) { 5210 SHRINK; 5211 GROW; 5212 cur = CUR_CHAR(l); 5213 } 5214 } 5215 if ((len > XML_MAX_TEXT_LENGTH) && 5216 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5217 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5218 "PI %s too big found", target); 5219 xmlFree(buf); 5220 ctxt->instate = state; 5221 return; 5222 } 5223 buf[len] = 0; 5224 if (cur != '?') { 5225 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5226 "ParsePI: PI %s never end ...\n", target); 5227 } else { 5228 if (inputid != ctxt->input->id) { 5229 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5230 "PI declaration doesn't start and stop in" 5231 " the same entity\n"); 5232 } 5233 SKIP(2); 5234 5235 #ifdef LIBXML_CATALOG_ENABLED 5236 if (((state == XML_PARSER_MISC) || 5237 (state == XML_PARSER_START)) && 5238 (xmlStrEqual(target, XML_CATALOG_PI))) { 5239 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5240 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5241 (allow == XML_CATA_ALLOW_ALL)) 5242 xmlParseCatalogPI(ctxt, buf); 5243 } 5244 #endif 5245 5246 5247 /* 5248 * SAX: PI detected. 5249 */ 5250 if ((ctxt->sax) && (!ctxt->disableSAX) && 5251 (ctxt->sax->processingInstruction != NULL)) 5252 ctxt->sax->processingInstruction(ctxt->userData, 5253 target, buf); 5254 } 5255 xmlFree(buf); 5256 } else { 5257 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5258 } 5259 if (ctxt->instate != XML_PARSER_EOF) 5260 ctxt->instate = state; 5261 } 5262 } 5263 5264 /** 5265 * xmlParseNotationDecl: 5266 * @ctxt: an XML parser context 5267 * 5268 * parse a notation declaration 5269 * 5270 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5271 * 5272 * Hence there is actually 3 choices: 5273 * 'PUBLIC' S PubidLiteral 5274 * 'PUBLIC' S PubidLiteral S SystemLiteral 5275 * and 'SYSTEM' S SystemLiteral 5276 * 5277 * See the NOTE on xmlParseExternalID(). 5278 */ 5279 5280 void 5281 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5282 const xmlChar *name; 5283 xmlChar *Pubid; 5284 xmlChar *Systemid; 5285 5286 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5287 int inputid = ctxt->input->id; 5288 SHRINK; 5289 SKIP(10); 5290 if (SKIP_BLANKS == 0) { 5291 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5292 "Space required after '<!NOTATION'\n"); 5293 return; 5294 } 5295 5296 name = xmlParseName(ctxt); 5297 if (name == NULL) { 5298 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5299 return; 5300 } 5301 if (xmlStrchr(name, ':') != NULL) { 5302 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5303 "colons are forbidden from notation names '%s'\n", 5304 name, NULL, NULL); 5305 } 5306 if (SKIP_BLANKS == 0) { 5307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5308 "Space required after the NOTATION name'\n"); 5309 return; 5310 } 5311 5312 /* 5313 * Parse the IDs. 5314 */ 5315 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5316 SKIP_BLANKS; 5317 5318 if (RAW == '>') { 5319 if (inputid != ctxt->input->id) { 5320 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5321 "Notation declaration doesn't start and stop" 5322 " in the same entity\n"); 5323 } 5324 NEXT; 5325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5326 (ctxt->sax->notationDecl != NULL)) 5327 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5328 } else { 5329 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5330 } 5331 if (Systemid != NULL) xmlFree(Systemid); 5332 if (Pubid != NULL) xmlFree(Pubid); 5333 } 5334 } 5335 5336 /** 5337 * xmlParseEntityDecl: 5338 * @ctxt: an XML parser context 5339 * 5340 * parse <!ENTITY declarations 5341 * 5342 * [70] EntityDecl ::= GEDecl | PEDecl 5343 * 5344 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5345 * 5346 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5347 * 5348 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5349 * 5350 * [74] PEDef ::= EntityValue | ExternalID 5351 * 5352 * [76] NDataDecl ::= S 'NDATA' S Name 5353 * 5354 * [ VC: Notation Declared ] 5355 * The Name must match the declared name of a notation. 5356 */ 5357 5358 void 5359 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5360 const xmlChar *name = NULL; 5361 xmlChar *value = NULL; 5362 xmlChar *URI = NULL, *literal = NULL; 5363 const xmlChar *ndata = NULL; 5364 int isParameter = 0; 5365 xmlChar *orig = NULL; 5366 5367 /* GROW; done in the caller */ 5368 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5369 int inputid = ctxt->input->id; 5370 SHRINK; 5371 SKIP(8); 5372 if (SKIP_BLANKS == 0) { 5373 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5374 "Space required after '<!ENTITY'\n"); 5375 } 5376 5377 if (RAW == '%') { 5378 NEXT; 5379 if (SKIP_BLANKS == 0) { 5380 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5381 "Space required after '%%'\n"); 5382 } 5383 isParameter = 1; 5384 } 5385 5386 name = xmlParseName(ctxt); 5387 if (name == NULL) { 5388 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5389 "xmlParseEntityDecl: no name\n"); 5390 return; 5391 } 5392 if (xmlStrchr(name, ':') != NULL) { 5393 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5394 "colons are forbidden from entities names '%s'\n", 5395 name, NULL, NULL); 5396 } 5397 if (SKIP_BLANKS == 0) { 5398 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5399 "Space required after the entity name\n"); 5400 } 5401 5402 ctxt->instate = XML_PARSER_ENTITY_DECL; 5403 /* 5404 * handle the various case of definitions... 5405 */ 5406 if (isParameter) { 5407 if ((RAW == '"') || (RAW == '\'')) { 5408 value = xmlParseEntityValue(ctxt, &orig); 5409 if (value) { 5410 if ((ctxt->sax != NULL) && 5411 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5412 ctxt->sax->entityDecl(ctxt->userData, name, 5413 XML_INTERNAL_PARAMETER_ENTITY, 5414 NULL, NULL, value); 5415 } 5416 } else { 5417 URI = xmlParseExternalID(ctxt, &literal, 1); 5418 if ((URI == NULL) && (literal == NULL)) { 5419 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5420 } 5421 if (URI) { 5422 xmlURIPtr uri; 5423 5424 uri = xmlParseURI((const char *) URI); 5425 if (uri == NULL) { 5426 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5427 "Invalid URI: %s\n", URI); 5428 /* 5429 * This really ought to be a well formedness error 5430 * but the XML Core WG decided otherwise c.f. issue 5431 * E26 of the XML erratas. 5432 */ 5433 } else { 5434 if (uri->fragment != NULL) { 5435 /* 5436 * Okay this is foolish to block those but not 5437 * invalid URIs. 5438 */ 5439 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5440 } else { 5441 if ((ctxt->sax != NULL) && 5442 (!ctxt->disableSAX) && 5443 (ctxt->sax->entityDecl != NULL)) 5444 ctxt->sax->entityDecl(ctxt->userData, name, 5445 XML_EXTERNAL_PARAMETER_ENTITY, 5446 literal, URI, NULL); 5447 } 5448 xmlFreeURI(uri); 5449 } 5450 } 5451 } 5452 } else { 5453 if ((RAW == '"') || (RAW == '\'')) { 5454 value = xmlParseEntityValue(ctxt, &orig); 5455 if ((ctxt->sax != NULL) && 5456 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5457 ctxt->sax->entityDecl(ctxt->userData, name, 5458 XML_INTERNAL_GENERAL_ENTITY, 5459 NULL, NULL, value); 5460 /* 5461 * For expat compatibility in SAX mode. 5462 */ 5463 if ((ctxt->myDoc == NULL) || 5464 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5465 if (ctxt->myDoc == NULL) { 5466 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5467 if (ctxt->myDoc == NULL) { 5468 xmlErrMemory(ctxt, "New Doc failed"); 5469 return; 5470 } 5471 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5472 } 5473 if (ctxt->myDoc->intSubset == NULL) 5474 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5475 BAD_CAST "fake", NULL, NULL); 5476 5477 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5478 NULL, NULL, value); 5479 } 5480 } else { 5481 URI = xmlParseExternalID(ctxt, &literal, 1); 5482 if ((URI == NULL) && (literal == NULL)) { 5483 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5484 } 5485 if (URI) { 5486 xmlURIPtr uri; 5487 5488 uri = xmlParseURI((const char *)URI); 5489 if (uri == NULL) { 5490 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5491 "Invalid URI: %s\n", URI); 5492 /* 5493 * This really ought to be a well formedness error 5494 * but the XML Core WG decided otherwise c.f. issue 5495 * E26 of the XML erratas. 5496 */ 5497 } else { 5498 if (uri->fragment != NULL) { 5499 /* 5500 * Okay this is foolish to block those but not 5501 * invalid URIs. 5502 */ 5503 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5504 } 5505 xmlFreeURI(uri); 5506 } 5507 } 5508 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5510 "Space required before 'NDATA'\n"); 5511 } 5512 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5513 SKIP(5); 5514 if (SKIP_BLANKS == 0) { 5515 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5516 "Space required after 'NDATA'\n"); 5517 } 5518 ndata = xmlParseName(ctxt); 5519 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5520 (ctxt->sax->unparsedEntityDecl != NULL)) 5521 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5522 literal, URI, ndata); 5523 } else { 5524 if ((ctxt->sax != NULL) && 5525 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5526 ctxt->sax->entityDecl(ctxt->userData, name, 5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5528 literal, URI, NULL); 5529 /* 5530 * For expat compatibility in SAX mode. 5531 * assuming the entity replacement was asked for 5532 */ 5533 if ((ctxt->replaceEntities != 0) && 5534 ((ctxt->myDoc == NULL) || 5535 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5536 if (ctxt->myDoc == NULL) { 5537 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5538 if (ctxt->myDoc == NULL) { 5539 xmlErrMemory(ctxt, "New Doc failed"); 5540 return; 5541 } 5542 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5543 } 5544 5545 if (ctxt->myDoc->intSubset == NULL) 5546 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5547 BAD_CAST "fake", NULL, NULL); 5548 xmlSAX2EntityDecl(ctxt, name, 5549 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5550 literal, URI, NULL); 5551 } 5552 } 5553 } 5554 } 5555 if (ctxt->instate == XML_PARSER_EOF) 5556 goto done; 5557 SKIP_BLANKS; 5558 if (RAW != '>') { 5559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5560 "xmlParseEntityDecl: entity %s not terminated\n", name); 5561 xmlHaltParser(ctxt); 5562 } else { 5563 if (inputid != ctxt->input->id) { 5564 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5565 "Entity declaration doesn't start and stop in" 5566 " the same entity\n"); 5567 } 5568 NEXT; 5569 } 5570 if (orig != NULL) { 5571 /* 5572 * Ugly mechanism to save the raw entity value. 5573 */ 5574 xmlEntityPtr cur = NULL; 5575 5576 if (isParameter) { 5577 if ((ctxt->sax != NULL) && 5578 (ctxt->sax->getParameterEntity != NULL)) 5579 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5580 } else { 5581 if ((ctxt->sax != NULL) && 5582 (ctxt->sax->getEntity != NULL)) 5583 cur = ctxt->sax->getEntity(ctxt->userData, name); 5584 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5585 cur = xmlSAX2GetEntity(ctxt, name); 5586 } 5587 } 5588 if ((cur != NULL) && (cur->orig == NULL)) { 5589 cur->orig = orig; 5590 orig = NULL; 5591 } 5592 } 5593 5594 done: 5595 if (value != NULL) xmlFree(value); 5596 if (URI != NULL) xmlFree(URI); 5597 if (literal != NULL) xmlFree(literal); 5598 if (orig != NULL) xmlFree(orig); 5599 } 5600 } 5601 5602 /** 5603 * xmlParseDefaultDecl: 5604 * @ctxt: an XML parser context 5605 * @value: Receive a possible fixed default value for the attribute 5606 * 5607 * Parse an attribute default declaration 5608 * 5609 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5610 * 5611 * [ VC: Required Attribute ] 5612 * if the default declaration is the keyword #REQUIRED, then the 5613 * attribute must be specified for all elements of the type in the 5614 * attribute-list declaration. 5615 * 5616 * [ VC: Attribute Default Legal ] 5617 * The declared default value must meet the lexical constraints of 5618 * the declared attribute type c.f. xmlValidateAttributeDecl() 5619 * 5620 * [ VC: Fixed Attribute Default ] 5621 * if an attribute has a default value declared with the #FIXED 5622 * keyword, instances of that attribute must match the default value. 5623 * 5624 * [ WFC: No < in Attribute Values ] 5625 * handled in xmlParseAttValue() 5626 * 5627 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5628 * or XML_ATTRIBUTE_FIXED. 5629 */ 5630 5631 int 5632 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5633 int val; 5634 xmlChar *ret; 5635 5636 *value = NULL; 5637 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5638 SKIP(9); 5639 return(XML_ATTRIBUTE_REQUIRED); 5640 } 5641 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5642 SKIP(8); 5643 return(XML_ATTRIBUTE_IMPLIED); 5644 } 5645 val = XML_ATTRIBUTE_NONE; 5646 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5647 SKIP(6); 5648 val = XML_ATTRIBUTE_FIXED; 5649 if (SKIP_BLANKS == 0) { 5650 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5651 "Space required after '#FIXED'\n"); 5652 } 5653 } 5654 ret = xmlParseAttValue(ctxt); 5655 ctxt->instate = XML_PARSER_DTD; 5656 if (ret == NULL) { 5657 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5658 "Attribute default value declaration error\n"); 5659 } else 5660 *value = ret; 5661 return(val); 5662 } 5663 5664 /** 5665 * xmlParseNotationType: 5666 * @ctxt: an XML parser context 5667 * 5668 * parse an Notation attribute type. 5669 * 5670 * Note: the leading 'NOTATION' S part has already being parsed... 5671 * 5672 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5673 * 5674 * [ VC: Notation Attributes ] 5675 * Values of this type must match one of the notation names included 5676 * in the declaration; all notation names in the declaration must be declared. 5677 * 5678 * Returns: the notation attribute tree built while parsing 5679 */ 5680 5681 xmlEnumerationPtr 5682 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5683 const xmlChar *name; 5684 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5685 5686 if (RAW != '(') { 5687 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5688 return(NULL); 5689 } 5690 SHRINK; 5691 do { 5692 NEXT; 5693 SKIP_BLANKS; 5694 name = xmlParseName(ctxt); 5695 if (name == NULL) { 5696 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5697 "Name expected in NOTATION declaration\n"); 5698 xmlFreeEnumeration(ret); 5699 return(NULL); 5700 } 5701 tmp = ret; 5702 while (tmp != NULL) { 5703 if (xmlStrEqual(name, tmp->name)) { 5704 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5705 "standalone: attribute notation value token %s duplicated\n", 5706 name, NULL); 5707 if (!xmlDictOwns(ctxt->dict, name)) 5708 xmlFree((xmlChar *) name); 5709 break; 5710 } 5711 tmp = tmp->next; 5712 } 5713 if (tmp == NULL) { 5714 cur = xmlCreateEnumeration(name); 5715 if (cur == NULL) { 5716 xmlFreeEnumeration(ret); 5717 return(NULL); 5718 } 5719 if (last == NULL) ret = last = cur; 5720 else { 5721 last->next = cur; 5722 last = cur; 5723 } 5724 } 5725 SKIP_BLANKS; 5726 } while (RAW == '|'); 5727 if (RAW != ')') { 5728 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5729 xmlFreeEnumeration(ret); 5730 return(NULL); 5731 } 5732 NEXT; 5733 return(ret); 5734 } 5735 5736 /** 5737 * xmlParseEnumerationType: 5738 * @ctxt: an XML parser context 5739 * 5740 * parse an Enumeration attribute type. 5741 * 5742 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5743 * 5744 * [ VC: Enumeration ] 5745 * Values of this type must match one of the Nmtoken tokens in 5746 * the declaration 5747 * 5748 * Returns: the enumeration attribute tree built while parsing 5749 */ 5750 5751 xmlEnumerationPtr 5752 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5753 xmlChar *name; 5754 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5755 5756 if (RAW != '(') { 5757 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5758 return(NULL); 5759 } 5760 SHRINK; 5761 do { 5762 NEXT; 5763 SKIP_BLANKS; 5764 name = xmlParseNmtoken(ctxt); 5765 if (name == NULL) { 5766 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5767 return(ret); 5768 } 5769 tmp = ret; 5770 while (tmp != NULL) { 5771 if (xmlStrEqual(name, tmp->name)) { 5772 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5773 "standalone: attribute enumeration value token %s duplicated\n", 5774 name, NULL); 5775 if (!xmlDictOwns(ctxt->dict, name)) 5776 xmlFree(name); 5777 break; 5778 } 5779 tmp = tmp->next; 5780 } 5781 if (tmp == NULL) { 5782 cur = xmlCreateEnumeration(name); 5783 if (!xmlDictOwns(ctxt->dict, name)) 5784 xmlFree(name); 5785 if (cur == NULL) { 5786 xmlFreeEnumeration(ret); 5787 return(NULL); 5788 } 5789 if (last == NULL) ret = last = cur; 5790 else { 5791 last->next = cur; 5792 last = cur; 5793 } 5794 } 5795 SKIP_BLANKS; 5796 } while (RAW == '|'); 5797 if (RAW != ')') { 5798 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5799 return(ret); 5800 } 5801 NEXT; 5802 return(ret); 5803 } 5804 5805 /** 5806 * xmlParseEnumeratedType: 5807 * @ctxt: an XML parser context 5808 * @tree: the enumeration tree built while parsing 5809 * 5810 * parse an Enumerated attribute type. 5811 * 5812 * [57] EnumeratedType ::= NotationType | Enumeration 5813 * 5814 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5815 * 5816 * 5817 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5818 */ 5819 5820 int 5821 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5822 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5823 SKIP(8); 5824 if (SKIP_BLANKS == 0) { 5825 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5826 "Space required after 'NOTATION'\n"); 5827 return(0); 5828 } 5829 *tree = xmlParseNotationType(ctxt); 5830 if (*tree == NULL) return(0); 5831 return(XML_ATTRIBUTE_NOTATION); 5832 } 5833 *tree = xmlParseEnumerationType(ctxt); 5834 if (*tree == NULL) return(0); 5835 return(XML_ATTRIBUTE_ENUMERATION); 5836 } 5837 5838 /** 5839 * xmlParseAttributeType: 5840 * @ctxt: an XML parser context 5841 * @tree: the enumeration tree built while parsing 5842 * 5843 * parse the Attribute list def for an element 5844 * 5845 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5846 * 5847 * [55] StringType ::= 'CDATA' 5848 * 5849 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5850 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5851 * 5852 * Validity constraints for attribute values syntax are checked in 5853 * xmlValidateAttributeValue() 5854 * 5855 * [ VC: ID ] 5856 * Values of type ID must match the Name production. A name must not 5857 * appear more than once in an XML document as a value of this type; 5858 * i.e., ID values must uniquely identify the elements which bear them. 5859 * 5860 * [ VC: One ID per Element Type ] 5861 * No element type may have more than one ID attribute specified. 5862 * 5863 * [ VC: ID Attribute Default ] 5864 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5865 * 5866 * [ VC: IDREF ] 5867 * Values of type IDREF must match the Name production, and values 5868 * of type IDREFS must match Names; each IDREF Name must match the value 5869 * of an ID attribute on some element in the XML document; i.e. IDREF 5870 * values must match the value of some ID attribute. 5871 * 5872 * [ VC: Entity Name ] 5873 * Values of type ENTITY must match the Name production, values 5874 * of type ENTITIES must match Names; each Entity Name must match the 5875 * name of an unparsed entity declared in the DTD. 5876 * 5877 * [ VC: Name Token ] 5878 * Values of type NMTOKEN must match the Nmtoken production; values 5879 * of type NMTOKENS must match Nmtokens. 5880 * 5881 * Returns the attribute type 5882 */ 5883 int 5884 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5885 SHRINK; 5886 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5887 SKIP(5); 5888 return(XML_ATTRIBUTE_CDATA); 5889 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5890 SKIP(6); 5891 return(XML_ATTRIBUTE_IDREFS); 5892 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5893 SKIP(5); 5894 return(XML_ATTRIBUTE_IDREF); 5895 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5896 SKIP(2); 5897 return(XML_ATTRIBUTE_ID); 5898 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5899 SKIP(6); 5900 return(XML_ATTRIBUTE_ENTITY); 5901 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5902 SKIP(8); 5903 return(XML_ATTRIBUTE_ENTITIES); 5904 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5905 SKIP(8); 5906 return(XML_ATTRIBUTE_NMTOKENS); 5907 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5908 SKIP(7); 5909 return(XML_ATTRIBUTE_NMTOKEN); 5910 } 5911 return(xmlParseEnumeratedType(ctxt, tree)); 5912 } 5913 5914 /** 5915 * xmlParseAttributeListDecl: 5916 * @ctxt: an XML parser context 5917 * 5918 * : parse the Attribute list def for an element 5919 * 5920 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5921 * 5922 * [53] AttDef ::= S Name S AttType S DefaultDecl 5923 * 5924 */ 5925 void 5926 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5927 const xmlChar *elemName; 5928 const xmlChar *attrName; 5929 xmlEnumerationPtr tree; 5930 5931 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5932 int inputid = ctxt->input->id; 5933 5934 SKIP(9); 5935 if (SKIP_BLANKS == 0) { 5936 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5937 "Space required after '<!ATTLIST'\n"); 5938 } 5939 elemName = xmlParseName(ctxt); 5940 if (elemName == NULL) { 5941 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5942 "ATTLIST: no name for Element\n"); 5943 return; 5944 } 5945 SKIP_BLANKS; 5946 GROW; 5947 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5948 int type; 5949 int def; 5950 xmlChar *defaultValue = NULL; 5951 5952 GROW; 5953 tree = NULL; 5954 attrName = xmlParseName(ctxt); 5955 if (attrName == NULL) { 5956 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5957 "ATTLIST: no name for Attribute\n"); 5958 break; 5959 } 5960 GROW; 5961 if (SKIP_BLANKS == 0) { 5962 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5963 "Space required after the attribute name\n"); 5964 break; 5965 } 5966 5967 type = xmlParseAttributeType(ctxt, &tree); 5968 if (type <= 0) { 5969 break; 5970 } 5971 5972 GROW; 5973 if (SKIP_BLANKS == 0) { 5974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5975 "Space required after the attribute type\n"); 5976 if (tree != NULL) 5977 xmlFreeEnumeration(tree); 5978 break; 5979 } 5980 5981 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5982 if (def <= 0) { 5983 if (defaultValue != NULL) 5984 xmlFree(defaultValue); 5985 if (tree != NULL) 5986 xmlFreeEnumeration(tree); 5987 break; 5988 } 5989 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5990 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5991 5992 GROW; 5993 if (RAW != '>') { 5994 if (SKIP_BLANKS == 0) { 5995 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5996 "Space required after the attribute default value\n"); 5997 if (defaultValue != NULL) 5998 xmlFree(defaultValue); 5999 if (tree != NULL) 6000 xmlFreeEnumeration(tree); 6001 break; 6002 } 6003 } 6004 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6005 (ctxt->sax->attributeDecl != NULL)) 6006 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6007 type, def, defaultValue, tree); 6008 else if (tree != NULL) 6009 xmlFreeEnumeration(tree); 6010 6011 if ((ctxt->sax2) && (defaultValue != NULL) && 6012 (def != XML_ATTRIBUTE_IMPLIED) && 6013 (def != XML_ATTRIBUTE_REQUIRED)) { 6014 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6015 } 6016 if (ctxt->sax2) { 6017 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6018 } 6019 if (defaultValue != NULL) 6020 xmlFree(defaultValue); 6021 GROW; 6022 } 6023 if (RAW == '>') { 6024 if (inputid != ctxt->input->id) { 6025 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6026 "Attribute list declaration doesn't start and" 6027 " stop in the same entity\n"); 6028 } 6029 NEXT; 6030 } 6031 } 6032 } 6033 6034 /** 6035 * xmlParseElementMixedContentDecl: 6036 * @ctxt: an XML parser context 6037 * @inputchk: the input used for the current entity, needed for boundary checks 6038 * 6039 * parse the declaration for a Mixed Element content 6040 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6041 * 6042 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6043 * '(' S? '#PCDATA' S? ')' 6044 * 6045 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6046 * 6047 * [ VC: No Duplicate Types ] 6048 * The same name must not appear more than once in a single 6049 * mixed-content declaration. 6050 * 6051 * returns: the list of the xmlElementContentPtr describing the element choices 6052 */ 6053 xmlElementContentPtr 6054 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6055 xmlElementContentPtr ret = NULL, cur = NULL, n; 6056 const xmlChar *elem = NULL; 6057 6058 GROW; 6059 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6060 SKIP(7); 6061 SKIP_BLANKS; 6062 SHRINK; 6063 if (RAW == ')') { 6064 if (ctxt->input->id != inputchk) { 6065 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6066 "Element content declaration doesn't start and" 6067 " stop in the same entity\n"); 6068 } 6069 NEXT; 6070 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6071 if (ret == NULL) 6072 return(NULL); 6073 if (RAW == '*') { 6074 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6075 NEXT; 6076 } 6077 return(ret); 6078 } 6079 if ((RAW == '(') || (RAW == '|')) { 6080 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6081 if (ret == NULL) return(NULL); 6082 } 6083 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6084 NEXT; 6085 if (elem == NULL) { 6086 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6087 if (ret == NULL) return(NULL); 6088 ret->c1 = cur; 6089 if (cur != NULL) 6090 cur->parent = ret; 6091 cur = ret; 6092 } else { 6093 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6094 if (n == NULL) return(NULL); 6095 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6096 if (n->c1 != NULL) 6097 n->c1->parent = n; 6098 cur->c2 = n; 6099 if (n != NULL) 6100 n->parent = cur; 6101 cur = n; 6102 } 6103 SKIP_BLANKS; 6104 elem = xmlParseName(ctxt); 6105 if (elem == NULL) { 6106 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6107 "xmlParseElementMixedContentDecl : Name expected\n"); 6108 xmlFreeDocElementContent(ctxt->myDoc, ret); 6109 return(NULL); 6110 } 6111 SKIP_BLANKS; 6112 GROW; 6113 } 6114 if ((RAW == ')') && (NXT(1) == '*')) { 6115 if (elem != NULL) { 6116 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6117 XML_ELEMENT_CONTENT_ELEMENT); 6118 if (cur->c2 != NULL) 6119 cur->c2->parent = cur; 6120 } 6121 if (ret != NULL) 6122 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6123 if (ctxt->input->id != inputchk) { 6124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6125 "Element content declaration doesn't start and" 6126 " stop in the same entity\n"); 6127 } 6128 SKIP(2); 6129 } else { 6130 xmlFreeDocElementContent(ctxt->myDoc, ret); 6131 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6132 return(NULL); 6133 } 6134 6135 } else { 6136 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6137 } 6138 return(ret); 6139 } 6140 6141 /** 6142 * xmlParseElementChildrenContentDeclPriv: 6143 * @ctxt: an XML parser context 6144 * @inputchk: the input used for the current entity, needed for boundary checks 6145 * @depth: the level of recursion 6146 * 6147 * parse the declaration for a Mixed Element content 6148 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6149 * 6150 * 6151 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6152 * 6153 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6154 * 6155 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6156 * 6157 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6158 * 6159 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6160 * TODO Parameter-entity replacement text must be properly nested 6161 * with parenthesized groups. That is to say, if either of the 6162 * opening or closing parentheses in a choice, seq, or Mixed 6163 * construct is contained in the replacement text for a parameter 6164 * entity, both must be contained in the same replacement text. For 6165 * interoperability, if a parameter-entity reference appears in a 6166 * choice, seq, or Mixed construct, its replacement text should not 6167 * be empty, and neither the first nor last non-blank character of 6168 * the replacement text should be a connector (| or ,). 6169 * 6170 * Returns the tree of xmlElementContentPtr describing the element 6171 * hierarchy. 6172 */ 6173 static xmlElementContentPtr 6174 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6175 int depth) { 6176 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6177 const xmlChar *elem; 6178 xmlChar type = 0; 6179 6180 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6181 (depth > 2048)) { 6182 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6183 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6184 depth); 6185 return(NULL); 6186 } 6187 SKIP_BLANKS; 6188 GROW; 6189 if (RAW == '(') { 6190 int inputid = ctxt->input->id; 6191 6192 /* Recurse on first child */ 6193 NEXT; 6194 SKIP_BLANKS; 6195 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6196 depth + 1); 6197 SKIP_BLANKS; 6198 GROW; 6199 } else { 6200 elem = xmlParseName(ctxt); 6201 if (elem == NULL) { 6202 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6203 return(NULL); 6204 } 6205 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6206 if (cur == NULL) { 6207 xmlErrMemory(ctxt, NULL); 6208 return(NULL); 6209 } 6210 GROW; 6211 if (RAW == '?') { 6212 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6213 NEXT; 6214 } else if (RAW == '*') { 6215 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6216 NEXT; 6217 } else if (RAW == '+') { 6218 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6219 NEXT; 6220 } else { 6221 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6222 } 6223 GROW; 6224 } 6225 SKIP_BLANKS; 6226 SHRINK; 6227 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6228 /* 6229 * Each loop we parse one separator and one element. 6230 */ 6231 if (RAW == ',') { 6232 if (type == 0) type = CUR; 6233 6234 /* 6235 * Detect "Name | Name , Name" error 6236 */ 6237 else if (type != CUR) { 6238 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6239 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6240 type); 6241 if ((last != NULL) && (last != ret)) 6242 xmlFreeDocElementContent(ctxt->myDoc, last); 6243 if (ret != NULL) 6244 xmlFreeDocElementContent(ctxt->myDoc, ret); 6245 return(NULL); 6246 } 6247 NEXT; 6248 6249 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6250 if (op == NULL) { 6251 if ((last != NULL) && (last != ret)) 6252 xmlFreeDocElementContent(ctxt->myDoc, last); 6253 xmlFreeDocElementContent(ctxt->myDoc, ret); 6254 return(NULL); 6255 } 6256 if (last == NULL) { 6257 op->c1 = ret; 6258 if (ret != NULL) 6259 ret->parent = op; 6260 ret = cur = op; 6261 } else { 6262 cur->c2 = op; 6263 if (op != NULL) 6264 op->parent = cur; 6265 op->c1 = last; 6266 if (last != NULL) 6267 last->parent = op; 6268 cur =op; 6269 last = NULL; 6270 } 6271 } else if (RAW == '|') { 6272 if (type == 0) type = CUR; 6273 6274 /* 6275 * Detect "Name , Name | Name" error 6276 */ 6277 else if (type != CUR) { 6278 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6279 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6280 type); 6281 if ((last != NULL) && (last != ret)) 6282 xmlFreeDocElementContent(ctxt->myDoc, last); 6283 if (ret != NULL) 6284 xmlFreeDocElementContent(ctxt->myDoc, ret); 6285 return(NULL); 6286 } 6287 NEXT; 6288 6289 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6290 if (op == NULL) { 6291 if ((last != NULL) && (last != ret)) 6292 xmlFreeDocElementContent(ctxt->myDoc, last); 6293 if (ret != NULL) 6294 xmlFreeDocElementContent(ctxt->myDoc, ret); 6295 return(NULL); 6296 } 6297 if (last == NULL) { 6298 op->c1 = ret; 6299 if (ret != NULL) 6300 ret->parent = op; 6301 ret = cur = op; 6302 } else { 6303 cur->c2 = op; 6304 if (op != NULL) 6305 op->parent = cur; 6306 op->c1 = last; 6307 if (last != NULL) 6308 last->parent = op; 6309 cur =op; 6310 last = NULL; 6311 } 6312 } else { 6313 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6314 if ((last != NULL) && (last != ret)) 6315 xmlFreeDocElementContent(ctxt->myDoc, last); 6316 if (ret != NULL) 6317 xmlFreeDocElementContent(ctxt->myDoc, ret); 6318 return(NULL); 6319 } 6320 GROW; 6321 SKIP_BLANKS; 6322 GROW; 6323 if (RAW == '(') { 6324 int inputid = ctxt->input->id; 6325 /* Recurse on second child */ 6326 NEXT; 6327 SKIP_BLANKS; 6328 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6329 depth + 1); 6330 SKIP_BLANKS; 6331 } else { 6332 elem = xmlParseName(ctxt); 6333 if (elem == NULL) { 6334 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6335 if (ret != NULL) 6336 xmlFreeDocElementContent(ctxt->myDoc, ret); 6337 return(NULL); 6338 } 6339 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6340 if (last == NULL) { 6341 if (ret != NULL) 6342 xmlFreeDocElementContent(ctxt->myDoc, ret); 6343 return(NULL); 6344 } 6345 if (RAW == '?') { 6346 last->ocur = XML_ELEMENT_CONTENT_OPT; 6347 NEXT; 6348 } else if (RAW == '*') { 6349 last->ocur = XML_ELEMENT_CONTENT_MULT; 6350 NEXT; 6351 } else if (RAW == '+') { 6352 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6353 NEXT; 6354 } else { 6355 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6356 } 6357 } 6358 SKIP_BLANKS; 6359 GROW; 6360 } 6361 if ((cur != NULL) && (last != NULL)) { 6362 cur->c2 = last; 6363 if (last != NULL) 6364 last->parent = cur; 6365 } 6366 if (ctxt->input->id != inputchk) { 6367 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6368 "Element content declaration doesn't start and stop in" 6369 " the same entity\n"); 6370 } 6371 NEXT; 6372 if (RAW == '?') { 6373 if (ret != NULL) { 6374 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6375 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6376 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6377 else 6378 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6379 } 6380 NEXT; 6381 } else if (RAW == '*') { 6382 if (ret != NULL) { 6383 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6384 cur = ret; 6385 /* 6386 * Some normalization: 6387 * (a | b* | c?)* == (a | b | c)* 6388 */ 6389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6390 if ((cur->c1 != NULL) && 6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6394 if ((cur->c2 != NULL) && 6395 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6396 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6397 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6398 cur = cur->c2; 6399 } 6400 } 6401 NEXT; 6402 } else if (RAW == '+') { 6403 if (ret != NULL) { 6404 int found = 0; 6405 6406 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6407 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6408 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6409 else 6410 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6411 /* 6412 * Some normalization: 6413 * (a | b*)+ == (a | b)* 6414 * (a | b?)+ == (a | b)* 6415 */ 6416 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6417 if ((cur->c1 != NULL) && 6418 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6419 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6420 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6421 found = 1; 6422 } 6423 if ((cur->c2 != NULL) && 6424 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6425 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6426 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6427 found = 1; 6428 } 6429 cur = cur->c2; 6430 } 6431 if (found) 6432 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6433 } 6434 NEXT; 6435 } 6436 return(ret); 6437 } 6438 6439 /** 6440 * xmlParseElementChildrenContentDecl: 6441 * @ctxt: an XML parser context 6442 * @inputchk: the input used for the current entity, needed for boundary checks 6443 * 6444 * parse the declaration for a Mixed Element content 6445 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6446 * 6447 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6448 * 6449 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6450 * 6451 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6452 * 6453 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6454 * 6455 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6456 * TODO Parameter-entity replacement text must be properly nested 6457 * with parenthesized groups. That is to say, if either of the 6458 * opening or closing parentheses in a choice, seq, or Mixed 6459 * construct is contained in the replacement text for a parameter 6460 * entity, both must be contained in the same replacement text. For 6461 * interoperability, if a parameter-entity reference appears in a 6462 * choice, seq, or Mixed construct, its replacement text should not 6463 * be empty, and neither the first nor last non-blank character of 6464 * the replacement text should be a connector (| or ,). 6465 * 6466 * Returns the tree of xmlElementContentPtr describing the element 6467 * hierarchy. 6468 */ 6469 xmlElementContentPtr 6470 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6471 /* stub left for API/ABI compat */ 6472 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6473 } 6474 6475 /** 6476 * xmlParseElementContentDecl: 6477 * @ctxt: an XML parser context 6478 * @name: the name of the element being defined. 6479 * @result: the Element Content pointer will be stored here if any 6480 * 6481 * parse the declaration for an Element content either Mixed or Children, 6482 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6483 * 6484 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6485 * 6486 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6487 */ 6488 6489 int 6490 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6491 xmlElementContentPtr *result) { 6492 6493 xmlElementContentPtr tree = NULL; 6494 int inputid = ctxt->input->id; 6495 int res; 6496 6497 *result = NULL; 6498 6499 if (RAW != '(') { 6500 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6501 "xmlParseElementContentDecl : %s '(' expected\n", name); 6502 return(-1); 6503 } 6504 NEXT; 6505 GROW; 6506 if (ctxt->instate == XML_PARSER_EOF) 6507 return(-1); 6508 SKIP_BLANKS; 6509 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6510 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6511 res = XML_ELEMENT_TYPE_MIXED; 6512 } else { 6513 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6514 res = XML_ELEMENT_TYPE_ELEMENT; 6515 } 6516 SKIP_BLANKS; 6517 *result = tree; 6518 return(res); 6519 } 6520 6521 /** 6522 * xmlParseElementDecl: 6523 * @ctxt: an XML parser context 6524 * 6525 * parse an Element declaration. 6526 * 6527 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6528 * 6529 * [ VC: Unique Element Type Declaration ] 6530 * No element type may be declared more than once 6531 * 6532 * Returns the type of the element, or -1 in case of error 6533 */ 6534 int 6535 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6536 const xmlChar *name; 6537 int ret = -1; 6538 xmlElementContentPtr content = NULL; 6539 6540 /* GROW; done in the caller */ 6541 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6542 int inputid = ctxt->input->id; 6543 6544 SKIP(9); 6545 if (SKIP_BLANKS == 0) { 6546 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6547 "Space required after 'ELEMENT'\n"); 6548 return(-1); 6549 } 6550 name = xmlParseName(ctxt); 6551 if (name == NULL) { 6552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6553 "xmlParseElementDecl: no name for Element\n"); 6554 return(-1); 6555 } 6556 if (SKIP_BLANKS == 0) { 6557 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6558 "Space required after the element name\n"); 6559 } 6560 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6561 SKIP(5); 6562 /* 6563 * Element must always be empty. 6564 */ 6565 ret = XML_ELEMENT_TYPE_EMPTY; 6566 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6567 (NXT(2) == 'Y')) { 6568 SKIP(3); 6569 /* 6570 * Element is a generic container. 6571 */ 6572 ret = XML_ELEMENT_TYPE_ANY; 6573 } else if (RAW == '(') { 6574 ret = xmlParseElementContentDecl(ctxt, name, &content); 6575 } else { 6576 /* 6577 * [ WFC: PEs in Internal Subset ] error handling. 6578 */ 6579 if ((RAW == '%') && (ctxt->external == 0) && 6580 (ctxt->inputNr == 1)) { 6581 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6582 "PEReference: forbidden within markup decl in internal subset\n"); 6583 } else { 6584 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6585 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6586 } 6587 return(-1); 6588 } 6589 6590 SKIP_BLANKS; 6591 6592 if (RAW != '>') { 6593 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6594 if (content != NULL) { 6595 xmlFreeDocElementContent(ctxt->myDoc, content); 6596 } 6597 } else { 6598 if (inputid != ctxt->input->id) { 6599 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6600 "Element declaration doesn't start and stop in" 6601 " the same entity\n"); 6602 } 6603 6604 NEXT; 6605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6606 (ctxt->sax->elementDecl != NULL)) { 6607 if (content != NULL) 6608 content->parent = NULL; 6609 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6610 content); 6611 if ((content != NULL) && (content->parent == NULL)) { 6612 /* 6613 * this is a trick: if xmlAddElementDecl is called, 6614 * instead of copying the full tree it is plugged directly 6615 * if called from the parser. Avoid duplicating the 6616 * interfaces or change the API/ABI 6617 */ 6618 xmlFreeDocElementContent(ctxt->myDoc, content); 6619 } 6620 } else if (content != NULL) { 6621 xmlFreeDocElementContent(ctxt->myDoc, content); 6622 } 6623 } 6624 } 6625 return(ret); 6626 } 6627 6628 /** 6629 * xmlParseConditionalSections 6630 * @ctxt: an XML parser context 6631 * 6632 * [61] conditionalSect ::= includeSect | ignoreSect 6633 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6634 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6635 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6636 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6637 */ 6638 6639 static void 6640 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6641 int *inputIds = NULL; 6642 size_t inputIdsSize = 0; 6643 size_t depth = 0; 6644 6645 while (ctxt->instate != XML_PARSER_EOF) { 6646 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6647 int id = ctxt->input->id; 6648 6649 SKIP(3); 6650 SKIP_BLANKS; 6651 6652 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6653 SKIP(7); 6654 SKIP_BLANKS; 6655 if (RAW != '[') { 6656 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6657 xmlHaltParser(ctxt); 6658 goto error; 6659 } 6660 if (ctxt->input->id != id) { 6661 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6662 "All markup of the conditional section is" 6663 " not in the same entity\n"); 6664 } 6665 NEXT; 6666 6667 if (inputIdsSize <= depth) { 6668 int *tmp; 6669 6670 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2); 6671 tmp = (int *) xmlRealloc(inputIds, 6672 inputIdsSize * sizeof(int)); 6673 if (tmp == NULL) { 6674 xmlErrMemory(ctxt, NULL); 6675 goto error; 6676 } 6677 inputIds = tmp; 6678 } 6679 inputIds[depth] = id; 6680 depth++; 6681 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6682 int state; 6683 xmlParserInputState instate; 6684 size_t ignoreDepth = 0; 6685 6686 SKIP(6); 6687 SKIP_BLANKS; 6688 if (RAW != '[') { 6689 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6690 xmlHaltParser(ctxt); 6691 goto error; 6692 } 6693 if (ctxt->input->id != id) { 6694 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6695 "All markup of the conditional section is" 6696 " not in the same entity\n"); 6697 } 6698 NEXT; 6699 6700 /* 6701 * Parse up to the end of the conditional section but disable 6702 * SAX event generating DTD building in the meantime 6703 */ 6704 state = ctxt->disableSAX; 6705 instate = ctxt->instate; 6706 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6707 ctxt->instate = XML_PARSER_IGNORE; 6708 6709 while (RAW != 0) { 6710 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6711 SKIP(3); 6712 ignoreDepth++; 6713 /* Check for integer overflow */ 6714 if (ignoreDepth == 0) { 6715 xmlErrMemory(ctxt, NULL); 6716 goto error; 6717 } 6718 } else if ((RAW == ']') && (NXT(1) == ']') && 6719 (NXT(2) == '>')) { 6720 if (ignoreDepth == 0) 6721 break; 6722 SKIP(3); 6723 ignoreDepth--; 6724 } else { 6725 NEXT; 6726 } 6727 } 6728 6729 ctxt->disableSAX = state; 6730 ctxt->instate = instate; 6731 6732 if (RAW == 0) { 6733 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6734 goto error; 6735 } 6736 if (ctxt->input->id != id) { 6737 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6738 "All markup of the conditional section is" 6739 " not in the same entity\n"); 6740 } 6741 SKIP(3); 6742 } else { 6743 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6744 xmlHaltParser(ctxt); 6745 goto error; 6746 } 6747 } else if ((depth > 0) && 6748 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6749 depth--; 6750 if (ctxt->input->id != inputIds[depth]) { 6751 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6752 "All markup of the conditional section is not" 6753 " in the same entity\n"); 6754 } 6755 SKIP(3); 6756 } else { 6757 const xmlChar *check = CUR_PTR; 6758 unsigned int cons = ctxt->input->consumed; 6759 6760 xmlParseMarkupDecl(ctxt); 6761 6762 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6763 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6764 xmlHaltParser(ctxt); 6765 goto error; 6766 } 6767 } 6768 6769 if (depth == 0) 6770 break; 6771 6772 SKIP_BLANKS; 6773 GROW; 6774 } 6775 6776 error: 6777 xmlFree(inputIds); 6778 } 6779 6780 /** 6781 * xmlParseMarkupDecl: 6782 * @ctxt: an XML parser context 6783 * 6784 * parse Markup declarations 6785 * 6786 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6787 * NotationDecl | PI | Comment 6788 * 6789 * [ VC: Proper Declaration/PE Nesting ] 6790 * Parameter-entity replacement text must be properly nested with 6791 * markup declarations. That is to say, if either the first character 6792 * or the last character of a markup declaration (markupdecl above) is 6793 * contained in the replacement text for a parameter-entity reference, 6794 * both must be contained in the same replacement text. 6795 * 6796 * [ WFC: PEs in Internal Subset ] 6797 * In the internal DTD subset, parameter-entity references can occur 6798 * only where markup declarations can occur, not within markup declarations. 6799 * (This does not apply to references that occur in external parameter 6800 * entities or to the external subset.) 6801 */ 6802 void 6803 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6804 GROW; 6805 if (CUR == '<') { 6806 if (NXT(1) == '!') { 6807 switch (NXT(2)) { 6808 case 'E': 6809 if (NXT(3) == 'L') 6810 xmlParseElementDecl(ctxt); 6811 else if (NXT(3) == 'N') 6812 xmlParseEntityDecl(ctxt); 6813 break; 6814 case 'A': 6815 xmlParseAttributeListDecl(ctxt); 6816 break; 6817 case 'N': 6818 xmlParseNotationDecl(ctxt); 6819 break; 6820 case '-': 6821 xmlParseComment(ctxt); 6822 break; 6823 default: 6824 /* there is an error but it will be detected later */ 6825 break; 6826 } 6827 } else if (NXT(1) == '?') { 6828 xmlParsePI(ctxt); 6829 } 6830 } 6831 6832 /* 6833 * detect requirement to exit there and act accordingly 6834 * and avoid having instate overridden later on 6835 */ 6836 if (ctxt->instate == XML_PARSER_EOF) 6837 return; 6838 6839 ctxt->instate = XML_PARSER_DTD; 6840 } 6841 6842 /** 6843 * xmlParseTextDecl: 6844 * @ctxt: an XML parser context 6845 * 6846 * parse an XML declaration header for external entities 6847 * 6848 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6849 */ 6850 6851 void 6852 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6853 xmlChar *version; 6854 const xmlChar *encoding; 6855 6856 /* 6857 * We know that '<?xml' is here. 6858 */ 6859 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6860 SKIP(5); 6861 } else { 6862 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6863 return; 6864 } 6865 6866 if (SKIP_BLANKS == 0) { 6867 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6868 "Space needed after '<?xml'\n"); 6869 } 6870 6871 /* 6872 * We may have the VersionInfo here. 6873 */ 6874 version = xmlParseVersionInfo(ctxt); 6875 if (version == NULL) 6876 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6877 else { 6878 if (SKIP_BLANKS == 0) { 6879 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6880 "Space needed here\n"); 6881 } 6882 } 6883 ctxt->input->version = version; 6884 6885 /* 6886 * We must have the encoding declaration 6887 */ 6888 encoding = xmlParseEncodingDecl(ctxt); 6889 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6890 /* 6891 * The XML REC instructs us to stop parsing right here 6892 */ 6893 return; 6894 } 6895 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6896 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6897 "Missing encoding in text declaration\n"); 6898 } 6899 6900 SKIP_BLANKS; 6901 if ((RAW == '?') && (NXT(1) == '>')) { 6902 SKIP(2); 6903 } else if (RAW == '>') { 6904 /* Deprecated old WD ... */ 6905 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6906 NEXT; 6907 } else { 6908 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6909 MOVETO_ENDTAG(CUR_PTR); 6910 NEXT; 6911 } 6912 } 6913 6914 /** 6915 * xmlParseExternalSubset: 6916 * @ctxt: an XML parser context 6917 * @ExternalID: the external identifier 6918 * @SystemID: the system identifier (or URL) 6919 * 6920 * parse Markup declarations from an external subset 6921 * 6922 * [30] extSubset ::= textDecl? extSubsetDecl 6923 * 6924 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6925 */ 6926 void 6927 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6928 const xmlChar *SystemID) { 6929 xmlDetectSAX2(ctxt); 6930 GROW; 6931 6932 if ((ctxt->encoding == NULL) && 6933 (ctxt->input->end - ctxt->input->cur >= 4)) { 6934 xmlChar start[4]; 6935 xmlCharEncoding enc; 6936 6937 start[0] = RAW; 6938 start[1] = NXT(1); 6939 start[2] = NXT(2); 6940 start[3] = NXT(3); 6941 enc = xmlDetectCharEncoding(start, 4); 6942 if (enc != XML_CHAR_ENCODING_NONE) 6943 xmlSwitchEncoding(ctxt, enc); 6944 } 6945 6946 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6947 xmlParseTextDecl(ctxt); 6948 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6949 /* 6950 * The XML REC instructs us to stop parsing right here 6951 */ 6952 xmlHaltParser(ctxt); 6953 return; 6954 } 6955 } 6956 if (ctxt->myDoc == NULL) { 6957 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6958 if (ctxt->myDoc == NULL) { 6959 xmlErrMemory(ctxt, "New Doc failed"); 6960 return; 6961 } 6962 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6963 } 6964 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6965 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6966 6967 ctxt->instate = XML_PARSER_DTD; 6968 ctxt->external = 1; 6969 SKIP_BLANKS; 6970 while (((RAW == '<') && (NXT(1) == '?')) || 6971 ((RAW == '<') && (NXT(1) == '!')) || 6972 (RAW == '%')) { 6973 const xmlChar *check = CUR_PTR; 6974 unsigned int cons = ctxt->input->consumed; 6975 6976 GROW; 6977 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6978 xmlParseConditionalSections(ctxt); 6979 } else 6980 xmlParseMarkupDecl(ctxt); 6981 SKIP_BLANKS; 6982 6983 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6984 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6985 break; 6986 } 6987 } 6988 6989 if (RAW != 0) { 6990 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6991 } 6992 6993 } 6994 6995 /** 6996 * xmlParseReference: 6997 * @ctxt: an XML parser context 6998 * 6999 * parse and handle entity references in content, depending on the SAX 7000 * interface, this may end-up in a call to character() if this is a 7001 * CharRef, a predefined entity, if there is no reference() callback. 7002 * or if the parser was asked to switch to that mode. 7003 * 7004 * [67] Reference ::= EntityRef | CharRef 7005 */ 7006 void 7007 xmlParseReference(xmlParserCtxtPtr ctxt) { 7008 xmlEntityPtr ent; 7009 xmlChar *val; 7010 int was_checked; 7011 xmlNodePtr list = NULL; 7012 xmlParserErrors ret = XML_ERR_OK; 7013 7014 7015 if (RAW != '&') 7016 return; 7017 7018 /* 7019 * Simple case of a CharRef 7020 */ 7021 if (NXT(1) == '#') { 7022 int i = 0; 7023 xmlChar out[16]; 7024 int hex = NXT(2); 7025 int value = xmlParseCharRef(ctxt); 7026 7027 if (value == 0) 7028 return; 7029 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7030 /* 7031 * So we are using non-UTF-8 buffers 7032 * Check that the char fit on 8bits, if not 7033 * generate a CharRef. 7034 */ 7035 if (value <= 0xFF) { 7036 out[0] = value; 7037 out[1] = 0; 7038 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7039 (!ctxt->disableSAX)) 7040 ctxt->sax->characters(ctxt->userData, out, 1); 7041 } else { 7042 if ((hex == 'x') || (hex == 'X')) 7043 snprintf((char *)out, sizeof(out), "#x%X", value); 7044 else 7045 snprintf((char *)out, sizeof(out), "#%d", value); 7046 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7047 (!ctxt->disableSAX)) 7048 ctxt->sax->reference(ctxt->userData, out); 7049 } 7050 } else { 7051 /* 7052 * Just encode the value in UTF-8 7053 */ 7054 COPY_BUF(0 ,out, i, value); 7055 out[i] = 0; 7056 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7057 (!ctxt->disableSAX)) 7058 ctxt->sax->characters(ctxt->userData, out, i); 7059 } 7060 return; 7061 } 7062 7063 /* 7064 * We are seeing an entity reference 7065 */ 7066 ent = xmlParseEntityRef(ctxt); 7067 if (ent == NULL) return; 7068 if (!ctxt->wellFormed) 7069 return; 7070 was_checked = ent->checked; 7071 7072 /* special case of predefined entities */ 7073 if ((ent->name == NULL) || 7074 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7075 val = ent->content; 7076 if (val == NULL) return; 7077 /* 7078 * inline the entity. 7079 */ 7080 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7081 (!ctxt->disableSAX)) 7082 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7083 return; 7084 } 7085 7086 /* 7087 * The first reference to the entity trigger a parsing phase 7088 * where the ent->children is filled with the result from 7089 * the parsing. 7090 * Note: external parsed entities will not be loaded, it is not 7091 * required for a non-validating parser, unless the parsing option 7092 * of validating, or substituting entities were given. Doing so is 7093 * far more secure as the parser will only process data coming from 7094 * the document entity by default. 7095 */ 7096 if (((ent->checked == 0) || 7097 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7098 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7099 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7100 unsigned long oldnbent = ctxt->nbentities, diff; 7101 7102 /* 7103 * This is a bit hackish but this seems the best 7104 * way to make sure both SAX and DOM entity support 7105 * behaves okay. 7106 */ 7107 void *user_data; 7108 if (ctxt->userData == ctxt) 7109 user_data = NULL; 7110 else 7111 user_data = ctxt->userData; 7112 7113 /* 7114 * Check that this entity is well formed 7115 * 4.3.2: An internal general parsed entity is well-formed 7116 * if its replacement text matches the production labeled 7117 * content. 7118 */ 7119 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7120 ctxt->depth++; 7121 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7122 user_data, &list); 7123 ctxt->depth--; 7124 7125 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7126 ctxt->depth++; 7127 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7128 user_data, ctxt->depth, ent->URI, 7129 ent->ExternalID, &list); 7130 ctxt->depth--; 7131 } else { 7132 ret = XML_ERR_ENTITY_PE_INTERNAL; 7133 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7134 "invalid entity type found\n", NULL); 7135 } 7136 7137 /* 7138 * Store the number of entities needing parsing for this entity 7139 * content and do checkings 7140 */ 7141 diff = ctxt->nbentities - oldnbent + 1; 7142 if (diff > INT_MAX / 2) 7143 diff = INT_MAX / 2; 7144 ent->checked = diff * 2; 7145 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7146 ent->checked |= 1; 7147 if (ret == XML_ERR_ENTITY_LOOP) { 7148 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7149 xmlFreeNodeList(list); 7150 return; 7151 } 7152 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7153 xmlFreeNodeList(list); 7154 return; 7155 } 7156 7157 if ((ret == XML_ERR_OK) && (list != NULL)) { 7158 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7159 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7160 (ent->children == NULL)) { 7161 ent->children = list; 7162 if (ctxt->replaceEntities) { 7163 /* 7164 * Prune it directly in the generated document 7165 * except for single text nodes. 7166 */ 7167 if (((list->type == XML_TEXT_NODE) && 7168 (list->next == NULL)) || 7169 (ctxt->parseMode == XML_PARSE_READER)) { 7170 list->parent = (xmlNodePtr) ent; 7171 list = NULL; 7172 ent->owner = 1; 7173 } else { 7174 ent->owner = 0; 7175 while (list != NULL) { 7176 list->parent = (xmlNodePtr) ctxt->node; 7177 list->doc = ctxt->myDoc; 7178 if (list->next == NULL) 7179 ent->last = list; 7180 list = list->next; 7181 } 7182 list = ent->children; 7183 #ifdef LIBXML_LEGACY_ENABLED 7184 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7185 xmlAddEntityReference(ent, list, NULL); 7186 #endif /* LIBXML_LEGACY_ENABLED */ 7187 } 7188 } else { 7189 ent->owner = 1; 7190 while (list != NULL) { 7191 list->parent = (xmlNodePtr) ent; 7192 xmlSetTreeDoc(list, ent->doc); 7193 if (list->next == NULL) 7194 ent->last = list; 7195 list = list->next; 7196 } 7197 } 7198 } else { 7199 xmlFreeNodeList(list); 7200 list = NULL; 7201 } 7202 } else if ((ret != XML_ERR_OK) && 7203 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7204 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7205 "Entity '%s' failed to parse\n", ent->name); 7206 if (ent->content != NULL) 7207 ent->content[0] = 0; 7208 xmlParserEntityCheck(ctxt, 0, ent, 0); 7209 } else if (list != NULL) { 7210 xmlFreeNodeList(list); 7211 list = NULL; 7212 } 7213 if (ent->checked == 0) 7214 ent->checked = 2; 7215 7216 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7217 was_checked = 0; 7218 } else if (ent->checked != 1) { 7219 ctxt->nbentities += ent->checked / 2; 7220 } 7221 7222 /* 7223 * Now that the entity content has been gathered 7224 * provide it to the application, this can take different forms based 7225 * on the parsing modes. 7226 */ 7227 if (ent->children == NULL) { 7228 /* 7229 * Probably running in SAX mode and the callbacks don't 7230 * build the entity content. So unless we already went 7231 * though parsing for first checking go though the entity 7232 * content to generate callbacks associated to the entity 7233 */ 7234 if (was_checked != 0) { 7235 void *user_data; 7236 /* 7237 * This is a bit hackish but this seems the best 7238 * way to make sure both SAX and DOM entity support 7239 * behaves okay. 7240 */ 7241 if (ctxt->userData == ctxt) 7242 user_data = NULL; 7243 else 7244 user_data = ctxt->userData; 7245 7246 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7247 ctxt->depth++; 7248 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7249 ent->content, user_data, NULL); 7250 ctxt->depth--; 7251 } else if (ent->etype == 7252 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7253 ctxt->depth++; 7254 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7255 ctxt->sax, user_data, ctxt->depth, 7256 ent->URI, ent->ExternalID, NULL); 7257 ctxt->depth--; 7258 } else { 7259 ret = XML_ERR_ENTITY_PE_INTERNAL; 7260 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7261 "invalid entity type found\n", NULL); 7262 } 7263 if (ret == XML_ERR_ENTITY_LOOP) { 7264 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7265 return; 7266 } 7267 } 7268 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7269 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7270 /* 7271 * Entity reference callback comes second, it's somewhat 7272 * superfluous but a compatibility to historical behaviour 7273 */ 7274 ctxt->sax->reference(ctxt->userData, ent->name); 7275 } 7276 return; 7277 } 7278 7279 /* 7280 * If we didn't get any children for the entity being built 7281 */ 7282 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7283 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7284 /* 7285 * Create a node. 7286 */ 7287 ctxt->sax->reference(ctxt->userData, ent->name); 7288 return; 7289 } 7290 7291 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7292 /* 7293 * There is a problem on the handling of _private for entities 7294 * (bug 155816): Should we copy the content of the field from 7295 * the entity (possibly overwriting some value set by the user 7296 * when a copy is created), should we leave it alone, or should 7297 * we try to take care of different situations? The problem 7298 * is exacerbated by the usage of this field by the xmlReader. 7299 * To fix this bug, we look at _private on the created node 7300 * and, if it's NULL, we copy in whatever was in the entity. 7301 * If it's not NULL we leave it alone. This is somewhat of a 7302 * hack - maybe we should have further tests to determine 7303 * what to do. 7304 */ 7305 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7306 /* 7307 * Seems we are generating the DOM content, do 7308 * a simple tree copy for all references except the first 7309 * In the first occurrence list contains the replacement. 7310 */ 7311 if (((list == NULL) && (ent->owner == 0)) || 7312 (ctxt->parseMode == XML_PARSE_READER)) { 7313 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7314 7315 /* 7316 * We are copying here, make sure there is no abuse 7317 */ 7318 ctxt->sizeentcopy += ent->length + 5; 7319 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7320 return; 7321 7322 /* 7323 * when operating on a reader, the entities definitions 7324 * are always owning the entities subtree. 7325 if (ctxt->parseMode == XML_PARSE_READER) 7326 ent->owner = 1; 7327 */ 7328 7329 cur = ent->children; 7330 while (cur != NULL) { 7331 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7332 if (nw != NULL) { 7333 if (nw->_private == NULL) 7334 nw->_private = cur->_private; 7335 if (firstChild == NULL){ 7336 firstChild = nw; 7337 } 7338 nw = xmlAddChild(ctxt->node, nw); 7339 } 7340 if (cur == ent->last) { 7341 /* 7342 * needed to detect some strange empty 7343 * node cases in the reader tests 7344 */ 7345 if ((ctxt->parseMode == XML_PARSE_READER) && 7346 (nw != NULL) && 7347 (nw->type == XML_ELEMENT_NODE) && 7348 (nw->children == NULL)) 7349 nw->extra = 1; 7350 7351 break; 7352 } 7353 cur = cur->next; 7354 } 7355 #ifdef LIBXML_LEGACY_ENABLED 7356 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7357 xmlAddEntityReference(ent, firstChild, nw); 7358 #endif /* LIBXML_LEGACY_ENABLED */ 7359 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7360 xmlNodePtr nw = NULL, cur, next, last, 7361 firstChild = NULL; 7362 7363 /* 7364 * We are copying here, make sure there is no abuse 7365 */ 7366 ctxt->sizeentcopy += ent->length + 5; 7367 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7368 return; 7369 7370 /* 7371 * Copy the entity child list and make it the new 7372 * entity child list. The goal is to make sure any 7373 * ID or REF referenced will be the one from the 7374 * document content and not the entity copy. 7375 */ 7376 cur = ent->children; 7377 ent->children = NULL; 7378 last = ent->last; 7379 ent->last = NULL; 7380 while (cur != NULL) { 7381 next = cur->next; 7382 cur->next = NULL; 7383 cur->parent = NULL; 7384 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7385 if (nw != NULL) { 7386 if (nw->_private == NULL) 7387 nw->_private = cur->_private; 7388 if (firstChild == NULL){ 7389 firstChild = cur; 7390 } 7391 xmlAddChild((xmlNodePtr) ent, nw); 7392 xmlAddChild(ctxt->node, cur); 7393 } 7394 if (cur == last) 7395 break; 7396 cur = next; 7397 } 7398 if (ent->owner == 0) 7399 ent->owner = 1; 7400 #ifdef LIBXML_LEGACY_ENABLED 7401 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7402 xmlAddEntityReference(ent, firstChild, nw); 7403 #endif /* LIBXML_LEGACY_ENABLED */ 7404 } else { 7405 const xmlChar *nbktext; 7406 7407 /* 7408 * the name change is to avoid coalescing of the 7409 * node with a possible previous text one which 7410 * would make ent->children a dangling pointer 7411 */ 7412 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7413 -1); 7414 if (ent->children->type == XML_TEXT_NODE) 7415 ent->children->name = nbktext; 7416 if ((ent->last != ent->children) && 7417 (ent->last->type == XML_TEXT_NODE)) 7418 ent->last->name = nbktext; 7419 xmlAddChildList(ctxt->node, ent->children); 7420 } 7421 7422 /* 7423 * This is to avoid a nasty side effect, see 7424 * characters() in SAX.c 7425 */ 7426 ctxt->nodemem = 0; 7427 ctxt->nodelen = 0; 7428 return; 7429 } 7430 } 7431 } 7432 7433 /** 7434 * xmlParseEntityRef: 7435 * @ctxt: an XML parser context 7436 * 7437 * parse ENTITY references declarations 7438 * 7439 * [68] EntityRef ::= '&' Name ';' 7440 * 7441 * [ WFC: Entity Declared ] 7442 * In a document without any DTD, a document with only an internal DTD 7443 * subset which contains no parameter entity references, or a document 7444 * with "standalone='yes'", the Name given in the entity reference 7445 * must match that in an entity declaration, except that well-formed 7446 * documents need not declare any of the following entities: amp, lt, 7447 * gt, apos, quot. The declaration of a parameter entity must precede 7448 * any reference to it. Similarly, the declaration of a general entity 7449 * must precede any reference to it which appears in a default value in an 7450 * attribute-list declaration. Note that if entities are declared in the 7451 * external subset or in external parameter entities, a non-validating 7452 * processor is not obligated to read and process their declarations; 7453 * for such documents, the rule that an entity must be declared is a 7454 * well-formedness constraint only if standalone='yes'. 7455 * 7456 * [ WFC: Parsed Entity ] 7457 * An entity reference must not contain the name of an unparsed entity 7458 * 7459 * Returns the xmlEntityPtr if found, or NULL otherwise. 7460 */ 7461 xmlEntityPtr 7462 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7463 const xmlChar *name; 7464 xmlEntityPtr ent = NULL; 7465 7466 GROW; 7467 if (ctxt->instate == XML_PARSER_EOF) 7468 return(NULL); 7469 7470 if (RAW != '&') 7471 return(NULL); 7472 NEXT; 7473 name = xmlParseName(ctxt); 7474 if (name == NULL) { 7475 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7476 "xmlParseEntityRef: no name\n"); 7477 return(NULL); 7478 } 7479 if (RAW != ';') { 7480 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7481 return(NULL); 7482 } 7483 NEXT; 7484 7485 /* 7486 * Predefined entities override any extra definition 7487 */ 7488 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7489 ent = xmlGetPredefinedEntity(name); 7490 if (ent != NULL) 7491 return(ent); 7492 } 7493 7494 /* 7495 * Increase the number of entity references parsed 7496 */ 7497 ctxt->nbentities++; 7498 7499 /* 7500 * Ask first SAX for entity resolution, otherwise try the 7501 * entities which may have stored in the parser context. 7502 */ 7503 if (ctxt->sax != NULL) { 7504 if (ctxt->sax->getEntity != NULL) 7505 ent = ctxt->sax->getEntity(ctxt->userData, name); 7506 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7507 (ctxt->options & XML_PARSE_OLDSAX)) 7508 ent = xmlGetPredefinedEntity(name); 7509 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7510 (ctxt->userData==ctxt)) { 7511 ent = xmlSAX2GetEntity(ctxt, name); 7512 } 7513 } 7514 if (ctxt->instate == XML_PARSER_EOF) 7515 return(NULL); 7516 /* 7517 * [ WFC: Entity Declared ] 7518 * In a document without any DTD, a document with only an 7519 * internal DTD subset which contains no parameter entity 7520 * references, or a document with "standalone='yes'", the 7521 * Name given in the entity reference must match that in an 7522 * entity declaration, except that well-formed documents 7523 * need not declare any of the following entities: amp, lt, 7524 * gt, apos, quot. 7525 * The declaration of a parameter entity must precede any 7526 * reference to it. 7527 * Similarly, the declaration of a general entity must 7528 * precede any reference to it which appears in a default 7529 * value in an attribute-list declaration. Note that if 7530 * entities are declared in the external subset or in 7531 * external parameter entities, a non-validating processor 7532 * is not obligated to read and process their declarations; 7533 * for such documents, the rule that an entity must be 7534 * declared is a well-formedness constraint only if 7535 * standalone='yes'. 7536 */ 7537 if (ent == NULL) { 7538 if ((ctxt->standalone == 1) || 7539 ((ctxt->hasExternalSubset == 0) && 7540 (ctxt->hasPErefs == 0))) { 7541 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7542 "Entity '%s' not defined\n", name); 7543 } else { 7544 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7545 "Entity '%s' not defined\n", name); 7546 if ((ctxt->inSubset == 0) && 7547 (ctxt->sax != NULL) && 7548 (ctxt->sax->reference != NULL)) { 7549 ctxt->sax->reference(ctxt->userData, name); 7550 } 7551 } 7552 xmlParserEntityCheck(ctxt, 0, ent, 0); 7553 ctxt->valid = 0; 7554 } 7555 7556 /* 7557 * [ WFC: Parsed Entity ] 7558 * An entity reference must not contain the name of an 7559 * unparsed entity 7560 */ 7561 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7562 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7563 "Entity reference to unparsed entity %s\n", name); 7564 } 7565 7566 /* 7567 * [ WFC: No External Entity References ] 7568 * Attribute values cannot contain direct or indirect 7569 * entity references to external entities. 7570 */ 7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7572 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7573 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7574 "Attribute references external entity '%s'\n", name); 7575 } 7576 /* 7577 * [ WFC: No < in Attribute Values ] 7578 * The replacement text of any entity referred to directly or 7579 * indirectly in an attribute value (other than "<") must 7580 * not contain a <. 7581 */ 7582 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7583 (ent != NULL) && 7584 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7585 if (((ent->checked & 1) || (ent->checked == 0)) && 7586 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7587 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7588 "'<' in entity '%s' is not allowed in attributes values\n", name); 7589 } 7590 } 7591 7592 /* 7593 * Internal check, no parameter entities here ... 7594 */ 7595 else { 7596 switch (ent->etype) { 7597 case XML_INTERNAL_PARAMETER_ENTITY: 7598 case XML_EXTERNAL_PARAMETER_ENTITY: 7599 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7600 "Attempt to reference the parameter entity '%s'\n", 7601 name); 7602 break; 7603 default: 7604 break; 7605 } 7606 } 7607 7608 /* 7609 * [ WFC: No Recursion ] 7610 * A parsed entity must not contain a recursive reference 7611 * to itself, either directly or indirectly. 7612 * Done somewhere else 7613 */ 7614 return(ent); 7615 } 7616 7617 /** 7618 * xmlParseStringEntityRef: 7619 * @ctxt: an XML parser context 7620 * @str: a pointer to an index in the string 7621 * 7622 * parse ENTITY references declarations, but this version parses it from 7623 * a string value. 7624 * 7625 * [68] EntityRef ::= '&' Name ';' 7626 * 7627 * [ WFC: Entity Declared ] 7628 * In a document without any DTD, a document with only an internal DTD 7629 * subset which contains no parameter entity references, or a document 7630 * with "standalone='yes'", the Name given in the entity reference 7631 * must match that in an entity declaration, except that well-formed 7632 * documents need not declare any of the following entities: amp, lt, 7633 * gt, apos, quot. The declaration of a parameter entity must precede 7634 * any reference to it. Similarly, the declaration of a general entity 7635 * must precede any reference to it which appears in a default value in an 7636 * attribute-list declaration. Note that if entities are declared in the 7637 * external subset or in external parameter entities, a non-validating 7638 * processor is not obligated to read and process their declarations; 7639 * for such documents, the rule that an entity must be declared is a 7640 * well-formedness constraint only if standalone='yes'. 7641 * 7642 * [ WFC: Parsed Entity ] 7643 * An entity reference must not contain the name of an unparsed entity 7644 * 7645 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7646 * is updated to the current location in the string. 7647 */ 7648 static xmlEntityPtr 7649 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7650 xmlChar *name; 7651 const xmlChar *ptr; 7652 xmlChar cur; 7653 xmlEntityPtr ent = NULL; 7654 7655 if ((str == NULL) || (*str == NULL)) 7656 return(NULL); 7657 ptr = *str; 7658 cur = *ptr; 7659 if (cur != '&') 7660 return(NULL); 7661 7662 ptr++; 7663 name = xmlParseStringName(ctxt, &ptr); 7664 if (name == NULL) { 7665 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7666 "xmlParseStringEntityRef: no name\n"); 7667 *str = ptr; 7668 return(NULL); 7669 } 7670 if (*ptr != ';') { 7671 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7672 xmlFree(name); 7673 *str = ptr; 7674 return(NULL); 7675 } 7676 ptr++; 7677 7678 7679 /* 7680 * Predefined entities override any extra definition 7681 */ 7682 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7683 ent = xmlGetPredefinedEntity(name); 7684 if (ent != NULL) { 7685 xmlFree(name); 7686 *str = ptr; 7687 return(ent); 7688 } 7689 } 7690 7691 /* 7692 * Increase the number of entity references parsed 7693 */ 7694 ctxt->nbentities++; 7695 7696 /* 7697 * Ask first SAX for entity resolution, otherwise try the 7698 * entities which may have stored in the parser context. 7699 */ 7700 if (ctxt->sax != NULL) { 7701 if (ctxt->sax->getEntity != NULL) 7702 ent = ctxt->sax->getEntity(ctxt->userData, name); 7703 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7704 ent = xmlGetPredefinedEntity(name); 7705 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7706 ent = xmlSAX2GetEntity(ctxt, name); 7707 } 7708 } 7709 if (ctxt->instate == XML_PARSER_EOF) { 7710 xmlFree(name); 7711 return(NULL); 7712 } 7713 7714 /* 7715 * [ WFC: Entity Declared ] 7716 * In a document without any DTD, a document with only an 7717 * internal DTD subset which contains no parameter entity 7718 * references, or a document with "standalone='yes'", the 7719 * Name given in the entity reference must match that in an 7720 * entity declaration, except that well-formed documents 7721 * need not declare any of the following entities: amp, lt, 7722 * gt, apos, quot. 7723 * The declaration of a parameter entity must precede any 7724 * reference to it. 7725 * Similarly, the declaration of a general entity must 7726 * precede any reference to it which appears in a default 7727 * value in an attribute-list declaration. Note that if 7728 * entities are declared in the external subset or in 7729 * external parameter entities, a non-validating processor 7730 * is not obligated to read and process their declarations; 7731 * for such documents, the rule that an entity must be 7732 * declared is a well-formedness constraint only if 7733 * standalone='yes'. 7734 */ 7735 if (ent == NULL) { 7736 if ((ctxt->standalone == 1) || 7737 ((ctxt->hasExternalSubset == 0) && 7738 (ctxt->hasPErefs == 0))) { 7739 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7740 "Entity '%s' not defined\n", name); 7741 } else { 7742 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7743 "Entity '%s' not defined\n", 7744 name); 7745 } 7746 xmlParserEntityCheck(ctxt, 0, ent, 0); 7747 /* TODO ? check regressions ctxt->valid = 0; */ 7748 } 7749 7750 /* 7751 * [ WFC: Parsed Entity ] 7752 * An entity reference must not contain the name of an 7753 * unparsed entity 7754 */ 7755 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7756 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7757 "Entity reference to unparsed entity %s\n", name); 7758 } 7759 7760 /* 7761 * [ WFC: No External Entity References ] 7762 * Attribute values cannot contain direct or indirect 7763 * entity references to external entities. 7764 */ 7765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7766 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7767 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7768 "Attribute references external entity '%s'\n", name); 7769 } 7770 /* 7771 * [ WFC: No < in Attribute Values ] 7772 * The replacement text of any entity referred to directly or 7773 * indirectly in an attribute value (other than "<") must 7774 * not contain a <. 7775 */ 7776 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7777 (ent != NULL) && (ent->content != NULL) && 7778 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7779 (xmlStrchr(ent->content, '<'))) { 7780 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7781 "'<' in entity '%s' is not allowed in attributes values\n", 7782 name); 7783 } 7784 7785 /* 7786 * Internal check, no parameter entities here ... 7787 */ 7788 else { 7789 switch (ent->etype) { 7790 case XML_INTERNAL_PARAMETER_ENTITY: 7791 case XML_EXTERNAL_PARAMETER_ENTITY: 7792 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7793 "Attempt to reference the parameter entity '%s'\n", 7794 name); 7795 break; 7796 default: 7797 break; 7798 } 7799 } 7800 7801 /* 7802 * [ WFC: No Recursion ] 7803 * A parsed entity must not contain a recursive reference 7804 * to itself, either directly or indirectly. 7805 * Done somewhere else 7806 */ 7807 7808 xmlFree(name); 7809 *str = ptr; 7810 return(ent); 7811 } 7812 7813 /** 7814 * xmlParsePEReference: 7815 * @ctxt: an XML parser context 7816 * 7817 * parse PEReference declarations 7818 * The entity content is handled directly by pushing it's content as 7819 * a new input stream. 7820 * 7821 * [69] PEReference ::= '%' Name ';' 7822 * 7823 * [ WFC: No Recursion ] 7824 * A parsed entity must not contain a recursive 7825 * reference to itself, either directly or indirectly. 7826 * 7827 * [ WFC: Entity Declared ] 7828 * In a document without any DTD, a document with only an internal DTD 7829 * subset which contains no parameter entity references, or a document 7830 * with "standalone='yes'", ... ... The declaration of a parameter 7831 * entity must precede any reference to it... 7832 * 7833 * [ VC: Entity Declared ] 7834 * In a document with an external subset or external parameter entities 7835 * with "standalone='no'", ... ... The declaration of a parameter entity 7836 * must precede any reference to it... 7837 * 7838 * [ WFC: In DTD ] 7839 * Parameter-entity references may only appear in the DTD. 7840 * NOTE: misleading but this is handled. 7841 */ 7842 void 7843 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7844 { 7845 const xmlChar *name; 7846 xmlEntityPtr entity = NULL; 7847 xmlParserInputPtr input; 7848 7849 if (RAW != '%') 7850 return; 7851 NEXT; 7852 name = xmlParseName(ctxt); 7853 if (name == NULL) { 7854 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 7855 return; 7856 } 7857 if (xmlParserDebugEntities) 7858 xmlGenericError(xmlGenericErrorContext, 7859 "PEReference: %s\n", name); 7860 if (RAW != ';') { 7861 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 7862 return; 7863 } 7864 7865 NEXT; 7866 7867 /* 7868 * Increase the number of entity references parsed 7869 */ 7870 ctxt->nbentities++; 7871 7872 /* 7873 * Request the entity from SAX 7874 */ 7875 if ((ctxt->sax != NULL) && 7876 (ctxt->sax->getParameterEntity != NULL)) 7877 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7878 if (ctxt->instate == XML_PARSER_EOF) 7879 return; 7880 if (entity == NULL) { 7881 /* 7882 * [ WFC: Entity Declared ] 7883 * In a document without any DTD, a document with only an 7884 * internal DTD subset which contains no parameter entity 7885 * references, or a document with "standalone='yes'", ... 7886 * ... The declaration of a parameter entity must precede 7887 * any reference to it... 7888 */ 7889 if ((ctxt->standalone == 1) || 7890 ((ctxt->hasExternalSubset == 0) && 7891 (ctxt->hasPErefs == 0))) { 7892 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7893 "PEReference: %%%s; not found\n", 7894 name); 7895 } else { 7896 /* 7897 * [ VC: Entity Declared ] 7898 * In a document with an external subset or external 7899 * parameter entities with "standalone='no'", ... 7900 * ... The declaration of a parameter entity must 7901 * precede any reference to it... 7902 */ 7903 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 7904 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 7905 "PEReference: %%%s; not found\n", 7906 name, NULL); 7907 } else 7908 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7909 "PEReference: %%%s; not found\n", 7910 name, NULL); 7911 ctxt->valid = 0; 7912 } 7913 xmlParserEntityCheck(ctxt, 0, NULL, 0); 7914 } else { 7915 /* 7916 * Internal checking in case the entity quest barfed 7917 */ 7918 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7919 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7920 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7921 "Internal: %%%s; is not a parameter entity\n", 7922 name, NULL); 7923 } else { 7924 xmlChar start[4]; 7925 xmlCharEncoding enc; 7926 7927 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7928 ((ctxt->options & XML_PARSE_NOENT) == 0) && 7929 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 7930 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 7931 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 7932 (ctxt->replaceEntities == 0) && 7933 (ctxt->validate == 0)) 7934 return; 7935 7936 input = xmlNewEntityInputStream(ctxt, entity); 7937 if (xmlPushInput(ctxt, input) < 0) { 7938 xmlFreeInputStream(input); 7939 return; 7940 } 7941 7942 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 7943 /* 7944 * Get the 4 first bytes and decode the charset 7945 * if enc != XML_CHAR_ENCODING_NONE 7946 * plug some encoding conversion routines. 7947 * Note that, since we may have some non-UTF8 7948 * encoding (like UTF16, bug 135229), the 'length' 7949 * is not known, but we can calculate based upon 7950 * the amount of data in the buffer. 7951 */ 7952 GROW 7953 if (ctxt->instate == XML_PARSER_EOF) 7954 return; 7955 if ((ctxt->input->end - ctxt->input->cur)>=4) { 7956 start[0] = RAW; 7957 start[1] = NXT(1); 7958 start[2] = NXT(2); 7959 start[3] = NXT(3); 7960 enc = xmlDetectCharEncoding(start, 4); 7961 if (enc != XML_CHAR_ENCODING_NONE) { 7962 xmlSwitchEncoding(ctxt, enc); 7963 } 7964 } 7965 7966 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7967 (IS_BLANK_CH(NXT(5)))) { 7968 xmlParseTextDecl(ctxt); 7969 } 7970 } 7971 } 7972 } 7973 ctxt->hasPErefs = 1; 7974 } 7975 7976 /** 7977 * xmlLoadEntityContent: 7978 * @ctxt: an XML parser context 7979 * @entity: an unloaded system entity 7980 * 7981 * Load the original content of the given system entity from the 7982 * ExternalID/SystemID given. This is to be used for Included in Literal 7983 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7984 * 7985 * Returns 0 in case of success and -1 in case of failure 7986 */ 7987 static int 7988 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7989 xmlParserInputPtr input; 7990 xmlBufferPtr buf; 7991 int l, c; 7992 int count = 0; 7993 7994 if ((ctxt == NULL) || (entity == NULL) || 7995 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7996 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7997 (entity->content != NULL)) { 7998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7999 "xmlLoadEntityContent parameter error"); 8000 return(-1); 8001 } 8002 8003 if (xmlParserDebugEntities) 8004 xmlGenericError(xmlGenericErrorContext, 8005 "Reading %s entity content input\n", entity->name); 8006 8007 buf = xmlBufferCreate(); 8008 if (buf == NULL) { 8009 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8010 "xmlLoadEntityContent parameter error"); 8011 return(-1); 8012 } 8013 8014 input = xmlNewEntityInputStream(ctxt, entity); 8015 if (input == NULL) { 8016 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8017 "xmlLoadEntityContent input error"); 8018 xmlBufferFree(buf); 8019 return(-1); 8020 } 8021 8022 /* 8023 * Push the entity as the current input, read char by char 8024 * saving to the buffer until the end of the entity or an error 8025 */ 8026 if (xmlPushInput(ctxt, input) < 0) { 8027 xmlBufferFree(buf); 8028 return(-1); 8029 } 8030 8031 GROW; 8032 c = CUR_CHAR(l); 8033 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8034 (IS_CHAR(c))) { 8035 xmlBufferAdd(buf, ctxt->input->cur, l); 8036 if (count++ > XML_PARSER_CHUNK_SIZE) { 8037 count = 0; 8038 GROW; 8039 if (ctxt->instate == XML_PARSER_EOF) { 8040 xmlBufferFree(buf); 8041 return(-1); 8042 } 8043 } 8044 NEXTL(l); 8045 c = CUR_CHAR(l); 8046 if (c == 0) { 8047 count = 0; 8048 GROW; 8049 if (ctxt->instate == XML_PARSER_EOF) { 8050 xmlBufferFree(buf); 8051 return(-1); 8052 } 8053 c = CUR_CHAR(l); 8054 } 8055 } 8056 8057 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8058 xmlPopInput(ctxt); 8059 } else if (!IS_CHAR(c)) { 8060 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8061 "xmlLoadEntityContent: invalid char value %d\n", 8062 c); 8063 xmlBufferFree(buf); 8064 return(-1); 8065 } 8066 entity->content = buf->content; 8067 buf->content = NULL; 8068 xmlBufferFree(buf); 8069 8070 return(0); 8071 } 8072 8073 /** 8074 * xmlParseStringPEReference: 8075 * @ctxt: an XML parser context 8076 * @str: a pointer to an index in the string 8077 * 8078 * parse PEReference declarations 8079 * 8080 * [69] PEReference ::= '%' Name ';' 8081 * 8082 * [ WFC: No Recursion ] 8083 * A parsed entity must not contain a recursive 8084 * reference to itself, either directly or indirectly. 8085 * 8086 * [ WFC: Entity Declared ] 8087 * In a document without any DTD, a document with only an internal DTD 8088 * subset which contains no parameter entity references, or a document 8089 * with "standalone='yes'", ... ... The declaration of a parameter 8090 * entity must precede any reference to it... 8091 * 8092 * [ VC: Entity Declared ] 8093 * In a document with an external subset or external parameter entities 8094 * with "standalone='no'", ... ... The declaration of a parameter entity 8095 * must precede any reference to it... 8096 * 8097 * [ WFC: In DTD ] 8098 * Parameter-entity references may only appear in the DTD. 8099 * NOTE: misleading but this is handled. 8100 * 8101 * Returns the string of the entity content. 8102 * str is updated to the current value of the index 8103 */ 8104 static xmlEntityPtr 8105 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8106 const xmlChar *ptr; 8107 xmlChar cur; 8108 xmlChar *name; 8109 xmlEntityPtr entity = NULL; 8110 8111 if ((str == NULL) || (*str == NULL)) return(NULL); 8112 ptr = *str; 8113 cur = *ptr; 8114 if (cur != '%') 8115 return(NULL); 8116 ptr++; 8117 name = xmlParseStringName(ctxt, &ptr); 8118 if (name == NULL) { 8119 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8120 "xmlParseStringPEReference: no name\n"); 8121 *str = ptr; 8122 return(NULL); 8123 } 8124 cur = *ptr; 8125 if (cur != ';') { 8126 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8127 xmlFree(name); 8128 *str = ptr; 8129 return(NULL); 8130 } 8131 ptr++; 8132 8133 /* 8134 * Increase the number of entity references parsed 8135 */ 8136 ctxt->nbentities++; 8137 8138 /* 8139 * Request the entity from SAX 8140 */ 8141 if ((ctxt->sax != NULL) && 8142 (ctxt->sax->getParameterEntity != NULL)) 8143 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8144 if (ctxt->instate == XML_PARSER_EOF) { 8145 xmlFree(name); 8146 *str = ptr; 8147 return(NULL); 8148 } 8149 if (entity == NULL) { 8150 /* 8151 * [ WFC: Entity Declared ] 8152 * In a document without any DTD, a document with only an 8153 * internal DTD subset which contains no parameter entity 8154 * references, or a document with "standalone='yes'", ... 8155 * ... The declaration of a parameter entity must precede 8156 * any reference to it... 8157 */ 8158 if ((ctxt->standalone == 1) || 8159 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8160 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8161 "PEReference: %%%s; not found\n", name); 8162 } else { 8163 /* 8164 * [ VC: Entity Declared ] 8165 * In a document with an external subset or external 8166 * parameter entities with "standalone='no'", ... 8167 * ... The declaration of a parameter entity must 8168 * precede any reference to it... 8169 */ 8170 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8171 "PEReference: %%%s; not found\n", 8172 name, NULL); 8173 ctxt->valid = 0; 8174 } 8175 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8176 } else { 8177 /* 8178 * Internal checking in case the entity quest barfed 8179 */ 8180 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8181 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8182 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8183 "%%%s; is not a parameter entity\n", 8184 name, NULL); 8185 } 8186 } 8187 ctxt->hasPErefs = 1; 8188 xmlFree(name); 8189 *str = ptr; 8190 return(entity); 8191 } 8192 8193 /** 8194 * xmlParseDocTypeDecl: 8195 * @ctxt: an XML parser context 8196 * 8197 * parse a DOCTYPE declaration 8198 * 8199 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8200 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8201 * 8202 * [ VC: Root Element Type ] 8203 * The Name in the document type declaration must match the element 8204 * type of the root element. 8205 */ 8206 8207 void 8208 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8209 const xmlChar *name = NULL; 8210 xmlChar *ExternalID = NULL; 8211 xmlChar *URI = NULL; 8212 8213 /* 8214 * We know that '<!DOCTYPE' has been detected. 8215 */ 8216 SKIP(9); 8217 8218 SKIP_BLANKS; 8219 8220 /* 8221 * Parse the DOCTYPE name. 8222 */ 8223 name = xmlParseName(ctxt); 8224 if (name == NULL) { 8225 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8226 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8227 } 8228 ctxt->intSubName = name; 8229 8230 SKIP_BLANKS; 8231 8232 /* 8233 * Check for SystemID and ExternalID 8234 */ 8235 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8236 8237 if ((URI != NULL) || (ExternalID != NULL)) { 8238 ctxt->hasExternalSubset = 1; 8239 } 8240 ctxt->extSubURI = URI; 8241 ctxt->extSubSystem = ExternalID; 8242 8243 SKIP_BLANKS; 8244 8245 /* 8246 * Create and update the internal subset. 8247 */ 8248 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8249 (!ctxt->disableSAX)) 8250 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8251 if (ctxt->instate == XML_PARSER_EOF) 8252 return; 8253 8254 /* 8255 * Is there any internal subset declarations ? 8256 * they are handled separately in xmlParseInternalSubset() 8257 */ 8258 if (RAW == '[') 8259 return; 8260 8261 /* 8262 * We should be at the end of the DOCTYPE declaration. 8263 */ 8264 if (RAW != '>') { 8265 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8266 } 8267 NEXT; 8268 } 8269 8270 /** 8271 * xmlParseInternalSubset: 8272 * @ctxt: an XML parser context 8273 * 8274 * parse the internal subset declaration 8275 * 8276 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8277 */ 8278 8279 static void 8280 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8281 /* 8282 * Is there any DTD definition ? 8283 */ 8284 if (RAW == '[') { 8285 int baseInputNr = ctxt->inputNr; 8286 ctxt->instate = XML_PARSER_DTD; 8287 NEXT; 8288 /* 8289 * Parse the succession of Markup declarations and 8290 * PEReferences. 8291 * Subsequence (markupdecl | PEReference | S)* 8292 */ 8293 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8294 (ctxt->instate != XML_PARSER_EOF)) { 8295 const xmlChar *check = CUR_PTR; 8296 unsigned int cons = ctxt->input->consumed; 8297 8298 SKIP_BLANKS; 8299 xmlParseMarkupDecl(ctxt); 8300 xmlParsePEReference(ctxt); 8301 8302 /* 8303 * Conditional sections are allowed from external entities included 8304 * by PE References in the internal subset. 8305 */ 8306 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && 8307 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 8308 xmlParseConditionalSections(ctxt); 8309 } 8310 8311 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8313 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8314 if (ctxt->inputNr > baseInputNr) 8315 xmlPopInput(ctxt); 8316 else 8317 break; 8318 } 8319 } 8320 if (RAW == ']') { 8321 NEXT; 8322 SKIP_BLANKS; 8323 } 8324 } 8325 8326 /* 8327 * We should be at the end of the DOCTYPE declaration. 8328 */ 8329 if (RAW != '>') { 8330 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8331 return; 8332 } 8333 NEXT; 8334 } 8335 8336 #ifdef LIBXML_SAX1_ENABLED 8337 /** 8338 * xmlParseAttribute: 8339 * @ctxt: an XML parser context 8340 * @value: a xmlChar ** used to store the value of the attribute 8341 * 8342 * parse an attribute 8343 * 8344 * [41] Attribute ::= Name Eq AttValue 8345 * 8346 * [ WFC: No External Entity References ] 8347 * Attribute values cannot contain direct or indirect entity references 8348 * to external entities. 8349 * 8350 * [ WFC: No < in Attribute Values ] 8351 * The replacement text of any entity referred to directly or indirectly in 8352 * an attribute value (other than "<") must not contain a <. 8353 * 8354 * [ VC: Attribute Value Type ] 8355 * The attribute must have been declared; the value must be of the type 8356 * declared for it. 8357 * 8358 * [25] Eq ::= S? '=' S? 8359 * 8360 * With namespace: 8361 * 8362 * [NS 11] Attribute ::= QName Eq AttValue 8363 * 8364 * Also the case QName == xmlns:??? is handled independently as a namespace 8365 * definition. 8366 * 8367 * Returns the attribute name, and the value in *value. 8368 */ 8369 8370 const xmlChar * 8371 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8372 const xmlChar *name; 8373 xmlChar *val; 8374 8375 *value = NULL; 8376 GROW; 8377 name = xmlParseName(ctxt); 8378 if (name == NULL) { 8379 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8380 "error parsing attribute name\n"); 8381 return(NULL); 8382 } 8383 8384 /* 8385 * read the value 8386 */ 8387 SKIP_BLANKS; 8388 if (RAW == '=') { 8389 NEXT; 8390 SKIP_BLANKS; 8391 val = xmlParseAttValue(ctxt); 8392 ctxt->instate = XML_PARSER_CONTENT; 8393 } else { 8394 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8395 "Specification mandates value for attribute %s\n", name); 8396 return(NULL); 8397 } 8398 8399 /* 8400 * Check that xml:lang conforms to the specification 8401 * No more registered as an error, just generate a warning now 8402 * since this was deprecated in XML second edition 8403 */ 8404 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8405 if (!xmlCheckLanguageID(val)) { 8406 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8407 "Malformed value for xml:lang : %s\n", 8408 val, NULL); 8409 } 8410 } 8411 8412 /* 8413 * Check that xml:space conforms to the specification 8414 */ 8415 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8416 if (xmlStrEqual(val, BAD_CAST "default")) 8417 *(ctxt->space) = 0; 8418 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8419 *(ctxt->space) = 1; 8420 else { 8421 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8422 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8423 val, NULL); 8424 } 8425 } 8426 8427 *value = val; 8428 return(name); 8429 } 8430 8431 /** 8432 * xmlParseStartTag: 8433 * @ctxt: an XML parser context 8434 * 8435 * parse a start of tag either for rule element or 8436 * EmptyElement. In both case we don't parse the tag closing chars. 8437 * 8438 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8439 * 8440 * [ WFC: Unique Att Spec ] 8441 * No attribute name may appear more than once in the same start-tag or 8442 * empty-element tag. 8443 * 8444 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8445 * 8446 * [ WFC: Unique Att Spec ] 8447 * No attribute name may appear more than once in the same start-tag or 8448 * empty-element tag. 8449 * 8450 * With namespace: 8451 * 8452 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8453 * 8454 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8455 * 8456 * Returns the element name parsed 8457 */ 8458 8459 const xmlChar * 8460 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8461 const xmlChar *name; 8462 const xmlChar *attname; 8463 xmlChar *attvalue; 8464 const xmlChar **atts = ctxt->atts; 8465 int nbatts = 0; 8466 int maxatts = ctxt->maxatts; 8467 int i; 8468 8469 if (RAW != '<') return(NULL); 8470 NEXT1; 8471 8472 name = xmlParseName(ctxt); 8473 if (name == NULL) { 8474 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8475 "xmlParseStartTag: invalid element name\n"); 8476 return(NULL); 8477 } 8478 8479 /* 8480 * Now parse the attributes, it ends up with the ending 8481 * 8482 * (S Attribute)* S? 8483 */ 8484 SKIP_BLANKS; 8485 GROW; 8486 8487 while (((RAW != '>') && 8488 ((RAW != '/') || (NXT(1) != '>')) && 8489 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8490 const xmlChar *q = CUR_PTR; 8491 unsigned int cons = ctxt->input->consumed; 8492 8493 attname = xmlParseAttribute(ctxt, &attvalue); 8494 if ((attname != NULL) && (attvalue != NULL)) { 8495 /* 8496 * [ WFC: Unique Att Spec ] 8497 * No attribute name may appear more than once in the same 8498 * start-tag or empty-element tag. 8499 */ 8500 for (i = 0; i < nbatts;i += 2) { 8501 if (xmlStrEqual(atts[i], attname)) { 8502 xmlErrAttributeDup(ctxt, NULL, attname); 8503 xmlFree(attvalue); 8504 goto failed; 8505 } 8506 } 8507 /* 8508 * Add the pair to atts 8509 */ 8510 if (atts == NULL) { 8511 maxatts = 22; /* allow for 10 attrs by default */ 8512 atts = (const xmlChar **) 8513 xmlMalloc(maxatts * sizeof(xmlChar *)); 8514 if (atts == NULL) { 8515 xmlErrMemory(ctxt, NULL); 8516 if (attvalue != NULL) 8517 xmlFree(attvalue); 8518 goto failed; 8519 } 8520 ctxt->atts = atts; 8521 ctxt->maxatts = maxatts; 8522 } else if (nbatts + 4 > maxatts) { 8523 const xmlChar **n; 8524 8525 maxatts *= 2; 8526 n = (const xmlChar **) xmlRealloc((void *) atts, 8527 maxatts * sizeof(const xmlChar *)); 8528 if (n == NULL) { 8529 xmlErrMemory(ctxt, NULL); 8530 if (attvalue != NULL) 8531 xmlFree(attvalue); 8532 goto failed; 8533 } 8534 atts = n; 8535 ctxt->atts = atts; 8536 ctxt->maxatts = maxatts; 8537 } 8538 atts[nbatts++] = attname; 8539 atts[nbatts++] = attvalue; 8540 atts[nbatts] = NULL; 8541 atts[nbatts + 1] = NULL; 8542 } else { 8543 if (attvalue != NULL) 8544 xmlFree(attvalue); 8545 } 8546 8547 failed: 8548 8549 GROW 8550 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8551 break; 8552 if (SKIP_BLANKS == 0) { 8553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8554 "attributes construct error\n"); 8555 } 8556 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8557 (attname == NULL) && (attvalue == NULL)) { 8558 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8559 "xmlParseStartTag: problem parsing attributes\n"); 8560 break; 8561 } 8562 SHRINK; 8563 GROW; 8564 } 8565 8566 /* 8567 * SAX: Start of Element ! 8568 */ 8569 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8570 (!ctxt->disableSAX)) { 8571 if (nbatts > 0) 8572 ctxt->sax->startElement(ctxt->userData, name, atts); 8573 else 8574 ctxt->sax->startElement(ctxt->userData, name, NULL); 8575 } 8576 8577 if (atts != NULL) { 8578 /* Free only the content strings */ 8579 for (i = 1;i < nbatts;i+=2) 8580 if (atts[i] != NULL) 8581 xmlFree((xmlChar *) atts[i]); 8582 } 8583 return(name); 8584 } 8585 8586 /** 8587 * xmlParseEndTag1: 8588 * @ctxt: an XML parser context 8589 * @line: line of the start tag 8590 * @nsNr: number of namespaces on the start tag 8591 * 8592 * parse an end of tag 8593 * 8594 * [42] ETag ::= '</' Name S? '>' 8595 * 8596 * With namespace 8597 * 8598 * [NS 9] ETag ::= '</' QName S? '>' 8599 */ 8600 8601 static void 8602 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8603 const xmlChar *name; 8604 8605 GROW; 8606 if ((RAW != '<') || (NXT(1) != '/')) { 8607 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8608 "xmlParseEndTag: '</' not found\n"); 8609 return; 8610 } 8611 SKIP(2); 8612 8613 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8614 8615 /* 8616 * We should definitely be at the ending "S? '>'" part 8617 */ 8618 GROW; 8619 SKIP_BLANKS; 8620 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8621 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8622 } else 8623 NEXT1; 8624 8625 /* 8626 * [ WFC: Element Type Match ] 8627 * The Name in an element's end-tag must match the element type in the 8628 * start-tag. 8629 * 8630 */ 8631 if (name != (xmlChar*)1) { 8632 if (name == NULL) name = BAD_CAST "unparseable"; 8633 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8634 "Opening and ending tag mismatch: %s line %d and %s\n", 8635 ctxt->name, line, name); 8636 } 8637 8638 /* 8639 * SAX: End of Tag 8640 */ 8641 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8642 (!ctxt->disableSAX)) 8643 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8644 8645 namePop(ctxt); 8646 spacePop(ctxt); 8647 return; 8648 } 8649 8650 /** 8651 * xmlParseEndTag: 8652 * @ctxt: an XML parser context 8653 * 8654 * parse an end of tag 8655 * 8656 * [42] ETag ::= '</' Name S? '>' 8657 * 8658 * With namespace 8659 * 8660 * [NS 9] ETag ::= '</' QName S? '>' 8661 */ 8662 8663 void 8664 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8665 xmlParseEndTag1(ctxt, 0); 8666 } 8667 #endif /* LIBXML_SAX1_ENABLED */ 8668 8669 /************************************************************************ 8670 * * 8671 * SAX 2 specific operations * 8672 * * 8673 ************************************************************************/ 8674 8675 /* 8676 * xmlGetNamespace: 8677 * @ctxt: an XML parser context 8678 * @prefix: the prefix to lookup 8679 * 8680 * Lookup the namespace name for the @prefix (which ca be NULL) 8681 * The prefix must come from the @ctxt->dict dictionary 8682 * 8683 * Returns the namespace name or NULL if not bound 8684 */ 8685 static const xmlChar * 8686 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8687 int i; 8688 8689 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8690 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8691 if (ctxt->nsTab[i] == prefix) { 8692 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8693 return(NULL); 8694 return(ctxt->nsTab[i + 1]); 8695 } 8696 return(NULL); 8697 } 8698 8699 /** 8700 * xmlParseQName: 8701 * @ctxt: an XML parser context 8702 * @prefix: pointer to store the prefix part 8703 * 8704 * parse an XML Namespace QName 8705 * 8706 * [6] QName ::= (Prefix ':')? LocalPart 8707 * [7] Prefix ::= NCName 8708 * [8] LocalPart ::= NCName 8709 * 8710 * Returns the Name parsed or NULL 8711 */ 8712 8713 static const xmlChar * 8714 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8715 const xmlChar *l, *p; 8716 8717 GROW; 8718 8719 l = xmlParseNCName(ctxt); 8720 if (l == NULL) { 8721 if (CUR == ':') { 8722 l = xmlParseName(ctxt); 8723 if (l != NULL) { 8724 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8725 "Failed to parse QName '%s'\n", l, NULL, NULL); 8726 *prefix = NULL; 8727 return(l); 8728 } 8729 } 8730 return(NULL); 8731 } 8732 if (CUR == ':') { 8733 NEXT; 8734 p = l; 8735 l = xmlParseNCName(ctxt); 8736 if (l == NULL) { 8737 xmlChar *tmp; 8738 8739 if (ctxt->instate == XML_PARSER_EOF) 8740 return(NULL); 8741 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8742 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8743 l = xmlParseNmtoken(ctxt); 8744 if (l == NULL) { 8745 if (ctxt->instate == XML_PARSER_EOF) 8746 return(NULL); 8747 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8748 } else { 8749 tmp = xmlBuildQName(l, p, NULL, 0); 8750 xmlFree((char *)l); 8751 } 8752 p = xmlDictLookup(ctxt->dict, tmp, -1); 8753 if (tmp != NULL) xmlFree(tmp); 8754 *prefix = NULL; 8755 return(p); 8756 } 8757 if (CUR == ':') { 8758 xmlChar *tmp; 8759 8760 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8761 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8762 NEXT; 8763 tmp = (xmlChar *) xmlParseName(ctxt); 8764 if (tmp != NULL) { 8765 tmp = xmlBuildQName(tmp, l, NULL, 0); 8766 l = xmlDictLookup(ctxt->dict, tmp, -1); 8767 if (tmp != NULL) xmlFree(tmp); 8768 *prefix = p; 8769 return(l); 8770 } 8771 if (ctxt->instate == XML_PARSER_EOF) 8772 return(NULL); 8773 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8774 l = xmlDictLookup(ctxt->dict, tmp, -1); 8775 if (tmp != NULL) xmlFree(tmp); 8776 *prefix = p; 8777 return(l); 8778 } 8779 *prefix = p; 8780 } else 8781 *prefix = NULL; 8782 return(l); 8783 } 8784 8785 /** 8786 * xmlParseQNameAndCompare: 8787 * @ctxt: an XML parser context 8788 * @name: the localname 8789 * @prefix: the prefix, if any. 8790 * 8791 * parse an XML name and compares for match 8792 * (specialized for endtag parsing) 8793 * 8794 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8795 * and the name for mismatch 8796 */ 8797 8798 static const xmlChar * 8799 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8800 xmlChar const *prefix) { 8801 const xmlChar *cmp; 8802 const xmlChar *in; 8803 const xmlChar *ret; 8804 const xmlChar *prefix2; 8805 8806 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8807 8808 GROW; 8809 in = ctxt->input->cur; 8810 8811 cmp = prefix; 8812 while (*in != 0 && *in == *cmp) { 8813 ++in; 8814 ++cmp; 8815 } 8816 if ((*cmp == 0) && (*in == ':')) { 8817 in++; 8818 cmp = name; 8819 while (*in != 0 && *in == *cmp) { 8820 ++in; 8821 ++cmp; 8822 } 8823 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8824 /* success */ 8825 ctxt->input->cur = in; 8826 return((const xmlChar*) 1); 8827 } 8828 } 8829 /* 8830 * all strings coms from the dictionary, equality can be done directly 8831 */ 8832 ret = xmlParseQName (ctxt, &prefix2); 8833 if ((ret == name) && (prefix == prefix2)) 8834 return((const xmlChar*) 1); 8835 return ret; 8836 } 8837 8838 /** 8839 * xmlParseAttValueInternal: 8840 * @ctxt: an XML parser context 8841 * @len: attribute len result 8842 * @alloc: whether the attribute was reallocated as a new string 8843 * @normalize: if 1 then further non-CDATA normalization must be done 8844 * 8845 * parse a value for an attribute. 8846 * NOTE: if no normalization is needed, the routine will return pointers 8847 * directly from the data buffer. 8848 * 8849 * 3.3.3 Attribute-Value Normalization: 8850 * Before the value of an attribute is passed to the application or 8851 * checked for validity, the XML processor must normalize it as follows: 8852 * - a character reference is processed by appending the referenced 8853 * character to the attribute value 8854 * - an entity reference is processed by recursively processing the 8855 * replacement text of the entity 8856 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8857 * appending #x20 to the normalized value, except that only a single 8858 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8859 * parsed entity or the literal entity value of an internal parsed entity 8860 * - other characters are processed by appending them to the normalized value 8861 * If the declared value is not CDATA, then the XML processor must further 8862 * process the normalized attribute value by discarding any leading and 8863 * trailing space (#x20) characters, and by replacing sequences of space 8864 * (#x20) characters by a single space (#x20) character. 8865 * All attributes for which no declaration has been read should be treated 8866 * by a non-validating parser as if declared CDATA. 8867 * 8868 * Returns the AttValue parsed or NULL. The value has to be freed by the 8869 * caller if it was copied, this can be detected by val[*len] == 0. 8870 */ 8871 8872 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \ 8873 const xmlChar *oldbase = ctxt->input->base;\ 8874 GROW;\ 8875 if (ctxt->instate == XML_PARSER_EOF)\ 8876 return(NULL);\ 8877 if (oldbase != ctxt->input->base) {\ 8878 ptrdiff_t delta = ctxt->input->base - oldbase;\ 8879 start = start + delta;\ 8880 in = in + delta;\ 8881 }\ 8882 end = ctxt->input->end; 8883 8884 static xmlChar * 8885 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8886 int normalize) 8887 { 8888 xmlChar limit = 0; 8889 const xmlChar *in = NULL, *start, *end, *last; 8890 xmlChar *ret = NULL; 8891 int line, col; 8892 8893 GROW; 8894 in = (xmlChar *) CUR_PTR; 8895 line = ctxt->input->line; 8896 col = ctxt->input->col; 8897 if (*in != '"' && *in != '\'') { 8898 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8899 return (NULL); 8900 } 8901 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8902 8903 /* 8904 * try to handle in this routine the most common case where no 8905 * allocation of a new string is required and where content is 8906 * pure ASCII. 8907 */ 8908 limit = *in++; 8909 col++; 8910 end = ctxt->input->end; 8911 start = in; 8912 if (in >= end) { 8913 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8914 } 8915 if (normalize) { 8916 /* 8917 * Skip any leading spaces 8918 */ 8919 while ((in < end) && (*in != limit) && 8920 ((*in == 0x20) || (*in == 0x9) || 8921 (*in == 0xA) || (*in == 0xD))) { 8922 if (*in == 0xA) { 8923 line++; col = 1; 8924 } else { 8925 col++; 8926 } 8927 in++; 8928 start = in; 8929 if (in >= end) { 8930 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8931 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8932 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8933 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8934 "AttValue length too long\n"); 8935 return(NULL); 8936 } 8937 } 8938 } 8939 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8940 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8941 col++; 8942 if ((*in++ == 0x20) && (*in == 0x20)) break; 8943 if (in >= end) { 8944 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 8945 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8946 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8947 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8948 "AttValue length too long\n"); 8949 return(NULL); 8950 } 8951 } 8952 } 8953 last = in; 8954 /* 8955 * skip the trailing blanks 8956 */ 8957 while ((last[-1] == 0x20) && (last > start)) last--; 8958 while ((in < end) && (*in != limit) && 8959 ((*in == 0x20) || (*in == 0x9) || 8960 (*in == 0xA) || (*in == 0xD))) { 8961 if (*in == 0xA) { 8962 line++, col = 1; 8963 } else { 8964 col++; 8965 } 8966 in++; 8967 if (in >= end) { 8968 const xmlChar *oldbase = ctxt->input->base; 8969 GROW; 8970 if (ctxt->instate == XML_PARSER_EOF) 8971 return(NULL); 8972 if (oldbase != ctxt->input->base) { 8973 ptrdiff_t delta = ctxt->input->base - oldbase; 8974 start = start + delta; 8975 in = in + delta; 8976 last = last + delta; 8977 } 8978 end = ctxt->input->end; 8979 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8980 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8981 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8982 "AttValue length too long\n"); 8983 return(NULL); 8984 } 8985 } 8986 } 8987 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8988 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8989 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8990 "AttValue length too long\n"); 8991 return(NULL); 8992 } 8993 if (*in != limit) goto need_complex; 8994 } else { 8995 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8996 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8997 in++; 8998 col++; 8999 if (in >= end) { 9000 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9001 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9002 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9003 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9004 "AttValue length too long\n"); 9005 return(NULL); 9006 } 9007 } 9008 } 9009 last = in; 9010 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9011 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9012 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9013 "AttValue length too long\n"); 9014 return(NULL); 9015 } 9016 if (*in != limit) goto need_complex; 9017 } 9018 in++; 9019 col++; 9020 if (len != NULL) { 9021 *len = last - start; 9022 ret = (xmlChar *) start; 9023 } else { 9024 if (alloc) *alloc = 1; 9025 ret = xmlStrndup(start, last - start); 9026 } 9027 CUR_PTR = in; 9028 ctxt->input->line = line; 9029 ctxt->input->col = col; 9030 if (alloc) *alloc = 0; 9031 return ret; 9032 need_complex: 9033 if (alloc) *alloc = 1; 9034 return xmlParseAttValueComplex(ctxt, len, normalize); 9035 } 9036 9037 /** 9038 * xmlParseAttribute2: 9039 * @ctxt: an XML parser context 9040 * @pref: the element prefix 9041 * @elem: the element name 9042 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9043 * @value: a xmlChar ** used to store the value of the attribute 9044 * @len: an int * to save the length of the attribute 9045 * @alloc: an int * to indicate if the attribute was allocated 9046 * 9047 * parse an attribute in the new SAX2 framework. 9048 * 9049 * Returns the attribute name, and the value in *value, . 9050 */ 9051 9052 static const xmlChar * 9053 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9054 const xmlChar * pref, const xmlChar * elem, 9055 const xmlChar ** prefix, xmlChar ** value, 9056 int *len, int *alloc) 9057 { 9058 const xmlChar *name; 9059 xmlChar *val, *internal_val = NULL; 9060 int normalize = 0; 9061 9062 *value = NULL; 9063 GROW; 9064 name = xmlParseQName(ctxt, prefix); 9065 if (name == NULL) { 9066 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9067 "error parsing attribute name\n"); 9068 return (NULL); 9069 } 9070 9071 /* 9072 * get the type if needed 9073 */ 9074 if (ctxt->attsSpecial != NULL) { 9075 int type; 9076 9077 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9078 pref, elem, *prefix, name); 9079 if (type != 0) 9080 normalize = 1; 9081 } 9082 9083 /* 9084 * read the value 9085 */ 9086 SKIP_BLANKS; 9087 if (RAW == '=') { 9088 NEXT; 9089 SKIP_BLANKS; 9090 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9091 if (normalize) { 9092 /* 9093 * Sometimes a second normalisation pass for spaces is needed 9094 * but that only happens if charrefs or entities references 9095 * have been used in the attribute value, i.e. the attribute 9096 * value have been extracted in an allocated string already. 9097 */ 9098 if (*alloc) { 9099 const xmlChar *val2; 9100 9101 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9102 if ((val2 != NULL) && (val2 != val)) { 9103 xmlFree(val); 9104 val = (xmlChar *) val2; 9105 } 9106 } 9107 } 9108 ctxt->instate = XML_PARSER_CONTENT; 9109 } else { 9110 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9111 "Specification mandates value for attribute %s\n", 9112 name); 9113 return (NULL); 9114 } 9115 9116 if (*prefix == ctxt->str_xml) { 9117 /* 9118 * Check that xml:lang conforms to the specification 9119 * No more registered as an error, just generate a warning now 9120 * since this was deprecated in XML second edition 9121 */ 9122 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9123 internal_val = xmlStrndup(val, *len); 9124 if (!xmlCheckLanguageID(internal_val)) { 9125 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9126 "Malformed value for xml:lang : %s\n", 9127 internal_val, NULL); 9128 } 9129 } 9130 9131 /* 9132 * Check that xml:space conforms to the specification 9133 */ 9134 if (xmlStrEqual(name, BAD_CAST "space")) { 9135 internal_val = xmlStrndup(val, *len); 9136 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9137 *(ctxt->space) = 0; 9138 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9139 *(ctxt->space) = 1; 9140 else { 9141 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9142 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9143 internal_val, NULL); 9144 } 9145 } 9146 if (internal_val) { 9147 xmlFree(internal_val); 9148 } 9149 } 9150 9151 *value = val; 9152 return (name); 9153 } 9154 /** 9155 * xmlParseStartTag2: 9156 * @ctxt: an XML parser context 9157 * 9158 * parse a start of tag either for rule element or 9159 * EmptyElement. In both case we don't parse the tag closing chars. 9160 * This routine is called when running SAX2 parsing 9161 * 9162 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9163 * 9164 * [ WFC: Unique Att Spec ] 9165 * No attribute name may appear more than once in the same start-tag or 9166 * empty-element tag. 9167 * 9168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9169 * 9170 * [ WFC: Unique Att Spec ] 9171 * No attribute name may appear more than once in the same start-tag or 9172 * empty-element tag. 9173 * 9174 * With namespace: 9175 * 9176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9177 * 9178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9179 * 9180 * Returns the element name parsed 9181 */ 9182 9183 static const xmlChar * 9184 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9185 const xmlChar **URI, int *tlen) { 9186 const xmlChar *localname; 9187 const xmlChar *prefix; 9188 const xmlChar *attname; 9189 const xmlChar *aprefix; 9190 const xmlChar *nsname; 9191 xmlChar *attvalue; 9192 const xmlChar **atts = ctxt->atts; 9193 int maxatts = ctxt->maxatts; 9194 int nratts, nbatts, nbdef, inputid; 9195 int i, j, nbNs, attval; 9196 unsigned long cur; 9197 int nsNr = ctxt->nsNr; 9198 9199 if (RAW != '<') return(NULL); 9200 NEXT1; 9201 9202 /* 9203 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9204 * point since the attribute values may be stored as pointers to 9205 * the buffer and calling SHRINK would destroy them ! 9206 * The Shrinking is only possible once the full set of attribute 9207 * callbacks have been done. 9208 */ 9209 SHRINK; 9210 cur = ctxt->input->cur - ctxt->input->base; 9211 inputid = ctxt->input->id; 9212 nbatts = 0; 9213 nratts = 0; 9214 nbdef = 0; 9215 nbNs = 0; 9216 attval = 0; 9217 /* Forget any namespaces added during an earlier parse of this element. */ 9218 ctxt->nsNr = nsNr; 9219 9220 localname = xmlParseQName(ctxt, &prefix); 9221 if (localname == NULL) { 9222 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9223 "StartTag: invalid element name\n"); 9224 return(NULL); 9225 } 9226 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9227 9228 /* 9229 * Now parse the attributes, it ends up with the ending 9230 * 9231 * (S Attribute)* S? 9232 */ 9233 SKIP_BLANKS; 9234 GROW; 9235 9236 while (((RAW != '>') && 9237 ((RAW != '/') || (NXT(1) != '>')) && 9238 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9239 const xmlChar *q = CUR_PTR; 9240 unsigned int cons = ctxt->input->consumed; 9241 int len = -1, alloc = 0; 9242 9243 attname = xmlParseAttribute2(ctxt, prefix, localname, 9244 &aprefix, &attvalue, &len, &alloc); 9245 if ((attname == NULL) || (attvalue == NULL)) 9246 goto next_attr; 9247 if (len < 0) len = xmlStrlen(attvalue); 9248 9249 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9251 xmlURIPtr uri; 9252 9253 if (URL == NULL) { 9254 xmlErrMemory(ctxt, "dictionary allocation failure"); 9255 if ((attvalue != NULL) && (alloc != 0)) 9256 xmlFree(attvalue); 9257 localname = NULL; 9258 goto done; 9259 } 9260 if (*URL != 0) { 9261 uri = xmlParseURI((const char *) URL); 9262 if (uri == NULL) { 9263 xmlNsErr(ctxt, XML_WAR_NS_URI, 9264 "xmlns: '%s' is not a valid URI\n", 9265 URL, NULL, NULL); 9266 } else { 9267 if (uri->scheme == NULL) { 9268 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9269 "xmlns: URI %s is not absolute\n", 9270 URL, NULL, NULL); 9271 } 9272 xmlFreeURI(uri); 9273 } 9274 if (URL == ctxt->str_xml_ns) { 9275 if (attname != ctxt->str_xml) { 9276 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9277 "xml namespace URI cannot be the default namespace\n", 9278 NULL, NULL, NULL); 9279 } 9280 goto next_attr; 9281 } 9282 if ((len == 29) && 9283 (xmlStrEqual(URL, 9284 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9285 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9286 "reuse of the xmlns namespace name is forbidden\n", 9287 NULL, NULL, NULL); 9288 goto next_attr; 9289 } 9290 } 9291 /* 9292 * check that it's not a defined namespace 9293 */ 9294 for (j = 1;j <= nbNs;j++) 9295 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9296 break; 9297 if (j <= nbNs) 9298 xmlErrAttributeDup(ctxt, NULL, attname); 9299 else 9300 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9301 9302 } else if (aprefix == ctxt->str_xmlns) { 9303 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9304 xmlURIPtr uri; 9305 9306 if (attname == ctxt->str_xml) { 9307 if (URL != ctxt->str_xml_ns) { 9308 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9309 "xml namespace prefix mapped to wrong URI\n", 9310 NULL, NULL, NULL); 9311 } 9312 /* 9313 * Do not keep a namespace definition node 9314 */ 9315 goto next_attr; 9316 } 9317 if (URL == ctxt->str_xml_ns) { 9318 if (attname != ctxt->str_xml) { 9319 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9320 "xml namespace URI mapped to wrong prefix\n", 9321 NULL, NULL, NULL); 9322 } 9323 goto next_attr; 9324 } 9325 if (attname == ctxt->str_xmlns) { 9326 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9327 "redefinition of the xmlns prefix is forbidden\n", 9328 NULL, NULL, NULL); 9329 goto next_attr; 9330 } 9331 if ((len == 29) && 9332 (xmlStrEqual(URL, 9333 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9334 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9335 "reuse of the xmlns namespace name is forbidden\n", 9336 NULL, NULL, NULL); 9337 goto next_attr; 9338 } 9339 if ((URL == NULL) || (URL[0] == 0)) { 9340 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9341 "xmlns:%s: Empty XML namespace is not allowed\n", 9342 attname, NULL, NULL); 9343 goto next_attr; 9344 } else { 9345 uri = xmlParseURI((const char *) URL); 9346 if (uri == NULL) { 9347 xmlNsErr(ctxt, XML_WAR_NS_URI, 9348 "xmlns:%s: '%s' is not a valid URI\n", 9349 attname, URL, NULL); 9350 } else { 9351 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9352 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9353 "xmlns:%s: URI %s is not absolute\n", 9354 attname, URL, NULL); 9355 } 9356 xmlFreeURI(uri); 9357 } 9358 } 9359 9360 /* 9361 * check that it's not a defined namespace 9362 */ 9363 for (j = 1;j <= nbNs;j++) 9364 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9365 break; 9366 if (j <= nbNs) 9367 xmlErrAttributeDup(ctxt, aprefix, attname); 9368 else 9369 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9370 9371 } else { 9372 /* 9373 * Add the pair to atts 9374 */ 9375 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9376 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9377 goto next_attr; 9378 } 9379 maxatts = ctxt->maxatts; 9380 atts = ctxt->atts; 9381 } 9382 ctxt->attallocs[nratts++] = alloc; 9383 atts[nbatts++] = attname; 9384 atts[nbatts++] = aprefix; 9385 /* 9386 * The namespace URI field is used temporarily to point at the 9387 * base of the current input buffer for non-alloced attributes. 9388 * When the input buffer is reallocated, all the pointers become 9389 * invalid, but they can be reconstructed later. 9390 */ 9391 if (alloc) 9392 atts[nbatts++] = NULL; 9393 else 9394 atts[nbatts++] = ctxt->input->base; 9395 atts[nbatts++] = attvalue; 9396 attvalue += len; 9397 atts[nbatts++] = attvalue; 9398 /* 9399 * tag if some deallocation is needed 9400 */ 9401 if (alloc != 0) attval = 1; 9402 attvalue = NULL; /* moved into atts */ 9403 } 9404 9405 next_attr: 9406 if ((attvalue != NULL) && (alloc != 0)) { 9407 xmlFree(attvalue); 9408 attvalue = NULL; 9409 } 9410 9411 GROW 9412 if (ctxt->instate == XML_PARSER_EOF) 9413 break; 9414 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9415 break; 9416 if (SKIP_BLANKS == 0) { 9417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9418 "attributes construct error\n"); 9419 break; 9420 } 9421 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9422 (attname == NULL) && (attvalue == NULL)) { 9423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9424 "xmlParseStartTag: problem parsing attributes\n"); 9425 break; 9426 } 9427 GROW; 9428 } 9429 9430 if (ctxt->input->id != inputid) { 9431 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9432 "Unexpected change of input\n"); 9433 localname = NULL; 9434 goto done; 9435 } 9436 9437 /* Reconstruct attribute value pointers. */ 9438 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9439 if (atts[i+2] != NULL) { 9440 /* 9441 * Arithmetic on dangling pointers is technically undefined 9442 * behavior, but well... 9443 */ 9444 ptrdiff_t offset = ctxt->input->base - atts[i+2]; 9445 atts[i+2] = NULL; /* Reset repurposed namespace URI */ 9446 atts[i+3] += offset; /* value */ 9447 atts[i+4] += offset; /* valuend */ 9448 } 9449 } 9450 9451 /* 9452 * The attributes defaulting 9453 */ 9454 if (ctxt->attsDefault != NULL) { 9455 xmlDefAttrsPtr defaults; 9456 9457 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9458 if (defaults != NULL) { 9459 for (i = 0;i < defaults->nbAttrs;i++) { 9460 attname = defaults->values[5 * i]; 9461 aprefix = defaults->values[5 * i + 1]; 9462 9463 /* 9464 * special work for namespaces defaulted defs 9465 */ 9466 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9467 /* 9468 * check that it's not a defined namespace 9469 */ 9470 for (j = 1;j <= nbNs;j++) 9471 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9472 break; 9473 if (j <= nbNs) continue; 9474 9475 nsname = xmlGetNamespace(ctxt, NULL); 9476 if (nsname != defaults->values[5 * i + 2]) { 9477 if (nsPush(ctxt, NULL, 9478 defaults->values[5 * i + 2]) > 0) 9479 nbNs++; 9480 } 9481 } else if (aprefix == ctxt->str_xmlns) { 9482 /* 9483 * check that it's not a defined namespace 9484 */ 9485 for (j = 1;j <= nbNs;j++) 9486 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9487 break; 9488 if (j <= nbNs) continue; 9489 9490 nsname = xmlGetNamespace(ctxt, attname); 9491 if (nsname != defaults->values[2]) { 9492 if (nsPush(ctxt, attname, 9493 defaults->values[5 * i + 2]) > 0) 9494 nbNs++; 9495 } 9496 } else { 9497 /* 9498 * check that it's not a defined attribute 9499 */ 9500 for (j = 0;j < nbatts;j+=5) { 9501 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9502 break; 9503 } 9504 if (j < nbatts) continue; 9505 9506 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9507 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9508 localname = NULL; 9509 goto done; 9510 } 9511 maxatts = ctxt->maxatts; 9512 atts = ctxt->atts; 9513 } 9514 atts[nbatts++] = attname; 9515 atts[nbatts++] = aprefix; 9516 if (aprefix == NULL) 9517 atts[nbatts++] = NULL; 9518 else 9519 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9520 atts[nbatts++] = defaults->values[5 * i + 2]; 9521 atts[nbatts++] = defaults->values[5 * i + 3]; 9522 if ((ctxt->standalone == 1) && 9523 (defaults->values[5 * i + 4] != NULL)) { 9524 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9525 "standalone: attribute %s on %s defaulted from external subset\n", 9526 attname, localname); 9527 } 9528 nbdef++; 9529 } 9530 } 9531 } 9532 } 9533 9534 /* 9535 * The attributes checkings 9536 */ 9537 for (i = 0; i < nbatts;i += 5) { 9538 /* 9539 * The default namespace does not apply to attribute names. 9540 */ 9541 if (atts[i + 1] != NULL) { 9542 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9543 if (nsname == NULL) { 9544 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9545 "Namespace prefix %s for %s on %s is not defined\n", 9546 atts[i + 1], atts[i], localname); 9547 } 9548 atts[i + 2] = nsname; 9549 } else 9550 nsname = NULL; 9551 /* 9552 * [ WFC: Unique Att Spec ] 9553 * No attribute name may appear more than once in the same 9554 * start-tag or empty-element tag. 9555 * As extended by the Namespace in XML REC. 9556 */ 9557 for (j = 0; j < i;j += 5) { 9558 if (atts[i] == atts[j]) { 9559 if (atts[i+1] == atts[j+1]) { 9560 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9561 break; 9562 } 9563 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9564 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9565 "Namespaced Attribute %s in '%s' redefined\n", 9566 atts[i], nsname, NULL); 9567 break; 9568 } 9569 } 9570 } 9571 } 9572 9573 nsname = xmlGetNamespace(ctxt, prefix); 9574 if ((prefix != NULL) && (nsname == NULL)) { 9575 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9576 "Namespace prefix %s on %s is not defined\n", 9577 prefix, localname, NULL); 9578 } 9579 *pref = prefix; 9580 *URI = nsname; 9581 9582 /* 9583 * SAX: Start of Element ! 9584 */ 9585 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9586 (!ctxt->disableSAX)) { 9587 if (nbNs > 0) 9588 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9589 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9590 nbatts / 5, nbdef, atts); 9591 else 9592 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9593 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9594 } 9595 9596 done: 9597 /* 9598 * Free up attribute allocated strings if needed 9599 */ 9600 if (attval != 0) { 9601 for (i = 3,j = 0; j < nratts;i += 5,j++) 9602 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9603 xmlFree((xmlChar *) atts[i]); 9604 } 9605 9606 return(localname); 9607 } 9608 9609 /** 9610 * xmlParseEndTag2: 9611 * @ctxt: an XML parser context 9612 * @line: line of the start tag 9613 * @nsNr: number of namespaces on the start tag 9614 * 9615 * parse an end of tag 9616 * 9617 * [42] ETag ::= '</' Name S? '>' 9618 * 9619 * With namespace 9620 * 9621 * [NS 9] ETag ::= '</' QName S? '>' 9622 */ 9623 9624 static void 9625 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9626 const xmlChar *URI, int line, int nsNr, int tlen) { 9627 const xmlChar *name; 9628 size_t curLength; 9629 9630 GROW; 9631 if ((RAW != '<') || (NXT(1) != '/')) { 9632 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9633 return; 9634 } 9635 SKIP(2); 9636 9637 curLength = ctxt->input->end - ctxt->input->cur; 9638 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9639 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9640 if ((curLength >= (size_t)(tlen + 1)) && 9641 (ctxt->input->cur[tlen] == '>')) { 9642 ctxt->input->cur += tlen + 1; 9643 ctxt->input->col += tlen + 1; 9644 goto done; 9645 } 9646 ctxt->input->cur += tlen; 9647 ctxt->input->col += tlen; 9648 name = (xmlChar*)1; 9649 } else { 9650 if (prefix == NULL) 9651 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9652 else 9653 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9654 } 9655 9656 /* 9657 * We should definitely be at the ending "S? '>'" part 9658 */ 9659 GROW; 9660 if (ctxt->instate == XML_PARSER_EOF) 9661 return; 9662 SKIP_BLANKS; 9663 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9664 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9665 } else 9666 NEXT1; 9667 9668 /* 9669 * [ WFC: Element Type Match ] 9670 * The Name in an element's end-tag must match the element type in the 9671 * start-tag. 9672 * 9673 */ 9674 if (name != (xmlChar*)1) { 9675 if (name == NULL) name = BAD_CAST "unparseable"; 9676 if ((line == 0) && (ctxt->node != NULL)) 9677 line = ctxt->node->line; 9678 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9679 "Opening and ending tag mismatch: %s line %d and %s\n", 9680 ctxt->name, line, name); 9681 } 9682 9683 /* 9684 * SAX: End of Tag 9685 */ 9686 done: 9687 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9688 (!ctxt->disableSAX)) 9689 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9690 9691 spacePop(ctxt); 9692 if (nsNr != 0) 9693 nsPop(ctxt, nsNr); 9694 return; 9695 } 9696 9697 /** 9698 * xmlParseCDSect: 9699 * @ctxt: an XML parser context 9700 * 9701 * Parse escaped pure raw content. 9702 * 9703 * [18] CDSect ::= CDStart CData CDEnd 9704 * 9705 * [19] CDStart ::= '<![CDATA[' 9706 * 9707 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9708 * 9709 * [21] CDEnd ::= ']]>' 9710 */ 9711 void 9712 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9713 xmlChar *buf = NULL; 9714 int len = 0; 9715 int size = XML_PARSER_BUFFER_SIZE; 9716 int r, rl; 9717 int s, sl; 9718 int cur, l; 9719 int count = 0; 9720 9721 /* Check 2.6.0 was NXT(0) not RAW */ 9722 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9723 SKIP(9); 9724 } else 9725 return; 9726 9727 ctxt->instate = XML_PARSER_CDATA_SECTION; 9728 r = CUR_CHAR(rl); 9729 if (!IS_CHAR(r)) { 9730 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9731 ctxt->instate = XML_PARSER_CONTENT; 9732 return; 9733 } 9734 NEXTL(rl); 9735 s = CUR_CHAR(sl); 9736 if (!IS_CHAR(s)) { 9737 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9738 ctxt->instate = XML_PARSER_CONTENT; 9739 return; 9740 } 9741 NEXTL(sl); 9742 cur = CUR_CHAR(l); 9743 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9744 if (buf == NULL) { 9745 xmlErrMemory(ctxt, NULL); 9746 return; 9747 } 9748 while (IS_CHAR(cur) && 9749 ((r != ']') || (s != ']') || (cur != '>'))) { 9750 if (len + 5 >= size) { 9751 xmlChar *tmp; 9752 9753 if ((size > XML_MAX_TEXT_LENGTH) && 9754 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9755 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9756 "CData section too big found", NULL); 9757 xmlFree (buf); 9758 return; 9759 } 9760 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9761 if (tmp == NULL) { 9762 xmlFree(buf); 9763 xmlErrMemory(ctxt, NULL); 9764 return; 9765 } 9766 buf = tmp; 9767 size *= 2; 9768 } 9769 COPY_BUF(rl,buf,len,r); 9770 r = s; 9771 rl = sl; 9772 s = cur; 9773 sl = l; 9774 count++; 9775 if (count > 50) { 9776 GROW; 9777 if (ctxt->instate == XML_PARSER_EOF) { 9778 xmlFree(buf); 9779 return; 9780 } 9781 count = 0; 9782 } 9783 NEXTL(l); 9784 cur = CUR_CHAR(l); 9785 } 9786 buf[len] = 0; 9787 ctxt->instate = XML_PARSER_CONTENT; 9788 if (cur != '>') { 9789 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9790 "CData section not finished\n%.50s\n", buf); 9791 xmlFree(buf); 9792 return; 9793 } 9794 NEXTL(l); 9795 9796 /* 9797 * OK the buffer is to be consumed as cdata. 9798 */ 9799 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9800 if (ctxt->sax->cdataBlock != NULL) 9801 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9802 else if (ctxt->sax->characters != NULL) 9803 ctxt->sax->characters(ctxt->userData, buf, len); 9804 } 9805 xmlFree(buf); 9806 } 9807 9808 /** 9809 * xmlParseContent: 9810 * @ctxt: an XML parser context 9811 * 9812 * Parse a content: 9813 * 9814 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9815 */ 9816 9817 void 9818 xmlParseContent(xmlParserCtxtPtr ctxt) { 9819 int nameNr = ctxt->nameNr; 9820 9821 GROW; 9822 while ((RAW != 0) && 9823 (ctxt->instate != XML_PARSER_EOF)) { 9824 const xmlChar *test = CUR_PTR; 9825 unsigned int cons = ctxt->input->consumed; 9826 const xmlChar *cur = ctxt->input->cur; 9827 9828 /* 9829 * First case : a Processing Instruction. 9830 */ 9831 if ((*cur == '<') && (cur[1] == '?')) { 9832 xmlParsePI(ctxt); 9833 } 9834 9835 /* 9836 * Second case : a CDSection 9837 */ 9838 /* 2.6.0 test was *cur not RAW */ 9839 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9840 xmlParseCDSect(ctxt); 9841 } 9842 9843 /* 9844 * Third case : a comment 9845 */ 9846 else if ((*cur == '<') && (NXT(1) == '!') && 9847 (NXT(2) == '-') && (NXT(3) == '-')) { 9848 xmlParseComment(ctxt); 9849 ctxt->instate = XML_PARSER_CONTENT; 9850 } 9851 9852 /* 9853 * Fourth case : a sub-element. 9854 */ 9855 else if (*cur == '<') { 9856 if (NXT(1) == '/') { 9857 if (ctxt->nameNr <= nameNr) 9858 break; 9859 xmlParseElementEnd(ctxt); 9860 } else { 9861 xmlParseElementStart(ctxt); 9862 } 9863 } 9864 9865 /* 9866 * Fifth case : a reference. If if has not been resolved, 9867 * parsing returns it's Name, create the node 9868 */ 9869 9870 else if (*cur == '&') { 9871 xmlParseReference(ctxt); 9872 } 9873 9874 /* 9875 * Last case, text. Note that References are handled directly. 9876 */ 9877 else { 9878 xmlParseCharData(ctxt, 0); 9879 } 9880 9881 GROW; 9882 SHRINK; 9883 9884 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9885 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9886 "detected an error in element content\n"); 9887 xmlHaltParser(ctxt); 9888 break; 9889 } 9890 } 9891 } 9892 9893 /** 9894 * xmlParseElement: 9895 * @ctxt: an XML parser context 9896 * 9897 * parse an XML element 9898 * 9899 * [39] element ::= EmptyElemTag | STag content ETag 9900 * 9901 * [ WFC: Element Type Match ] 9902 * The Name in an element's end-tag must match the element type in the 9903 * start-tag. 9904 * 9905 */ 9906 9907 void 9908 xmlParseElement(xmlParserCtxtPtr ctxt) { 9909 if (xmlParseElementStart(ctxt) != 0) 9910 return; 9911 xmlParseContent(ctxt); 9912 if (ctxt->instate == XML_PARSER_EOF) 9913 return; 9914 xmlParseElementEnd(ctxt); 9915 } 9916 9917 /** 9918 * xmlParseElementStart: 9919 * @ctxt: an XML parser context 9920 * 9921 * Parse the start of an XML element. Returns -1 in case of error, 0 if an 9922 * opening tag was parsed, 1 if an empty element was parsed. 9923 */ 9924 static int 9925 xmlParseElementStart(xmlParserCtxtPtr ctxt) { 9926 const xmlChar *name; 9927 const xmlChar *prefix = NULL; 9928 const xmlChar *URI = NULL; 9929 xmlParserNodeInfo node_info; 9930 int line, tlen = 0; 9931 xmlNodePtr ret; 9932 int nsNr = ctxt->nsNr; 9933 9934 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9935 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9936 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9937 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9938 xmlParserMaxDepth); 9939 xmlHaltParser(ctxt); 9940 return(-1); 9941 } 9942 9943 /* Capture start position */ 9944 if (ctxt->record_info) { 9945 node_info.begin_pos = ctxt->input->consumed + 9946 (CUR_PTR - ctxt->input->base); 9947 node_info.begin_line = ctxt->input->line; 9948 } 9949 9950 if (ctxt->spaceNr == 0) 9951 spacePush(ctxt, -1); 9952 else if (*ctxt->space == -2) 9953 spacePush(ctxt, -1); 9954 else 9955 spacePush(ctxt, *ctxt->space); 9956 9957 line = ctxt->input->line; 9958 #ifdef LIBXML_SAX1_ENABLED 9959 if (ctxt->sax2) 9960 #endif /* LIBXML_SAX1_ENABLED */ 9961 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9962 #ifdef LIBXML_SAX1_ENABLED 9963 else 9964 name = xmlParseStartTag(ctxt); 9965 #endif /* LIBXML_SAX1_ENABLED */ 9966 if (ctxt->instate == XML_PARSER_EOF) 9967 return(-1); 9968 if (name == NULL) { 9969 spacePop(ctxt); 9970 return(-1); 9971 } 9972 if (ctxt->sax2) 9973 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 9974 #ifdef LIBXML_SAX1_ENABLED 9975 else 9976 namePush(ctxt, name); 9977 #endif /* LIBXML_SAX1_ENABLED */ 9978 ret = ctxt->node; 9979 9980 #ifdef LIBXML_VALID_ENABLED 9981 /* 9982 * [ VC: Root Element Type ] 9983 * The Name in the document type declaration must match the element 9984 * type of the root element. 9985 */ 9986 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9987 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9988 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9989 #endif /* LIBXML_VALID_ENABLED */ 9990 9991 /* 9992 * Check for an Empty Element. 9993 */ 9994 if ((RAW == '/') && (NXT(1) == '>')) { 9995 SKIP(2); 9996 if (ctxt->sax2) { 9997 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9998 (!ctxt->disableSAX)) 9999 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10000 #ifdef LIBXML_SAX1_ENABLED 10001 } else { 10002 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10003 (!ctxt->disableSAX)) 10004 ctxt->sax->endElement(ctxt->userData, name); 10005 #endif /* LIBXML_SAX1_ENABLED */ 10006 } 10007 namePop(ctxt); 10008 spacePop(ctxt); 10009 if (nsNr != ctxt->nsNr) 10010 nsPop(ctxt, ctxt->nsNr - nsNr); 10011 if ( ret != NULL && ctxt->record_info ) { 10012 node_info.end_pos = ctxt->input->consumed + 10013 (CUR_PTR - ctxt->input->base); 10014 node_info.end_line = ctxt->input->line; 10015 node_info.node = ret; 10016 xmlParserAddNodeInfo(ctxt, &node_info); 10017 } 10018 return(1); 10019 } 10020 if (RAW == '>') { 10021 NEXT1; 10022 } else { 10023 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10024 "Couldn't find end of Start Tag %s line %d\n", 10025 name, line, NULL); 10026 10027 /* 10028 * end of parsing of this node. 10029 */ 10030 nodePop(ctxt); 10031 namePop(ctxt); 10032 spacePop(ctxt); 10033 if (nsNr != ctxt->nsNr) 10034 nsPop(ctxt, ctxt->nsNr - nsNr); 10035 10036 /* 10037 * Capture end position and add node 10038 */ 10039 if ( ret != NULL && ctxt->record_info ) { 10040 node_info.end_pos = ctxt->input->consumed + 10041 (CUR_PTR - ctxt->input->base); 10042 node_info.end_line = ctxt->input->line; 10043 node_info.node = ret; 10044 xmlParserAddNodeInfo(ctxt, &node_info); 10045 } 10046 return(-1); 10047 } 10048 10049 return(0); 10050 } 10051 10052 /** 10053 * xmlParseElementEnd: 10054 * @ctxt: an XML parser context 10055 * 10056 * Parse the end of an XML element. 10057 */ 10058 static void 10059 xmlParseElementEnd(xmlParserCtxtPtr ctxt) { 10060 xmlParserNodeInfo node_info; 10061 xmlNodePtr ret = ctxt->node; 10062 10063 if (ctxt->nameNr <= 0) 10064 return; 10065 10066 /* 10067 * parse the end of tag: '</' should be here. 10068 */ 10069 if (ctxt->sax2) { 10070 const xmlChar *prefix = ctxt->pushTab[ctxt->nameNr * 3 - 3]; 10071 const xmlChar *URI = ctxt->pushTab[ctxt->nameNr * 3 - 2]; 10072 int nsNr = (ptrdiff_t) ctxt->pushTab[ctxt->nameNr * 3 - 1]; 10073 xmlParseEndTag2(ctxt, prefix, URI, 0, nsNr, 0); 10074 namePop(ctxt); 10075 } 10076 #ifdef LIBXML_SAX1_ENABLED 10077 else 10078 xmlParseEndTag1(ctxt, 0); 10079 #endif /* LIBXML_SAX1_ENABLED */ 10080 10081 /* 10082 * Capture end position and add node 10083 */ 10084 if ( ret != NULL && ctxt->record_info ) { 10085 node_info.end_pos = ctxt->input->consumed + 10086 (CUR_PTR - ctxt->input->base); 10087 node_info.end_line = ctxt->input->line; 10088 node_info.node = ret; 10089 xmlParserAddNodeInfo(ctxt, &node_info); 10090 } 10091 } 10092 10093 /** 10094 * xmlParseVersionNum: 10095 * @ctxt: an XML parser context 10096 * 10097 * parse the XML version value. 10098 * 10099 * [26] VersionNum ::= '1.' [0-9]+ 10100 * 10101 * In practice allow [0-9].[0-9]+ at that level 10102 * 10103 * Returns the string giving the XML version number, or NULL 10104 */ 10105 xmlChar * 10106 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10107 xmlChar *buf = NULL; 10108 int len = 0; 10109 int size = 10; 10110 xmlChar cur; 10111 10112 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10113 if (buf == NULL) { 10114 xmlErrMemory(ctxt, NULL); 10115 return(NULL); 10116 } 10117 cur = CUR; 10118 if (!((cur >= '0') && (cur <= '9'))) { 10119 xmlFree(buf); 10120 return(NULL); 10121 } 10122 buf[len++] = cur; 10123 NEXT; 10124 cur=CUR; 10125 if (cur != '.') { 10126 xmlFree(buf); 10127 return(NULL); 10128 } 10129 buf[len++] = cur; 10130 NEXT; 10131 cur=CUR; 10132 while ((cur >= '0') && (cur <= '9')) { 10133 if (len + 1 >= size) { 10134 xmlChar *tmp; 10135 10136 size *= 2; 10137 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10138 if (tmp == NULL) { 10139 xmlFree(buf); 10140 xmlErrMemory(ctxt, NULL); 10141 return(NULL); 10142 } 10143 buf = tmp; 10144 } 10145 buf[len++] = cur; 10146 NEXT; 10147 cur=CUR; 10148 } 10149 buf[len] = 0; 10150 return(buf); 10151 } 10152 10153 /** 10154 * xmlParseVersionInfo: 10155 * @ctxt: an XML parser context 10156 * 10157 * parse the XML version. 10158 * 10159 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10160 * 10161 * [25] Eq ::= S? '=' S? 10162 * 10163 * Returns the version string, e.g. "1.0" 10164 */ 10165 10166 xmlChar * 10167 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10168 xmlChar *version = NULL; 10169 10170 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10171 SKIP(7); 10172 SKIP_BLANKS; 10173 if (RAW != '=') { 10174 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10175 return(NULL); 10176 } 10177 NEXT; 10178 SKIP_BLANKS; 10179 if (RAW == '"') { 10180 NEXT; 10181 version = xmlParseVersionNum(ctxt); 10182 if (RAW != '"') { 10183 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10184 } else 10185 NEXT; 10186 } else if (RAW == '\''){ 10187 NEXT; 10188 version = xmlParseVersionNum(ctxt); 10189 if (RAW != '\'') { 10190 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10191 } else 10192 NEXT; 10193 } else { 10194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10195 } 10196 } 10197 return(version); 10198 } 10199 10200 /** 10201 * xmlParseEncName: 10202 * @ctxt: an XML parser context 10203 * 10204 * parse the XML encoding name 10205 * 10206 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10207 * 10208 * Returns the encoding name value or NULL 10209 */ 10210 xmlChar * 10211 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10212 xmlChar *buf = NULL; 10213 int len = 0; 10214 int size = 10; 10215 xmlChar cur; 10216 10217 cur = CUR; 10218 if (((cur >= 'a') && (cur <= 'z')) || 10219 ((cur >= 'A') && (cur <= 'Z'))) { 10220 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10221 if (buf == NULL) { 10222 xmlErrMemory(ctxt, NULL); 10223 return(NULL); 10224 } 10225 10226 buf[len++] = cur; 10227 NEXT; 10228 cur = CUR; 10229 while (((cur >= 'a') && (cur <= 'z')) || 10230 ((cur >= 'A') && (cur <= 'Z')) || 10231 ((cur >= '0') && (cur <= '9')) || 10232 (cur == '.') || (cur == '_') || 10233 (cur == '-')) { 10234 if (len + 1 >= size) { 10235 xmlChar *tmp; 10236 10237 size *= 2; 10238 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10239 if (tmp == NULL) { 10240 xmlErrMemory(ctxt, NULL); 10241 xmlFree(buf); 10242 return(NULL); 10243 } 10244 buf = tmp; 10245 } 10246 buf[len++] = cur; 10247 NEXT; 10248 cur = CUR; 10249 if (cur == 0) { 10250 SHRINK; 10251 GROW; 10252 cur = CUR; 10253 } 10254 } 10255 buf[len] = 0; 10256 } else { 10257 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10258 } 10259 return(buf); 10260 } 10261 10262 /** 10263 * xmlParseEncodingDecl: 10264 * @ctxt: an XML parser context 10265 * 10266 * parse the XML encoding declaration 10267 * 10268 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10269 * 10270 * this setups the conversion filters. 10271 * 10272 * Returns the encoding value or NULL 10273 */ 10274 10275 const xmlChar * 10276 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10277 xmlChar *encoding = NULL; 10278 10279 SKIP_BLANKS; 10280 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10281 SKIP(8); 10282 SKIP_BLANKS; 10283 if (RAW != '=') { 10284 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10285 return(NULL); 10286 } 10287 NEXT; 10288 SKIP_BLANKS; 10289 if (RAW == '"') { 10290 NEXT; 10291 encoding = xmlParseEncName(ctxt); 10292 if (RAW != '"') { 10293 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10294 xmlFree((xmlChar *) encoding); 10295 return(NULL); 10296 } else 10297 NEXT; 10298 } else if (RAW == '\''){ 10299 NEXT; 10300 encoding = xmlParseEncName(ctxt); 10301 if (RAW != '\'') { 10302 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10303 xmlFree((xmlChar *) encoding); 10304 return(NULL); 10305 } else 10306 NEXT; 10307 } else { 10308 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10309 } 10310 10311 /* 10312 * Non standard parsing, allowing the user to ignore encoding 10313 */ 10314 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10315 xmlFree((xmlChar *) encoding); 10316 return(NULL); 10317 } 10318 10319 /* 10320 * UTF-16 encoding switch has already taken place at this stage, 10321 * more over the little-endian/big-endian selection is already done 10322 */ 10323 if ((encoding != NULL) && 10324 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10325 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10326 /* 10327 * If no encoding was passed to the parser, that we are 10328 * using UTF-16 and no decoder is present i.e. the 10329 * document is apparently UTF-8 compatible, then raise an 10330 * encoding mismatch fatal error 10331 */ 10332 if ((ctxt->encoding == NULL) && 10333 (ctxt->input->buf != NULL) && 10334 (ctxt->input->buf->encoder == NULL)) { 10335 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10336 "Document labelled UTF-16 but has UTF-8 content\n"); 10337 } 10338 if (ctxt->encoding != NULL) 10339 xmlFree((xmlChar *) ctxt->encoding); 10340 ctxt->encoding = encoding; 10341 } 10342 /* 10343 * UTF-8 encoding is handled natively 10344 */ 10345 else if ((encoding != NULL) && 10346 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10347 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10348 if (ctxt->encoding != NULL) 10349 xmlFree((xmlChar *) ctxt->encoding); 10350 ctxt->encoding = encoding; 10351 } 10352 else if (encoding != NULL) { 10353 xmlCharEncodingHandlerPtr handler; 10354 10355 if (ctxt->input->encoding != NULL) 10356 xmlFree((xmlChar *) ctxt->input->encoding); 10357 ctxt->input->encoding = encoding; 10358 10359 handler = xmlFindCharEncodingHandler((const char *) encoding); 10360 if (handler != NULL) { 10361 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10362 /* failed to convert */ 10363 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10364 return(NULL); 10365 } 10366 } else { 10367 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10368 "Unsupported encoding %s\n", encoding); 10369 return(NULL); 10370 } 10371 } 10372 } 10373 return(encoding); 10374 } 10375 10376 /** 10377 * xmlParseSDDecl: 10378 * @ctxt: an XML parser context 10379 * 10380 * parse the XML standalone declaration 10381 * 10382 * [32] SDDecl ::= S 'standalone' Eq 10383 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10384 * 10385 * [ VC: Standalone Document Declaration ] 10386 * TODO The standalone document declaration must have the value "no" 10387 * if any external markup declarations contain declarations of: 10388 * - attributes with default values, if elements to which these 10389 * attributes apply appear in the document without specifications 10390 * of values for these attributes, or 10391 * - entities (other than amp, lt, gt, apos, quot), if references 10392 * to those entities appear in the document, or 10393 * - attributes with values subject to normalization, where the 10394 * attribute appears in the document with a value which will change 10395 * as a result of normalization, or 10396 * - element types with element content, if white space occurs directly 10397 * within any instance of those types. 10398 * 10399 * Returns: 10400 * 1 if standalone="yes" 10401 * 0 if standalone="no" 10402 * -2 if standalone attribute is missing or invalid 10403 * (A standalone value of -2 means that the XML declaration was found, 10404 * but no value was specified for the standalone attribute). 10405 */ 10406 10407 int 10408 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10409 int standalone = -2; 10410 10411 SKIP_BLANKS; 10412 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10413 SKIP(10); 10414 SKIP_BLANKS; 10415 if (RAW != '=') { 10416 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10417 return(standalone); 10418 } 10419 NEXT; 10420 SKIP_BLANKS; 10421 if (RAW == '\''){ 10422 NEXT; 10423 if ((RAW == 'n') && (NXT(1) == 'o')) { 10424 standalone = 0; 10425 SKIP(2); 10426 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10427 (NXT(2) == 's')) { 10428 standalone = 1; 10429 SKIP(3); 10430 } else { 10431 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10432 } 10433 if (RAW != '\'') { 10434 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10435 } else 10436 NEXT; 10437 } else if (RAW == '"'){ 10438 NEXT; 10439 if ((RAW == 'n') && (NXT(1) == 'o')) { 10440 standalone = 0; 10441 SKIP(2); 10442 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10443 (NXT(2) == 's')) { 10444 standalone = 1; 10445 SKIP(3); 10446 } else { 10447 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10448 } 10449 if (RAW != '"') { 10450 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10451 } else 10452 NEXT; 10453 } else { 10454 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10455 } 10456 } 10457 return(standalone); 10458 } 10459 10460 /** 10461 * xmlParseXMLDecl: 10462 * @ctxt: an XML parser context 10463 * 10464 * parse an XML declaration header 10465 * 10466 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10467 */ 10468 10469 void 10470 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10471 xmlChar *version; 10472 10473 /* 10474 * This value for standalone indicates that the document has an 10475 * XML declaration but it does not have a standalone attribute. 10476 * It will be overwritten later if a standalone attribute is found. 10477 */ 10478 ctxt->input->standalone = -2; 10479 10480 /* 10481 * We know that '<?xml' is here. 10482 */ 10483 SKIP(5); 10484 10485 if (!IS_BLANK_CH(RAW)) { 10486 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10487 "Blank needed after '<?xml'\n"); 10488 } 10489 SKIP_BLANKS; 10490 10491 /* 10492 * We must have the VersionInfo here. 10493 */ 10494 version = xmlParseVersionInfo(ctxt); 10495 if (version == NULL) { 10496 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10497 } else { 10498 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10499 /* 10500 * Changed here for XML-1.0 5th edition 10501 */ 10502 if (ctxt->options & XML_PARSE_OLD10) { 10503 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10504 "Unsupported version '%s'\n", 10505 version); 10506 } else { 10507 if ((version[0] == '1') && ((version[1] == '.'))) { 10508 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10509 "Unsupported version '%s'\n", 10510 version, NULL); 10511 } else { 10512 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10513 "Unsupported version '%s'\n", 10514 version); 10515 } 10516 } 10517 } 10518 if (ctxt->version != NULL) 10519 xmlFree((void *) ctxt->version); 10520 ctxt->version = version; 10521 } 10522 10523 /* 10524 * We may have the encoding declaration 10525 */ 10526 if (!IS_BLANK_CH(RAW)) { 10527 if ((RAW == '?') && (NXT(1) == '>')) { 10528 SKIP(2); 10529 return; 10530 } 10531 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10532 } 10533 xmlParseEncodingDecl(ctxt); 10534 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10535 (ctxt->instate == XML_PARSER_EOF)) { 10536 /* 10537 * The XML REC instructs us to stop parsing right here 10538 */ 10539 return; 10540 } 10541 10542 /* 10543 * We may have the standalone status. 10544 */ 10545 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10546 if ((RAW == '?') && (NXT(1) == '>')) { 10547 SKIP(2); 10548 return; 10549 } 10550 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10551 } 10552 10553 /* 10554 * We can grow the input buffer freely at that point 10555 */ 10556 GROW; 10557 10558 SKIP_BLANKS; 10559 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10560 10561 SKIP_BLANKS; 10562 if ((RAW == '?') && (NXT(1) == '>')) { 10563 SKIP(2); 10564 } else if (RAW == '>') { 10565 /* Deprecated old WD ... */ 10566 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10567 NEXT; 10568 } else { 10569 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10570 MOVETO_ENDTAG(CUR_PTR); 10571 NEXT; 10572 } 10573 } 10574 10575 /** 10576 * xmlParseMisc: 10577 * @ctxt: an XML parser context 10578 * 10579 * parse an XML Misc* optional field. 10580 * 10581 * [27] Misc ::= Comment | PI | S 10582 */ 10583 10584 void 10585 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10586 while ((ctxt->instate != XML_PARSER_EOF) && 10587 (((RAW == '<') && (NXT(1) == '?')) || 10588 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10589 IS_BLANK_CH(CUR))) { 10590 if ((RAW == '<') && (NXT(1) == '?')) { 10591 xmlParsePI(ctxt); 10592 } else if (IS_BLANK_CH(CUR)) { 10593 NEXT; 10594 } else 10595 xmlParseComment(ctxt); 10596 } 10597 } 10598 10599 /** 10600 * xmlParseDocument: 10601 * @ctxt: an XML parser context 10602 * 10603 * parse an XML document (and build a tree if using the standard SAX 10604 * interface). 10605 * 10606 * [1] document ::= prolog element Misc* 10607 * 10608 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10609 * 10610 * Returns 0, -1 in case of error. the parser context is augmented 10611 * as a result of the parsing. 10612 */ 10613 10614 int 10615 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10616 xmlChar start[4]; 10617 xmlCharEncoding enc; 10618 10619 xmlInitParser(); 10620 10621 if ((ctxt == NULL) || (ctxt->input == NULL)) 10622 return(-1); 10623 10624 GROW; 10625 10626 /* 10627 * SAX: detecting the level. 10628 */ 10629 xmlDetectSAX2(ctxt); 10630 10631 /* 10632 * SAX: beginning of the document processing. 10633 */ 10634 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10635 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10636 if (ctxt->instate == XML_PARSER_EOF) 10637 return(-1); 10638 10639 if ((ctxt->encoding == NULL) && 10640 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10641 /* 10642 * Get the 4 first bytes and decode the charset 10643 * if enc != XML_CHAR_ENCODING_NONE 10644 * plug some encoding conversion routines. 10645 */ 10646 start[0] = RAW; 10647 start[1] = NXT(1); 10648 start[2] = NXT(2); 10649 start[3] = NXT(3); 10650 enc = xmlDetectCharEncoding(&start[0], 4); 10651 if (enc != XML_CHAR_ENCODING_NONE) { 10652 xmlSwitchEncoding(ctxt, enc); 10653 } 10654 } 10655 10656 10657 if (CUR == 0) { 10658 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10659 return(-1); 10660 } 10661 10662 /* 10663 * Check for the XMLDecl in the Prolog. 10664 * do not GROW here to avoid the detected encoder to decode more 10665 * than just the first line, unless the amount of data is really 10666 * too small to hold "<?xml version="1.0" encoding="foo" 10667 */ 10668 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10669 GROW; 10670 } 10671 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10672 10673 /* 10674 * Note that we will switch encoding on the fly. 10675 */ 10676 xmlParseXMLDecl(ctxt); 10677 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10678 (ctxt->instate == XML_PARSER_EOF)) { 10679 /* 10680 * The XML REC instructs us to stop parsing right here 10681 */ 10682 return(-1); 10683 } 10684 ctxt->standalone = ctxt->input->standalone; 10685 SKIP_BLANKS; 10686 } else { 10687 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10688 } 10689 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10690 ctxt->sax->startDocument(ctxt->userData); 10691 if (ctxt->instate == XML_PARSER_EOF) 10692 return(-1); 10693 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10694 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10695 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10696 } 10697 10698 /* 10699 * The Misc part of the Prolog 10700 */ 10701 GROW; 10702 xmlParseMisc(ctxt); 10703 10704 /* 10705 * Then possibly doc type declaration(s) and more Misc 10706 * (doctypedecl Misc*)? 10707 */ 10708 GROW; 10709 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10710 10711 ctxt->inSubset = 1; 10712 xmlParseDocTypeDecl(ctxt); 10713 if (RAW == '[') { 10714 ctxt->instate = XML_PARSER_DTD; 10715 xmlParseInternalSubset(ctxt); 10716 if (ctxt->instate == XML_PARSER_EOF) 10717 return(-1); 10718 } 10719 10720 /* 10721 * Create and update the external subset. 10722 */ 10723 ctxt->inSubset = 2; 10724 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10725 (!ctxt->disableSAX)) 10726 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10727 ctxt->extSubSystem, ctxt->extSubURI); 10728 if (ctxt->instate == XML_PARSER_EOF) 10729 return(-1); 10730 ctxt->inSubset = 0; 10731 10732 xmlCleanSpecialAttr(ctxt); 10733 10734 ctxt->instate = XML_PARSER_PROLOG; 10735 xmlParseMisc(ctxt); 10736 } 10737 10738 /* 10739 * Time to start parsing the tree itself 10740 */ 10741 GROW; 10742 if (RAW != '<') { 10743 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10744 "Start tag expected, '<' not found\n"); 10745 } else { 10746 ctxt->instate = XML_PARSER_CONTENT; 10747 xmlParseElement(ctxt); 10748 ctxt->instate = XML_PARSER_EPILOG; 10749 10750 10751 /* 10752 * The Misc part at the end 10753 */ 10754 xmlParseMisc(ctxt); 10755 10756 if (RAW != 0) { 10757 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10758 } 10759 ctxt->instate = XML_PARSER_EOF; 10760 } 10761 10762 /* 10763 * SAX: end of the document processing. 10764 */ 10765 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10766 ctxt->sax->endDocument(ctxt->userData); 10767 10768 /* 10769 * Remove locally kept entity definitions if the tree was not built 10770 */ 10771 if ((ctxt->myDoc != NULL) && 10772 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10773 xmlFreeDoc(ctxt->myDoc); 10774 ctxt->myDoc = NULL; 10775 } 10776 10777 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10778 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10779 if (ctxt->valid) 10780 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10781 if (ctxt->nsWellFormed) 10782 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10783 if (ctxt->options & XML_PARSE_OLD10) 10784 ctxt->myDoc->properties |= XML_DOC_OLD10; 10785 } 10786 if (! ctxt->wellFormed) { 10787 ctxt->valid = 0; 10788 return(-1); 10789 } 10790 return(0); 10791 } 10792 10793 /** 10794 * xmlParseExtParsedEnt: 10795 * @ctxt: an XML parser context 10796 * 10797 * parse a general parsed entity 10798 * An external general parsed entity is well-formed if it matches the 10799 * production labeled extParsedEnt. 10800 * 10801 * [78] extParsedEnt ::= TextDecl? content 10802 * 10803 * Returns 0, -1 in case of error. the parser context is augmented 10804 * as a result of the parsing. 10805 */ 10806 10807 int 10808 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10809 xmlChar start[4]; 10810 xmlCharEncoding enc; 10811 10812 if ((ctxt == NULL) || (ctxt->input == NULL)) 10813 return(-1); 10814 10815 xmlDefaultSAXHandlerInit(); 10816 10817 xmlDetectSAX2(ctxt); 10818 10819 GROW; 10820 10821 /* 10822 * SAX: beginning of the document processing. 10823 */ 10824 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10825 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10826 10827 /* 10828 * Get the 4 first bytes and decode the charset 10829 * if enc != XML_CHAR_ENCODING_NONE 10830 * plug some encoding conversion routines. 10831 */ 10832 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10833 start[0] = RAW; 10834 start[1] = NXT(1); 10835 start[2] = NXT(2); 10836 start[3] = NXT(3); 10837 enc = xmlDetectCharEncoding(start, 4); 10838 if (enc != XML_CHAR_ENCODING_NONE) { 10839 xmlSwitchEncoding(ctxt, enc); 10840 } 10841 } 10842 10843 10844 if (CUR == 0) { 10845 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10846 } 10847 10848 /* 10849 * Check for the XMLDecl in the Prolog. 10850 */ 10851 GROW; 10852 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10853 10854 /* 10855 * Note that we will switch encoding on the fly. 10856 */ 10857 xmlParseXMLDecl(ctxt); 10858 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10859 /* 10860 * The XML REC instructs us to stop parsing right here 10861 */ 10862 return(-1); 10863 } 10864 SKIP_BLANKS; 10865 } else { 10866 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10867 } 10868 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10869 ctxt->sax->startDocument(ctxt->userData); 10870 if (ctxt->instate == XML_PARSER_EOF) 10871 return(-1); 10872 10873 /* 10874 * Doing validity checking on chunk doesn't make sense 10875 */ 10876 ctxt->instate = XML_PARSER_CONTENT; 10877 ctxt->validate = 0; 10878 ctxt->loadsubset = 0; 10879 ctxt->depth = 0; 10880 10881 xmlParseContent(ctxt); 10882 if (ctxt->instate == XML_PARSER_EOF) 10883 return(-1); 10884 10885 if ((RAW == '<') && (NXT(1) == '/')) { 10886 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10887 } else if (RAW != 0) { 10888 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10889 } 10890 10891 /* 10892 * SAX: end of the document processing. 10893 */ 10894 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10895 ctxt->sax->endDocument(ctxt->userData); 10896 10897 if (! ctxt->wellFormed) return(-1); 10898 return(0); 10899 } 10900 10901 #ifdef LIBXML_PUSH_ENABLED 10902 /************************************************************************ 10903 * * 10904 * Progressive parsing interfaces * 10905 * * 10906 ************************************************************************/ 10907 10908 /** 10909 * xmlParseLookupSequence: 10910 * @ctxt: an XML parser context 10911 * @first: the first char to lookup 10912 * @next: the next char to lookup or zero 10913 * @third: the next char to lookup or zero 10914 * 10915 * Try to find if a sequence (first, next, third) or just (first next) or 10916 * (first) is available in the input stream. 10917 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10918 * to avoid rescanning sequences of bytes, it DOES change the state of the 10919 * parser, do not use liberally. 10920 * 10921 * Returns the index to the current parsing point if the full sequence 10922 * is available, -1 otherwise. 10923 */ 10924 static int 10925 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10926 xmlChar next, xmlChar third) { 10927 int base, len; 10928 xmlParserInputPtr in; 10929 const xmlChar *buf; 10930 10931 in = ctxt->input; 10932 if (in == NULL) return(-1); 10933 base = in->cur - in->base; 10934 if (base < 0) return(-1); 10935 if (ctxt->checkIndex > base) 10936 base = ctxt->checkIndex; 10937 if (in->buf == NULL) { 10938 buf = in->base; 10939 len = in->length; 10940 } else { 10941 buf = xmlBufContent(in->buf->buffer); 10942 len = xmlBufUse(in->buf->buffer); 10943 } 10944 /* take into account the sequence length */ 10945 if (third) len -= 2; 10946 else if (next) len --; 10947 for (;base < len;base++) { 10948 if (buf[base] == first) { 10949 if (third != 0) { 10950 if ((buf[base + 1] != next) || 10951 (buf[base + 2] != third)) continue; 10952 } else if (next != 0) { 10953 if (buf[base + 1] != next) continue; 10954 } 10955 ctxt->checkIndex = 0; 10956 #ifdef DEBUG_PUSH 10957 if (next == 0) 10958 xmlGenericError(xmlGenericErrorContext, 10959 "PP: lookup '%c' found at %d\n", 10960 first, base); 10961 else if (third == 0) 10962 xmlGenericError(xmlGenericErrorContext, 10963 "PP: lookup '%c%c' found at %d\n", 10964 first, next, base); 10965 else 10966 xmlGenericError(xmlGenericErrorContext, 10967 "PP: lookup '%c%c%c' found at %d\n", 10968 first, next, third, base); 10969 #endif 10970 return(base - (in->cur - in->base)); 10971 } 10972 } 10973 ctxt->checkIndex = base; 10974 #ifdef DEBUG_PUSH 10975 if (next == 0) 10976 xmlGenericError(xmlGenericErrorContext, 10977 "PP: lookup '%c' failed\n", first); 10978 else if (third == 0) 10979 xmlGenericError(xmlGenericErrorContext, 10980 "PP: lookup '%c%c' failed\n", first, next); 10981 else 10982 xmlGenericError(xmlGenericErrorContext, 10983 "PP: lookup '%c%c%c' failed\n", first, next, third); 10984 #endif 10985 return(-1); 10986 } 10987 10988 /** 10989 * xmlParseGetLasts: 10990 * @ctxt: an XML parser context 10991 * @lastlt: pointer to store the last '<' from the input 10992 * @lastgt: pointer to store the last '>' from the input 10993 * 10994 * Lookup the last < and > in the current chunk 10995 */ 10996 static void 10997 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10998 const xmlChar **lastgt) { 10999 const xmlChar *tmp; 11000 11001 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11002 xmlGenericError(xmlGenericErrorContext, 11003 "Internal error: xmlParseGetLasts\n"); 11004 return; 11005 } 11006 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11007 tmp = ctxt->input->end; 11008 tmp--; 11009 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11010 if (tmp < ctxt->input->base) { 11011 *lastlt = NULL; 11012 *lastgt = NULL; 11013 } else { 11014 *lastlt = tmp; 11015 tmp++; 11016 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11017 if (*tmp == '\'') { 11018 tmp++; 11019 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11020 if (tmp < ctxt->input->end) tmp++; 11021 } else if (*tmp == '"') { 11022 tmp++; 11023 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11024 if (tmp < ctxt->input->end) tmp++; 11025 } else 11026 tmp++; 11027 } 11028 if (tmp < ctxt->input->end) 11029 *lastgt = tmp; 11030 else { 11031 tmp = *lastlt; 11032 tmp--; 11033 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11034 if (tmp >= ctxt->input->base) 11035 *lastgt = tmp; 11036 else 11037 *lastgt = NULL; 11038 } 11039 } 11040 } else { 11041 *lastlt = NULL; 11042 *lastgt = NULL; 11043 } 11044 } 11045 /** 11046 * xmlCheckCdataPush: 11047 * @cur: pointer to the block of characters 11048 * @len: length of the block in bytes 11049 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11050 * 11051 * Check that the block of characters is okay as SCdata content [20] 11052 * 11053 * Returns the number of bytes to pass if okay, a negative index where an 11054 * UTF-8 error occurred otherwise 11055 */ 11056 static int 11057 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11058 int ix; 11059 unsigned char c; 11060 int codepoint; 11061 11062 if ((utf == NULL) || (len <= 0)) 11063 return(0); 11064 11065 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11066 c = utf[ix]; 11067 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11068 if (c >= 0x20) 11069 ix++; 11070 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11071 ix++; 11072 else 11073 return(-ix); 11074 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11075 if (ix + 2 > len) return(complete ? -ix : ix); 11076 if ((utf[ix+1] & 0xc0 ) != 0x80) 11077 return(-ix); 11078 codepoint = (utf[ix] & 0x1f) << 6; 11079 codepoint |= utf[ix+1] & 0x3f; 11080 if (!xmlIsCharQ(codepoint)) 11081 return(-ix); 11082 ix += 2; 11083 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11084 if (ix + 3 > len) return(complete ? -ix : ix); 11085 if (((utf[ix+1] & 0xc0) != 0x80) || 11086 ((utf[ix+2] & 0xc0) != 0x80)) 11087 return(-ix); 11088 codepoint = (utf[ix] & 0xf) << 12; 11089 codepoint |= (utf[ix+1] & 0x3f) << 6; 11090 codepoint |= utf[ix+2] & 0x3f; 11091 if (!xmlIsCharQ(codepoint)) 11092 return(-ix); 11093 ix += 3; 11094 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11095 if (ix + 4 > len) return(complete ? -ix : ix); 11096 if (((utf[ix+1] & 0xc0) != 0x80) || 11097 ((utf[ix+2] & 0xc0) != 0x80) || 11098 ((utf[ix+3] & 0xc0) != 0x80)) 11099 return(-ix); 11100 codepoint = (utf[ix] & 0x7) << 18; 11101 codepoint |= (utf[ix+1] & 0x3f) << 12; 11102 codepoint |= (utf[ix+2] & 0x3f) << 6; 11103 codepoint |= utf[ix+3] & 0x3f; 11104 if (!xmlIsCharQ(codepoint)) 11105 return(-ix); 11106 ix += 4; 11107 } else /* unknown encoding */ 11108 return(-ix); 11109 } 11110 return(ix); 11111 } 11112 11113 /** 11114 * xmlParseTryOrFinish: 11115 * @ctxt: an XML parser context 11116 * @terminate: last chunk indicator 11117 * 11118 * Try to progress on parsing 11119 * 11120 * Returns zero if no parsing was possible 11121 */ 11122 static int 11123 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11124 int ret = 0; 11125 int avail, tlen; 11126 xmlChar cur, next; 11127 const xmlChar *lastlt, *lastgt; 11128 11129 if (ctxt->input == NULL) 11130 return(0); 11131 11132 #ifdef DEBUG_PUSH 11133 switch (ctxt->instate) { 11134 case XML_PARSER_EOF: 11135 xmlGenericError(xmlGenericErrorContext, 11136 "PP: try EOF\n"); break; 11137 case XML_PARSER_START: 11138 xmlGenericError(xmlGenericErrorContext, 11139 "PP: try START\n"); break; 11140 case XML_PARSER_MISC: 11141 xmlGenericError(xmlGenericErrorContext, 11142 "PP: try MISC\n");break; 11143 case XML_PARSER_COMMENT: 11144 xmlGenericError(xmlGenericErrorContext, 11145 "PP: try COMMENT\n");break; 11146 case XML_PARSER_PROLOG: 11147 xmlGenericError(xmlGenericErrorContext, 11148 "PP: try PROLOG\n");break; 11149 case XML_PARSER_START_TAG: 11150 xmlGenericError(xmlGenericErrorContext, 11151 "PP: try START_TAG\n");break; 11152 case XML_PARSER_CONTENT: 11153 xmlGenericError(xmlGenericErrorContext, 11154 "PP: try CONTENT\n");break; 11155 case XML_PARSER_CDATA_SECTION: 11156 xmlGenericError(xmlGenericErrorContext, 11157 "PP: try CDATA_SECTION\n");break; 11158 case XML_PARSER_END_TAG: 11159 xmlGenericError(xmlGenericErrorContext, 11160 "PP: try END_TAG\n");break; 11161 case XML_PARSER_ENTITY_DECL: 11162 xmlGenericError(xmlGenericErrorContext, 11163 "PP: try ENTITY_DECL\n");break; 11164 case XML_PARSER_ENTITY_VALUE: 11165 xmlGenericError(xmlGenericErrorContext, 11166 "PP: try ENTITY_VALUE\n");break; 11167 case XML_PARSER_ATTRIBUTE_VALUE: 11168 xmlGenericError(xmlGenericErrorContext, 11169 "PP: try ATTRIBUTE_VALUE\n");break; 11170 case XML_PARSER_DTD: 11171 xmlGenericError(xmlGenericErrorContext, 11172 "PP: try DTD\n");break; 11173 case XML_PARSER_EPILOG: 11174 xmlGenericError(xmlGenericErrorContext, 11175 "PP: try EPILOG\n");break; 11176 case XML_PARSER_PI: 11177 xmlGenericError(xmlGenericErrorContext, 11178 "PP: try PI\n");break; 11179 case XML_PARSER_IGNORE: 11180 xmlGenericError(xmlGenericErrorContext, 11181 "PP: try IGNORE\n");break; 11182 } 11183 #endif 11184 11185 if ((ctxt->input != NULL) && 11186 (ctxt->input->cur - ctxt->input->base > 4096)) { 11187 xmlSHRINK(ctxt); 11188 ctxt->checkIndex = 0; 11189 } 11190 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11191 11192 while (ctxt->instate != XML_PARSER_EOF) { 11193 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11194 return(0); 11195 11196 if (ctxt->input == NULL) break; 11197 if (ctxt->input->buf == NULL) 11198 avail = ctxt->input->length - 11199 (ctxt->input->cur - ctxt->input->base); 11200 else { 11201 /* 11202 * If we are operating on converted input, try to flush 11203 * remaining chars to avoid them stalling in the non-converted 11204 * buffer. But do not do this in document start where 11205 * encoding="..." may not have been read and we work on a 11206 * guessed encoding. 11207 */ 11208 if ((ctxt->instate != XML_PARSER_START) && 11209 (ctxt->input->buf->raw != NULL) && 11210 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11211 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11212 ctxt->input); 11213 size_t current = ctxt->input->cur - ctxt->input->base; 11214 11215 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11216 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11217 base, current); 11218 } 11219 avail = xmlBufUse(ctxt->input->buf->buffer) - 11220 (ctxt->input->cur - ctxt->input->base); 11221 } 11222 if (avail < 1) 11223 goto done; 11224 switch (ctxt->instate) { 11225 case XML_PARSER_EOF: 11226 /* 11227 * Document parsing is done ! 11228 */ 11229 goto done; 11230 case XML_PARSER_START: 11231 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11232 xmlChar start[4]; 11233 xmlCharEncoding enc; 11234 11235 /* 11236 * Very first chars read from the document flow. 11237 */ 11238 if (avail < 4) 11239 goto done; 11240 11241 /* 11242 * Get the 4 first bytes and decode the charset 11243 * if enc != XML_CHAR_ENCODING_NONE 11244 * plug some encoding conversion routines, 11245 * else xmlSwitchEncoding will set to (default) 11246 * UTF8. 11247 */ 11248 start[0] = RAW; 11249 start[1] = NXT(1); 11250 start[2] = NXT(2); 11251 start[3] = NXT(3); 11252 enc = xmlDetectCharEncoding(start, 4); 11253 xmlSwitchEncoding(ctxt, enc); 11254 break; 11255 } 11256 11257 if (avail < 2) 11258 goto done; 11259 cur = ctxt->input->cur[0]; 11260 next = ctxt->input->cur[1]; 11261 if (cur == 0) { 11262 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11263 ctxt->sax->setDocumentLocator(ctxt->userData, 11264 &xmlDefaultSAXLocator); 11265 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11266 xmlHaltParser(ctxt); 11267 #ifdef DEBUG_PUSH 11268 xmlGenericError(xmlGenericErrorContext, 11269 "PP: entering EOF\n"); 11270 #endif 11271 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11272 ctxt->sax->endDocument(ctxt->userData); 11273 goto done; 11274 } 11275 if ((cur == '<') && (next == '?')) { 11276 /* PI or XML decl */ 11277 if (avail < 5) return(ret); 11278 if ((!terminate) && 11279 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11280 return(ret); 11281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11282 ctxt->sax->setDocumentLocator(ctxt->userData, 11283 &xmlDefaultSAXLocator); 11284 if ((ctxt->input->cur[2] == 'x') && 11285 (ctxt->input->cur[3] == 'm') && 11286 (ctxt->input->cur[4] == 'l') && 11287 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11288 ret += 5; 11289 #ifdef DEBUG_PUSH 11290 xmlGenericError(xmlGenericErrorContext, 11291 "PP: Parsing XML Decl\n"); 11292 #endif 11293 xmlParseXMLDecl(ctxt); 11294 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11295 /* 11296 * The XML REC instructs us to stop parsing right 11297 * here 11298 */ 11299 xmlHaltParser(ctxt); 11300 return(0); 11301 } 11302 ctxt->standalone = ctxt->input->standalone; 11303 if ((ctxt->encoding == NULL) && 11304 (ctxt->input->encoding != NULL)) 11305 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11306 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11307 (!ctxt->disableSAX)) 11308 ctxt->sax->startDocument(ctxt->userData); 11309 ctxt->instate = XML_PARSER_MISC; 11310 #ifdef DEBUG_PUSH 11311 xmlGenericError(xmlGenericErrorContext, 11312 "PP: entering MISC\n"); 11313 #endif 11314 } else { 11315 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11316 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11317 (!ctxt->disableSAX)) 11318 ctxt->sax->startDocument(ctxt->userData); 11319 ctxt->instate = XML_PARSER_MISC; 11320 #ifdef DEBUG_PUSH 11321 xmlGenericError(xmlGenericErrorContext, 11322 "PP: entering MISC\n"); 11323 #endif 11324 } 11325 } else { 11326 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11327 ctxt->sax->setDocumentLocator(ctxt->userData, 11328 &xmlDefaultSAXLocator); 11329 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11330 if (ctxt->version == NULL) { 11331 xmlErrMemory(ctxt, NULL); 11332 break; 11333 } 11334 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11335 (!ctxt->disableSAX)) 11336 ctxt->sax->startDocument(ctxt->userData); 11337 ctxt->instate = XML_PARSER_MISC; 11338 #ifdef DEBUG_PUSH 11339 xmlGenericError(xmlGenericErrorContext, 11340 "PP: entering MISC\n"); 11341 #endif 11342 } 11343 break; 11344 case XML_PARSER_START_TAG: { 11345 const xmlChar *name; 11346 const xmlChar *prefix = NULL; 11347 const xmlChar *URI = NULL; 11348 int nsNr = ctxt->nsNr; 11349 11350 if ((avail < 2) && (ctxt->inputNr == 1)) 11351 goto done; 11352 cur = ctxt->input->cur[0]; 11353 if (cur != '<') { 11354 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11355 xmlHaltParser(ctxt); 11356 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11357 ctxt->sax->endDocument(ctxt->userData); 11358 goto done; 11359 } 11360 if (!terminate) { 11361 if (ctxt->progressive) { 11362 /* > can be found unescaped in attribute values */ 11363 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11364 goto done; 11365 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11366 goto done; 11367 } 11368 } 11369 if (ctxt->spaceNr == 0) 11370 spacePush(ctxt, -1); 11371 else if (*ctxt->space == -2) 11372 spacePush(ctxt, -1); 11373 else 11374 spacePush(ctxt, *ctxt->space); 11375 #ifdef LIBXML_SAX1_ENABLED 11376 if (ctxt->sax2) 11377 #endif /* LIBXML_SAX1_ENABLED */ 11378 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11379 #ifdef LIBXML_SAX1_ENABLED 11380 else 11381 name = xmlParseStartTag(ctxt); 11382 #endif /* LIBXML_SAX1_ENABLED */ 11383 if (ctxt->instate == XML_PARSER_EOF) 11384 goto done; 11385 if (name == NULL) { 11386 spacePop(ctxt); 11387 xmlHaltParser(ctxt); 11388 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11389 ctxt->sax->endDocument(ctxt->userData); 11390 goto done; 11391 } 11392 #ifdef LIBXML_VALID_ENABLED 11393 /* 11394 * [ VC: Root Element Type ] 11395 * The Name in the document type declaration must match 11396 * the element type of the root element. 11397 */ 11398 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11399 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11400 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11401 #endif /* LIBXML_VALID_ENABLED */ 11402 11403 /* 11404 * Check for an Empty Element. 11405 */ 11406 if ((RAW == '/') && (NXT(1) == '>')) { 11407 SKIP(2); 11408 11409 if (ctxt->sax2) { 11410 if ((ctxt->sax != NULL) && 11411 (ctxt->sax->endElementNs != NULL) && 11412 (!ctxt->disableSAX)) 11413 ctxt->sax->endElementNs(ctxt->userData, name, 11414 prefix, URI); 11415 if (ctxt->nsNr - nsNr > 0) 11416 nsPop(ctxt, ctxt->nsNr - nsNr); 11417 #ifdef LIBXML_SAX1_ENABLED 11418 } else { 11419 if ((ctxt->sax != NULL) && 11420 (ctxt->sax->endElement != NULL) && 11421 (!ctxt->disableSAX)) 11422 ctxt->sax->endElement(ctxt->userData, name); 11423 #endif /* LIBXML_SAX1_ENABLED */ 11424 } 11425 if (ctxt->instate == XML_PARSER_EOF) 11426 goto done; 11427 spacePop(ctxt); 11428 if (ctxt->nameNr == 0) { 11429 ctxt->instate = XML_PARSER_EPILOG; 11430 } else { 11431 ctxt->instate = XML_PARSER_CONTENT; 11432 } 11433 ctxt->progressive = 1; 11434 break; 11435 } 11436 if (RAW == '>') { 11437 NEXT; 11438 } else { 11439 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11440 "Couldn't find end of Start Tag %s\n", 11441 name); 11442 nodePop(ctxt); 11443 spacePop(ctxt); 11444 } 11445 if (ctxt->sax2) 11446 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11447 #ifdef LIBXML_SAX1_ENABLED 11448 else 11449 namePush(ctxt, name); 11450 #endif /* LIBXML_SAX1_ENABLED */ 11451 11452 ctxt->instate = XML_PARSER_CONTENT; 11453 ctxt->progressive = 1; 11454 break; 11455 } 11456 case XML_PARSER_CONTENT: { 11457 const xmlChar *test; 11458 unsigned int cons; 11459 if ((avail < 2) && (ctxt->inputNr == 1)) 11460 goto done; 11461 cur = ctxt->input->cur[0]; 11462 next = ctxt->input->cur[1]; 11463 11464 test = CUR_PTR; 11465 cons = ctxt->input->consumed; 11466 if ((cur == '<') && (next == '/')) { 11467 ctxt->instate = XML_PARSER_END_TAG; 11468 break; 11469 } else if ((cur == '<') && (next == '?')) { 11470 if ((!terminate) && 11471 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11472 ctxt->progressive = XML_PARSER_PI; 11473 goto done; 11474 } 11475 xmlParsePI(ctxt); 11476 ctxt->instate = XML_PARSER_CONTENT; 11477 ctxt->progressive = 1; 11478 } else if ((cur == '<') && (next != '!')) { 11479 ctxt->instate = XML_PARSER_START_TAG; 11480 break; 11481 } else if ((cur == '<') && (next == '!') && 11482 (ctxt->input->cur[2] == '-') && 11483 (ctxt->input->cur[3] == '-')) { 11484 int term; 11485 11486 if (avail < 4) 11487 goto done; 11488 ctxt->input->cur += 4; 11489 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11490 ctxt->input->cur -= 4; 11491 if ((!terminate) && (term < 0)) { 11492 ctxt->progressive = XML_PARSER_COMMENT; 11493 goto done; 11494 } 11495 xmlParseComment(ctxt); 11496 ctxt->instate = XML_PARSER_CONTENT; 11497 ctxt->progressive = 1; 11498 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11499 (ctxt->input->cur[2] == '[') && 11500 (ctxt->input->cur[3] == 'C') && 11501 (ctxt->input->cur[4] == 'D') && 11502 (ctxt->input->cur[5] == 'A') && 11503 (ctxt->input->cur[6] == 'T') && 11504 (ctxt->input->cur[7] == 'A') && 11505 (ctxt->input->cur[8] == '[')) { 11506 SKIP(9); 11507 ctxt->instate = XML_PARSER_CDATA_SECTION; 11508 break; 11509 } else if ((cur == '<') && (next == '!') && 11510 (avail < 9)) { 11511 goto done; 11512 } else if (cur == '&') { 11513 if ((!terminate) && 11514 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11515 goto done; 11516 xmlParseReference(ctxt); 11517 } else { 11518 /* TODO Avoid the extra copy, handle directly !!! */ 11519 /* 11520 * Goal of the following test is: 11521 * - minimize calls to the SAX 'character' callback 11522 * when they are mergeable 11523 * - handle an problem for isBlank when we only parse 11524 * a sequence of blank chars and the next one is 11525 * not available to check against '<' presence. 11526 * - tries to homogenize the differences in SAX 11527 * callbacks between the push and pull versions 11528 * of the parser. 11529 */ 11530 if ((ctxt->inputNr == 1) && 11531 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11532 if (!terminate) { 11533 if (ctxt->progressive) { 11534 if ((lastlt == NULL) || 11535 (ctxt->input->cur > lastlt)) 11536 goto done; 11537 } else if (xmlParseLookupSequence(ctxt, 11538 '<', 0, 0) < 0) { 11539 goto done; 11540 } 11541 } 11542 } 11543 ctxt->checkIndex = 0; 11544 xmlParseCharData(ctxt, 0); 11545 } 11546 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11547 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11548 "detected an error in element content\n"); 11549 xmlHaltParser(ctxt); 11550 break; 11551 } 11552 break; 11553 } 11554 case XML_PARSER_END_TAG: 11555 if (avail < 2) 11556 goto done; 11557 if (!terminate) { 11558 if (ctxt->progressive) { 11559 /* > can be found unescaped in attribute values */ 11560 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11561 goto done; 11562 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11563 goto done; 11564 } 11565 } 11566 if (ctxt->sax2) { 11567 xmlParseEndTag2(ctxt, 11568 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11569 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11570 (int) (ptrdiff_t) 11571 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11572 nameNsPop(ctxt); 11573 } 11574 #ifdef LIBXML_SAX1_ENABLED 11575 else 11576 xmlParseEndTag1(ctxt, 0); 11577 #endif /* LIBXML_SAX1_ENABLED */ 11578 if (ctxt->instate == XML_PARSER_EOF) { 11579 /* Nothing */ 11580 } else if (ctxt->nameNr == 0) { 11581 ctxt->instate = XML_PARSER_EPILOG; 11582 } else { 11583 ctxt->instate = XML_PARSER_CONTENT; 11584 } 11585 break; 11586 case XML_PARSER_CDATA_SECTION: { 11587 /* 11588 * The Push mode need to have the SAX callback for 11589 * cdataBlock merge back contiguous callbacks. 11590 */ 11591 int base; 11592 11593 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11594 if (base < 0) { 11595 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11596 int tmp; 11597 11598 tmp = xmlCheckCdataPush(ctxt->input->cur, 11599 XML_PARSER_BIG_BUFFER_SIZE, 0); 11600 if (tmp < 0) { 11601 tmp = -tmp; 11602 ctxt->input->cur += tmp; 11603 goto encoding_error; 11604 } 11605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11606 if (ctxt->sax->cdataBlock != NULL) 11607 ctxt->sax->cdataBlock(ctxt->userData, 11608 ctxt->input->cur, tmp); 11609 else if (ctxt->sax->characters != NULL) 11610 ctxt->sax->characters(ctxt->userData, 11611 ctxt->input->cur, tmp); 11612 } 11613 if (ctxt->instate == XML_PARSER_EOF) 11614 goto done; 11615 SKIPL(tmp); 11616 ctxt->checkIndex = 0; 11617 } 11618 goto done; 11619 } else { 11620 int tmp; 11621 11622 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11623 if ((tmp < 0) || (tmp != base)) { 11624 tmp = -tmp; 11625 ctxt->input->cur += tmp; 11626 goto encoding_error; 11627 } 11628 if ((ctxt->sax != NULL) && (base == 0) && 11629 (ctxt->sax->cdataBlock != NULL) && 11630 (!ctxt->disableSAX)) { 11631 /* 11632 * Special case to provide identical behaviour 11633 * between pull and push parsers on enpty CDATA 11634 * sections 11635 */ 11636 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11637 (!strncmp((const char *)&ctxt->input->cur[-9], 11638 "<![CDATA[", 9))) 11639 ctxt->sax->cdataBlock(ctxt->userData, 11640 BAD_CAST "", 0); 11641 } else if ((ctxt->sax != NULL) && (base > 0) && 11642 (!ctxt->disableSAX)) { 11643 if (ctxt->sax->cdataBlock != NULL) 11644 ctxt->sax->cdataBlock(ctxt->userData, 11645 ctxt->input->cur, base); 11646 else if (ctxt->sax->characters != NULL) 11647 ctxt->sax->characters(ctxt->userData, 11648 ctxt->input->cur, base); 11649 } 11650 if (ctxt->instate == XML_PARSER_EOF) 11651 goto done; 11652 SKIPL(base + 3); 11653 ctxt->checkIndex = 0; 11654 ctxt->instate = XML_PARSER_CONTENT; 11655 #ifdef DEBUG_PUSH 11656 xmlGenericError(xmlGenericErrorContext, 11657 "PP: entering CONTENT\n"); 11658 #endif 11659 } 11660 break; 11661 } 11662 case XML_PARSER_MISC: 11663 SKIP_BLANKS; 11664 if (ctxt->input->buf == NULL) 11665 avail = ctxt->input->length - 11666 (ctxt->input->cur - ctxt->input->base); 11667 else 11668 avail = xmlBufUse(ctxt->input->buf->buffer) - 11669 (ctxt->input->cur - ctxt->input->base); 11670 if (avail < 2) 11671 goto done; 11672 cur = ctxt->input->cur[0]; 11673 next = ctxt->input->cur[1]; 11674 if ((cur == '<') && (next == '?')) { 11675 if ((!terminate) && 11676 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11677 ctxt->progressive = XML_PARSER_PI; 11678 goto done; 11679 } 11680 #ifdef DEBUG_PUSH 11681 xmlGenericError(xmlGenericErrorContext, 11682 "PP: Parsing PI\n"); 11683 #endif 11684 xmlParsePI(ctxt); 11685 if (ctxt->instate == XML_PARSER_EOF) 11686 goto done; 11687 ctxt->instate = XML_PARSER_MISC; 11688 ctxt->progressive = 1; 11689 ctxt->checkIndex = 0; 11690 } else if ((cur == '<') && (next == '!') && 11691 (ctxt->input->cur[2] == '-') && 11692 (ctxt->input->cur[3] == '-')) { 11693 if ((!terminate) && 11694 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11695 ctxt->progressive = XML_PARSER_COMMENT; 11696 goto done; 11697 } 11698 #ifdef DEBUG_PUSH 11699 xmlGenericError(xmlGenericErrorContext, 11700 "PP: Parsing Comment\n"); 11701 #endif 11702 xmlParseComment(ctxt); 11703 if (ctxt->instate == XML_PARSER_EOF) 11704 goto done; 11705 ctxt->instate = XML_PARSER_MISC; 11706 ctxt->progressive = 1; 11707 ctxt->checkIndex = 0; 11708 } else if ((cur == '<') && (next == '!') && 11709 (ctxt->input->cur[2] == 'D') && 11710 (ctxt->input->cur[3] == 'O') && 11711 (ctxt->input->cur[4] == 'C') && 11712 (ctxt->input->cur[5] == 'T') && 11713 (ctxt->input->cur[6] == 'Y') && 11714 (ctxt->input->cur[7] == 'P') && 11715 (ctxt->input->cur[8] == 'E')) { 11716 if ((!terminate) && 11717 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11718 ctxt->progressive = XML_PARSER_DTD; 11719 goto done; 11720 } 11721 #ifdef DEBUG_PUSH 11722 xmlGenericError(xmlGenericErrorContext, 11723 "PP: Parsing internal subset\n"); 11724 #endif 11725 ctxt->inSubset = 1; 11726 ctxt->progressive = 0; 11727 ctxt->checkIndex = 0; 11728 xmlParseDocTypeDecl(ctxt); 11729 if (ctxt->instate == XML_PARSER_EOF) 11730 goto done; 11731 if (RAW == '[') { 11732 ctxt->instate = XML_PARSER_DTD; 11733 #ifdef DEBUG_PUSH 11734 xmlGenericError(xmlGenericErrorContext, 11735 "PP: entering DTD\n"); 11736 #endif 11737 } else { 11738 /* 11739 * Create and update the external subset. 11740 */ 11741 ctxt->inSubset = 2; 11742 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11743 (ctxt->sax->externalSubset != NULL)) 11744 ctxt->sax->externalSubset(ctxt->userData, 11745 ctxt->intSubName, ctxt->extSubSystem, 11746 ctxt->extSubURI); 11747 ctxt->inSubset = 0; 11748 xmlCleanSpecialAttr(ctxt); 11749 ctxt->instate = XML_PARSER_PROLOG; 11750 #ifdef DEBUG_PUSH 11751 xmlGenericError(xmlGenericErrorContext, 11752 "PP: entering PROLOG\n"); 11753 #endif 11754 } 11755 } else if ((cur == '<') && (next == '!') && 11756 (avail < 9)) { 11757 goto done; 11758 } else { 11759 ctxt->instate = XML_PARSER_START_TAG; 11760 ctxt->progressive = XML_PARSER_START_TAG; 11761 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11762 #ifdef DEBUG_PUSH 11763 xmlGenericError(xmlGenericErrorContext, 11764 "PP: entering START_TAG\n"); 11765 #endif 11766 } 11767 break; 11768 case XML_PARSER_PROLOG: 11769 SKIP_BLANKS; 11770 if (ctxt->input->buf == NULL) 11771 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11772 else 11773 avail = xmlBufUse(ctxt->input->buf->buffer) - 11774 (ctxt->input->cur - ctxt->input->base); 11775 if (avail < 2) 11776 goto done; 11777 cur = ctxt->input->cur[0]; 11778 next = ctxt->input->cur[1]; 11779 if ((cur == '<') && (next == '?')) { 11780 if ((!terminate) && 11781 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11782 ctxt->progressive = XML_PARSER_PI; 11783 goto done; 11784 } 11785 #ifdef DEBUG_PUSH 11786 xmlGenericError(xmlGenericErrorContext, 11787 "PP: Parsing PI\n"); 11788 #endif 11789 xmlParsePI(ctxt); 11790 if (ctxt->instate == XML_PARSER_EOF) 11791 goto done; 11792 ctxt->instate = XML_PARSER_PROLOG; 11793 ctxt->progressive = 1; 11794 } else if ((cur == '<') && (next == '!') && 11795 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11796 if ((!terminate) && 11797 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11798 ctxt->progressive = XML_PARSER_COMMENT; 11799 goto done; 11800 } 11801 #ifdef DEBUG_PUSH 11802 xmlGenericError(xmlGenericErrorContext, 11803 "PP: Parsing Comment\n"); 11804 #endif 11805 xmlParseComment(ctxt); 11806 if (ctxt->instate == XML_PARSER_EOF) 11807 goto done; 11808 ctxt->instate = XML_PARSER_PROLOG; 11809 ctxt->progressive = 1; 11810 } else if ((cur == '<') && (next == '!') && 11811 (avail < 4)) { 11812 goto done; 11813 } else { 11814 ctxt->instate = XML_PARSER_START_TAG; 11815 if (ctxt->progressive == 0) 11816 ctxt->progressive = XML_PARSER_START_TAG; 11817 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11818 #ifdef DEBUG_PUSH 11819 xmlGenericError(xmlGenericErrorContext, 11820 "PP: entering START_TAG\n"); 11821 #endif 11822 } 11823 break; 11824 case XML_PARSER_EPILOG: 11825 SKIP_BLANKS; 11826 if (ctxt->input->buf == NULL) 11827 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11828 else 11829 avail = xmlBufUse(ctxt->input->buf->buffer) - 11830 (ctxt->input->cur - ctxt->input->base); 11831 if (avail < 2) 11832 goto done; 11833 cur = ctxt->input->cur[0]; 11834 next = ctxt->input->cur[1]; 11835 if ((cur == '<') && (next == '?')) { 11836 if ((!terminate) && 11837 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11838 ctxt->progressive = XML_PARSER_PI; 11839 goto done; 11840 } 11841 #ifdef DEBUG_PUSH 11842 xmlGenericError(xmlGenericErrorContext, 11843 "PP: Parsing PI\n"); 11844 #endif 11845 xmlParsePI(ctxt); 11846 if (ctxt->instate == XML_PARSER_EOF) 11847 goto done; 11848 ctxt->instate = XML_PARSER_EPILOG; 11849 ctxt->progressive = 1; 11850 } else if ((cur == '<') && (next == '!') && 11851 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11852 if ((!terminate) && 11853 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11854 ctxt->progressive = XML_PARSER_COMMENT; 11855 goto done; 11856 } 11857 #ifdef DEBUG_PUSH 11858 xmlGenericError(xmlGenericErrorContext, 11859 "PP: Parsing Comment\n"); 11860 #endif 11861 xmlParseComment(ctxt); 11862 if (ctxt->instate == XML_PARSER_EOF) 11863 goto done; 11864 ctxt->instate = XML_PARSER_EPILOG; 11865 ctxt->progressive = 1; 11866 } else if ((cur == '<') && (next == '!') && 11867 (avail < 4)) { 11868 goto done; 11869 } else { 11870 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11871 xmlHaltParser(ctxt); 11872 #ifdef DEBUG_PUSH 11873 xmlGenericError(xmlGenericErrorContext, 11874 "PP: entering EOF\n"); 11875 #endif 11876 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11877 ctxt->sax->endDocument(ctxt->userData); 11878 goto done; 11879 } 11880 break; 11881 case XML_PARSER_DTD: { 11882 /* 11883 * Sorry but progressive parsing of the internal subset 11884 * is not expected to be supported. We first check that 11885 * the full content of the internal subset is available and 11886 * the parsing is launched only at that point. 11887 * Internal subset ends up with "']' S? '>'" in an unescaped 11888 * section and not in a ']]>' sequence which are conditional 11889 * sections (whoever argued to keep that crap in XML deserve 11890 * a place in hell !). 11891 */ 11892 int base, i; 11893 xmlChar *buf; 11894 xmlChar quote = 0; 11895 size_t use; 11896 11897 base = ctxt->input->cur - ctxt->input->base; 11898 if (base < 0) return(0); 11899 if (ctxt->checkIndex > base) 11900 base = ctxt->checkIndex; 11901 buf = xmlBufContent(ctxt->input->buf->buffer); 11902 use = xmlBufUse(ctxt->input->buf->buffer); 11903 for (;(unsigned int) base < use; base++) { 11904 if (quote != 0) { 11905 if (buf[base] == quote) 11906 quote = 0; 11907 continue; 11908 } 11909 if ((quote == 0) && (buf[base] == '<')) { 11910 int found = 0; 11911 /* special handling of comments */ 11912 if (((unsigned int) base + 4 < use) && 11913 (buf[base + 1] == '!') && 11914 (buf[base + 2] == '-') && 11915 (buf[base + 3] == '-')) { 11916 for (;(unsigned int) base + 3 < use; base++) { 11917 if ((buf[base] == '-') && 11918 (buf[base + 1] == '-') && 11919 (buf[base + 2] == '>')) { 11920 found = 1; 11921 base += 2; 11922 break; 11923 } 11924 } 11925 if (!found) { 11926 #if 0 11927 fprintf(stderr, "unfinished comment\n"); 11928 #endif 11929 break; /* for */ 11930 } 11931 continue; 11932 } 11933 } 11934 if (buf[base] == '"') { 11935 quote = '"'; 11936 continue; 11937 } 11938 if (buf[base] == '\'') { 11939 quote = '\''; 11940 continue; 11941 } 11942 if (buf[base] == ']') { 11943 #if 0 11944 fprintf(stderr, "%c%c%c%c: ", buf[base], 11945 buf[base + 1], buf[base + 2], buf[base + 3]); 11946 #endif 11947 if ((unsigned int) base +1 >= use) 11948 break; 11949 if (buf[base + 1] == ']') { 11950 /* conditional crap, skip both ']' ! */ 11951 base++; 11952 continue; 11953 } 11954 for (i = 1; (unsigned int) base + i < use; i++) { 11955 if (buf[base + i] == '>') { 11956 #if 0 11957 fprintf(stderr, "found\n"); 11958 #endif 11959 goto found_end_int_subset; 11960 } 11961 if (!IS_BLANK_CH(buf[base + i])) { 11962 #if 0 11963 fprintf(stderr, "not found\n"); 11964 #endif 11965 goto not_end_of_int_subset; 11966 } 11967 } 11968 #if 0 11969 fprintf(stderr, "end of stream\n"); 11970 #endif 11971 break; 11972 11973 } 11974 not_end_of_int_subset: 11975 continue; /* for */ 11976 } 11977 /* 11978 * We didn't found the end of the Internal subset 11979 */ 11980 if (quote == 0) 11981 ctxt->checkIndex = base; 11982 else 11983 ctxt->checkIndex = 0; 11984 #ifdef DEBUG_PUSH 11985 if (next == 0) 11986 xmlGenericError(xmlGenericErrorContext, 11987 "PP: lookup of int subset end filed\n"); 11988 #endif 11989 goto done; 11990 11991 found_end_int_subset: 11992 ctxt->checkIndex = 0; 11993 xmlParseInternalSubset(ctxt); 11994 if (ctxt->instate == XML_PARSER_EOF) 11995 goto done; 11996 ctxt->inSubset = 2; 11997 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11998 (ctxt->sax->externalSubset != NULL)) 11999 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12000 ctxt->extSubSystem, ctxt->extSubURI); 12001 ctxt->inSubset = 0; 12002 xmlCleanSpecialAttr(ctxt); 12003 if (ctxt->instate == XML_PARSER_EOF) 12004 goto done; 12005 ctxt->instate = XML_PARSER_PROLOG; 12006 ctxt->checkIndex = 0; 12007 #ifdef DEBUG_PUSH 12008 xmlGenericError(xmlGenericErrorContext, 12009 "PP: entering PROLOG\n"); 12010 #endif 12011 break; 12012 } 12013 case XML_PARSER_COMMENT: 12014 xmlGenericError(xmlGenericErrorContext, 12015 "PP: internal error, state == COMMENT\n"); 12016 ctxt->instate = XML_PARSER_CONTENT; 12017 #ifdef DEBUG_PUSH 12018 xmlGenericError(xmlGenericErrorContext, 12019 "PP: entering CONTENT\n"); 12020 #endif 12021 break; 12022 case XML_PARSER_IGNORE: 12023 xmlGenericError(xmlGenericErrorContext, 12024 "PP: internal error, state == IGNORE"); 12025 ctxt->instate = XML_PARSER_DTD; 12026 #ifdef DEBUG_PUSH 12027 xmlGenericError(xmlGenericErrorContext, 12028 "PP: entering DTD\n"); 12029 #endif 12030 break; 12031 case XML_PARSER_PI: 12032 xmlGenericError(xmlGenericErrorContext, 12033 "PP: internal error, state == PI\n"); 12034 ctxt->instate = XML_PARSER_CONTENT; 12035 #ifdef DEBUG_PUSH 12036 xmlGenericError(xmlGenericErrorContext, 12037 "PP: entering CONTENT\n"); 12038 #endif 12039 break; 12040 case XML_PARSER_ENTITY_DECL: 12041 xmlGenericError(xmlGenericErrorContext, 12042 "PP: internal error, state == ENTITY_DECL\n"); 12043 ctxt->instate = XML_PARSER_DTD; 12044 #ifdef DEBUG_PUSH 12045 xmlGenericError(xmlGenericErrorContext, 12046 "PP: entering DTD\n"); 12047 #endif 12048 break; 12049 case XML_PARSER_ENTITY_VALUE: 12050 xmlGenericError(xmlGenericErrorContext, 12051 "PP: internal error, state == ENTITY_VALUE\n"); 12052 ctxt->instate = XML_PARSER_CONTENT; 12053 #ifdef DEBUG_PUSH 12054 xmlGenericError(xmlGenericErrorContext, 12055 "PP: entering DTD\n"); 12056 #endif 12057 break; 12058 case XML_PARSER_ATTRIBUTE_VALUE: 12059 xmlGenericError(xmlGenericErrorContext, 12060 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12061 ctxt->instate = XML_PARSER_START_TAG; 12062 #ifdef DEBUG_PUSH 12063 xmlGenericError(xmlGenericErrorContext, 12064 "PP: entering START_TAG\n"); 12065 #endif 12066 break; 12067 case XML_PARSER_SYSTEM_LITERAL: 12068 xmlGenericError(xmlGenericErrorContext, 12069 "PP: internal error, state == SYSTEM_LITERAL\n"); 12070 ctxt->instate = XML_PARSER_START_TAG; 12071 #ifdef DEBUG_PUSH 12072 xmlGenericError(xmlGenericErrorContext, 12073 "PP: entering START_TAG\n"); 12074 #endif 12075 break; 12076 case XML_PARSER_PUBLIC_LITERAL: 12077 xmlGenericError(xmlGenericErrorContext, 12078 "PP: internal error, state == PUBLIC_LITERAL\n"); 12079 ctxt->instate = XML_PARSER_START_TAG; 12080 #ifdef DEBUG_PUSH 12081 xmlGenericError(xmlGenericErrorContext, 12082 "PP: entering START_TAG\n"); 12083 #endif 12084 break; 12085 } 12086 } 12087 done: 12088 #ifdef DEBUG_PUSH 12089 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12090 #endif 12091 return(ret); 12092 encoding_error: 12093 { 12094 char buffer[150]; 12095 12096 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12097 ctxt->input->cur[0], ctxt->input->cur[1], 12098 ctxt->input->cur[2], ctxt->input->cur[3]); 12099 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12100 "Input is not proper UTF-8, indicate encoding !\n%s", 12101 BAD_CAST buffer, NULL); 12102 } 12103 return(0); 12104 } 12105 12106 /** 12107 * xmlParseCheckTransition: 12108 * @ctxt: an XML parser context 12109 * @chunk: a char array 12110 * @size: the size in byte of the chunk 12111 * 12112 * Check depending on the current parser state if the chunk given must be 12113 * processed immediately or one need more data to advance on parsing. 12114 * 12115 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12116 */ 12117 static int 12118 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12119 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12120 return(-1); 12121 if (ctxt->instate == XML_PARSER_START_TAG) { 12122 if (memchr(chunk, '>', size) != NULL) 12123 return(1); 12124 return(0); 12125 } 12126 if (ctxt->progressive == XML_PARSER_COMMENT) { 12127 if (memchr(chunk, '>', size) != NULL) 12128 return(1); 12129 return(0); 12130 } 12131 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12132 if (memchr(chunk, '>', size) != NULL) 12133 return(1); 12134 return(0); 12135 } 12136 if (ctxt->progressive == XML_PARSER_PI) { 12137 if (memchr(chunk, '>', size) != NULL) 12138 return(1); 12139 return(0); 12140 } 12141 if (ctxt->instate == XML_PARSER_END_TAG) { 12142 if (memchr(chunk, '>', size) != NULL) 12143 return(1); 12144 return(0); 12145 } 12146 if ((ctxt->progressive == XML_PARSER_DTD) || 12147 (ctxt->instate == XML_PARSER_DTD)) { 12148 if (memchr(chunk, '>', size) != NULL) 12149 return(1); 12150 return(0); 12151 } 12152 return(1); 12153 } 12154 12155 /** 12156 * xmlParseChunk: 12157 * @ctxt: an XML parser context 12158 * @chunk: an char array 12159 * @size: the size in byte of the chunk 12160 * @terminate: last chunk indicator 12161 * 12162 * Parse a Chunk of memory 12163 * 12164 * Returns zero if no error, the xmlParserErrors otherwise. 12165 */ 12166 int 12167 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12168 int terminate) { 12169 int end_in_lf = 0; 12170 int remain = 0; 12171 size_t old_avail = 0; 12172 size_t avail = 0; 12173 12174 if (ctxt == NULL) 12175 return(XML_ERR_INTERNAL_ERROR); 12176 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12177 return(ctxt->errNo); 12178 if (ctxt->instate == XML_PARSER_EOF) 12179 return(-1); 12180 if (ctxt->instate == XML_PARSER_START) 12181 xmlDetectSAX2(ctxt); 12182 if ((size > 0) && (chunk != NULL) && (!terminate) && 12183 (chunk[size - 1] == '\r')) { 12184 end_in_lf = 1; 12185 size--; 12186 } 12187 12188 xmldecl_done: 12189 12190 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12191 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12192 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12193 size_t cur = ctxt->input->cur - ctxt->input->base; 12194 int res; 12195 12196 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12197 /* 12198 * Specific handling if we autodetected an encoding, we should not 12199 * push more than the first line ... which depend on the encoding 12200 * And only push the rest once the final encoding was detected 12201 */ 12202 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12203 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12204 unsigned int len = 45; 12205 12206 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12207 BAD_CAST "UTF-16")) || 12208 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12209 BAD_CAST "UTF16"))) 12210 len = 90; 12211 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12212 BAD_CAST "UCS-4")) || 12213 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12214 BAD_CAST "UCS4"))) 12215 len = 180; 12216 12217 if (ctxt->input->buf->rawconsumed < len) 12218 len -= ctxt->input->buf->rawconsumed; 12219 12220 /* 12221 * Change size for reading the initial declaration only 12222 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12223 * will blindly copy extra bytes from memory. 12224 */ 12225 if ((unsigned int) size > len) { 12226 remain = size - len; 12227 size = len; 12228 } else { 12229 remain = 0; 12230 } 12231 } 12232 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12233 if (res < 0) { 12234 ctxt->errNo = XML_PARSER_EOF; 12235 xmlHaltParser(ctxt); 12236 return (XML_PARSER_EOF); 12237 } 12238 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12239 #ifdef DEBUG_PUSH 12240 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12241 #endif 12242 12243 } else if (ctxt->instate != XML_PARSER_EOF) { 12244 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12245 xmlParserInputBufferPtr in = ctxt->input->buf; 12246 if ((in->encoder != NULL) && (in->buffer != NULL) && 12247 (in->raw != NULL)) { 12248 int nbchars; 12249 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12250 size_t current = ctxt->input->cur - ctxt->input->base; 12251 12252 nbchars = xmlCharEncInput(in, terminate); 12253 if (nbchars < 0) { 12254 /* TODO 2.6.0 */ 12255 xmlGenericError(xmlGenericErrorContext, 12256 "xmlParseChunk: encoder error\n"); 12257 xmlHaltParser(ctxt); 12258 return(XML_ERR_INVALID_ENCODING); 12259 } 12260 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12261 } 12262 } 12263 } 12264 if (remain != 0) { 12265 xmlParseTryOrFinish(ctxt, 0); 12266 } else { 12267 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12268 avail = xmlBufUse(ctxt->input->buf->buffer); 12269 /* 12270 * Depending on the current state it may not be such 12271 * a good idea to try parsing if there is nothing in the chunk 12272 * which would be worth doing a parser state transition and we 12273 * need to wait for more data 12274 */ 12275 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12276 (old_avail == 0) || (avail == 0) || 12277 (xmlParseCheckTransition(ctxt, 12278 (const char *)&ctxt->input->base[old_avail], 12279 avail - old_avail))) 12280 xmlParseTryOrFinish(ctxt, terminate); 12281 } 12282 if (ctxt->instate == XML_PARSER_EOF) 12283 return(ctxt->errNo); 12284 12285 if ((ctxt->input != NULL) && 12286 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12287 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12288 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12289 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12290 xmlHaltParser(ctxt); 12291 } 12292 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12293 return(ctxt->errNo); 12294 12295 if (remain != 0) { 12296 chunk += size; 12297 size = remain; 12298 remain = 0; 12299 goto xmldecl_done; 12300 } 12301 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12302 (ctxt->input->buf != NULL)) { 12303 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12304 ctxt->input); 12305 size_t current = ctxt->input->cur - ctxt->input->base; 12306 12307 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12308 12309 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12310 base, current); 12311 } 12312 if (terminate) { 12313 /* 12314 * Check for termination 12315 */ 12316 int cur_avail = 0; 12317 12318 if (ctxt->input != NULL) { 12319 if (ctxt->input->buf == NULL) 12320 cur_avail = ctxt->input->length - 12321 (ctxt->input->cur - ctxt->input->base); 12322 else 12323 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12324 (ctxt->input->cur - ctxt->input->base); 12325 } 12326 12327 if ((ctxt->instate != XML_PARSER_EOF) && 12328 (ctxt->instate != XML_PARSER_EPILOG)) { 12329 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12330 } 12331 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12332 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12333 } 12334 if (ctxt->instate != XML_PARSER_EOF) { 12335 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12336 ctxt->sax->endDocument(ctxt->userData); 12337 } 12338 ctxt->instate = XML_PARSER_EOF; 12339 } 12340 if (ctxt->wellFormed == 0) 12341 return((xmlParserErrors) ctxt->errNo); 12342 else 12343 return(0); 12344 } 12345 12346 /************************************************************************ 12347 * * 12348 * I/O front end functions to the parser * 12349 * * 12350 ************************************************************************/ 12351 12352 /** 12353 * xmlCreatePushParserCtxt: 12354 * @sax: a SAX handler 12355 * @user_data: The user data returned on SAX callbacks 12356 * @chunk: a pointer to an array of chars 12357 * @size: number of chars in the array 12358 * @filename: an optional file name or URI 12359 * 12360 * Create a parser context for using the XML parser in push mode. 12361 * If @buffer and @size are non-NULL, the data is used to detect 12362 * the encoding. The remaining characters will be parsed so they 12363 * don't need to be fed in again through xmlParseChunk. 12364 * To allow content encoding detection, @size should be >= 4 12365 * The value of @filename is used for fetching external entities 12366 * and error/warning reports. 12367 * 12368 * Returns the new parser context or NULL 12369 */ 12370 12371 xmlParserCtxtPtr 12372 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12373 const char *chunk, int size, const char *filename) { 12374 xmlParserCtxtPtr ctxt; 12375 xmlParserInputPtr inputStream; 12376 xmlParserInputBufferPtr buf; 12377 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12378 12379 /* 12380 * plug some encoding conversion routines 12381 */ 12382 if ((chunk != NULL) && (size >= 4)) 12383 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12384 12385 buf = xmlAllocParserInputBuffer(enc); 12386 if (buf == NULL) return(NULL); 12387 12388 ctxt = xmlNewParserCtxt(); 12389 if (ctxt == NULL) { 12390 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12391 xmlFreeParserInputBuffer(buf); 12392 return(NULL); 12393 } 12394 ctxt->dictNames = 1; 12395 if (sax != NULL) { 12396 #ifdef LIBXML_SAX1_ENABLED 12397 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12398 #endif /* LIBXML_SAX1_ENABLED */ 12399 xmlFree(ctxt->sax); 12400 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12401 if (ctxt->sax == NULL) { 12402 xmlErrMemory(ctxt, NULL); 12403 xmlFreeParserInputBuffer(buf); 12404 xmlFreeParserCtxt(ctxt); 12405 return(NULL); 12406 } 12407 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12408 if (sax->initialized == XML_SAX2_MAGIC) 12409 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12410 else 12411 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12412 if (user_data != NULL) 12413 ctxt->userData = user_data; 12414 } 12415 if (filename == NULL) { 12416 ctxt->directory = NULL; 12417 } else { 12418 ctxt->directory = xmlParserGetDirectory(filename); 12419 } 12420 12421 inputStream = xmlNewInputStream(ctxt); 12422 if (inputStream == NULL) { 12423 xmlFreeParserCtxt(ctxt); 12424 xmlFreeParserInputBuffer(buf); 12425 return(NULL); 12426 } 12427 12428 if (filename == NULL) 12429 inputStream->filename = NULL; 12430 else { 12431 inputStream->filename = (char *) 12432 xmlCanonicPath((const xmlChar *) filename); 12433 if (inputStream->filename == NULL) { 12434 xmlFreeParserCtxt(ctxt); 12435 xmlFreeParserInputBuffer(buf); 12436 return(NULL); 12437 } 12438 } 12439 inputStream->buf = buf; 12440 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12441 inputPush(ctxt, inputStream); 12442 12443 /* 12444 * If the caller didn't provide an initial 'chunk' for determining 12445 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12446 * that it can be automatically determined later 12447 */ 12448 if ((size == 0) || (chunk == NULL)) { 12449 ctxt->charset = XML_CHAR_ENCODING_NONE; 12450 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12451 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12452 size_t cur = ctxt->input->cur - ctxt->input->base; 12453 12454 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12455 12456 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12457 #ifdef DEBUG_PUSH 12458 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12459 #endif 12460 } 12461 12462 if (enc != XML_CHAR_ENCODING_NONE) { 12463 xmlSwitchEncoding(ctxt, enc); 12464 } 12465 12466 return(ctxt); 12467 } 12468 #endif /* LIBXML_PUSH_ENABLED */ 12469 12470 /** 12471 * xmlHaltParser: 12472 * @ctxt: an XML parser context 12473 * 12474 * Blocks further parser processing don't override error 12475 * for internal use 12476 */ 12477 static void 12478 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12479 if (ctxt == NULL) 12480 return; 12481 ctxt->instate = XML_PARSER_EOF; 12482 ctxt->disableSAX = 1; 12483 while (ctxt->inputNr > 1) 12484 xmlFreeInputStream(inputPop(ctxt)); 12485 if (ctxt->input != NULL) { 12486 /* 12487 * in case there was a specific allocation deallocate before 12488 * overriding base 12489 */ 12490 if (ctxt->input->free != NULL) { 12491 ctxt->input->free((xmlChar *) ctxt->input->base); 12492 ctxt->input->free = NULL; 12493 } 12494 if (ctxt->input->buf != NULL) { 12495 xmlFreeParserInputBuffer(ctxt->input->buf); 12496 ctxt->input->buf = NULL; 12497 } 12498 ctxt->input->cur = BAD_CAST""; 12499 ctxt->input->length = 0; 12500 ctxt->input->base = ctxt->input->cur; 12501 ctxt->input->end = ctxt->input->cur; 12502 } 12503 } 12504 12505 /** 12506 * xmlStopParser: 12507 * @ctxt: an XML parser context 12508 * 12509 * Blocks further parser processing 12510 */ 12511 void 12512 xmlStopParser(xmlParserCtxtPtr ctxt) { 12513 if (ctxt == NULL) 12514 return; 12515 xmlHaltParser(ctxt); 12516 ctxt->errNo = XML_ERR_USER_STOP; 12517 } 12518 12519 /** 12520 * xmlCreateIOParserCtxt: 12521 * @sax: a SAX handler 12522 * @user_data: The user data returned on SAX callbacks 12523 * @ioread: an I/O read function 12524 * @ioclose: an I/O close function 12525 * @ioctx: an I/O handler 12526 * @enc: the charset encoding if known 12527 * 12528 * Create a parser context for using the XML parser with an existing 12529 * I/O stream 12530 * 12531 * Returns the new parser context or NULL 12532 */ 12533 xmlParserCtxtPtr 12534 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12535 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12536 void *ioctx, xmlCharEncoding enc) { 12537 xmlParserCtxtPtr ctxt; 12538 xmlParserInputPtr inputStream; 12539 xmlParserInputBufferPtr buf; 12540 12541 if (ioread == NULL) return(NULL); 12542 12543 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12544 if (buf == NULL) { 12545 if (ioclose != NULL) 12546 ioclose(ioctx); 12547 return (NULL); 12548 } 12549 12550 ctxt = xmlNewParserCtxt(); 12551 if (ctxt == NULL) { 12552 xmlFreeParserInputBuffer(buf); 12553 return(NULL); 12554 } 12555 if (sax != NULL) { 12556 #ifdef LIBXML_SAX1_ENABLED 12557 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12558 #endif /* LIBXML_SAX1_ENABLED */ 12559 xmlFree(ctxt->sax); 12560 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12561 if (ctxt->sax == NULL) { 12562 xmlErrMemory(ctxt, NULL); 12563 xmlFreeParserCtxt(ctxt); 12564 return(NULL); 12565 } 12566 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12567 if (sax->initialized == XML_SAX2_MAGIC) 12568 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12569 else 12570 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12571 if (user_data != NULL) 12572 ctxt->userData = user_data; 12573 } 12574 12575 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12576 if (inputStream == NULL) { 12577 xmlFreeParserCtxt(ctxt); 12578 return(NULL); 12579 } 12580 inputPush(ctxt, inputStream); 12581 12582 return(ctxt); 12583 } 12584 12585 #ifdef LIBXML_VALID_ENABLED 12586 /************************************************************************ 12587 * * 12588 * Front ends when parsing a DTD * 12589 * * 12590 ************************************************************************/ 12591 12592 /** 12593 * xmlIOParseDTD: 12594 * @sax: the SAX handler block or NULL 12595 * @input: an Input Buffer 12596 * @enc: the charset encoding if known 12597 * 12598 * Load and parse a DTD 12599 * 12600 * Returns the resulting xmlDtdPtr or NULL in case of error. 12601 * @input will be freed by the function in any case. 12602 */ 12603 12604 xmlDtdPtr 12605 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12606 xmlCharEncoding enc) { 12607 xmlDtdPtr ret = NULL; 12608 xmlParserCtxtPtr ctxt; 12609 xmlParserInputPtr pinput = NULL; 12610 xmlChar start[4]; 12611 12612 if (input == NULL) 12613 return(NULL); 12614 12615 ctxt = xmlNewParserCtxt(); 12616 if (ctxt == NULL) { 12617 xmlFreeParserInputBuffer(input); 12618 return(NULL); 12619 } 12620 12621 /* We are loading a DTD */ 12622 ctxt->options |= XML_PARSE_DTDLOAD; 12623 12624 /* 12625 * Set-up the SAX context 12626 */ 12627 if (sax != NULL) { 12628 if (ctxt->sax != NULL) 12629 xmlFree(ctxt->sax); 12630 ctxt->sax = sax; 12631 ctxt->userData = ctxt; 12632 } 12633 xmlDetectSAX2(ctxt); 12634 12635 /* 12636 * generate a parser input from the I/O handler 12637 */ 12638 12639 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12640 if (pinput == NULL) { 12641 if (sax != NULL) ctxt->sax = NULL; 12642 xmlFreeParserInputBuffer(input); 12643 xmlFreeParserCtxt(ctxt); 12644 return(NULL); 12645 } 12646 12647 /* 12648 * plug some encoding conversion routines here. 12649 */ 12650 if (xmlPushInput(ctxt, pinput) < 0) { 12651 if (sax != NULL) ctxt->sax = NULL; 12652 xmlFreeParserCtxt(ctxt); 12653 return(NULL); 12654 } 12655 if (enc != XML_CHAR_ENCODING_NONE) { 12656 xmlSwitchEncoding(ctxt, enc); 12657 } 12658 12659 pinput->filename = NULL; 12660 pinput->line = 1; 12661 pinput->col = 1; 12662 pinput->base = ctxt->input->cur; 12663 pinput->cur = ctxt->input->cur; 12664 pinput->free = NULL; 12665 12666 /* 12667 * let's parse that entity knowing it's an external subset. 12668 */ 12669 ctxt->inSubset = 2; 12670 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12671 if (ctxt->myDoc == NULL) { 12672 xmlErrMemory(ctxt, "New Doc failed"); 12673 return(NULL); 12674 } 12675 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12676 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12677 BAD_CAST "none", BAD_CAST "none"); 12678 12679 if ((enc == XML_CHAR_ENCODING_NONE) && 12680 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12681 /* 12682 * Get the 4 first bytes and decode the charset 12683 * if enc != XML_CHAR_ENCODING_NONE 12684 * plug some encoding conversion routines. 12685 */ 12686 start[0] = RAW; 12687 start[1] = NXT(1); 12688 start[2] = NXT(2); 12689 start[3] = NXT(3); 12690 enc = xmlDetectCharEncoding(start, 4); 12691 if (enc != XML_CHAR_ENCODING_NONE) { 12692 xmlSwitchEncoding(ctxt, enc); 12693 } 12694 } 12695 12696 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12697 12698 if (ctxt->myDoc != NULL) { 12699 if (ctxt->wellFormed) { 12700 ret = ctxt->myDoc->extSubset; 12701 ctxt->myDoc->extSubset = NULL; 12702 if (ret != NULL) { 12703 xmlNodePtr tmp; 12704 12705 ret->doc = NULL; 12706 tmp = ret->children; 12707 while (tmp != NULL) { 12708 tmp->doc = NULL; 12709 tmp = tmp->next; 12710 } 12711 } 12712 } else { 12713 ret = NULL; 12714 } 12715 xmlFreeDoc(ctxt->myDoc); 12716 ctxt->myDoc = NULL; 12717 } 12718 if (sax != NULL) ctxt->sax = NULL; 12719 xmlFreeParserCtxt(ctxt); 12720 12721 return(ret); 12722 } 12723 12724 /** 12725 * xmlSAXParseDTD: 12726 * @sax: the SAX handler block 12727 * @ExternalID: a NAME* containing the External ID of the DTD 12728 * @SystemID: a NAME* containing the URL to the DTD 12729 * 12730 * Load and parse an external subset. 12731 * 12732 * Returns the resulting xmlDtdPtr or NULL in case of error. 12733 */ 12734 12735 xmlDtdPtr 12736 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12737 const xmlChar *SystemID) { 12738 xmlDtdPtr ret = NULL; 12739 xmlParserCtxtPtr ctxt; 12740 xmlParserInputPtr input = NULL; 12741 xmlCharEncoding enc; 12742 xmlChar* systemIdCanonic; 12743 12744 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12745 12746 ctxt = xmlNewParserCtxt(); 12747 if (ctxt == NULL) { 12748 return(NULL); 12749 } 12750 12751 /* We are loading a DTD */ 12752 ctxt->options |= XML_PARSE_DTDLOAD; 12753 12754 /* 12755 * Set-up the SAX context 12756 */ 12757 if (sax != NULL) { 12758 if (ctxt->sax != NULL) 12759 xmlFree(ctxt->sax); 12760 ctxt->sax = sax; 12761 ctxt->userData = ctxt; 12762 } 12763 12764 /* 12765 * Canonicalise the system ID 12766 */ 12767 systemIdCanonic = xmlCanonicPath(SystemID); 12768 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12769 xmlFreeParserCtxt(ctxt); 12770 return(NULL); 12771 } 12772 12773 /* 12774 * Ask the Entity resolver to load the damn thing 12775 */ 12776 12777 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12778 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12779 systemIdCanonic); 12780 if (input == NULL) { 12781 if (sax != NULL) ctxt->sax = NULL; 12782 xmlFreeParserCtxt(ctxt); 12783 if (systemIdCanonic != NULL) 12784 xmlFree(systemIdCanonic); 12785 return(NULL); 12786 } 12787 12788 /* 12789 * plug some encoding conversion routines here. 12790 */ 12791 if (xmlPushInput(ctxt, input) < 0) { 12792 if (sax != NULL) ctxt->sax = NULL; 12793 xmlFreeParserCtxt(ctxt); 12794 if (systemIdCanonic != NULL) 12795 xmlFree(systemIdCanonic); 12796 return(NULL); 12797 } 12798 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12799 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12800 xmlSwitchEncoding(ctxt, enc); 12801 } 12802 12803 if (input->filename == NULL) 12804 input->filename = (char *) systemIdCanonic; 12805 else 12806 xmlFree(systemIdCanonic); 12807 input->line = 1; 12808 input->col = 1; 12809 input->base = ctxt->input->cur; 12810 input->cur = ctxt->input->cur; 12811 input->free = NULL; 12812 12813 /* 12814 * let's parse that entity knowing it's an external subset. 12815 */ 12816 ctxt->inSubset = 2; 12817 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12818 if (ctxt->myDoc == NULL) { 12819 xmlErrMemory(ctxt, "New Doc failed"); 12820 if (sax != NULL) ctxt->sax = NULL; 12821 xmlFreeParserCtxt(ctxt); 12822 return(NULL); 12823 } 12824 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12825 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12826 ExternalID, SystemID); 12827 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12828 12829 if (ctxt->myDoc != NULL) { 12830 if (ctxt->wellFormed) { 12831 ret = ctxt->myDoc->extSubset; 12832 ctxt->myDoc->extSubset = NULL; 12833 if (ret != NULL) { 12834 xmlNodePtr tmp; 12835 12836 ret->doc = NULL; 12837 tmp = ret->children; 12838 while (tmp != NULL) { 12839 tmp->doc = NULL; 12840 tmp = tmp->next; 12841 } 12842 } 12843 } else { 12844 ret = NULL; 12845 } 12846 xmlFreeDoc(ctxt->myDoc); 12847 ctxt->myDoc = NULL; 12848 } 12849 if (sax != NULL) ctxt->sax = NULL; 12850 xmlFreeParserCtxt(ctxt); 12851 12852 return(ret); 12853 } 12854 12855 12856 /** 12857 * xmlParseDTD: 12858 * @ExternalID: a NAME* containing the External ID of the DTD 12859 * @SystemID: a NAME* containing the URL to the DTD 12860 * 12861 * Load and parse an external subset. 12862 * 12863 * Returns the resulting xmlDtdPtr or NULL in case of error. 12864 */ 12865 12866 xmlDtdPtr 12867 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12868 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12869 } 12870 #endif /* LIBXML_VALID_ENABLED */ 12871 12872 /************************************************************************ 12873 * * 12874 * Front ends when parsing an Entity * 12875 * * 12876 ************************************************************************/ 12877 12878 /** 12879 * xmlParseCtxtExternalEntity: 12880 * @ctx: the existing parsing context 12881 * @URL: the URL for the entity to load 12882 * @ID: the System ID for the entity to load 12883 * @lst: the return value for the set of parsed nodes 12884 * 12885 * Parse an external general entity within an existing parsing context 12886 * An external general parsed entity is well-formed if it matches the 12887 * production labeled extParsedEnt. 12888 * 12889 * [78] extParsedEnt ::= TextDecl? content 12890 * 12891 * Returns 0 if the entity is well formed, -1 in case of args problem and 12892 * the parser error code otherwise 12893 */ 12894 12895 int 12896 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12897 const xmlChar *ID, xmlNodePtr *lst) { 12898 xmlParserCtxtPtr ctxt; 12899 xmlDocPtr newDoc; 12900 xmlNodePtr newRoot; 12901 xmlSAXHandlerPtr oldsax = NULL; 12902 int ret = 0; 12903 xmlChar start[4]; 12904 xmlCharEncoding enc; 12905 12906 if (ctx == NULL) return(-1); 12907 12908 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12909 (ctx->depth > 1024)) { 12910 return(XML_ERR_ENTITY_LOOP); 12911 } 12912 12913 if (lst != NULL) 12914 *lst = NULL; 12915 if ((URL == NULL) && (ID == NULL)) 12916 return(-1); 12917 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12918 return(-1); 12919 12920 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12921 if (ctxt == NULL) { 12922 return(-1); 12923 } 12924 12925 oldsax = ctxt->sax; 12926 ctxt->sax = ctx->sax; 12927 xmlDetectSAX2(ctxt); 12928 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12929 if (newDoc == NULL) { 12930 xmlFreeParserCtxt(ctxt); 12931 return(-1); 12932 } 12933 newDoc->properties = XML_DOC_INTERNAL; 12934 if (ctx->myDoc->dict) { 12935 newDoc->dict = ctx->myDoc->dict; 12936 xmlDictReference(newDoc->dict); 12937 } 12938 if (ctx->myDoc != NULL) { 12939 newDoc->intSubset = ctx->myDoc->intSubset; 12940 newDoc->extSubset = ctx->myDoc->extSubset; 12941 } 12942 if (ctx->myDoc->URL != NULL) { 12943 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12944 } 12945 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12946 if (newRoot == NULL) { 12947 ctxt->sax = oldsax; 12948 xmlFreeParserCtxt(ctxt); 12949 newDoc->intSubset = NULL; 12950 newDoc->extSubset = NULL; 12951 xmlFreeDoc(newDoc); 12952 return(-1); 12953 } 12954 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12955 nodePush(ctxt, newDoc->children); 12956 if (ctx->myDoc == NULL) { 12957 ctxt->myDoc = newDoc; 12958 } else { 12959 ctxt->myDoc = ctx->myDoc; 12960 newDoc->children->doc = ctx->myDoc; 12961 } 12962 12963 /* 12964 * Get the 4 first bytes and decode the charset 12965 * if enc != XML_CHAR_ENCODING_NONE 12966 * plug some encoding conversion routines. 12967 */ 12968 GROW 12969 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12970 start[0] = RAW; 12971 start[1] = NXT(1); 12972 start[2] = NXT(2); 12973 start[3] = NXT(3); 12974 enc = xmlDetectCharEncoding(start, 4); 12975 if (enc != XML_CHAR_ENCODING_NONE) { 12976 xmlSwitchEncoding(ctxt, enc); 12977 } 12978 } 12979 12980 /* 12981 * Parse a possible text declaration first 12982 */ 12983 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12984 xmlParseTextDecl(ctxt); 12985 /* 12986 * An XML-1.0 document can't reference an entity not XML-1.0 12987 */ 12988 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12989 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12990 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12991 "Version mismatch between document and entity\n"); 12992 } 12993 } 12994 12995 /* 12996 * If the user provided its own SAX callbacks then reuse the 12997 * useData callback field, otherwise the expected setup in a 12998 * DOM builder is to have userData == ctxt 12999 */ 13000 if (ctx->userData == ctx) 13001 ctxt->userData = ctxt; 13002 else 13003 ctxt->userData = ctx->userData; 13004 13005 /* 13006 * Doing validity checking on chunk doesn't make sense 13007 */ 13008 ctxt->instate = XML_PARSER_CONTENT; 13009 ctxt->validate = ctx->validate; 13010 ctxt->valid = ctx->valid; 13011 ctxt->loadsubset = ctx->loadsubset; 13012 ctxt->depth = ctx->depth + 1; 13013 ctxt->replaceEntities = ctx->replaceEntities; 13014 if (ctxt->validate) { 13015 ctxt->vctxt.error = ctx->vctxt.error; 13016 ctxt->vctxt.warning = ctx->vctxt.warning; 13017 } else { 13018 ctxt->vctxt.error = NULL; 13019 ctxt->vctxt.warning = NULL; 13020 } 13021 ctxt->vctxt.nodeTab = NULL; 13022 ctxt->vctxt.nodeNr = 0; 13023 ctxt->vctxt.nodeMax = 0; 13024 ctxt->vctxt.node = NULL; 13025 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13026 ctxt->dict = ctx->dict; 13027 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13028 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13029 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13030 ctxt->dictNames = ctx->dictNames; 13031 ctxt->attsDefault = ctx->attsDefault; 13032 ctxt->attsSpecial = ctx->attsSpecial; 13033 ctxt->linenumbers = ctx->linenumbers; 13034 13035 xmlParseContent(ctxt); 13036 13037 ctx->validate = ctxt->validate; 13038 ctx->valid = ctxt->valid; 13039 if ((RAW == '<') && (NXT(1) == '/')) { 13040 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13041 } else if (RAW != 0) { 13042 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13043 } 13044 if (ctxt->node != newDoc->children) { 13045 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13046 } 13047 13048 if (!ctxt->wellFormed) { 13049 if (ctxt->errNo == 0) 13050 ret = 1; 13051 else 13052 ret = ctxt->errNo; 13053 } else { 13054 if (lst != NULL) { 13055 xmlNodePtr cur; 13056 13057 /* 13058 * Return the newly created nodeset after unlinking it from 13059 * they pseudo parent. 13060 */ 13061 cur = newDoc->children->children; 13062 *lst = cur; 13063 while (cur != NULL) { 13064 cur->parent = NULL; 13065 cur = cur->next; 13066 } 13067 newDoc->children->children = NULL; 13068 } 13069 ret = 0; 13070 } 13071 ctxt->sax = oldsax; 13072 ctxt->dict = NULL; 13073 ctxt->attsDefault = NULL; 13074 ctxt->attsSpecial = NULL; 13075 xmlFreeParserCtxt(ctxt); 13076 newDoc->intSubset = NULL; 13077 newDoc->extSubset = NULL; 13078 xmlFreeDoc(newDoc); 13079 13080 return(ret); 13081 } 13082 13083 /** 13084 * xmlParseExternalEntityPrivate: 13085 * @doc: the document the chunk pertains to 13086 * @oldctxt: the previous parser context if available 13087 * @sax: the SAX handler bloc (possibly NULL) 13088 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13089 * @depth: Used for loop detection, use 0 13090 * @URL: the URL for the entity to load 13091 * @ID: the System ID for the entity to load 13092 * @list: the return value for the set of parsed nodes 13093 * 13094 * Private version of xmlParseExternalEntity() 13095 * 13096 * Returns 0 if the entity is well formed, -1 in case of args problem and 13097 * the parser error code otherwise 13098 */ 13099 13100 static xmlParserErrors 13101 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13102 xmlSAXHandlerPtr sax, 13103 void *user_data, int depth, const xmlChar *URL, 13104 const xmlChar *ID, xmlNodePtr *list) { 13105 xmlParserCtxtPtr ctxt; 13106 xmlDocPtr newDoc; 13107 xmlNodePtr newRoot; 13108 xmlSAXHandlerPtr oldsax = NULL; 13109 xmlParserErrors ret = XML_ERR_OK; 13110 xmlChar start[4]; 13111 xmlCharEncoding enc; 13112 13113 if (((depth > 40) && 13114 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13115 (depth > 1024)) { 13116 return(XML_ERR_ENTITY_LOOP); 13117 } 13118 13119 if (list != NULL) 13120 *list = NULL; 13121 if ((URL == NULL) && (ID == NULL)) 13122 return(XML_ERR_INTERNAL_ERROR); 13123 if (doc == NULL) 13124 return(XML_ERR_INTERNAL_ERROR); 13125 13126 13127 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13128 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13129 ctxt->userData = ctxt; 13130 if (oldctxt != NULL) { 13131 ctxt->_private = oldctxt->_private; 13132 ctxt->loadsubset = oldctxt->loadsubset; 13133 ctxt->validate = oldctxt->validate; 13134 ctxt->external = oldctxt->external; 13135 ctxt->record_info = oldctxt->record_info; 13136 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13137 ctxt->node_seq.length = oldctxt->node_seq.length; 13138 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13139 } else { 13140 /* 13141 * Doing validity checking on chunk without context 13142 * doesn't make sense 13143 */ 13144 ctxt->_private = NULL; 13145 ctxt->validate = 0; 13146 ctxt->external = 2; 13147 ctxt->loadsubset = 0; 13148 } 13149 if (sax != NULL) { 13150 oldsax = ctxt->sax; 13151 ctxt->sax = sax; 13152 if (user_data != NULL) 13153 ctxt->userData = user_data; 13154 } 13155 xmlDetectSAX2(ctxt); 13156 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13157 if (newDoc == NULL) { 13158 ctxt->node_seq.maximum = 0; 13159 ctxt->node_seq.length = 0; 13160 ctxt->node_seq.buffer = NULL; 13161 xmlFreeParserCtxt(ctxt); 13162 return(XML_ERR_INTERNAL_ERROR); 13163 } 13164 newDoc->properties = XML_DOC_INTERNAL; 13165 newDoc->intSubset = doc->intSubset; 13166 newDoc->extSubset = doc->extSubset; 13167 newDoc->dict = doc->dict; 13168 xmlDictReference(newDoc->dict); 13169 13170 if (doc->URL != NULL) { 13171 newDoc->URL = xmlStrdup(doc->URL); 13172 } 13173 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13174 if (newRoot == NULL) { 13175 if (sax != NULL) 13176 ctxt->sax = oldsax; 13177 ctxt->node_seq.maximum = 0; 13178 ctxt->node_seq.length = 0; 13179 ctxt->node_seq.buffer = NULL; 13180 xmlFreeParserCtxt(ctxt); 13181 newDoc->intSubset = NULL; 13182 newDoc->extSubset = NULL; 13183 xmlFreeDoc(newDoc); 13184 return(XML_ERR_INTERNAL_ERROR); 13185 } 13186 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13187 nodePush(ctxt, newDoc->children); 13188 ctxt->myDoc = doc; 13189 newRoot->doc = doc; 13190 13191 /* 13192 * Get the 4 first bytes and decode the charset 13193 * if enc != XML_CHAR_ENCODING_NONE 13194 * plug some encoding conversion routines. 13195 */ 13196 GROW; 13197 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13198 start[0] = RAW; 13199 start[1] = NXT(1); 13200 start[2] = NXT(2); 13201 start[3] = NXT(3); 13202 enc = xmlDetectCharEncoding(start, 4); 13203 if (enc != XML_CHAR_ENCODING_NONE) { 13204 xmlSwitchEncoding(ctxt, enc); 13205 } 13206 } 13207 13208 /* 13209 * Parse a possible text declaration first 13210 */ 13211 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13212 xmlParseTextDecl(ctxt); 13213 } 13214 13215 ctxt->instate = XML_PARSER_CONTENT; 13216 ctxt->depth = depth; 13217 13218 xmlParseContent(ctxt); 13219 13220 if ((RAW == '<') && (NXT(1) == '/')) { 13221 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13222 } else if (RAW != 0) { 13223 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13224 } 13225 if (ctxt->node != newDoc->children) { 13226 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13227 } 13228 13229 if (!ctxt->wellFormed) { 13230 if (ctxt->errNo == 0) 13231 ret = XML_ERR_INTERNAL_ERROR; 13232 else 13233 ret = (xmlParserErrors)ctxt->errNo; 13234 } else { 13235 if (list != NULL) { 13236 xmlNodePtr cur; 13237 13238 /* 13239 * Return the newly created nodeset after unlinking it from 13240 * they pseudo parent. 13241 */ 13242 cur = newDoc->children->children; 13243 *list = cur; 13244 while (cur != NULL) { 13245 cur->parent = NULL; 13246 cur = cur->next; 13247 } 13248 newDoc->children->children = NULL; 13249 } 13250 ret = XML_ERR_OK; 13251 } 13252 13253 /* 13254 * Record in the parent context the number of entities replacement 13255 * done when parsing that reference. 13256 */ 13257 if (oldctxt != NULL) 13258 oldctxt->nbentities += ctxt->nbentities; 13259 13260 /* 13261 * Also record the size of the entity parsed 13262 */ 13263 if (ctxt->input != NULL && oldctxt != NULL) { 13264 oldctxt->sizeentities += ctxt->input->consumed; 13265 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13266 } 13267 /* 13268 * And record the last error if any 13269 */ 13270 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13271 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13272 13273 if (sax != NULL) 13274 ctxt->sax = oldsax; 13275 if (oldctxt != NULL) { 13276 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13277 oldctxt->node_seq.length = ctxt->node_seq.length; 13278 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13279 } 13280 ctxt->node_seq.maximum = 0; 13281 ctxt->node_seq.length = 0; 13282 ctxt->node_seq.buffer = NULL; 13283 xmlFreeParserCtxt(ctxt); 13284 newDoc->intSubset = NULL; 13285 newDoc->extSubset = NULL; 13286 xmlFreeDoc(newDoc); 13287 13288 return(ret); 13289 } 13290 13291 #ifdef LIBXML_SAX1_ENABLED 13292 /** 13293 * xmlParseExternalEntity: 13294 * @doc: the document the chunk pertains to 13295 * @sax: the SAX handler bloc (possibly NULL) 13296 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13297 * @depth: Used for loop detection, use 0 13298 * @URL: the URL for the entity to load 13299 * @ID: the System ID for the entity to load 13300 * @lst: the return value for the set of parsed nodes 13301 * 13302 * Parse an external general entity 13303 * An external general parsed entity is well-formed if it matches the 13304 * production labeled extParsedEnt. 13305 * 13306 * [78] extParsedEnt ::= TextDecl? content 13307 * 13308 * Returns 0 if the entity is well formed, -1 in case of args problem and 13309 * the parser error code otherwise 13310 */ 13311 13312 int 13313 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13314 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13315 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13316 ID, lst)); 13317 } 13318 13319 /** 13320 * xmlParseBalancedChunkMemory: 13321 * @doc: the document the chunk pertains to 13322 * @sax: the SAX handler bloc (possibly NULL) 13323 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13324 * @depth: Used for loop detection, use 0 13325 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13326 * @lst: the return value for the set of parsed nodes 13327 * 13328 * Parse a well-balanced chunk of an XML document 13329 * called by the parser 13330 * The allowed sequence for the Well Balanced Chunk is the one defined by 13331 * the content production in the XML grammar: 13332 * 13333 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13334 * 13335 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13336 * the parser error code otherwise 13337 */ 13338 13339 int 13340 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13341 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13342 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13343 depth, string, lst, 0 ); 13344 } 13345 #endif /* LIBXML_SAX1_ENABLED */ 13346 13347 /** 13348 * xmlParseBalancedChunkMemoryInternal: 13349 * @oldctxt: the existing parsing context 13350 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13351 * @user_data: the user data field for the parser context 13352 * @lst: the return value for the set of parsed nodes 13353 * 13354 * 13355 * Parse a well-balanced chunk of an XML document 13356 * called by the parser 13357 * The allowed sequence for the Well Balanced Chunk is the one defined by 13358 * the content production in the XML grammar: 13359 * 13360 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13361 * 13362 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13363 * error code otherwise 13364 * 13365 * In case recover is set to 1, the nodelist will not be empty even if 13366 * the parsed chunk is not well balanced. 13367 */ 13368 static xmlParserErrors 13369 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13370 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13371 xmlParserCtxtPtr ctxt; 13372 xmlDocPtr newDoc = NULL; 13373 xmlNodePtr newRoot; 13374 xmlSAXHandlerPtr oldsax = NULL; 13375 xmlNodePtr content = NULL; 13376 xmlNodePtr last = NULL; 13377 int size; 13378 xmlParserErrors ret = XML_ERR_OK; 13379 #ifdef SAX2 13380 int i; 13381 #endif 13382 13383 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13384 (oldctxt->depth > 1024)) { 13385 return(XML_ERR_ENTITY_LOOP); 13386 } 13387 13388 13389 if (lst != NULL) 13390 *lst = NULL; 13391 if (string == NULL) 13392 return(XML_ERR_INTERNAL_ERROR); 13393 13394 size = xmlStrlen(string); 13395 13396 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13397 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13398 if (user_data != NULL) 13399 ctxt->userData = user_data; 13400 else 13401 ctxt->userData = ctxt; 13402 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13403 ctxt->dict = oldctxt->dict; 13404 ctxt->input_id = oldctxt->input_id + 1; 13405 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13406 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13407 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13408 13409 #ifdef SAX2 13410 /* propagate namespaces down the entity */ 13411 for (i = 0;i < oldctxt->nsNr;i += 2) { 13412 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13413 } 13414 #endif 13415 13416 oldsax = ctxt->sax; 13417 ctxt->sax = oldctxt->sax; 13418 xmlDetectSAX2(ctxt); 13419 ctxt->replaceEntities = oldctxt->replaceEntities; 13420 ctxt->options = oldctxt->options; 13421 13422 ctxt->_private = oldctxt->_private; 13423 if (oldctxt->myDoc == NULL) { 13424 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13425 if (newDoc == NULL) { 13426 ctxt->sax = oldsax; 13427 ctxt->dict = NULL; 13428 xmlFreeParserCtxt(ctxt); 13429 return(XML_ERR_INTERNAL_ERROR); 13430 } 13431 newDoc->properties = XML_DOC_INTERNAL; 13432 newDoc->dict = ctxt->dict; 13433 xmlDictReference(newDoc->dict); 13434 ctxt->myDoc = newDoc; 13435 } else { 13436 ctxt->myDoc = oldctxt->myDoc; 13437 content = ctxt->myDoc->children; 13438 last = ctxt->myDoc->last; 13439 } 13440 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13441 if (newRoot == NULL) { 13442 ctxt->sax = oldsax; 13443 ctxt->dict = NULL; 13444 xmlFreeParserCtxt(ctxt); 13445 if (newDoc != NULL) { 13446 xmlFreeDoc(newDoc); 13447 } 13448 return(XML_ERR_INTERNAL_ERROR); 13449 } 13450 ctxt->myDoc->children = NULL; 13451 ctxt->myDoc->last = NULL; 13452 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13453 nodePush(ctxt, ctxt->myDoc->children); 13454 ctxt->instate = XML_PARSER_CONTENT; 13455 ctxt->depth = oldctxt->depth + 1; 13456 13457 ctxt->validate = 0; 13458 ctxt->loadsubset = oldctxt->loadsubset; 13459 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13460 /* 13461 * ID/IDREF registration will be done in xmlValidateElement below 13462 */ 13463 ctxt->loadsubset |= XML_SKIP_IDS; 13464 } 13465 ctxt->dictNames = oldctxt->dictNames; 13466 ctxt->attsDefault = oldctxt->attsDefault; 13467 ctxt->attsSpecial = oldctxt->attsSpecial; 13468 13469 xmlParseContent(ctxt); 13470 if ((RAW == '<') && (NXT(1) == '/')) { 13471 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13472 } else if (RAW != 0) { 13473 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13474 } 13475 if (ctxt->node != ctxt->myDoc->children) { 13476 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13477 } 13478 13479 if (!ctxt->wellFormed) { 13480 if (ctxt->errNo == 0) 13481 ret = XML_ERR_INTERNAL_ERROR; 13482 else 13483 ret = (xmlParserErrors)ctxt->errNo; 13484 } else { 13485 ret = XML_ERR_OK; 13486 } 13487 13488 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13489 xmlNodePtr cur; 13490 13491 /* 13492 * Return the newly created nodeset after unlinking it from 13493 * they pseudo parent. 13494 */ 13495 cur = ctxt->myDoc->children->children; 13496 *lst = cur; 13497 while (cur != NULL) { 13498 #ifdef LIBXML_VALID_ENABLED 13499 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13500 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13501 (cur->type == XML_ELEMENT_NODE)) { 13502 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13503 oldctxt->myDoc, cur); 13504 } 13505 #endif /* LIBXML_VALID_ENABLED */ 13506 cur->parent = NULL; 13507 cur = cur->next; 13508 } 13509 ctxt->myDoc->children->children = NULL; 13510 } 13511 if (ctxt->myDoc != NULL) { 13512 xmlFreeNode(ctxt->myDoc->children); 13513 ctxt->myDoc->children = content; 13514 ctxt->myDoc->last = last; 13515 } 13516 13517 /* 13518 * Record in the parent context the number of entities replacement 13519 * done when parsing that reference. 13520 */ 13521 if (oldctxt != NULL) 13522 oldctxt->nbentities += ctxt->nbentities; 13523 13524 /* 13525 * Also record the last error if any 13526 */ 13527 if (ctxt->lastError.code != XML_ERR_OK) 13528 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13529 13530 ctxt->sax = oldsax; 13531 ctxt->dict = NULL; 13532 ctxt->attsDefault = NULL; 13533 ctxt->attsSpecial = NULL; 13534 xmlFreeParserCtxt(ctxt); 13535 if (newDoc != NULL) { 13536 xmlFreeDoc(newDoc); 13537 } 13538 13539 return(ret); 13540 } 13541 13542 /** 13543 * xmlParseInNodeContext: 13544 * @node: the context node 13545 * @data: the input string 13546 * @datalen: the input string length in bytes 13547 * @options: a combination of xmlParserOption 13548 * @lst: the return value for the set of parsed nodes 13549 * 13550 * Parse a well-balanced chunk of an XML document 13551 * within the context (DTD, namespaces, etc ...) of the given node. 13552 * 13553 * The allowed sequence for the data is a Well Balanced Chunk defined by 13554 * the content production in the XML grammar: 13555 * 13556 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13557 * 13558 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13559 * error code otherwise 13560 */ 13561 xmlParserErrors 13562 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13563 int options, xmlNodePtr *lst) { 13564 #ifdef SAX2 13565 xmlParserCtxtPtr ctxt; 13566 xmlDocPtr doc = NULL; 13567 xmlNodePtr fake, cur; 13568 int nsnr = 0; 13569 13570 xmlParserErrors ret = XML_ERR_OK; 13571 13572 /* 13573 * check all input parameters, grab the document 13574 */ 13575 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13576 return(XML_ERR_INTERNAL_ERROR); 13577 switch (node->type) { 13578 case XML_ELEMENT_NODE: 13579 case XML_ATTRIBUTE_NODE: 13580 case XML_TEXT_NODE: 13581 case XML_CDATA_SECTION_NODE: 13582 case XML_ENTITY_REF_NODE: 13583 case XML_PI_NODE: 13584 case XML_COMMENT_NODE: 13585 case XML_DOCUMENT_NODE: 13586 case XML_HTML_DOCUMENT_NODE: 13587 break; 13588 default: 13589 return(XML_ERR_INTERNAL_ERROR); 13590 13591 } 13592 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13593 (node->type != XML_DOCUMENT_NODE) && 13594 (node->type != XML_HTML_DOCUMENT_NODE)) 13595 node = node->parent; 13596 if (node == NULL) 13597 return(XML_ERR_INTERNAL_ERROR); 13598 if (node->type == XML_ELEMENT_NODE) 13599 doc = node->doc; 13600 else 13601 doc = (xmlDocPtr) node; 13602 if (doc == NULL) 13603 return(XML_ERR_INTERNAL_ERROR); 13604 13605 /* 13606 * allocate a context and set-up everything not related to the 13607 * node position in the tree 13608 */ 13609 if (doc->type == XML_DOCUMENT_NODE) 13610 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13611 #ifdef LIBXML_HTML_ENABLED 13612 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13613 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13614 /* 13615 * When parsing in context, it makes no sense to add implied 13616 * elements like html/body/etc... 13617 */ 13618 options |= HTML_PARSE_NOIMPLIED; 13619 } 13620 #endif 13621 else 13622 return(XML_ERR_INTERNAL_ERROR); 13623 13624 if (ctxt == NULL) 13625 return(XML_ERR_NO_MEMORY); 13626 13627 /* 13628 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13629 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13630 * we must wait until the last moment to free the original one. 13631 */ 13632 if (doc->dict != NULL) { 13633 if (ctxt->dict != NULL) 13634 xmlDictFree(ctxt->dict); 13635 ctxt->dict = doc->dict; 13636 } else 13637 options |= XML_PARSE_NODICT; 13638 13639 if (doc->encoding != NULL) { 13640 xmlCharEncodingHandlerPtr hdlr; 13641 13642 if (ctxt->encoding != NULL) 13643 xmlFree((xmlChar *) ctxt->encoding); 13644 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13645 13646 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13647 if (hdlr != NULL) { 13648 xmlSwitchToEncoding(ctxt, hdlr); 13649 } else { 13650 return(XML_ERR_UNSUPPORTED_ENCODING); 13651 } 13652 } 13653 13654 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13655 xmlDetectSAX2(ctxt); 13656 ctxt->myDoc = doc; 13657 /* parsing in context, i.e. as within existing content */ 13658 ctxt->input_id = 2; 13659 ctxt->instate = XML_PARSER_CONTENT; 13660 13661 fake = xmlNewComment(NULL); 13662 if (fake == NULL) { 13663 xmlFreeParserCtxt(ctxt); 13664 return(XML_ERR_NO_MEMORY); 13665 } 13666 xmlAddChild(node, fake); 13667 13668 if (node->type == XML_ELEMENT_NODE) { 13669 nodePush(ctxt, node); 13670 /* 13671 * initialize the SAX2 namespaces stack 13672 */ 13673 cur = node; 13674 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13675 xmlNsPtr ns = cur->nsDef; 13676 const xmlChar *iprefix, *ihref; 13677 13678 while (ns != NULL) { 13679 if (ctxt->dict) { 13680 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13681 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13682 } else { 13683 iprefix = ns->prefix; 13684 ihref = ns->href; 13685 } 13686 13687 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13688 nsPush(ctxt, iprefix, ihref); 13689 nsnr++; 13690 } 13691 ns = ns->next; 13692 } 13693 cur = cur->parent; 13694 } 13695 } 13696 13697 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13698 /* 13699 * ID/IDREF registration will be done in xmlValidateElement below 13700 */ 13701 ctxt->loadsubset |= XML_SKIP_IDS; 13702 } 13703 13704 #ifdef LIBXML_HTML_ENABLED 13705 if (doc->type == XML_HTML_DOCUMENT_NODE) 13706 __htmlParseContent(ctxt); 13707 else 13708 #endif 13709 xmlParseContent(ctxt); 13710 13711 nsPop(ctxt, nsnr); 13712 if ((RAW == '<') && (NXT(1) == '/')) { 13713 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13714 } else if (RAW != 0) { 13715 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13716 } 13717 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13718 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13719 ctxt->wellFormed = 0; 13720 } 13721 13722 if (!ctxt->wellFormed) { 13723 if (ctxt->errNo == 0) 13724 ret = XML_ERR_INTERNAL_ERROR; 13725 else 13726 ret = (xmlParserErrors)ctxt->errNo; 13727 } else { 13728 ret = XML_ERR_OK; 13729 } 13730 13731 /* 13732 * Return the newly created nodeset after unlinking it from 13733 * the pseudo sibling. 13734 */ 13735 13736 cur = fake->next; 13737 fake->next = NULL; 13738 node->last = fake; 13739 13740 if (cur != NULL) { 13741 cur->prev = NULL; 13742 } 13743 13744 *lst = cur; 13745 13746 while (cur != NULL) { 13747 cur->parent = NULL; 13748 cur = cur->next; 13749 } 13750 13751 xmlUnlinkNode(fake); 13752 xmlFreeNode(fake); 13753 13754 13755 if (ret != XML_ERR_OK) { 13756 xmlFreeNodeList(*lst); 13757 *lst = NULL; 13758 } 13759 13760 if (doc->dict != NULL) 13761 ctxt->dict = NULL; 13762 xmlFreeParserCtxt(ctxt); 13763 13764 return(ret); 13765 #else /* !SAX2 */ 13766 return(XML_ERR_INTERNAL_ERROR); 13767 #endif 13768 } 13769 13770 #ifdef LIBXML_SAX1_ENABLED 13771 /** 13772 * xmlParseBalancedChunkMemoryRecover: 13773 * @doc: the document the chunk pertains to 13774 * @sax: the SAX handler bloc (possibly NULL) 13775 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13776 * @depth: Used for loop detection, use 0 13777 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13778 * @lst: the return value for the set of parsed nodes 13779 * @recover: return nodes even if the data is broken (use 0) 13780 * 13781 * 13782 * Parse a well-balanced chunk of an XML document 13783 * called by the parser 13784 * The allowed sequence for the Well Balanced Chunk is the one defined by 13785 * the content production in the XML grammar: 13786 * 13787 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13788 * 13789 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13790 * the parser error code otherwise 13791 * 13792 * In case recover is set to 1, the nodelist will not be empty even if 13793 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13794 * some extent. 13795 */ 13796 int 13797 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13798 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13799 int recover) { 13800 xmlParserCtxtPtr ctxt; 13801 xmlDocPtr newDoc; 13802 xmlSAXHandlerPtr oldsax = NULL; 13803 xmlNodePtr content, newRoot; 13804 int size; 13805 int ret = 0; 13806 13807 if (depth > 40) { 13808 return(XML_ERR_ENTITY_LOOP); 13809 } 13810 13811 13812 if (lst != NULL) 13813 *lst = NULL; 13814 if (string == NULL) 13815 return(-1); 13816 13817 size = xmlStrlen(string); 13818 13819 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13820 if (ctxt == NULL) return(-1); 13821 ctxt->userData = ctxt; 13822 if (sax != NULL) { 13823 oldsax = ctxt->sax; 13824 ctxt->sax = sax; 13825 if (user_data != NULL) 13826 ctxt->userData = user_data; 13827 } 13828 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13829 if (newDoc == NULL) { 13830 xmlFreeParserCtxt(ctxt); 13831 return(-1); 13832 } 13833 newDoc->properties = XML_DOC_INTERNAL; 13834 if ((doc != NULL) && (doc->dict != NULL)) { 13835 xmlDictFree(ctxt->dict); 13836 ctxt->dict = doc->dict; 13837 xmlDictReference(ctxt->dict); 13838 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13839 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13840 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13841 ctxt->dictNames = 1; 13842 } else { 13843 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13844 } 13845 if (doc != NULL) { 13846 newDoc->intSubset = doc->intSubset; 13847 newDoc->extSubset = doc->extSubset; 13848 } 13849 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13850 if (newRoot == NULL) { 13851 if (sax != NULL) 13852 ctxt->sax = oldsax; 13853 xmlFreeParserCtxt(ctxt); 13854 newDoc->intSubset = NULL; 13855 newDoc->extSubset = NULL; 13856 xmlFreeDoc(newDoc); 13857 return(-1); 13858 } 13859 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13860 nodePush(ctxt, newRoot); 13861 if (doc == NULL) { 13862 ctxt->myDoc = newDoc; 13863 } else { 13864 ctxt->myDoc = newDoc; 13865 newDoc->children->doc = doc; 13866 /* Ensure that doc has XML spec namespace */ 13867 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13868 newDoc->oldNs = doc->oldNs; 13869 } 13870 ctxt->instate = XML_PARSER_CONTENT; 13871 ctxt->input_id = 2; 13872 ctxt->depth = depth; 13873 13874 /* 13875 * Doing validity checking on chunk doesn't make sense 13876 */ 13877 ctxt->validate = 0; 13878 ctxt->loadsubset = 0; 13879 xmlDetectSAX2(ctxt); 13880 13881 if ( doc != NULL ){ 13882 content = doc->children; 13883 doc->children = NULL; 13884 xmlParseContent(ctxt); 13885 doc->children = content; 13886 } 13887 else { 13888 xmlParseContent(ctxt); 13889 } 13890 if ((RAW == '<') && (NXT(1) == '/')) { 13891 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13892 } else if (RAW != 0) { 13893 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13894 } 13895 if (ctxt->node != newDoc->children) { 13896 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13897 } 13898 13899 if (!ctxt->wellFormed) { 13900 if (ctxt->errNo == 0) 13901 ret = 1; 13902 else 13903 ret = ctxt->errNo; 13904 } else { 13905 ret = 0; 13906 } 13907 13908 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13909 xmlNodePtr cur; 13910 13911 /* 13912 * Return the newly created nodeset after unlinking it from 13913 * they pseudo parent. 13914 */ 13915 cur = newDoc->children->children; 13916 *lst = cur; 13917 while (cur != NULL) { 13918 xmlSetTreeDoc(cur, doc); 13919 cur->parent = NULL; 13920 cur = cur->next; 13921 } 13922 newDoc->children->children = NULL; 13923 } 13924 13925 if (sax != NULL) 13926 ctxt->sax = oldsax; 13927 xmlFreeParserCtxt(ctxt); 13928 newDoc->intSubset = NULL; 13929 newDoc->extSubset = NULL; 13930 if(doc != NULL) 13931 newDoc->oldNs = NULL; 13932 xmlFreeDoc(newDoc); 13933 13934 return(ret); 13935 } 13936 13937 /** 13938 * xmlSAXParseEntity: 13939 * @sax: the SAX handler block 13940 * @filename: the filename 13941 * 13942 * parse an XML external entity out of context and build a tree. 13943 * It use the given SAX function block to handle the parsing callback. 13944 * If sax is NULL, fallback to the default DOM tree building routines. 13945 * 13946 * [78] extParsedEnt ::= TextDecl? content 13947 * 13948 * This correspond to a "Well Balanced" chunk 13949 * 13950 * Returns the resulting document tree 13951 */ 13952 13953 xmlDocPtr 13954 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13955 xmlDocPtr ret; 13956 xmlParserCtxtPtr ctxt; 13957 13958 ctxt = xmlCreateFileParserCtxt(filename); 13959 if (ctxt == NULL) { 13960 return(NULL); 13961 } 13962 if (sax != NULL) { 13963 if (ctxt->sax != NULL) 13964 xmlFree(ctxt->sax); 13965 ctxt->sax = sax; 13966 ctxt->userData = NULL; 13967 } 13968 13969 xmlParseExtParsedEnt(ctxt); 13970 13971 if (ctxt->wellFormed) 13972 ret = ctxt->myDoc; 13973 else { 13974 ret = NULL; 13975 xmlFreeDoc(ctxt->myDoc); 13976 ctxt->myDoc = NULL; 13977 } 13978 if (sax != NULL) 13979 ctxt->sax = NULL; 13980 xmlFreeParserCtxt(ctxt); 13981 13982 return(ret); 13983 } 13984 13985 /** 13986 * xmlParseEntity: 13987 * @filename: the filename 13988 * 13989 * parse an XML external entity out of context and build a tree. 13990 * 13991 * [78] extParsedEnt ::= TextDecl? content 13992 * 13993 * This correspond to a "Well Balanced" chunk 13994 * 13995 * Returns the resulting document tree 13996 */ 13997 13998 xmlDocPtr 13999 xmlParseEntity(const char *filename) { 14000 return(xmlSAXParseEntity(NULL, filename)); 14001 } 14002 #endif /* LIBXML_SAX1_ENABLED */ 14003 14004 /** 14005 * xmlCreateEntityParserCtxtInternal: 14006 * @URL: the entity URL 14007 * @ID: the entity PUBLIC ID 14008 * @base: a possible base for the target URI 14009 * @pctx: parser context used to set options on new context 14010 * 14011 * Create a parser context for an external entity 14012 * Automatic support for ZLIB/Compress compressed document is provided 14013 * by default if found at compile-time. 14014 * 14015 * Returns the new parser context or NULL 14016 */ 14017 static xmlParserCtxtPtr 14018 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 14019 const xmlChar *base, xmlParserCtxtPtr pctx) { 14020 xmlParserCtxtPtr ctxt; 14021 xmlParserInputPtr inputStream; 14022 char *directory = NULL; 14023 xmlChar *uri; 14024 14025 ctxt = xmlNewParserCtxt(); 14026 if (ctxt == NULL) { 14027 return(NULL); 14028 } 14029 14030 if (pctx != NULL) { 14031 ctxt->options = pctx->options; 14032 ctxt->_private = pctx->_private; 14033 /* 14034 * this is a subparser of pctx, so the input_id should be 14035 * incremented to distinguish from main entity 14036 */ 14037 ctxt->input_id = pctx->input_id + 1; 14038 } 14039 14040 /* Don't read from stdin. */ 14041 if (xmlStrcmp(URL, BAD_CAST "-") == 0) 14042 URL = BAD_CAST "./-"; 14043 14044 uri = xmlBuildURI(URL, base); 14045 14046 if (uri == NULL) { 14047 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14048 if (inputStream == NULL) { 14049 xmlFreeParserCtxt(ctxt); 14050 return(NULL); 14051 } 14052 14053 inputPush(ctxt, inputStream); 14054 14055 if ((ctxt->directory == NULL) && (directory == NULL)) 14056 directory = xmlParserGetDirectory((char *)URL); 14057 if ((ctxt->directory == NULL) && (directory != NULL)) 14058 ctxt->directory = directory; 14059 } else { 14060 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14061 if (inputStream == NULL) { 14062 xmlFree(uri); 14063 xmlFreeParserCtxt(ctxt); 14064 return(NULL); 14065 } 14066 14067 inputPush(ctxt, inputStream); 14068 14069 if ((ctxt->directory == NULL) && (directory == NULL)) 14070 directory = xmlParserGetDirectory((char *)uri); 14071 if ((ctxt->directory == NULL) && (directory != NULL)) 14072 ctxt->directory = directory; 14073 xmlFree(uri); 14074 } 14075 return(ctxt); 14076 } 14077 14078 /** 14079 * xmlCreateEntityParserCtxt: 14080 * @URL: the entity URL 14081 * @ID: the entity PUBLIC ID 14082 * @base: a possible base for the target URI 14083 * 14084 * Create a parser context for an external entity 14085 * Automatic support for ZLIB/Compress compressed document is provided 14086 * by default if found at compile-time. 14087 * 14088 * Returns the new parser context or NULL 14089 */ 14090 xmlParserCtxtPtr 14091 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14092 const xmlChar *base) { 14093 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14094 14095 } 14096 14097 /************************************************************************ 14098 * * 14099 * Front ends when parsing from a file * 14100 * * 14101 ************************************************************************/ 14102 14103 /** 14104 * xmlCreateURLParserCtxt: 14105 * @filename: the filename or URL 14106 * @options: a combination of xmlParserOption 14107 * 14108 * Create a parser context for a file or URL content. 14109 * Automatic support for ZLIB/Compress compressed document is provided 14110 * by default if found at compile-time and for file accesses 14111 * 14112 * Returns the new parser context or NULL 14113 */ 14114 xmlParserCtxtPtr 14115 xmlCreateURLParserCtxt(const char *filename, int options) 14116 { 14117 xmlParserCtxtPtr ctxt; 14118 xmlParserInputPtr inputStream; 14119 char *directory = NULL; 14120 14121 ctxt = xmlNewParserCtxt(); 14122 if (ctxt == NULL) { 14123 xmlErrMemory(NULL, "cannot allocate parser context"); 14124 return(NULL); 14125 } 14126 14127 if (options) 14128 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14129 ctxt->linenumbers = 1; 14130 14131 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14132 if (inputStream == NULL) { 14133 xmlFreeParserCtxt(ctxt); 14134 return(NULL); 14135 } 14136 14137 inputPush(ctxt, inputStream); 14138 if ((ctxt->directory == NULL) && (directory == NULL)) 14139 directory = xmlParserGetDirectory(filename); 14140 if ((ctxt->directory == NULL) && (directory != NULL)) 14141 ctxt->directory = directory; 14142 14143 return(ctxt); 14144 } 14145 14146 /** 14147 * xmlCreateFileParserCtxt: 14148 * @filename: the filename 14149 * 14150 * Create a parser context for a file content. 14151 * Automatic support for ZLIB/Compress compressed document is provided 14152 * by default if found at compile-time. 14153 * 14154 * Returns the new parser context or NULL 14155 */ 14156 xmlParserCtxtPtr 14157 xmlCreateFileParserCtxt(const char *filename) 14158 { 14159 return(xmlCreateURLParserCtxt(filename, 0)); 14160 } 14161 14162 #ifdef LIBXML_SAX1_ENABLED 14163 /** 14164 * xmlSAXParseFileWithData: 14165 * @sax: the SAX handler block 14166 * @filename: the filename 14167 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14168 * documents 14169 * @data: the userdata 14170 * 14171 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14172 * compressed document is provided by default if found at compile-time. 14173 * It use the given SAX function block to handle the parsing callback. 14174 * If sax is NULL, fallback to the default DOM tree building routines. 14175 * 14176 * User data (void *) is stored within the parser context in the 14177 * context's _private member, so it is available nearly everywhere in libxml 14178 * 14179 * Returns the resulting document tree 14180 */ 14181 14182 xmlDocPtr 14183 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14184 int recovery, void *data) { 14185 xmlDocPtr ret; 14186 xmlParserCtxtPtr ctxt; 14187 14188 xmlInitParser(); 14189 14190 ctxt = xmlCreateFileParserCtxt(filename); 14191 if (ctxt == NULL) { 14192 return(NULL); 14193 } 14194 if (sax != NULL) { 14195 if (ctxt->sax != NULL) 14196 xmlFree(ctxt->sax); 14197 ctxt->sax = sax; 14198 } 14199 xmlDetectSAX2(ctxt); 14200 if (data!=NULL) { 14201 ctxt->_private = data; 14202 } 14203 14204 if (ctxt->directory == NULL) 14205 ctxt->directory = xmlParserGetDirectory(filename); 14206 14207 ctxt->recovery = recovery; 14208 14209 xmlParseDocument(ctxt); 14210 14211 if ((ctxt->wellFormed) || recovery) { 14212 ret = ctxt->myDoc; 14213 if (ret != NULL) { 14214 if (ctxt->input->buf->compressed > 0) 14215 ret->compression = 9; 14216 else 14217 ret->compression = ctxt->input->buf->compressed; 14218 } 14219 } 14220 else { 14221 ret = NULL; 14222 xmlFreeDoc(ctxt->myDoc); 14223 ctxt->myDoc = NULL; 14224 } 14225 if (sax != NULL) 14226 ctxt->sax = NULL; 14227 xmlFreeParserCtxt(ctxt); 14228 14229 return(ret); 14230 } 14231 14232 /** 14233 * xmlSAXParseFile: 14234 * @sax: the SAX handler block 14235 * @filename: the filename 14236 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14237 * documents 14238 * 14239 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14240 * compressed document is provided by default if found at compile-time. 14241 * It use the given SAX function block to handle the parsing callback. 14242 * If sax is NULL, fallback to the default DOM tree building routines. 14243 * 14244 * Returns the resulting document tree 14245 */ 14246 14247 xmlDocPtr 14248 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14249 int recovery) { 14250 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14251 } 14252 14253 /** 14254 * xmlRecoverDoc: 14255 * @cur: a pointer to an array of xmlChar 14256 * 14257 * parse an XML in-memory document and build a tree. 14258 * In the case the document is not Well Formed, a attempt to build a 14259 * tree is tried anyway 14260 * 14261 * Returns the resulting document tree or NULL in case of failure 14262 */ 14263 14264 xmlDocPtr 14265 xmlRecoverDoc(const xmlChar *cur) { 14266 return(xmlSAXParseDoc(NULL, cur, 1)); 14267 } 14268 14269 /** 14270 * xmlParseFile: 14271 * @filename: the filename 14272 * 14273 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14274 * compressed document is provided by default if found at compile-time. 14275 * 14276 * Returns the resulting document tree if the file was wellformed, 14277 * NULL otherwise. 14278 */ 14279 14280 xmlDocPtr 14281 xmlParseFile(const char *filename) { 14282 return(xmlSAXParseFile(NULL, filename, 0)); 14283 } 14284 14285 /** 14286 * xmlRecoverFile: 14287 * @filename: the filename 14288 * 14289 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14290 * compressed document is provided by default if found at compile-time. 14291 * In the case the document is not Well Formed, it attempts to build 14292 * a tree anyway 14293 * 14294 * Returns the resulting document tree or NULL in case of failure 14295 */ 14296 14297 xmlDocPtr 14298 xmlRecoverFile(const char *filename) { 14299 return(xmlSAXParseFile(NULL, filename, 1)); 14300 } 14301 14302 14303 /** 14304 * xmlSetupParserForBuffer: 14305 * @ctxt: an XML parser context 14306 * @buffer: a xmlChar * buffer 14307 * @filename: a file name 14308 * 14309 * Setup the parser context to parse a new buffer; Clears any prior 14310 * contents from the parser context. The buffer parameter must not be 14311 * NULL, but the filename parameter can be 14312 */ 14313 void 14314 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14315 const char* filename) 14316 { 14317 xmlParserInputPtr input; 14318 14319 if ((ctxt == NULL) || (buffer == NULL)) 14320 return; 14321 14322 input = xmlNewInputStream(ctxt); 14323 if (input == NULL) { 14324 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14325 xmlClearParserCtxt(ctxt); 14326 return; 14327 } 14328 14329 xmlClearParserCtxt(ctxt); 14330 if (filename != NULL) 14331 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14332 input->base = buffer; 14333 input->cur = buffer; 14334 input->end = &buffer[xmlStrlen(buffer)]; 14335 inputPush(ctxt, input); 14336 } 14337 14338 /** 14339 * xmlSAXUserParseFile: 14340 * @sax: a SAX handler 14341 * @user_data: The user data returned on SAX callbacks 14342 * @filename: a file name 14343 * 14344 * parse an XML file and call the given SAX handler routines. 14345 * Automatic support for ZLIB/Compress compressed document is provided 14346 * 14347 * Returns 0 in case of success or a error number otherwise 14348 */ 14349 int 14350 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14351 const char *filename) { 14352 int ret = 0; 14353 xmlParserCtxtPtr ctxt; 14354 14355 ctxt = xmlCreateFileParserCtxt(filename); 14356 if (ctxt == NULL) return -1; 14357 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14358 xmlFree(ctxt->sax); 14359 ctxt->sax = sax; 14360 xmlDetectSAX2(ctxt); 14361 14362 if (user_data != NULL) 14363 ctxt->userData = user_data; 14364 14365 xmlParseDocument(ctxt); 14366 14367 if (ctxt->wellFormed) 14368 ret = 0; 14369 else { 14370 if (ctxt->errNo != 0) 14371 ret = ctxt->errNo; 14372 else 14373 ret = -1; 14374 } 14375 if (sax != NULL) 14376 ctxt->sax = NULL; 14377 if (ctxt->myDoc != NULL) { 14378 xmlFreeDoc(ctxt->myDoc); 14379 ctxt->myDoc = NULL; 14380 } 14381 xmlFreeParserCtxt(ctxt); 14382 14383 return ret; 14384 } 14385 #endif /* LIBXML_SAX1_ENABLED */ 14386 14387 /************************************************************************ 14388 * * 14389 * Front ends when parsing from memory * 14390 * * 14391 ************************************************************************/ 14392 14393 /** 14394 * xmlCreateMemoryParserCtxt: 14395 * @buffer: a pointer to a char array 14396 * @size: the size of the array 14397 * 14398 * Create a parser context for an XML in-memory document. 14399 * 14400 * Returns the new parser context or NULL 14401 */ 14402 xmlParserCtxtPtr 14403 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14404 xmlParserCtxtPtr ctxt; 14405 xmlParserInputPtr input; 14406 xmlParserInputBufferPtr buf; 14407 14408 if (buffer == NULL) 14409 return(NULL); 14410 if (size <= 0) 14411 return(NULL); 14412 14413 ctxt = xmlNewParserCtxt(); 14414 if (ctxt == NULL) 14415 return(NULL); 14416 14417 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14418 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14419 if (buf == NULL) { 14420 xmlFreeParserCtxt(ctxt); 14421 return(NULL); 14422 } 14423 14424 input = xmlNewInputStream(ctxt); 14425 if (input == NULL) { 14426 xmlFreeParserInputBuffer(buf); 14427 xmlFreeParserCtxt(ctxt); 14428 return(NULL); 14429 } 14430 14431 input->filename = NULL; 14432 input->buf = buf; 14433 xmlBufResetInput(input->buf->buffer, input); 14434 14435 inputPush(ctxt, input); 14436 return(ctxt); 14437 } 14438 14439 #ifdef LIBXML_SAX1_ENABLED 14440 /** 14441 * xmlSAXParseMemoryWithData: 14442 * @sax: the SAX handler block 14443 * @buffer: an pointer to a char array 14444 * @size: the size of the array 14445 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14446 * documents 14447 * @data: the userdata 14448 * 14449 * parse an XML in-memory block and use the given SAX function block 14450 * to handle the parsing callback. If sax is NULL, fallback to the default 14451 * DOM tree building routines. 14452 * 14453 * User data (void *) is stored within the parser context in the 14454 * context's _private member, so it is available nearly everywhere in libxml 14455 * 14456 * Returns the resulting document tree 14457 */ 14458 14459 xmlDocPtr 14460 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14461 int size, int recovery, void *data) { 14462 xmlDocPtr ret; 14463 xmlParserCtxtPtr ctxt; 14464 14465 xmlInitParser(); 14466 14467 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14468 if (ctxt == NULL) return(NULL); 14469 if (sax != NULL) { 14470 if (ctxt->sax != NULL) 14471 xmlFree(ctxt->sax); 14472 ctxt->sax = sax; 14473 } 14474 xmlDetectSAX2(ctxt); 14475 if (data!=NULL) { 14476 ctxt->_private=data; 14477 } 14478 14479 ctxt->recovery = recovery; 14480 14481 xmlParseDocument(ctxt); 14482 14483 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14484 else { 14485 ret = NULL; 14486 xmlFreeDoc(ctxt->myDoc); 14487 ctxt->myDoc = NULL; 14488 } 14489 if (sax != NULL) 14490 ctxt->sax = NULL; 14491 xmlFreeParserCtxt(ctxt); 14492 14493 return(ret); 14494 } 14495 14496 /** 14497 * xmlSAXParseMemory: 14498 * @sax: the SAX handler block 14499 * @buffer: an pointer to a char array 14500 * @size: the size of the array 14501 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14502 * documents 14503 * 14504 * parse an XML in-memory block and use the given SAX function block 14505 * to handle the parsing callback. If sax is NULL, fallback to the default 14506 * DOM tree building routines. 14507 * 14508 * Returns the resulting document tree 14509 */ 14510 xmlDocPtr 14511 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14512 int size, int recovery) { 14513 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14514 } 14515 14516 /** 14517 * xmlParseMemory: 14518 * @buffer: an pointer to a char array 14519 * @size: the size of the array 14520 * 14521 * parse an XML in-memory block and build a tree. 14522 * 14523 * Returns the resulting document tree 14524 */ 14525 14526 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14527 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14528 } 14529 14530 /** 14531 * xmlRecoverMemory: 14532 * @buffer: an pointer to a char array 14533 * @size: the size of the array 14534 * 14535 * parse an XML in-memory block and build a tree. 14536 * In the case the document is not Well Formed, an attempt to 14537 * build a tree is tried anyway 14538 * 14539 * Returns the resulting document tree or NULL in case of error 14540 */ 14541 14542 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14543 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14544 } 14545 14546 /** 14547 * xmlSAXUserParseMemory: 14548 * @sax: a SAX handler 14549 * @user_data: The user data returned on SAX callbacks 14550 * @buffer: an in-memory XML document input 14551 * @size: the length of the XML document in bytes 14552 * 14553 * A better SAX parsing routine. 14554 * parse an XML in-memory buffer and call the given SAX handler routines. 14555 * 14556 * Returns 0 in case of success or a error number otherwise 14557 */ 14558 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14559 const char *buffer, int size) { 14560 int ret = 0; 14561 xmlParserCtxtPtr ctxt; 14562 14563 xmlInitParser(); 14564 14565 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14566 if (ctxt == NULL) return -1; 14567 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14568 xmlFree(ctxt->sax); 14569 ctxt->sax = sax; 14570 xmlDetectSAX2(ctxt); 14571 14572 if (user_data != NULL) 14573 ctxt->userData = user_data; 14574 14575 xmlParseDocument(ctxt); 14576 14577 if (ctxt->wellFormed) 14578 ret = 0; 14579 else { 14580 if (ctxt->errNo != 0) 14581 ret = ctxt->errNo; 14582 else 14583 ret = -1; 14584 } 14585 if (sax != NULL) 14586 ctxt->sax = NULL; 14587 if (ctxt->myDoc != NULL) { 14588 xmlFreeDoc(ctxt->myDoc); 14589 ctxt->myDoc = NULL; 14590 } 14591 xmlFreeParserCtxt(ctxt); 14592 14593 return ret; 14594 } 14595 #endif /* LIBXML_SAX1_ENABLED */ 14596 14597 /** 14598 * xmlCreateDocParserCtxt: 14599 * @cur: a pointer to an array of xmlChar 14600 * 14601 * Creates a parser context for an XML in-memory document. 14602 * 14603 * Returns the new parser context or NULL 14604 */ 14605 xmlParserCtxtPtr 14606 xmlCreateDocParserCtxt(const xmlChar *cur) { 14607 int len; 14608 14609 if (cur == NULL) 14610 return(NULL); 14611 len = xmlStrlen(cur); 14612 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14613 } 14614 14615 #ifdef LIBXML_SAX1_ENABLED 14616 /** 14617 * xmlSAXParseDoc: 14618 * @sax: the SAX handler block 14619 * @cur: a pointer to an array of xmlChar 14620 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14621 * documents 14622 * 14623 * parse an XML in-memory document and build a tree. 14624 * It use the given SAX function block to handle the parsing callback. 14625 * If sax is NULL, fallback to the default DOM tree building routines. 14626 * 14627 * Returns the resulting document tree 14628 */ 14629 14630 xmlDocPtr 14631 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14632 xmlDocPtr ret; 14633 xmlParserCtxtPtr ctxt; 14634 xmlSAXHandlerPtr oldsax = NULL; 14635 14636 if (cur == NULL) return(NULL); 14637 14638 14639 ctxt = xmlCreateDocParserCtxt(cur); 14640 if (ctxt == NULL) return(NULL); 14641 if (sax != NULL) { 14642 oldsax = ctxt->sax; 14643 ctxt->sax = sax; 14644 ctxt->userData = NULL; 14645 } 14646 xmlDetectSAX2(ctxt); 14647 14648 xmlParseDocument(ctxt); 14649 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14650 else { 14651 ret = NULL; 14652 xmlFreeDoc(ctxt->myDoc); 14653 ctxt->myDoc = NULL; 14654 } 14655 if (sax != NULL) 14656 ctxt->sax = oldsax; 14657 xmlFreeParserCtxt(ctxt); 14658 14659 return(ret); 14660 } 14661 14662 /** 14663 * xmlParseDoc: 14664 * @cur: a pointer to an array of xmlChar 14665 * 14666 * parse an XML in-memory document and build a tree. 14667 * 14668 * Returns the resulting document tree 14669 */ 14670 14671 xmlDocPtr 14672 xmlParseDoc(const xmlChar *cur) { 14673 return(xmlSAXParseDoc(NULL, cur, 0)); 14674 } 14675 #endif /* LIBXML_SAX1_ENABLED */ 14676 14677 #ifdef LIBXML_LEGACY_ENABLED 14678 /************************************************************************ 14679 * * 14680 * Specific function to keep track of entities references * 14681 * and used by the XSLT debugger * 14682 * * 14683 ************************************************************************/ 14684 14685 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14686 14687 /** 14688 * xmlAddEntityReference: 14689 * @ent : A valid entity 14690 * @firstNode : A valid first node for children of entity 14691 * @lastNode : A valid last node of children entity 14692 * 14693 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14694 */ 14695 static void 14696 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14697 xmlNodePtr lastNode) 14698 { 14699 if (xmlEntityRefFunc != NULL) { 14700 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14701 } 14702 } 14703 14704 14705 /** 14706 * xmlSetEntityReferenceFunc: 14707 * @func: A valid function 14708 * 14709 * Set the function to call call back when a xml reference has been made 14710 */ 14711 void 14712 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14713 { 14714 xmlEntityRefFunc = func; 14715 } 14716 #endif /* LIBXML_LEGACY_ENABLED */ 14717 14718 /************************************************************************ 14719 * * 14720 * Miscellaneous * 14721 * * 14722 ************************************************************************/ 14723 14724 #ifdef LIBXML_XPATH_ENABLED 14725 #include <libxml/xpath.h> 14726 #endif 14727 14728 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14729 static int xmlParserInitialized = 0; 14730 14731 /** 14732 * xmlInitParser: 14733 * 14734 * Initialization function for the XML parser. 14735 * This is not reentrant. Call once before processing in case of 14736 * use in multithreaded programs. 14737 */ 14738 14739 void 14740 xmlInitParser(void) { 14741 if (xmlParserInitialized != 0) 14742 return; 14743 14744 #ifdef LIBXML_THREAD_ENABLED 14745 __xmlGlobalInitMutexLock(); 14746 if (xmlParserInitialized == 0) { 14747 #endif 14748 xmlInitThreads(); 14749 xmlInitGlobals(); 14750 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14751 (xmlGenericError == NULL)) 14752 initGenericErrorDefaultFunc(NULL); 14753 xmlInitMemory(); 14754 xmlInitializeDict(); 14755 xmlInitCharEncodingHandlers(); 14756 xmlDefaultSAXHandlerInit(); 14757 xmlRegisterDefaultInputCallbacks(); 14758 #ifdef LIBXML_OUTPUT_ENABLED 14759 xmlRegisterDefaultOutputCallbacks(); 14760 #endif /* LIBXML_OUTPUT_ENABLED */ 14761 #ifdef LIBXML_HTML_ENABLED 14762 htmlInitAutoClose(); 14763 htmlDefaultSAXHandlerInit(); 14764 #endif 14765 #ifdef LIBXML_XPATH_ENABLED 14766 xmlXPathInit(); 14767 #endif 14768 xmlParserInitialized = 1; 14769 #ifdef LIBXML_THREAD_ENABLED 14770 } 14771 __xmlGlobalInitMutexUnlock(); 14772 #endif 14773 } 14774 14775 /** 14776 * xmlCleanupParser: 14777 * 14778 * This function name is somewhat misleading. It does not clean up 14779 * parser state, it cleans up memory allocated by the library itself. 14780 * It is a cleanup function for the XML library. It tries to reclaim all 14781 * related global memory allocated for the library processing. 14782 * It doesn't deallocate any document related memory. One should 14783 * call xmlCleanupParser() only when the process has finished using 14784 * the library and all XML/HTML documents built with it. 14785 * See also xmlInitParser() which has the opposite function of preparing 14786 * the library for operations. 14787 * 14788 * WARNING: if your application is multithreaded or has plugin support 14789 * calling this may crash the application if another thread or 14790 * a plugin is still using libxml2. It's sometimes very hard to 14791 * guess if libxml2 is in use in the application, some libraries 14792 * or plugins may use it without notice. In case of doubt abstain 14793 * from calling this function or do it just before calling exit() 14794 * to avoid leak reports from valgrind ! 14795 */ 14796 14797 void 14798 xmlCleanupParser(void) { 14799 if (!xmlParserInitialized) 14800 return; 14801 14802 xmlCleanupCharEncodingHandlers(); 14803 #ifdef LIBXML_CATALOG_ENABLED 14804 xmlCatalogCleanup(); 14805 #endif 14806 xmlDictCleanup(); 14807 xmlCleanupInputCallbacks(); 14808 #ifdef LIBXML_OUTPUT_ENABLED 14809 xmlCleanupOutputCallbacks(); 14810 #endif 14811 #ifdef LIBXML_SCHEMAS_ENABLED 14812 xmlSchemaCleanupTypes(); 14813 xmlRelaxNGCleanupTypes(); 14814 #endif 14815 xmlResetLastError(); 14816 xmlCleanupGlobals(); 14817 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14818 xmlCleanupMemory(); 14819 xmlParserInitialized = 0; 14820 } 14821 14822 /************************************************************************ 14823 * * 14824 * New set (2.6.0) of simpler and more flexible APIs * 14825 * * 14826 ************************************************************************/ 14827 14828 /** 14829 * DICT_FREE: 14830 * @str: a string 14831 * 14832 * Free a string if it is not owned by the "dict" dictionary in the 14833 * current scope 14834 */ 14835 #define DICT_FREE(str) \ 14836 if ((str) && ((!dict) || \ 14837 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14838 xmlFree((char *)(str)); 14839 14840 /** 14841 * xmlCtxtReset: 14842 * @ctxt: an XML parser context 14843 * 14844 * Reset a parser context 14845 */ 14846 void 14847 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14848 { 14849 xmlParserInputPtr input; 14850 xmlDictPtr dict; 14851 14852 if (ctxt == NULL) 14853 return; 14854 14855 dict = ctxt->dict; 14856 14857 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14858 xmlFreeInputStream(input); 14859 } 14860 ctxt->inputNr = 0; 14861 ctxt->input = NULL; 14862 14863 ctxt->spaceNr = 0; 14864 if (ctxt->spaceTab != NULL) { 14865 ctxt->spaceTab[0] = -1; 14866 ctxt->space = &ctxt->spaceTab[0]; 14867 } else { 14868 ctxt->space = NULL; 14869 } 14870 14871 14872 ctxt->nodeNr = 0; 14873 ctxt->node = NULL; 14874 14875 ctxt->nameNr = 0; 14876 ctxt->name = NULL; 14877 14878 DICT_FREE(ctxt->version); 14879 ctxt->version = NULL; 14880 DICT_FREE(ctxt->encoding); 14881 ctxt->encoding = NULL; 14882 DICT_FREE(ctxt->directory); 14883 ctxt->directory = NULL; 14884 DICT_FREE(ctxt->extSubURI); 14885 ctxt->extSubURI = NULL; 14886 DICT_FREE(ctxt->extSubSystem); 14887 ctxt->extSubSystem = NULL; 14888 if (ctxt->myDoc != NULL) 14889 xmlFreeDoc(ctxt->myDoc); 14890 ctxt->myDoc = NULL; 14891 14892 ctxt->standalone = -1; 14893 ctxt->hasExternalSubset = 0; 14894 ctxt->hasPErefs = 0; 14895 ctxt->html = 0; 14896 ctxt->external = 0; 14897 ctxt->instate = XML_PARSER_START; 14898 ctxt->token = 0; 14899 14900 ctxt->wellFormed = 1; 14901 ctxt->nsWellFormed = 1; 14902 ctxt->disableSAX = 0; 14903 ctxt->valid = 1; 14904 #if 0 14905 ctxt->vctxt.userData = ctxt; 14906 ctxt->vctxt.error = xmlParserValidityError; 14907 ctxt->vctxt.warning = xmlParserValidityWarning; 14908 #endif 14909 ctxt->record_info = 0; 14910 ctxt->nbChars = 0; 14911 ctxt->checkIndex = 0; 14912 ctxt->inSubset = 0; 14913 ctxt->errNo = XML_ERR_OK; 14914 ctxt->depth = 0; 14915 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14916 ctxt->catalogs = NULL; 14917 ctxt->nbentities = 0; 14918 ctxt->sizeentities = 0; 14919 ctxt->sizeentcopy = 0; 14920 xmlInitNodeInfoSeq(&ctxt->node_seq); 14921 14922 if (ctxt->attsDefault != NULL) { 14923 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 14924 ctxt->attsDefault = NULL; 14925 } 14926 if (ctxt->attsSpecial != NULL) { 14927 xmlHashFree(ctxt->attsSpecial, NULL); 14928 ctxt->attsSpecial = NULL; 14929 } 14930 14931 #ifdef LIBXML_CATALOG_ENABLED 14932 if (ctxt->catalogs != NULL) 14933 xmlCatalogFreeLocal(ctxt->catalogs); 14934 #endif 14935 if (ctxt->lastError.code != XML_ERR_OK) 14936 xmlResetError(&ctxt->lastError); 14937 } 14938 14939 /** 14940 * xmlCtxtResetPush: 14941 * @ctxt: an XML parser context 14942 * @chunk: a pointer to an array of chars 14943 * @size: number of chars in the array 14944 * @filename: an optional file name or URI 14945 * @encoding: the document encoding, or NULL 14946 * 14947 * Reset a push parser context 14948 * 14949 * Returns 0 in case of success and 1 in case of error 14950 */ 14951 int 14952 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14953 int size, const char *filename, const char *encoding) 14954 { 14955 xmlParserInputPtr inputStream; 14956 xmlParserInputBufferPtr buf; 14957 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14958 14959 if (ctxt == NULL) 14960 return(1); 14961 14962 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14963 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14964 14965 buf = xmlAllocParserInputBuffer(enc); 14966 if (buf == NULL) 14967 return(1); 14968 14969 if (ctxt == NULL) { 14970 xmlFreeParserInputBuffer(buf); 14971 return(1); 14972 } 14973 14974 xmlCtxtReset(ctxt); 14975 14976 if (filename == NULL) { 14977 ctxt->directory = NULL; 14978 } else { 14979 ctxt->directory = xmlParserGetDirectory(filename); 14980 } 14981 14982 inputStream = xmlNewInputStream(ctxt); 14983 if (inputStream == NULL) { 14984 xmlFreeParserInputBuffer(buf); 14985 return(1); 14986 } 14987 14988 if (filename == NULL) 14989 inputStream->filename = NULL; 14990 else 14991 inputStream->filename = (char *) 14992 xmlCanonicPath((const xmlChar *) filename); 14993 inputStream->buf = buf; 14994 xmlBufResetInput(buf->buffer, inputStream); 14995 14996 inputPush(ctxt, inputStream); 14997 14998 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14999 (ctxt->input->buf != NULL)) { 15000 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 15001 size_t cur = ctxt->input->cur - ctxt->input->base; 15002 15003 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 15004 15005 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 15006 #ifdef DEBUG_PUSH 15007 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 15008 #endif 15009 } 15010 15011 if (encoding != NULL) { 15012 xmlCharEncodingHandlerPtr hdlr; 15013 15014 if (ctxt->encoding != NULL) 15015 xmlFree((xmlChar *) ctxt->encoding); 15016 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15017 15018 hdlr = xmlFindCharEncodingHandler(encoding); 15019 if (hdlr != NULL) { 15020 xmlSwitchToEncoding(ctxt, hdlr); 15021 } else { 15022 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 15023 "Unsupported encoding %s\n", BAD_CAST encoding); 15024 } 15025 } else if (enc != XML_CHAR_ENCODING_NONE) { 15026 xmlSwitchEncoding(ctxt, enc); 15027 } 15028 15029 return(0); 15030 } 15031 15032 15033 /** 15034 * xmlCtxtUseOptionsInternal: 15035 * @ctxt: an XML parser context 15036 * @options: a combination of xmlParserOption 15037 * @encoding: the user provided encoding to use 15038 * 15039 * Applies the options to the parser context 15040 * 15041 * Returns 0 in case of success, the set of unknown or unimplemented options 15042 * in case of error. 15043 */ 15044 static int 15045 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15046 { 15047 if (ctxt == NULL) 15048 return(-1); 15049 if (encoding != NULL) { 15050 if (ctxt->encoding != NULL) 15051 xmlFree((xmlChar *) ctxt->encoding); 15052 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15053 } 15054 if (options & XML_PARSE_RECOVER) { 15055 ctxt->recovery = 1; 15056 options -= XML_PARSE_RECOVER; 15057 ctxt->options |= XML_PARSE_RECOVER; 15058 } else 15059 ctxt->recovery = 0; 15060 if (options & XML_PARSE_DTDLOAD) { 15061 ctxt->loadsubset = XML_DETECT_IDS; 15062 options -= XML_PARSE_DTDLOAD; 15063 ctxt->options |= XML_PARSE_DTDLOAD; 15064 } else 15065 ctxt->loadsubset = 0; 15066 if (options & XML_PARSE_DTDATTR) { 15067 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15068 options -= XML_PARSE_DTDATTR; 15069 ctxt->options |= XML_PARSE_DTDATTR; 15070 } 15071 if (options & XML_PARSE_NOENT) { 15072 ctxt->replaceEntities = 1; 15073 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15074 options -= XML_PARSE_NOENT; 15075 ctxt->options |= XML_PARSE_NOENT; 15076 } else 15077 ctxt->replaceEntities = 0; 15078 if (options & XML_PARSE_PEDANTIC) { 15079 ctxt->pedantic = 1; 15080 options -= XML_PARSE_PEDANTIC; 15081 ctxt->options |= XML_PARSE_PEDANTIC; 15082 } else 15083 ctxt->pedantic = 0; 15084 if (options & XML_PARSE_NOBLANKS) { 15085 ctxt->keepBlanks = 0; 15086 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15087 options -= XML_PARSE_NOBLANKS; 15088 ctxt->options |= XML_PARSE_NOBLANKS; 15089 } else 15090 ctxt->keepBlanks = 1; 15091 if (options & XML_PARSE_DTDVALID) { 15092 ctxt->validate = 1; 15093 if (options & XML_PARSE_NOWARNING) 15094 ctxt->vctxt.warning = NULL; 15095 if (options & XML_PARSE_NOERROR) 15096 ctxt->vctxt.error = NULL; 15097 options -= XML_PARSE_DTDVALID; 15098 ctxt->options |= XML_PARSE_DTDVALID; 15099 } else 15100 ctxt->validate = 0; 15101 if (options & XML_PARSE_NOWARNING) { 15102 ctxt->sax->warning = NULL; 15103 options -= XML_PARSE_NOWARNING; 15104 } 15105 if (options & XML_PARSE_NOERROR) { 15106 ctxt->sax->error = NULL; 15107 ctxt->sax->fatalError = NULL; 15108 options -= XML_PARSE_NOERROR; 15109 } 15110 #ifdef LIBXML_SAX1_ENABLED 15111 if (options & XML_PARSE_SAX1) { 15112 ctxt->sax->startElement = xmlSAX2StartElement; 15113 ctxt->sax->endElement = xmlSAX2EndElement; 15114 ctxt->sax->startElementNs = NULL; 15115 ctxt->sax->endElementNs = NULL; 15116 ctxt->sax->initialized = 1; 15117 options -= XML_PARSE_SAX1; 15118 ctxt->options |= XML_PARSE_SAX1; 15119 } 15120 #endif /* LIBXML_SAX1_ENABLED */ 15121 if (options & XML_PARSE_NODICT) { 15122 ctxt->dictNames = 0; 15123 options -= XML_PARSE_NODICT; 15124 ctxt->options |= XML_PARSE_NODICT; 15125 } else { 15126 ctxt->dictNames = 1; 15127 } 15128 if (options & XML_PARSE_NOCDATA) { 15129 ctxt->sax->cdataBlock = NULL; 15130 options -= XML_PARSE_NOCDATA; 15131 ctxt->options |= XML_PARSE_NOCDATA; 15132 } 15133 if (options & XML_PARSE_NSCLEAN) { 15134 ctxt->options |= XML_PARSE_NSCLEAN; 15135 options -= XML_PARSE_NSCLEAN; 15136 } 15137 if (options & XML_PARSE_NONET) { 15138 ctxt->options |= XML_PARSE_NONET; 15139 options -= XML_PARSE_NONET; 15140 } 15141 if (options & XML_PARSE_COMPACT) { 15142 ctxt->options |= XML_PARSE_COMPACT; 15143 options -= XML_PARSE_COMPACT; 15144 } 15145 if (options & XML_PARSE_OLD10) { 15146 ctxt->options |= XML_PARSE_OLD10; 15147 options -= XML_PARSE_OLD10; 15148 } 15149 if (options & XML_PARSE_NOBASEFIX) { 15150 ctxt->options |= XML_PARSE_NOBASEFIX; 15151 options -= XML_PARSE_NOBASEFIX; 15152 } 15153 if (options & XML_PARSE_HUGE) { 15154 ctxt->options |= XML_PARSE_HUGE; 15155 options -= XML_PARSE_HUGE; 15156 if (ctxt->dict != NULL) 15157 xmlDictSetLimit(ctxt->dict, 0); 15158 } 15159 if (options & XML_PARSE_OLDSAX) { 15160 ctxt->options |= XML_PARSE_OLDSAX; 15161 options -= XML_PARSE_OLDSAX; 15162 } 15163 if (options & XML_PARSE_IGNORE_ENC) { 15164 ctxt->options |= XML_PARSE_IGNORE_ENC; 15165 options -= XML_PARSE_IGNORE_ENC; 15166 } 15167 if (options & XML_PARSE_BIG_LINES) { 15168 ctxt->options |= XML_PARSE_BIG_LINES; 15169 options -= XML_PARSE_BIG_LINES; 15170 } 15171 ctxt->linenumbers = 1; 15172 return (options); 15173 } 15174 15175 /** 15176 * xmlCtxtUseOptions: 15177 * @ctxt: an XML parser context 15178 * @options: a combination of xmlParserOption 15179 * 15180 * Applies the options to the parser context 15181 * 15182 * Returns 0 in case of success, the set of unknown or unimplemented options 15183 * in case of error. 15184 */ 15185 int 15186 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15187 { 15188 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15189 } 15190 15191 /** 15192 * xmlDoRead: 15193 * @ctxt: an XML parser context 15194 * @URL: the base URL to use for the document 15195 * @encoding: the document encoding, or NULL 15196 * @options: a combination of xmlParserOption 15197 * @reuse: keep the context for reuse 15198 * 15199 * Common front-end for the xmlRead functions 15200 * 15201 * Returns the resulting document tree or NULL 15202 */ 15203 static xmlDocPtr 15204 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15205 int options, int reuse) 15206 { 15207 xmlDocPtr ret; 15208 15209 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15210 if (encoding != NULL) { 15211 xmlCharEncodingHandlerPtr hdlr; 15212 15213 hdlr = xmlFindCharEncodingHandler(encoding); 15214 if (hdlr != NULL) 15215 xmlSwitchToEncoding(ctxt, hdlr); 15216 } 15217 if ((URL != NULL) && (ctxt->input != NULL) && 15218 (ctxt->input->filename == NULL)) 15219 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15220 xmlParseDocument(ctxt); 15221 if ((ctxt->wellFormed) || ctxt->recovery) 15222 ret = ctxt->myDoc; 15223 else { 15224 ret = NULL; 15225 if (ctxt->myDoc != NULL) { 15226 xmlFreeDoc(ctxt->myDoc); 15227 } 15228 } 15229 ctxt->myDoc = NULL; 15230 if (!reuse) { 15231 xmlFreeParserCtxt(ctxt); 15232 } 15233 15234 return (ret); 15235 } 15236 15237 /** 15238 * xmlReadDoc: 15239 * @cur: a pointer to a zero terminated string 15240 * @URL: the base URL to use for the document 15241 * @encoding: the document encoding, or NULL 15242 * @options: a combination of xmlParserOption 15243 * 15244 * parse an XML in-memory document and build a tree. 15245 * 15246 * Returns the resulting document tree 15247 */ 15248 xmlDocPtr 15249 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15250 { 15251 xmlParserCtxtPtr ctxt; 15252 15253 if (cur == NULL) 15254 return (NULL); 15255 xmlInitParser(); 15256 15257 ctxt = xmlCreateDocParserCtxt(cur); 15258 if (ctxt == NULL) 15259 return (NULL); 15260 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15261 } 15262 15263 /** 15264 * xmlReadFile: 15265 * @filename: a file or URL 15266 * @encoding: the document encoding, or NULL 15267 * @options: a combination of xmlParserOption 15268 * 15269 * parse an XML file from the filesystem or the network. 15270 * 15271 * Returns the resulting document tree 15272 */ 15273 xmlDocPtr 15274 xmlReadFile(const char *filename, const char *encoding, int options) 15275 { 15276 xmlParserCtxtPtr ctxt; 15277 15278 xmlInitParser(); 15279 ctxt = xmlCreateURLParserCtxt(filename, options); 15280 if (ctxt == NULL) 15281 return (NULL); 15282 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15283 } 15284 15285 /** 15286 * xmlReadMemory: 15287 * @buffer: a pointer to a char array 15288 * @size: the size of the array 15289 * @URL: the base URL to use for the document 15290 * @encoding: the document encoding, or NULL 15291 * @options: a combination of xmlParserOption 15292 * 15293 * parse an XML in-memory document and build a tree. 15294 * 15295 * Returns the resulting document tree 15296 */ 15297 xmlDocPtr 15298 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15299 { 15300 xmlParserCtxtPtr ctxt; 15301 15302 xmlInitParser(); 15303 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15304 if (ctxt == NULL) 15305 return (NULL); 15306 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15307 } 15308 15309 /** 15310 * xmlReadFd: 15311 * @fd: an open file descriptor 15312 * @URL: the base URL to use for the document 15313 * @encoding: the document encoding, or NULL 15314 * @options: a combination of xmlParserOption 15315 * 15316 * parse an XML from a file descriptor and build a tree. 15317 * NOTE that the file descriptor will not be closed when the 15318 * reader is closed or reset. 15319 * 15320 * Returns the resulting document tree 15321 */ 15322 xmlDocPtr 15323 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15324 { 15325 xmlParserCtxtPtr ctxt; 15326 xmlParserInputBufferPtr input; 15327 xmlParserInputPtr stream; 15328 15329 if (fd < 0) 15330 return (NULL); 15331 xmlInitParser(); 15332 15333 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15334 if (input == NULL) 15335 return (NULL); 15336 input->closecallback = NULL; 15337 ctxt = xmlNewParserCtxt(); 15338 if (ctxt == NULL) { 15339 xmlFreeParserInputBuffer(input); 15340 return (NULL); 15341 } 15342 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15343 if (stream == NULL) { 15344 xmlFreeParserInputBuffer(input); 15345 xmlFreeParserCtxt(ctxt); 15346 return (NULL); 15347 } 15348 inputPush(ctxt, stream); 15349 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15350 } 15351 15352 /** 15353 * xmlReadIO: 15354 * @ioread: an I/O read function 15355 * @ioclose: an I/O close function 15356 * @ioctx: an I/O handler 15357 * @URL: the base URL to use for the document 15358 * @encoding: the document encoding, or NULL 15359 * @options: a combination of xmlParserOption 15360 * 15361 * parse an XML document from I/O functions and source and build a tree. 15362 * 15363 * Returns the resulting document tree 15364 */ 15365 xmlDocPtr 15366 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15367 void *ioctx, const char *URL, const char *encoding, int options) 15368 { 15369 xmlParserCtxtPtr ctxt; 15370 xmlParserInputBufferPtr input; 15371 xmlParserInputPtr stream; 15372 15373 if (ioread == NULL) 15374 return (NULL); 15375 xmlInitParser(); 15376 15377 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15378 XML_CHAR_ENCODING_NONE); 15379 if (input == NULL) { 15380 if (ioclose != NULL) 15381 ioclose(ioctx); 15382 return (NULL); 15383 } 15384 ctxt = xmlNewParserCtxt(); 15385 if (ctxt == NULL) { 15386 xmlFreeParserInputBuffer(input); 15387 return (NULL); 15388 } 15389 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15390 if (stream == NULL) { 15391 xmlFreeParserInputBuffer(input); 15392 xmlFreeParserCtxt(ctxt); 15393 return (NULL); 15394 } 15395 inputPush(ctxt, stream); 15396 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15397 } 15398 15399 /** 15400 * xmlCtxtReadDoc: 15401 * @ctxt: an XML parser context 15402 * @cur: a pointer to a zero terminated string 15403 * @URL: the base URL to use for the document 15404 * @encoding: the document encoding, or NULL 15405 * @options: a combination of xmlParserOption 15406 * 15407 * parse an XML in-memory document and build a tree. 15408 * This reuses the existing @ctxt parser context 15409 * 15410 * Returns the resulting document tree 15411 */ 15412 xmlDocPtr 15413 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15414 const char *URL, const char *encoding, int options) 15415 { 15416 xmlParserInputPtr stream; 15417 15418 if (cur == NULL) 15419 return (NULL); 15420 if (ctxt == NULL) 15421 return (NULL); 15422 xmlInitParser(); 15423 15424 xmlCtxtReset(ctxt); 15425 15426 stream = xmlNewStringInputStream(ctxt, cur); 15427 if (stream == NULL) { 15428 return (NULL); 15429 } 15430 inputPush(ctxt, stream); 15431 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15432 } 15433 15434 /** 15435 * xmlCtxtReadFile: 15436 * @ctxt: an XML parser context 15437 * @filename: a file or URL 15438 * @encoding: the document encoding, or NULL 15439 * @options: a combination of xmlParserOption 15440 * 15441 * parse an XML file from the filesystem or the network. 15442 * This reuses the existing @ctxt parser context 15443 * 15444 * Returns the resulting document tree 15445 */ 15446 xmlDocPtr 15447 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15448 const char *encoding, int options) 15449 { 15450 xmlParserInputPtr stream; 15451 15452 if (filename == NULL) 15453 return (NULL); 15454 if (ctxt == NULL) 15455 return (NULL); 15456 xmlInitParser(); 15457 15458 xmlCtxtReset(ctxt); 15459 15460 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15461 if (stream == NULL) { 15462 return (NULL); 15463 } 15464 inputPush(ctxt, stream); 15465 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15466 } 15467 15468 /** 15469 * xmlCtxtReadMemory: 15470 * @ctxt: an XML parser context 15471 * @buffer: a pointer to a char array 15472 * @size: the size of the array 15473 * @URL: the base URL to use for the document 15474 * @encoding: the document encoding, or NULL 15475 * @options: a combination of xmlParserOption 15476 * 15477 * parse an XML in-memory document and build a tree. 15478 * This reuses the existing @ctxt parser context 15479 * 15480 * Returns the resulting document tree 15481 */ 15482 xmlDocPtr 15483 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15484 const char *URL, const char *encoding, int options) 15485 { 15486 xmlParserInputBufferPtr input; 15487 xmlParserInputPtr stream; 15488 15489 if (ctxt == NULL) 15490 return (NULL); 15491 if (buffer == NULL) 15492 return (NULL); 15493 xmlInitParser(); 15494 15495 xmlCtxtReset(ctxt); 15496 15497 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15498 if (input == NULL) { 15499 return(NULL); 15500 } 15501 15502 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15503 if (stream == NULL) { 15504 xmlFreeParserInputBuffer(input); 15505 return(NULL); 15506 } 15507 15508 inputPush(ctxt, stream); 15509 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15510 } 15511 15512 /** 15513 * xmlCtxtReadFd: 15514 * @ctxt: an XML parser context 15515 * @fd: an open file descriptor 15516 * @URL: the base URL to use for the document 15517 * @encoding: the document encoding, or NULL 15518 * @options: a combination of xmlParserOption 15519 * 15520 * parse an XML from a file descriptor and build a tree. 15521 * This reuses the existing @ctxt parser context 15522 * NOTE that the file descriptor will not be closed when the 15523 * reader is closed or reset. 15524 * 15525 * Returns the resulting document tree 15526 */ 15527 xmlDocPtr 15528 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15529 const char *URL, const char *encoding, int options) 15530 { 15531 xmlParserInputBufferPtr input; 15532 xmlParserInputPtr stream; 15533 15534 if (fd < 0) 15535 return (NULL); 15536 if (ctxt == NULL) 15537 return (NULL); 15538 xmlInitParser(); 15539 15540 xmlCtxtReset(ctxt); 15541 15542 15543 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15544 if (input == NULL) 15545 return (NULL); 15546 input->closecallback = NULL; 15547 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15548 if (stream == NULL) { 15549 xmlFreeParserInputBuffer(input); 15550 return (NULL); 15551 } 15552 inputPush(ctxt, stream); 15553 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15554 } 15555 15556 /** 15557 * xmlCtxtReadIO: 15558 * @ctxt: an XML parser context 15559 * @ioread: an I/O read function 15560 * @ioclose: an I/O close function 15561 * @ioctx: an I/O handler 15562 * @URL: the base URL to use for the document 15563 * @encoding: the document encoding, or NULL 15564 * @options: a combination of xmlParserOption 15565 * 15566 * parse an XML document from I/O functions and source and build a tree. 15567 * This reuses the existing @ctxt parser context 15568 * 15569 * Returns the resulting document tree 15570 */ 15571 xmlDocPtr 15572 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15573 xmlInputCloseCallback ioclose, void *ioctx, 15574 const char *URL, 15575 const char *encoding, int options) 15576 { 15577 xmlParserInputBufferPtr input; 15578 xmlParserInputPtr stream; 15579 15580 if (ioread == NULL) 15581 return (NULL); 15582 if (ctxt == NULL) 15583 return (NULL); 15584 xmlInitParser(); 15585 15586 xmlCtxtReset(ctxt); 15587 15588 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15589 XML_CHAR_ENCODING_NONE); 15590 if (input == NULL) { 15591 if (ioclose != NULL) 15592 ioclose(ioctx); 15593 return (NULL); 15594 } 15595 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15596 if (stream == NULL) { 15597 xmlFreeParserInputBuffer(input); 15598 return (NULL); 15599 } 15600 inputPush(ctxt, stream); 15601 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15602 } 15603 15604 #define bottom_parser 15605 #include "elfgcchack.h" 15606