1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 /* To avoid EBCDIC trouble when parsing on zOS */ 34 #if defined(__MVS__) 35 #pragma convert("ISO8859-1") 36 #endif 37 38 #define IN_LIBXML 39 #include "libxml.h" 40 41 #if defined(_WIN32) && !defined (__CYGWIN__) 42 #define XML_DIR_SEP '\\' 43 #else 44 #define XML_DIR_SEP '/' 45 #endif 46 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <string.h> 50 #include <stdarg.h> 51 #include <stddef.h> 52 #include <libxml/xmlmemory.h> 53 #include <libxml/threads.h> 54 #include <libxml/globals.h> 55 #include <libxml/tree.h> 56 #include <libxml/parser.h> 57 #include <libxml/parserInternals.h> 58 #include <libxml/valid.h> 59 #include <libxml/entities.h> 60 #include <libxml/xmlerror.h> 61 #include <libxml/encoding.h> 62 #include <libxml/xmlIO.h> 63 #include <libxml/uri.h> 64 #ifdef LIBXML_CATALOG_ENABLED 65 #include <libxml/catalog.h> 66 #endif 67 #ifdef LIBXML_SCHEMAS_ENABLED 68 #include <libxml/xmlschemastypes.h> 69 #include <libxml/relaxng.h> 70 #endif 71 #ifdef HAVE_CTYPE_H 72 #include <ctype.h> 73 #endif 74 #ifdef HAVE_STDLIB_H 75 #include <stdlib.h> 76 #endif 77 #ifdef HAVE_SYS_STAT_H 78 #include <sys/stat.h> 79 #endif 80 #ifdef HAVE_FCNTL_H 81 #include <fcntl.h> 82 #endif 83 #ifdef HAVE_UNISTD_H 84 #include <unistd.h> 85 #endif 86 #ifdef HAVE_ZLIB_H 87 #include <zlib.h> 88 #endif 89 #ifdef HAVE_LZMA_H 90 #include <lzma.h> 91 #endif 92 93 #include "buf.h" 94 #include "enc.h" 95 96 static void 97 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 98 99 static xmlParserCtxtPtr 100 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 101 const xmlChar *base, xmlParserCtxtPtr pctx); 102 103 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 104 105 /************************************************************************ 106 * * 107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 108 * * 109 ************************************************************************/ 110 111 #define XML_PARSER_BIG_ENTITY 1000 112 #define XML_PARSER_LOT_ENTITY 5000 113 114 /* 115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 116 * replacement over the size in byte of the input indicates that you have 117 * and eponential behaviour. A value of 10 correspond to at least 3 entity 118 * replacement per byte of input. 119 */ 120 #define XML_PARSER_NON_LINEAR 10 121 122 /* 123 * xmlParserEntityCheck 124 * 125 * Function to check non-linear entity expansion behaviour 126 * This is here to detect and stop exponential linear entity expansion 127 * This is not a limitation of the parser but a safety 128 * boundary feature. It can be disabled with the XML_PARSE_HUGE 129 * parser option. 130 */ 131 static int 132 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 133 xmlEntityPtr ent, size_t replacement) 134 { 135 size_t consumed = 0; 136 137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 138 return (0); 139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 140 return (1); 141 142 /* 143 * This may look absurd but is needed to detect 144 * entities problems 145 */ 146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 147 (ent->content != NULL) && (ent->checked == 0) && 148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 149 unsigned long oldnbent = ctxt->nbentities; 150 xmlChar *rep; 151 152 ent->checked = 1; 153 154 ++ctxt->depth; 155 rep = xmlStringDecodeEntities(ctxt, ent->content, 156 XML_SUBSTITUTE_REF, 0, 0, 0); 157 --ctxt->depth; 158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) { 159 ent->content[0] = 0; 160 } 161 162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 163 if (rep != NULL) { 164 if (xmlStrchr(rep, '<')) 165 ent->checked |= 1; 166 xmlFree(rep); 167 rep = NULL; 168 } 169 } 170 if (replacement != 0) { 171 if (replacement < XML_MAX_TEXT_LENGTH) 172 return(0); 173 174 /* 175 * If the volume of entity copy reaches 10 times the 176 * amount of parsed data and over the large text threshold 177 * then that's very likely to be an abuse. 178 */ 179 if (ctxt->input != NULL) { 180 consumed = ctxt->input->consumed + 181 (ctxt->input->cur - ctxt->input->base); 182 } 183 consumed += ctxt->sizeentities; 184 185 if (replacement < XML_PARSER_NON_LINEAR * consumed) 186 return(0); 187 } else if (size != 0) { 188 /* 189 * Do the check based on the replacement size of the entity 190 */ 191 if (size < XML_PARSER_BIG_ENTITY) 192 return(0); 193 194 /* 195 * A limit on the amount of text data reasonably used 196 */ 197 if (ctxt->input != NULL) { 198 consumed = ctxt->input->consumed + 199 (ctxt->input->cur - ctxt->input->base); 200 } 201 consumed += ctxt->sizeentities; 202 203 if ((size < XML_PARSER_NON_LINEAR * consumed) && 204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 205 return (0); 206 } else if (ent != NULL) { 207 /* 208 * use the number of parsed entities in the replacement 209 */ 210 size = ent->checked / 2; 211 212 /* 213 * The amount of data parsed counting entities size only once 214 */ 215 if (ctxt->input != NULL) { 216 consumed = ctxt->input->consumed + 217 (ctxt->input->cur - ctxt->input->base); 218 } 219 consumed += ctxt->sizeentities; 220 221 /* 222 * Check the density of entities for the amount of data 223 * knowing an entity reference will take at least 3 bytes 224 */ 225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 226 return (0); 227 } else { 228 /* 229 * strange we got no data for checking 230 */ 231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 233 (ctxt->nbentities <= 10000)) 234 return (0); 235 } 236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 237 return (1); 238 } 239 240 /** 241 * xmlParserMaxDepth: 242 * 243 * arbitrary depth limit for the XML documents that we allow to 244 * process. This is not a limitation of the parser but a safety 245 * boundary feature. It can be disabled with the XML_PARSE_HUGE 246 * parser option. 247 */ 248 unsigned int xmlParserMaxDepth = 256; 249 250 251 252 #define SAX2 1 253 #define XML_PARSER_BIG_BUFFER_SIZE 300 254 #define XML_PARSER_BUFFER_SIZE 100 255 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 256 257 /** 258 * XML_PARSER_CHUNK_SIZE 259 * 260 * When calling GROW that's the minimal amount of data 261 * the parser expected to have received. It is not a hard 262 * limit but an optimization when reading strings like Names 263 * It is not strictly needed as long as inputs available characters 264 * are followed by 0, which should be provided by the I/O level 265 */ 266 #define XML_PARSER_CHUNK_SIZE 100 267 268 /* 269 * List of XML prefixed PI allowed by W3C specs 270 */ 271 272 static const char *xmlW3CPIs[] = { 273 "xml-stylesheet", 274 "xml-model", 275 NULL 276 }; 277 278 279 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 280 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 281 const xmlChar **str); 282 283 static xmlParserErrors 284 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 285 xmlSAXHandlerPtr sax, 286 void *user_data, int depth, const xmlChar *URL, 287 const xmlChar *ID, xmlNodePtr *list); 288 289 static int 290 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 291 const char *encoding); 292 #ifdef LIBXML_LEGACY_ENABLED 293 static void 294 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 295 xmlNodePtr lastNode); 296 #endif /* LIBXML_LEGACY_ENABLED */ 297 298 static xmlParserErrors 299 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 300 const xmlChar *string, void *user_data, xmlNodePtr *lst); 301 302 static int 303 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 304 305 /************************************************************************ 306 * * 307 * Some factorized error routines * 308 * * 309 ************************************************************************/ 310 311 /** 312 * xmlErrAttributeDup: 313 * @ctxt: an XML parser context 314 * @prefix: the attribute prefix 315 * @localname: the attribute localname 316 * 317 * Handle a redefinition of attribute error 318 */ 319 static void 320 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 321 const xmlChar * localname) 322 { 323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 324 (ctxt->instate == XML_PARSER_EOF)) 325 return; 326 if (ctxt != NULL) 327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 328 329 if (prefix == NULL) 330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 332 (const char *) localname, NULL, NULL, 0, 0, 333 "Attribute %s redefined\n", localname); 334 else 335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 337 (const char *) prefix, (const char *) localname, 338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 339 localname); 340 if (ctxt != NULL) { 341 ctxt->wellFormed = 0; 342 if (ctxt->recovery == 0) 343 ctxt->disableSAX = 1; 344 } 345 } 346 347 /** 348 * xmlFatalErr: 349 * @ctxt: an XML parser context 350 * @error: the error number 351 * @extra: extra information string 352 * 353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 354 */ 355 static void 356 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 357 { 358 const char *errmsg; 359 360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 361 (ctxt->instate == XML_PARSER_EOF)) 362 return; 363 switch (error) { 364 case XML_ERR_INVALID_HEX_CHARREF: 365 errmsg = "CharRef: invalid hexadecimal value"; 366 break; 367 case XML_ERR_INVALID_DEC_CHARREF: 368 errmsg = "CharRef: invalid decimal value"; 369 break; 370 case XML_ERR_INVALID_CHARREF: 371 errmsg = "CharRef: invalid value"; 372 break; 373 case XML_ERR_INTERNAL_ERROR: 374 errmsg = "internal error"; 375 break; 376 case XML_ERR_PEREF_AT_EOF: 377 errmsg = "PEReference at end of document"; 378 break; 379 case XML_ERR_PEREF_IN_PROLOG: 380 errmsg = "PEReference in prolog"; 381 break; 382 case XML_ERR_PEREF_IN_EPILOG: 383 errmsg = "PEReference in epilog"; 384 break; 385 case XML_ERR_PEREF_NO_NAME: 386 errmsg = "PEReference: no name"; 387 break; 388 case XML_ERR_PEREF_SEMICOL_MISSING: 389 errmsg = "PEReference: expecting ';'"; 390 break; 391 case XML_ERR_ENTITY_LOOP: 392 errmsg = "Detected an entity reference loop"; 393 break; 394 case XML_ERR_ENTITY_NOT_STARTED: 395 errmsg = "EntityValue: \" or ' expected"; 396 break; 397 case XML_ERR_ENTITY_PE_INTERNAL: 398 errmsg = "PEReferences forbidden in internal subset"; 399 break; 400 case XML_ERR_ENTITY_NOT_FINISHED: 401 errmsg = "EntityValue: \" or ' expected"; 402 break; 403 case XML_ERR_ATTRIBUTE_NOT_STARTED: 404 errmsg = "AttValue: \" or ' expected"; 405 break; 406 case XML_ERR_LT_IN_ATTRIBUTE: 407 errmsg = "Unescaped '<' not allowed in attributes values"; 408 break; 409 case XML_ERR_LITERAL_NOT_STARTED: 410 errmsg = "SystemLiteral \" or ' expected"; 411 break; 412 case XML_ERR_LITERAL_NOT_FINISHED: 413 errmsg = "Unfinished System or Public ID \" or ' expected"; 414 break; 415 case XML_ERR_MISPLACED_CDATA_END: 416 errmsg = "Sequence ']]>' not allowed in content"; 417 break; 418 case XML_ERR_URI_REQUIRED: 419 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 420 break; 421 case XML_ERR_PUBID_REQUIRED: 422 errmsg = "PUBLIC, the Public Identifier is missing"; 423 break; 424 case XML_ERR_HYPHEN_IN_COMMENT: 425 errmsg = "Comment must not contain '--' (double-hyphen)"; 426 break; 427 case XML_ERR_PI_NOT_STARTED: 428 errmsg = "xmlParsePI : no target name"; 429 break; 430 case XML_ERR_RESERVED_XML_NAME: 431 errmsg = "Invalid PI name"; 432 break; 433 case XML_ERR_NOTATION_NOT_STARTED: 434 errmsg = "NOTATION: Name expected here"; 435 break; 436 case XML_ERR_NOTATION_NOT_FINISHED: 437 errmsg = "'>' required to close NOTATION declaration"; 438 break; 439 case XML_ERR_VALUE_REQUIRED: 440 errmsg = "Entity value required"; 441 break; 442 case XML_ERR_URI_FRAGMENT: 443 errmsg = "Fragment not allowed"; 444 break; 445 case XML_ERR_ATTLIST_NOT_STARTED: 446 errmsg = "'(' required to start ATTLIST enumeration"; 447 break; 448 case XML_ERR_NMTOKEN_REQUIRED: 449 errmsg = "NmToken expected in ATTLIST enumeration"; 450 break; 451 case XML_ERR_ATTLIST_NOT_FINISHED: 452 errmsg = "')' required to finish ATTLIST enumeration"; 453 break; 454 case XML_ERR_MIXED_NOT_STARTED: 455 errmsg = "MixedContentDecl : '|' or ')*' expected"; 456 break; 457 case XML_ERR_PCDATA_REQUIRED: 458 errmsg = "MixedContentDecl : '#PCDATA' expected"; 459 break; 460 case XML_ERR_ELEMCONTENT_NOT_STARTED: 461 errmsg = "ContentDecl : Name or '(' expected"; 462 break; 463 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 464 errmsg = "ContentDecl : ',' '|' or ')' expected"; 465 break; 466 case XML_ERR_PEREF_IN_INT_SUBSET: 467 errmsg = 468 "PEReference: forbidden within markup decl in internal subset"; 469 break; 470 case XML_ERR_GT_REQUIRED: 471 errmsg = "expected '>'"; 472 break; 473 case XML_ERR_CONDSEC_INVALID: 474 errmsg = "XML conditional section '[' expected"; 475 break; 476 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 477 errmsg = "Content error in the external subset"; 478 break; 479 case XML_ERR_CONDSEC_INVALID_KEYWORD: 480 errmsg = 481 "conditional section INCLUDE or IGNORE keyword expected"; 482 break; 483 case XML_ERR_CONDSEC_NOT_FINISHED: 484 errmsg = "XML conditional section not closed"; 485 break; 486 case XML_ERR_XMLDECL_NOT_STARTED: 487 errmsg = "Text declaration '<?xml' required"; 488 break; 489 case XML_ERR_XMLDECL_NOT_FINISHED: 490 errmsg = "parsing XML declaration: '?>' expected"; 491 break; 492 case XML_ERR_EXT_ENTITY_STANDALONE: 493 errmsg = "external parsed entities cannot be standalone"; 494 break; 495 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 496 errmsg = "EntityRef: expecting ';'"; 497 break; 498 case XML_ERR_DOCTYPE_NOT_FINISHED: 499 errmsg = "DOCTYPE improperly terminated"; 500 break; 501 case XML_ERR_LTSLASH_REQUIRED: 502 errmsg = "EndTag: '</' not found"; 503 break; 504 case XML_ERR_EQUAL_REQUIRED: 505 errmsg = "expected '='"; 506 break; 507 case XML_ERR_STRING_NOT_CLOSED: 508 errmsg = "String not closed expecting \" or '"; 509 break; 510 case XML_ERR_STRING_NOT_STARTED: 511 errmsg = "String not started expecting ' or \""; 512 break; 513 case XML_ERR_ENCODING_NAME: 514 errmsg = "Invalid XML encoding name"; 515 break; 516 case XML_ERR_STANDALONE_VALUE: 517 errmsg = "standalone accepts only 'yes' or 'no'"; 518 break; 519 case XML_ERR_DOCUMENT_EMPTY: 520 errmsg = "Document is empty"; 521 break; 522 case XML_ERR_DOCUMENT_END: 523 errmsg = "Extra content at the end of the document"; 524 break; 525 case XML_ERR_NOT_WELL_BALANCED: 526 errmsg = "chunk is not well balanced"; 527 break; 528 case XML_ERR_EXTRA_CONTENT: 529 errmsg = "extra content at the end of well balanced chunk"; 530 break; 531 case XML_ERR_VERSION_MISSING: 532 errmsg = "Malformed declaration expecting version"; 533 break; 534 case XML_ERR_NAME_TOO_LONG: 535 errmsg = "Name too long use XML_PARSE_HUGE option"; 536 break; 537 #if 0 538 case: 539 errmsg = ""; 540 break; 541 #endif 542 default: 543 errmsg = "Unregistered error message"; 544 } 545 if (ctxt != NULL) 546 ctxt->errNo = error; 547 if (info == NULL) { 548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 550 errmsg); 551 } else { 552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 554 errmsg, info); 555 } 556 if (ctxt != NULL) { 557 ctxt->wellFormed = 0; 558 if (ctxt->recovery == 0) 559 ctxt->disableSAX = 1; 560 } 561 } 562 563 /** 564 * xmlFatalErrMsg: 565 * @ctxt: an XML parser context 566 * @error: the error number 567 * @msg: the error message 568 * 569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 570 */ 571 static void LIBXML_ATTR_FORMAT(3,0) 572 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 573 const char *msg) 574 { 575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 576 (ctxt->instate == XML_PARSER_EOF)) 577 return; 578 if (ctxt != NULL) 579 ctxt->errNo = error; 580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 582 if (ctxt != NULL) { 583 ctxt->wellFormed = 0; 584 if (ctxt->recovery == 0) 585 ctxt->disableSAX = 1; 586 } 587 } 588 589 /** 590 * xmlWarningMsg: 591 * @ctxt: an XML parser context 592 * @error: the error number 593 * @msg: the error message 594 * @str1: extra data 595 * @str2: extra data 596 * 597 * Handle a warning. 598 */ 599 static void LIBXML_ATTR_FORMAT(3,0) 600 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 601 const char *msg, const xmlChar *str1, const xmlChar *str2) 602 { 603 xmlStructuredErrorFunc schannel = NULL; 604 605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 606 (ctxt->instate == XML_PARSER_EOF)) 607 return; 608 if ((ctxt != NULL) && (ctxt->sax != NULL) && 609 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 610 schannel = ctxt->sax->serror; 611 if (ctxt != NULL) { 612 __xmlRaiseError(schannel, 613 (ctxt->sax) ? ctxt->sax->warning : NULL, 614 ctxt->userData, 615 ctxt, NULL, XML_FROM_PARSER, error, 616 XML_ERR_WARNING, NULL, 0, 617 (const char *) str1, (const char *) str2, NULL, 0, 0, 618 msg, (const char *) str1, (const char *) str2); 619 } else { 620 __xmlRaiseError(schannel, NULL, NULL, 621 ctxt, NULL, XML_FROM_PARSER, error, 622 XML_ERR_WARNING, NULL, 0, 623 (const char *) str1, (const char *) str2, NULL, 0, 0, 624 msg, (const char *) str1, (const char *) str2); 625 } 626 } 627 628 /** 629 * xmlValidityError: 630 * @ctxt: an XML parser context 631 * @error: the error number 632 * @msg: the error message 633 * @str1: extra data 634 * 635 * Handle a validity error. 636 */ 637 static void LIBXML_ATTR_FORMAT(3,0) 638 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 639 const char *msg, const xmlChar *str1, const xmlChar *str2) 640 { 641 xmlStructuredErrorFunc schannel = NULL; 642 643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 644 (ctxt->instate == XML_PARSER_EOF)) 645 return; 646 if (ctxt != NULL) { 647 ctxt->errNo = error; 648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 649 schannel = ctxt->sax->serror; 650 } 651 if (ctxt != NULL) { 652 __xmlRaiseError(schannel, 653 ctxt->vctxt.error, ctxt->vctxt.userData, 654 ctxt, NULL, XML_FROM_DTD, error, 655 XML_ERR_ERROR, NULL, 0, (const char *) str1, 656 (const char *) str2, NULL, 0, 0, 657 msg, (const char *) str1, (const char *) str2); 658 ctxt->valid = 0; 659 } else { 660 __xmlRaiseError(schannel, NULL, NULL, 661 ctxt, NULL, XML_FROM_DTD, error, 662 XML_ERR_ERROR, NULL, 0, (const char *) str1, 663 (const char *) str2, NULL, 0, 0, 664 msg, (const char *) str1, (const char *) str2); 665 } 666 } 667 668 /** 669 * xmlFatalErrMsgInt: 670 * @ctxt: an XML parser context 671 * @error: the error number 672 * @msg: the error message 673 * @val: an integer value 674 * 675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 676 */ 677 static void LIBXML_ATTR_FORMAT(3,0) 678 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 679 const char *msg, int val) 680 { 681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 682 (ctxt->instate == XML_PARSER_EOF)) 683 return; 684 if (ctxt != NULL) 685 ctxt->errNo = error; 686 __xmlRaiseError(NULL, NULL, NULL, 687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 689 if (ctxt != NULL) { 690 ctxt->wellFormed = 0; 691 if (ctxt->recovery == 0) 692 ctxt->disableSAX = 1; 693 } 694 } 695 696 /** 697 * xmlFatalErrMsgStrIntStr: 698 * @ctxt: an XML parser context 699 * @error: the error number 700 * @msg: the error message 701 * @str1: an string info 702 * @val: an integer value 703 * @str2: an string info 704 * 705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 706 */ 707 static void LIBXML_ATTR_FORMAT(3,0) 708 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 709 const char *msg, const xmlChar *str1, int val, 710 const xmlChar *str2) 711 { 712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 713 (ctxt->instate == XML_PARSER_EOF)) 714 return; 715 if (ctxt != NULL) 716 ctxt->errNo = error; 717 __xmlRaiseError(NULL, NULL, NULL, 718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 719 NULL, 0, (const char *) str1, (const char *) str2, 720 NULL, val, 0, msg, str1, val, str2); 721 if (ctxt != NULL) { 722 ctxt->wellFormed = 0; 723 if (ctxt->recovery == 0) 724 ctxt->disableSAX = 1; 725 } 726 } 727 728 /** 729 * xmlFatalErrMsgStr: 730 * @ctxt: an XML parser context 731 * @error: the error number 732 * @msg: the error message 733 * @val: a string value 734 * 735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 736 */ 737 static void LIBXML_ATTR_FORMAT(3,0) 738 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 739 const char *msg, const xmlChar * val) 740 { 741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 742 (ctxt->instate == XML_PARSER_EOF)) 743 return; 744 if (ctxt != NULL) 745 ctxt->errNo = error; 746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 747 XML_FROM_PARSER, error, XML_ERR_FATAL, 748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 749 val); 750 if (ctxt != NULL) { 751 ctxt->wellFormed = 0; 752 if (ctxt->recovery == 0) 753 ctxt->disableSAX = 1; 754 } 755 } 756 757 /** 758 * xmlErrMsgStr: 759 * @ctxt: an XML parser context 760 * @error: the error number 761 * @msg: the error message 762 * @val: a string value 763 * 764 * Handle a non fatal parser error 765 */ 766 static void LIBXML_ATTR_FORMAT(3,0) 767 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 768 const char *msg, const xmlChar * val) 769 { 770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 771 (ctxt->instate == XML_PARSER_EOF)) 772 return; 773 if (ctxt != NULL) 774 ctxt->errNo = error; 775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 776 XML_FROM_PARSER, error, XML_ERR_ERROR, 777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 778 val); 779 } 780 781 /** 782 * xmlNsErr: 783 * @ctxt: an XML parser context 784 * @error: the error number 785 * @msg: the message 786 * @info1: extra information string 787 * @info2: extra information string 788 * 789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 790 */ 791 static void LIBXML_ATTR_FORMAT(3,0) 792 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 793 const char *msg, 794 const xmlChar * info1, const xmlChar * info2, 795 const xmlChar * info3) 796 { 797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 798 (ctxt->instate == XML_PARSER_EOF)) 799 return; 800 if (ctxt != NULL) 801 ctxt->errNo = error; 802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 803 XML_ERR_ERROR, NULL, 0, (const char *) info1, 804 (const char *) info2, (const char *) info3, 0, 0, msg, 805 info1, info2, info3); 806 if (ctxt != NULL) 807 ctxt->nsWellFormed = 0; 808 } 809 810 /** 811 * xmlNsWarn 812 * @ctxt: an XML parser context 813 * @error: the error number 814 * @msg: the message 815 * @info1: extra information string 816 * @info2: extra information string 817 * 818 * Handle a namespace warning error 819 */ 820 static void LIBXML_ATTR_FORMAT(3,0) 821 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 822 const char *msg, 823 const xmlChar * info1, const xmlChar * info2, 824 const xmlChar * info3) 825 { 826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 827 (ctxt->instate == XML_PARSER_EOF)) 828 return; 829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 830 XML_ERR_WARNING, NULL, 0, (const char *) info1, 831 (const char *) info2, (const char *) info3, 0, 0, msg, 832 info1, info2, info3); 833 } 834 835 /************************************************************************ 836 * * 837 * Library wide options * 838 * * 839 ************************************************************************/ 840 841 /** 842 * xmlHasFeature: 843 * @feature: the feature to be examined 844 * 845 * Examines if the library has been compiled with a given feature. 846 * 847 * Returns a non-zero value if the feature exist, otherwise zero. 848 * Returns zero (0) if the feature does not exist or an unknown 849 * unknown feature is requested, non-zero otherwise. 850 */ 851 int 852 xmlHasFeature(xmlFeature feature) 853 { 854 switch (feature) { 855 case XML_WITH_THREAD: 856 #ifdef LIBXML_THREAD_ENABLED 857 return(1); 858 #else 859 return(0); 860 #endif 861 case XML_WITH_TREE: 862 #ifdef LIBXML_TREE_ENABLED 863 return(1); 864 #else 865 return(0); 866 #endif 867 case XML_WITH_OUTPUT: 868 #ifdef LIBXML_OUTPUT_ENABLED 869 return(1); 870 #else 871 return(0); 872 #endif 873 case XML_WITH_PUSH: 874 #ifdef LIBXML_PUSH_ENABLED 875 return(1); 876 #else 877 return(0); 878 #endif 879 case XML_WITH_READER: 880 #ifdef LIBXML_READER_ENABLED 881 return(1); 882 #else 883 return(0); 884 #endif 885 case XML_WITH_PATTERN: 886 #ifdef LIBXML_PATTERN_ENABLED 887 return(1); 888 #else 889 return(0); 890 #endif 891 case XML_WITH_WRITER: 892 #ifdef LIBXML_WRITER_ENABLED 893 return(1); 894 #else 895 return(0); 896 #endif 897 case XML_WITH_SAX1: 898 #ifdef LIBXML_SAX1_ENABLED 899 return(1); 900 #else 901 return(0); 902 #endif 903 case XML_WITH_FTP: 904 #ifdef LIBXML_FTP_ENABLED 905 return(1); 906 #else 907 return(0); 908 #endif 909 case XML_WITH_HTTP: 910 #ifdef LIBXML_HTTP_ENABLED 911 return(1); 912 #else 913 return(0); 914 #endif 915 case XML_WITH_VALID: 916 #ifdef LIBXML_VALID_ENABLED 917 return(1); 918 #else 919 return(0); 920 #endif 921 case XML_WITH_HTML: 922 #ifdef LIBXML_HTML_ENABLED 923 return(1); 924 #else 925 return(0); 926 #endif 927 case XML_WITH_LEGACY: 928 #ifdef LIBXML_LEGACY_ENABLED 929 return(1); 930 #else 931 return(0); 932 #endif 933 case XML_WITH_C14N: 934 #ifdef LIBXML_C14N_ENABLED 935 return(1); 936 #else 937 return(0); 938 #endif 939 case XML_WITH_CATALOG: 940 #ifdef LIBXML_CATALOG_ENABLED 941 return(1); 942 #else 943 return(0); 944 #endif 945 case XML_WITH_XPATH: 946 #ifdef LIBXML_XPATH_ENABLED 947 return(1); 948 #else 949 return(0); 950 #endif 951 case XML_WITH_XPTR: 952 #ifdef LIBXML_XPTR_ENABLED 953 return(1); 954 #else 955 return(0); 956 #endif 957 case XML_WITH_XINCLUDE: 958 #ifdef LIBXML_XINCLUDE_ENABLED 959 return(1); 960 #else 961 return(0); 962 #endif 963 case XML_WITH_ICONV: 964 #ifdef LIBXML_ICONV_ENABLED 965 return(1); 966 #else 967 return(0); 968 #endif 969 case XML_WITH_ISO8859X: 970 #ifdef LIBXML_ISO8859X_ENABLED 971 return(1); 972 #else 973 return(0); 974 #endif 975 case XML_WITH_UNICODE: 976 #ifdef LIBXML_UNICODE_ENABLED 977 return(1); 978 #else 979 return(0); 980 #endif 981 case XML_WITH_REGEXP: 982 #ifdef LIBXML_REGEXP_ENABLED 983 return(1); 984 #else 985 return(0); 986 #endif 987 case XML_WITH_AUTOMATA: 988 #ifdef LIBXML_AUTOMATA_ENABLED 989 return(1); 990 #else 991 return(0); 992 #endif 993 case XML_WITH_EXPR: 994 #ifdef LIBXML_EXPR_ENABLED 995 return(1); 996 #else 997 return(0); 998 #endif 999 case XML_WITH_SCHEMAS: 1000 #ifdef LIBXML_SCHEMAS_ENABLED 1001 return(1); 1002 #else 1003 return(0); 1004 #endif 1005 case XML_WITH_SCHEMATRON: 1006 #ifdef LIBXML_SCHEMATRON_ENABLED 1007 return(1); 1008 #else 1009 return(0); 1010 #endif 1011 case XML_WITH_MODULES: 1012 #ifdef LIBXML_MODULES_ENABLED 1013 return(1); 1014 #else 1015 return(0); 1016 #endif 1017 case XML_WITH_DEBUG: 1018 #ifdef LIBXML_DEBUG_ENABLED 1019 return(1); 1020 #else 1021 return(0); 1022 #endif 1023 case XML_WITH_DEBUG_MEM: 1024 #ifdef DEBUG_MEMORY_LOCATION 1025 return(1); 1026 #else 1027 return(0); 1028 #endif 1029 case XML_WITH_DEBUG_RUN: 1030 #ifdef LIBXML_DEBUG_RUNTIME 1031 return(1); 1032 #else 1033 return(0); 1034 #endif 1035 case XML_WITH_ZLIB: 1036 #ifdef LIBXML_ZLIB_ENABLED 1037 return(1); 1038 #else 1039 return(0); 1040 #endif 1041 case XML_WITH_LZMA: 1042 #ifdef LIBXML_LZMA_ENABLED 1043 return(1); 1044 #else 1045 return(0); 1046 #endif 1047 case XML_WITH_ICU: 1048 #ifdef LIBXML_ICU_ENABLED 1049 return(1); 1050 #else 1051 return(0); 1052 #endif 1053 default: 1054 break; 1055 } 1056 return(0); 1057 } 1058 1059 /************************************************************************ 1060 * * 1061 * SAX2 defaulted attributes handling * 1062 * * 1063 ************************************************************************/ 1064 1065 /** 1066 * xmlDetectSAX2: 1067 * @ctxt: an XML parser context 1068 * 1069 * Do the SAX2 detection and specific intialization 1070 */ 1071 static void 1072 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1073 if (ctxt == NULL) return; 1074 #ifdef LIBXML_SAX1_ENABLED 1075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1076 ((ctxt->sax->startElementNs != NULL) || 1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1078 #else 1079 ctxt->sax2 = 1; 1080 #endif /* LIBXML_SAX1_ENABLED */ 1081 1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1086 (ctxt->str_xml_ns == NULL)) { 1087 xmlErrMemory(ctxt, NULL); 1088 } 1089 } 1090 1091 typedef struct _xmlDefAttrs xmlDefAttrs; 1092 typedef xmlDefAttrs *xmlDefAttrsPtr; 1093 struct _xmlDefAttrs { 1094 int nbAttrs; /* number of defaulted attributes on that element */ 1095 int maxAttrs; /* the size of the array */ 1096 #if __STDC_VERSION__ >= 199901L 1097 /* Using a C99 flexible array member avoids UBSan errors. */ 1098 const xmlChar *values[]; /* array of localname/prefix/values/external */ 1099 #else 1100 const xmlChar *values[5]; 1101 #endif 1102 }; 1103 1104 /** 1105 * xmlAttrNormalizeSpace: 1106 * @src: the source string 1107 * @dst: the target string 1108 * 1109 * Normalize the space in non CDATA attribute values: 1110 * If the attribute type is not CDATA, then the XML processor MUST further 1111 * process the normalized attribute value by discarding any leading and 1112 * trailing space (#x20) characters, and by replacing sequences of space 1113 * (#x20) characters by a single space (#x20) character. 1114 * Note that the size of dst need to be at least src, and if one doesn't need 1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1116 * passing src as dst is just fine. 1117 * 1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1119 * is needed. 1120 */ 1121 static xmlChar * 1122 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1123 { 1124 if ((src == NULL) || (dst == NULL)) 1125 return(NULL); 1126 1127 while (*src == 0x20) src++; 1128 while (*src != 0) { 1129 if (*src == 0x20) { 1130 while (*src == 0x20) src++; 1131 if (*src != 0) 1132 *dst++ = 0x20; 1133 } else { 1134 *dst++ = *src++; 1135 } 1136 } 1137 *dst = 0; 1138 if (dst == src) 1139 return(NULL); 1140 return(dst); 1141 } 1142 1143 /** 1144 * xmlAttrNormalizeSpace2: 1145 * @src: the source string 1146 * 1147 * Normalize the space in non CDATA attribute values, a slightly more complex 1148 * front end to avoid allocation problems when running on attribute values 1149 * coming from the input. 1150 * 1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1152 * is needed. 1153 */ 1154 static const xmlChar * 1155 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1156 { 1157 int i; 1158 int remove_head = 0; 1159 int need_realloc = 0; 1160 const xmlChar *cur; 1161 1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1163 return(NULL); 1164 i = *len; 1165 if (i <= 0) 1166 return(NULL); 1167 1168 cur = src; 1169 while (*cur == 0x20) { 1170 cur++; 1171 remove_head++; 1172 } 1173 while (*cur != 0) { 1174 if (*cur == 0x20) { 1175 cur++; 1176 if ((*cur == 0x20) || (*cur == 0)) { 1177 need_realloc = 1; 1178 break; 1179 } 1180 } else 1181 cur++; 1182 } 1183 if (need_realloc) { 1184 xmlChar *ret; 1185 1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1187 if (ret == NULL) { 1188 xmlErrMemory(ctxt, NULL); 1189 return(NULL); 1190 } 1191 xmlAttrNormalizeSpace(ret, ret); 1192 *len = (int) strlen((const char *)ret); 1193 return(ret); 1194 } else if (remove_head) { 1195 *len -= remove_head; 1196 memmove(src, src + remove_head, 1 + *len); 1197 return(src); 1198 } 1199 return(NULL); 1200 } 1201 1202 /** 1203 * xmlAddDefAttrs: 1204 * @ctxt: an XML parser context 1205 * @fullname: the element fullname 1206 * @fullattr: the attribute fullname 1207 * @value: the attribute value 1208 * 1209 * Add a defaulted attribute for an element 1210 */ 1211 static void 1212 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1213 const xmlChar *fullname, 1214 const xmlChar *fullattr, 1215 const xmlChar *value) { 1216 xmlDefAttrsPtr defaults; 1217 int len; 1218 const xmlChar *name; 1219 const xmlChar *prefix; 1220 1221 /* 1222 * Allows to detect attribute redefinitions 1223 */ 1224 if (ctxt->attsSpecial != NULL) { 1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1226 return; 1227 } 1228 1229 if (ctxt->attsDefault == NULL) { 1230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1231 if (ctxt->attsDefault == NULL) 1232 goto mem_error; 1233 } 1234 1235 /* 1236 * split the element name into prefix:localname , the string found 1237 * are within the DTD and then not associated to namespace names. 1238 */ 1239 name = xmlSplitQName3(fullname, &len); 1240 if (name == NULL) { 1241 name = xmlDictLookup(ctxt->dict, fullname, -1); 1242 prefix = NULL; 1243 } else { 1244 name = xmlDictLookup(ctxt->dict, name, -1); 1245 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1246 } 1247 1248 /* 1249 * make sure there is some storage 1250 */ 1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1252 if (defaults == NULL) { 1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1254 (4 * 5) * sizeof(const xmlChar *)); 1255 if (defaults == NULL) 1256 goto mem_error; 1257 defaults->nbAttrs = 0; 1258 defaults->maxAttrs = 4; 1259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1260 defaults, NULL) < 0) { 1261 xmlFree(defaults); 1262 goto mem_error; 1263 } 1264 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1265 xmlDefAttrsPtr temp; 1266 1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1269 if (temp == NULL) 1270 goto mem_error; 1271 defaults = temp; 1272 defaults->maxAttrs *= 2; 1273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1274 defaults, NULL) < 0) { 1275 xmlFree(defaults); 1276 goto mem_error; 1277 } 1278 } 1279 1280 /* 1281 * Split the element name into prefix:localname , the string found 1282 * are within the DTD and hen not associated to namespace names. 1283 */ 1284 name = xmlSplitQName3(fullattr, &len); 1285 if (name == NULL) { 1286 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1287 prefix = NULL; 1288 } else { 1289 name = xmlDictLookup(ctxt->dict, name, -1); 1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1291 } 1292 1293 defaults->values[5 * defaults->nbAttrs] = name; 1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1295 /* intern the string and precompute the end */ 1296 len = xmlStrlen(value); 1297 value = xmlDictLookup(ctxt->dict, value, len); 1298 defaults->values[5 * defaults->nbAttrs + 2] = value; 1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1300 if (ctxt->external) 1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1302 else 1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1304 defaults->nbAttrs++; 1305 1306 return; 1307 1308 mem_error: 1309 xmlErrMemory(ctxt, NULL); 1310 return; 1311 } 1312 1313 /** 1314 * xmlAddSpecialAttr: 1315 * @ctxt: an XML parser context 1316 * @fullname: the element fullname 1317 * @fullattr: the attribute fullname 1318 * @type: the attribute type 1319 * 1320 * Register this attribute type 1321 */ 1322 static void 1323 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1324 const xmlChar *fullname, 1325 const xmlChar *fullattr, 1326 int type) 1327 { 1328 if (ctxt->attsSpecial == NULL) { 1329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1330 if (ctxt->attsSpecial == NULL) 1331 goto mem_error; 1332 } 1333 1334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1335 return; 1336 1337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1338 (void *) (ptrdiff_t) type); 1339 return; 1340 1341 mem_error: 1342 xmlErrMemory(ctxt, NULL); 1343 return; 1344 } 1345 1346 /** 1347 * xmlCleanSpecialAttrCallback: 1348 * 1349 * Removes CDATA attributes from the special attribute table 1350 */ 1351 static void 1352 xmlCleanSpecialAttrCallback(void *payload, void *data, 1353 const xmlChar *fullname, const xmlChar *fullattr, 1354 const xmlChar *unused ATTRIBUTE_UNUSED) { 1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1356 1357 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1359 } 1360 } 1361 1362 /** 1363 * xmlCleanSpecialAttr: 1364 * @ctxt: an XML parser context 1365 * 1366 * Trim the list of attributes defined to remove all those of type 1367 * CDATA as they are not special. This call should be done when finishing 1368 * to parse the DTD and before starting to parse the document root. 1369 */ 1370 static void 1371 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1372 { 1373 if (ctxt->attsSpecial == NULL) 1374 return; 1375 1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1377 1378 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1379 xmlHashFree(ctxt->attsSpecial, NULL); 1380 ctxt->attsSpecial = NULL; 1381 } 1382 return; 1383 } 1384 1385 /** 1386 * xmlCheckLanguageID: 1387 * @lang: pointer to the string value 1388 * 1389 * Checks that the value conforms to the LanguageID production: 1390 * 1391 * NOTE: this is somewhat deprecated, those productions were removed from 1392 * the XML Second edition. 1393 * 1394 * [33] LanguageID ::= Langcode ('-' Subcode)* 1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1399 * [38] Subcode ::= ([a-z] | [A-Z])+ 1400 * 1401 * The current REC reference the sucessors of RFC 1766, currently 5646 1402 * 1403 * http://www.rfc-editor.org/rfc/rfc5646.txt 1404 * langtag = language 1405 * ["-" script] 1406 * ["-" region] 1407 * *("-" variant) 1408 * *("-" extension) 1409 * ["-" privateuse] 1410 * language = 2*3ALPHA ; shortest ISO 639 code 1411 * ["-" extlang] ; sometimes followed by 1412 * ; extended language subtags 1413 * / 4ALPHA ; or reserved for future use 1414 * / 5*8ALPHA ; or registered language subtag 1415 * 1416 * extlang = 3ALPHA ; selected ISO 639 codes 1417 * *2("-" 3ALPHA) ; permanently reserved 1418 * 1419 * script = 4ALPHA ; ISO 15924 code 1420 * 1421 * region = 2ALPHA ; ISO 3166-1 code 1422 * / 3DIGIT ; UN M.49 code 1423 * 1424 * variant = 5*8alphanum ; registered variants 1425 * / (DIGIT 3alphanum) 1426 * 1427 * extension = singleton 1*("-" (2*8alphanum)) 1428 * 1429 * ; Single alphanumerics 1430 * ; "x" reserved for private use 1431 * singleton = DIGIT ; 0 - 9 1432 * / %x41-57 ; A - W 1433 * / %x59-5A ; Y - Z 1434 * / %x61-77 ; a - w 1435 * / %x79-7A ; y - z 1436 * 1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1438 * The parser below doesn't try to cope with extension or privateuse 1439 * that could be added but that's not interoperable anyway 1440 * 1441 * Returns 1 if correct 0 otherwise 1442 **/ 1443 int 1444 xmlCheckLanguageID(const xmlChar * lang) 1445 { 1446 const xmlChar *cur = lang, *nxt; 1447 1448 if (cur == NULL) 1449 return (0); 1450 if (((cur[0] == 'i') && (cur[1] == '-')) || 1451 ((cur[0] == 'I') && (cur[1] == '-')) || 1452 ((cur[0] == 'x') && (cur[1] == '-')) || 1453 ((cur[0] == 'X') && (cur[1] == '-'))) { 1454 /* 1455 * Still allow IANA code and user code which were coming 1456 * from the previous version of the XML-1.0 specification 1457 * it's deprecated but we should not fail 1458 */ 1459 cur += 2; 1460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1461 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1462 cur++; 1463 return(cur[0] == 0); 1464 } 1465 nxt = cur; 1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1468 nxt++; 1469 if (nxt - cur >= 4) { 1470 /* 1471 * Reserved 1472 */ 1473 if ((nxt - cur > 8) || (nxt[0] != 0)) 1474 return(0); 1475 return(1); 1476 } 1477 if (nxt - cur < 2) 1478 return(0); 1479 /* we got an ISO 639 code */ 1480 if (nxt[0] == 0) 1481 return(1); 1482 if (nxt[0] != '-') 1483 return(0); 1484 1485 nxt++; 1486 cur = nxt; 1487 /* now we can have extlang or script or region or variant */ 1488 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1489 goto region_m49; 1490 1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1493 nxt++; 1494 if (nxt - cur == 4) 1495 goto script; 1496 if (nxt - cur == 2) 1497 goto region; 1498 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1499 goto variant; 1500 if (nxt - cur != 3) 1501 return(0); 1502 /* we parsed an extlang */ 1503 if (nxt[0] == 0) 1504 return(1); 1505 if (nxt[0] != '-') 1506 return(0); 1507 1508 nxt++; 1509 cur = nxt; 1510 /* now we can have script or region or variant */ 1511 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1512 goto region_m49; 1513 1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1516 nxt++; 1517 if (nxt - cur == 2) 1518 goto region; 1519 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1520 goto variant; 1521 if (nxt - cur != 4) 1522 return(0); 1523 /* we parsed a script */ 1524 script: 1525 if (nxt[0] == 0) 1526 return(1); 1527 if (nxt[0] != '-') 1528 return(0); 1529 1530 nxt++; 1531 cur = nxt; 1532 /* now we can have region or variant */ 1533 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1534 goto region_m49; 1535 1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1538 nxt++; 1539 1540 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1541 goto variant; 1542 if (nxt - cur != 2) 1543 return(0); 1544 /* we parsed a region */ 1545 region: 1546 if (nxt[0] == 0) 1547 return(1); 1548 if (nxt[0] != '-') 1549 return(0); 1550 1551 nxt++; 1552 cur = nxt; 1553 /* now we can just have a variant */ 1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1556 nxt++; 1557 1558 if ((nxt - cur < 5) || (nxt - cur > 8)) 1559 return(0); 1560 1561 /* we parsed a variant */ 1562 variant: 1563 if (nxt[0] == 0) 1564 return(1); 1565 if (nxt[0] != '-') 1566 return(0); 1567 /* extensions and private use subtags not checked */ 1568 return (1); 1569 1570 region_m49: 1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1573 nxt += 3; 1574 goto region; 1575 } 1576 return(0); 1577 } 1578 1579 /************************************************************************ 1580 * * 1581 * Parser stacks related functions and macros * 1582 * * 1583 ************************************************************************/ 1584 1585 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1586 const xmlChar ** str); 1587 1588 #ifdef SAX2 1589 /** 1590 * nsPush: 1591 * @ctxt: an XML parser context 1592 * @prefix: the namespace prefix or NULL 1593 * @URL: the namespace name 1594 * 1595 * Pushes a new parser namespace on top of the ns stack 1596 * 1597 * Returns -1 in case of error, -2 if the namespace should be discarded 1598 * and the index in the stack otherwise. 1599 */ 1600 static int 1601 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1602 { 1603 if (ctxt->options & XML_PARSE_NSCLEAN) { 1604 int i; 1605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1606 if (ctxt->nsTab[i] == prefix) { 1607 /* in scope */ 1608 if (ctxt->nsTab[i + 1] == URL) 1609 return(-2); 1610 /* out of scope keep it */ 1611 break; 1612 } 1613 } 1614 } 1615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1616 ctxt->nsMax = 10; 1617 ctxt->nsNr = 0; 1618 ctxt->nsTab = (const xmlChar **) 1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1620 if (ctxt->nsTab == NULL) { 1621 xmlErrMemory(ctxt, NULL); 1622 ctxt->nsMax = 0; 1623 return (-1); 1624 } 1625 } else if (ctxt->nsNr >= ctxt->nsMax) { 1626 const xmlChar ** tmp; 1627 ctxt->nsMax *= 2; 1628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1629 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1630 if (tmp == NULL) { 1631 xmlErrMemory(ctxt, NULL); 1632 ctxt->nsMax /= 2; 1633 return (-1); 1634 } 1635 ctxt->nsTab = tmp; 1636 } 1637 ctxt->nsTab[ctxt->nsNr++] = prefix; 1638 ctxt->nsTab[ctxt->nsNr++] = URL; 1639 return (ctxt->nsNr); 1640 } 1641 /** 1642 * nsPop: 1643 * @ctxt: an XML parser context 1644 * @nr: the number to pop 1645 * 1646 * Pops the top @nr parser prefix/namespace from the ns stack 1647 * 1648 * Returns the number of namespaces removed 1649 */ 1650 static int 1651 nsPop(xmlParserCtxtPtr ctxt, int nr) 1652 { 1653 int i; 1654 1655 if (ctxt->nsTab == NULL) return(0); 1656 if (ctxt->nsNr < nr) { 1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1658 nr = ctxt->nsNr; 1659 } 1660 if (ctxt->nsNr <= 0) 1661 return (0); 1662 1663 for (i = 0;i < nr;i++) { 1664 ctxt->nsNr--; 1665 ctxt->nsTab[ctxt->nsNr] = NULL; 1666 } 1667 return(nr); 1668 } 1669 #endif 1670 1671 static int 1672 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1673 const xmlChar **atts; 1674 int *attallocs; 1675 int maxatts; 1676 1677 if (ctxt->atts == NULL) { 1678 maxatts = 55; /* allow for 10 attrs by default */ 1679 atts = (const xmlChar **) 1680 xmlMalloc(maxatts * sizeof(xmlChar *)); 1681 if (atts == NULL) goto mem_error; 1682 ctxt->atts = atts; 1683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1684 if (attallocs == NULL) goto mem_error; 1685 ctxt->attallocs = attallocs; 1686 ctxt->maxatts = maxatts; 1687 } else if (nr + 5 > ctxt->maxatts) { 1688 maxatts = (nr + 5) * 2; 1689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1690 maxatts * sizeof(const xmlChar *)); 1691 if (atts == NULL) goto mem_error; 1692 ctxt->atts = atts; 1693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1694 (maxatts / 5) * sizeof(int)); 1695 if (attallocs == NULL) goto mem_error; 1696 ctxt->attallocs = attallocs; 1697 ctxt->maxatts = maxatts; 1698 } 1699 return(ctxt->maxatts); 1700 mem_error: 1701 xmlErrMemory(ctxt, NULL); 1702 return(-1); 1703 } 1704 1705 /** 1706 * inputPush: 1707 * @ctxt: an XML parser context 1708 * @value: the parser input 1709 * 1710 * Pushes a new parser input on top of the input stack 1711 * 1712 * Returns -1 in case of error, the index in the stack otherwise 1713 */ 1714 int 1715 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1716 { 1717 if ((ctxt == NULL) || (value == NULL)) 1718 return(-1); 1719 if (ctxt->inputNr >= ctxt->inputMax) { 1720 ctxt->inputMax *= 2; 1721 ctxt->inputTab = 1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1723 ctxt->inputMax * 1724 sizeof(ctxt->inputTab[0])); 1725 if (ctxt->inputTab == NULL) { 1726 xmlErrMemory(ctxt, NULL); 1727 xmlFreeInputStream(value); 1728 ctxt->inputMax /= 2; 1729 value = NULL; 1730 return (-1); 1731 } 1732 } 1733 ctxt->inputTab[ctxt->inputNr] = value; 1734 ctxt->input = value; 1735 return (ctxt->inputNr++); 1736 } 1737 /** 1738 * inputPop: 1739 * @ctxt: an XML parser context 1740 * 1741 * Pops the top parser input from the input stack 1742 * 1743 * Returns the input just removed 1744 */ 1745 xmlParserInputPtr 1746 inputPop(xmlParserCtxtPtr ctxt) 1747 { 1748 xmlParserInputPtr ret; 1749 1750 if (ctxt == NULL) 1751 return(NULL); 1752 if (ctxt->inputNr <= 0) 1753 return (NULL); 1754 ctxt->inputNr--; 1755 if (ctxt->inputNr > 0) 1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1757 else 1758 ctxt->input = NULL; 1759 ret = ctxt->inputTab[ctxt->inputNr]; 1760 ctxt->inputTab[ctxt->inputNr] = NULL; 1761 return (ret); 1762 } 1763 /** 1764 * nodePush: 1765 * @ctxt: an XML parser context 1766 * @value: the element node 1767 * 1768 * Pushes a new element node on top of the node stack 1769 * 1770 * Returns -1 in case of error, the index in the stack otherwise 1771 */ 1772 int 1773 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1774 { 1775 if (ctxt == NULL) return(0); 1776 if (ctxt->nodeNr >= ctxt->nodeMax) { 1777 xmlNodePtr *tmp; 1778 1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1780 ctxt->nodeMax * 2 * 1781 sizeof(ctxt->nodeTab[0])); 1782 if (tmp == NULL) { 1783 xmlErrMemory(ctxt, NULL); 1784 return (-1); 1785 } 1786 ctxt->nodeTab = tmp; 1787 ctxt->nodeMax *= 2; 1788 } 1789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1793 xmlParserMaxDepth); 1794 xmlHaltParser(ctxt); 1795 return(-1); 1796 } 1797 ctxt->nodeTab[ctxt->nodeNr] = value; 1798 ctxt->node = value; 1799 return (ctxt->nodeNr++); 1800 } 1801 1802 /** 1803 * nodePop: 1804 * @ctxt: an XML parser context 1805 * 1806 * Pops the top element node from the node stack 1807 * 1808 * Returns the node just removed 1809 */ 1810 xmlNodePtr 1811 nodePop(xmlParserCtxtPtr ctxt) 1812 { 1813 xmlNodePtr ret; 1814 1815 if (ctxt == NULL) return(NULL); 1816 if (ctxt->nodeNr <= 0) 1817 return (NULL); 1818 ctxt->nodeNr--; 1819 if (ctxt->nodeNr > 0) 1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1821 else 1822 ctxt->node = NULL; 1823 ret = ctxt->nodeTab[ctxt->nodeNr]; 1824 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1825 return (ret); 1826 } 1827 1828 #ifdef LIBXML_PUSH_ENABLED 1829 /** 1830 * nameNsPush: 1831 * @ctxt: an XML parser context 1832 * @value: the element name 1833 * @prefix: the element prefix 1834 * @URI: the element namespace name 1835 * 1836 * Pushes a new element name/prefix/URL on top of the name stack 1837 * 1838 * Returns -1 in case of error, the index in the stack otherwise 1839 */ 1840 static int 1841 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1842 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1843 { 1844 if (ctxt->nameNr >= ctxt->nameMax) { 1845 const xmlChar * *tmp; 1846 void **tmp2; 1847 ctxt->nameMax *= 2; 1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1849 ctxt->nameMax * 1850 sizeof(ctxt->nameTab[0])); 1851 if (tmp == NULL) { 1852 ctxt->nameMax /= 2; 1853 goto mem_error; 1854 } 1855 ctxt->nameTab = tmp; 1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1857 ctxt->nameMax * 3 * 1858 sizeof(ctxt->pushTab[0])); 1859 if (tmp2 == NULL) { 1860 ctxt->nameMax /= 2; 1861 goto mem_error; 1862 } 1863 ctxt->pushTab = tmp2; 1864 } 1865 ctxt->nameTab[ctxt->nameNr] = value; 1866 ctxt->name = value; 1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr; 1870 return (ctxt->nameNr++); 1871 mem_error: 1872 xmlErrMemory(ctxt, NULL); 1873 return (-1); 1874 } 1875 /** 1876 * nameNsPop: 1877 * @ctxt: an XML parser context 1878 * 1879 * Pops the top element/prefix/URI name from the name stack 1880 * 1881 * Returns the name just removed 1882 */ 1883 static const xmlChar * 1884 nameNsPop(xmlParserCtxtPtr ctxt) 1885 { 1886 const xmlChar *ret; 1887 1888 if (ctxt->nameNr <= 0) 1889 return (NULL); 1890 ctxt->nameNr--; 1891 if (ctxt->nameNr > 0) 1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1893 else 1894 ctxt->name = NULL; 1895 ret = ctxt->nameTab[ctxt->nameNr]; 1896 ctxt->nameTab[ctxt->nameNr] = NULL; 1897 return (ret); 1898 } 1899 #endif /* LIBXML_PUSH_ENABLED */ 1900 1901 /** 1902 * namePush: 1903 * @ctxt: an XML parser context 1904 * @value: the element name 1905 * 1906 * Pushes a new element name on top of the name stack 1907 * 1908 * Returns -1 in case of error, the index in the stack otherwise 1909 */ 1910 int 1911 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1912 { 1913 if (ctxt == NULL) return (-1); 1914 1915 if (ctxt->nameNr >= ctxt->nameMax) { 1916 const xmlChar * *tmp; 1917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1918 ctxt->nameMax * 2 * 1919 sizeof(ctxt->nameTab[0])); 1920 if (tmp == NULL) { 1921 goto mem_error; 1922 } 1923 ctxt->nameTab = tmp; 1924 ctxt->nameMax *= 2; 1925 } 1926 ctxt->nameTab[ctxt->nameNr] = value; 1927 ctxt->name = value; 1928 return (ctxt->nameNr++); 1929 mem_error: 1930 xmlErrMemory(ctxt, NULL); 1931 return (-1); 1932 } 1933 /** 1934 * namePop: 1935 * @ctxt: an XML parser context 1936 * 1937 * Pops the top element name from the name stack 1938 * 1939 * Returns the name just removed 1940 */ 1941 const xmlChar * 1942 namePop(xmlParserCtxtPtr ctxt) 1943 { 1944 const xmlChar *ret; 1945 1946 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1947 return (NULL); 1948 ctxt->nameNr--; 1949 if (ctxt->nameNr > 0) 1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1951 else 1952 ctxt->name = NULL; 1953 ret = ctxt->nameTab[ctxt->nameNr]; 1954 ctxt->nameTab[ctxt->nameNr] = NULL; 1955 return (ret); 1956 } 1957 1958 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1959 if (ctxt->spaceNr >= ctxt->spaceMax) { 1960 int *tmp; 1961 1962 ctxt->spaceMax *= 2; 1963 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1965 if (tmp == NULL) { 1966 xmlErrMemory(ctxt, NULL); 1967 ctxt->spaceMax /=2; 1968 return(-1); 1969 } 1970 ctxt->spaceTab = tmp; 1971 } 1972 ctxt->spaceTab[ctxt->spaceNr] = val; 1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1974 return(ctxt->spaceNr++); 1975 } 1976 1977 static int spacePop(xmlParserCtxtPtr ctxt) { 1978 int ret; 1979 if (ctxt->spaceNr <= 0) return(0); 1980 ctxt->spaceNr--; 1981 if (ctxt->spaceNr > 0) 1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1983 else 1984 ctxt->space = &ctxt->spaceTab[0]; 1985 ret = ctxt->spaceTab[ctxt->spaceNr]; 1986 ctxt->spaceTab[ctxt->spaceNr] = -1; 1987 return(ret); 1988 } 1989 1990 /* 1991 * Macros for accessing the content. Those should be used only by the parser, 1992 * and not exported. 1993 * 1994 * Dirty macros, i.e. one often need to make assumption on the context to 1995 * use them 1996 * 1997 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1998 * To be used with extreme caution since operations consuming 1999 * characters may move the input buffer to a different location ! 2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2001 * This should be used internally by the parser 2002 * only to compare to ASCII values otherwise it would break when 2003 * running with UTF-8 encoding. 2004 * RAW same as CUR but in the input buffer, bypass any token 2005 * extraction that may have been done 2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2007 * to compare on ASCII based substring. 2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2009 * strings without newlines within the parser. 2010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2011 * defined char within the parser. 2012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2013 * 2014 * NEXT Skip to the next character, this does the proper decoding 2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2016 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2017 * CUR_CHAR(l) returns the current unicode character (int), set l 2018 * to the number of xmlChars used for the encoding [0-5]. 2019 * CUR_SCHAR same but operate on a string instead of the context 2020 * COPY_BUF copy the current unicode char to the target buffer, increment 2021 * the index 2022 * GROW, SHRINK handling of input buffers 2023 */ 2024 2025 #define RAW (*ctxt->input->cur) 2026 #define CUR (*ctxt->input->cur) 2027 #define NXT(val) ctxt->input->cur[(val)] 2028 #define CUR_PTR ctxt->input->cur 2029 #define BASE_PTR ctxt->input->base 2030 2031 #define CMP4( s, c1, c2, c3, c4 ) \ 2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2034 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2036 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2038 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2040 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2042 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2044 ((unsigned char *) s)[ 8 ] == c9 ) 2045 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2047 ((unsigned char *) s)[ 9 ] == c10 ) 2048 2049 #define SKIP(val) do { \ 2050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2051 if (*ctxt->input->cur == 0) \ 2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2053 } while (0) 2054 2055 #define SKIPL(val) do { \ 2056 int skipl; \ 2057 for(skipl=0; skipl<val; skipl++) { \ 2058 if (*(ctxt->input->cur) == '\n') { \ 2059 ctxt->input->line++; ctxt->input->col = 1; \ 2060 } else ctxt->input->col++; \ 2061 ctxt->nbChars++; \ 2062 ctxt->input->cur++; \ 2063 } \ 2064 if (*ctxt->input->cur == 0) \ 2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2066 } while (0) 2067 2068 #define SHRINK if ((ctxt->progressive == 0) && \ 2069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2071 xmlSHRINK (ctxt); 2072 2073 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2074 xmlParserInputShrink(ctxt->input); 2075 if (*ctxt->input->cur == 0) 2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2077 } 2078 2079 #define GROW if ((ctxt->progressive == 0) && \ 2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2081 xmlGROW (ctxt); 2082 2083 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2086 2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2090 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2092 xmlHaltParser(ctxt); 2093 return; 2094 } 2095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2096 if ((ctxt->input->cur > ctxt->input->end) || 2097 (ctxt->input->cur < ctxt->input->base)) { 2098 xmlHaltParser(ctxt); 2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2100 return; 2101 } 2102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) 2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2104 } 2105 2106 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2107 2108 #define NEXT xmlNextChar(ctxt) 2109 2110 #define NEXT1 { \ 2111 ctxt->input->col++; \ 2112 ctxt->input->cur++; \ 2113 ctxt->nbChars++; \ 2114 if (*ctxt->input->cur == 0) \ 2115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2116 } 2117 2118 #define NEXTL(l) do { \ 2119 if (*(ctxt->input->cur) == '\n') { \ 2120 ctxt->input->line++; ctxt->input->col = 1; \ 2121 } else ctxt->input->col++; \ 2122 ctxt->input->cur += l; \ 2123 } while (0) 2124 2125 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2126 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2127 2128 #define COPY_BUF(l,b,i,v) \ 2129 if (l == 1) b[i++] = (xmlChar) v; \ 2130 else i += xmlCopyCharMultiByte(&b[i],v) 2131 2132 /** 2133 * xmlSkipBlankChars: 2134 * @ctxt: the XML parser context 2135 * 2136 * skip all blanks character found at that point in the input streams. 2137 * It pops up finished entities in the process if allowable at that point. 2138 * 2139 * Returns the number of space chars skipped 2140 */ 2141 2142 int 2143 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2144 int res = 0; 2145 2146 /* 2147 * It's Okay to use CUR/NEXT here since all the blanks are on 2148 * the ASCII range. 2149 */ 2150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2151 const xmlChar *cur; 2152 /* 2153 * if we are in the document content, go really fast 2154 */ 2155 cur = ctxt->input->cur; 2156 while (IS_BLANK_CH(*cur)) { 2157 if (*cur == '\n') { 2158 ctxt->input->line++; ctxt->input->col = 1; 2159 } else { 2160 ctxt->input->col++; 2161 } 2162 cur++; 2163 res++; 2164 if (*cur == 0) { 2165 ctxt->input->cur = cur; 2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2167 cur = ctxt->input->cur; 2168 } 2169 } 2170 ctxt->input->cur = cur; 2171 } else { 2172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2173 2174 while (1) { 2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2176 NEXT; 2177 } else if (CUR == '%') { 2178 /* 2179 * Need to handle support of entities branching here 2180 */ 2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2182 break; 2183 xmlParsePEReference(ctxt); 2184 } else if (CUR == 0) { 2185 if (ctxt->inputNr <= 1) 2186 break; 2187 xmlPopInput(ctxt); 2188 } else { 2189 break; 2190 } 2191 2192 /* 2193 * Also increase the counter when entering or exiting a PERef. 2194 * The spec says: "When a parameter-entity reference is recognized 2195 * in the DTD and included, its replacement text MUST be enlarged 2196 * by the attachment of one leading and one following space (#x20) 2197 * character." 2198 */ 2199 res++; 2200 } 2201 } 2202 return(res); 2203 } 2204 2205 /************************************************************************ 2206 * * 2207 * Commodity functions to handle entities * 2208 * * 2209 ************************************************************************/ 2210 2211 /** 2212 * xmlPopInput: 2213 * @ctxt: an XML parser context 2214 * 2215 * xmlPopInput: the current input pointed by ctxt->input came to an end 2216 * pop it and return the next char. 2217 * 2218 * Returns the current xmlChar in the parser context 2219 */ 2220 xmlChar 2221 xmlPopInput(xmlParserCtxtPtr ctxt) { 2222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2223 if (xmlParserDebugEntities) 2224 xmlGenericError(xmlGenericErrorContext, 2225 "Popping input %d\n", ctxt->inputNr); 2226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2227 (ctxt->instate != XML_PARSER_EOF)) 2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2229 "Unfinished entity outside the DTD"); 2230 xmlFreeInputStream(inputPop(ctxt)); 2231 if (*ctxt->input->cur == 0) 2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2233 return(CUR); 2234 } 2235 2236 /** 2237 * xmlPushInput: 2238 * @ctxt: an XML parser context 2239 * @input: an XML parser input fragment (entity, XML fragment ...). 2240 * 2241 * xmlPushInput: switch to a new input stream which is stacked on top 2242 * of the previous one(s). 2243 * Returns -1 in case of error or the index in the input stack 2244 */ 2245 int 2246 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2247 int ret; 2248 if (input == NULL) return(-1); 2249 2250 if (xmlParserDebugEntities) { 2251 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2252 xmlGenericError(xmlGenericErrorContext, 2253 "%s(%d): ", ctxt->input->filename, 2254 ctxt->input->line); 2255 xmlGenericError(xmlGenericErrorContext, 2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2257 } 2258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2259 (ctxt->inputNr > 1024)) { 2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2261 while (ctxt->inputNr > 1) 2262 xmlFreeInputStream(inputPop(ctxt)); 2263 return(-1); 2264 } 2265 ret = inputPush(ctxt, input); 2266 if (ctxt->instate == XML_PARSER_EOF) 2267 return(-1); 2268 GROW; 2269 return(ret); 2270 } 2271 2272 /** 2273 * xmlParseCharRef: 2274 * @ctxt: an XML parser context 2275 * 2276 * parse Reference declarations 2277 * 2278 * [66] CharRef ::= '&#' [0-9]+ ';' | 2279 * '&#x' [0-9a-fA-F]+ ';' 2280 * 2281 * [ WFC: Legal Character ] 2282 * Characters referred to using character references must match the 2283 * production for Char. 2284 * 2285 * Returns the value parsed (as an int), 0 in case of error 2286 */ 2287 int 2288 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2289 unsigned int val = 0; 2290 int count = 0; 2291 unsigned int outofrange = 0; 2292 2293 /* 2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2295 */ 2296 if ((RAW == '&') && (NXT(1) == '#') && 2297 (NXT(2) == 'x')) { 2298 SKIP(3); 2299 GROW; 2300 while (RAW != ';') { /* loop blocked by count */ 2301 if (count++ > 20) { 2302 count = 0; 2303 GROW; 2304 if (ctxt->instate == XML_PARSER_EOF) 2305 return(0); 2306 } 2307 if ((RAW >= '0') && (RAW <= '9')) 2308 val = val * 16 + (CUR - '0'); 2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2310 val = val * 16 + (CUR - 'a') + 10; 2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2312 val = val * 16 + (CUR - 'A') + 10; 2313 else { 2314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2315 val = 0; 2316 break; 2317 } 2318 if (val > 0x10FFFF) 2319 outofrange = val; 2320 2321 NEXT; 2322 count++; 2323 } 2324 if (RAW == ';') { 2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2326 ctxt->input->col++; 2327 ctxt->nbChars ++; 2328 ctxt->input->cur++; 2329 } 2330 } else if ((RAW == '&') && (NXT(1) == '#')) { 2331 SKIP(2); 2332 GROW; 2333 while (RAW != ';') { /* loop blocked by count */ 2334 if (count++ > 20) { 2335 count = 0; 2336 GROW; 2337 if (ctxt->instate == XML_PARSER_EOF) 2338 return(0); 2339 } 2340 if ((RAW >= '0') && (RAW <= '9')) 2341 val = val * 10 + (CUR - '0'); 2342 else { 2343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2344 val = 0; 2345 break; 2346 } 2347 if (val > 0x10FFFF) 2348 outofrange = val; 2349 2350 NEXT; 2351 count++; 2352 } 2353 if (RAW == ';') { 2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2355 ctxt->input->col++; 2356 ctxt->nbChars ++; 2357 ctxt->input->cur++; 2358 } 2359 } else { 2360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2361 } 2362 2363 /* 2364 * [ WFC: Legal Character ] 2365 * Characters referred to using character references must match the 2366 * production for Char. 2367 */ 2368 if ((IS_CHAR(val) && (outofrange == 0))) { 2369 return(val); 2370 } else { 2371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2372 "xmlParseCharRef: invalid xmlChar value %d\n", 2373 val); 2374 } 2375 return(0); 2376 } 2377 2378 /** 2379 * xmlParseStringCharRef: 2380 * @ctxt: an XML parser context 2381 * @str: a pointer to an index in the string 2382 * 2383 * parse Reference declarations, variant parsing from a string rather 2384 * than an an input flow. 2385 * 2386 * [66] CharRef ::= '&#' [0-9]+ ';' | 2387 * '&#x' [0-9a-fA-F]+ ';' 2388 * 2389 * [ WFC: Legal Character ] 2390 * Characters referred to using character references must match the 2391 * production for Char. 2392 * 2393 * Returns the value parsed (as an int), 0 in case of error, str will be 2394 * updated to the current value of the index 2395 */ 2396 static int 2397 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2398 const xmlChar *ptr; 2399 xmlChar cur; 2400 unsigned int val = 0; 2401 unsigned int outofrange = 0; 2402 2403 if ((str == NULL) || (*str == NULL)) return(0); 2404 ptr = *str; 2405 cur = *ptr; 2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2407 ptr += 3; 2408 cur = *ptr; 2409 while (cur != ';') { /* Non input consuming loop */ 2410 if ((cur >= '0') && (cur <= '9')) 2411 val = val * 16 + (cur - '0'); 2412 else if ((cur >= 'a') && (cur <= 'f')) 2413 val = val * 16 + (cur - 'a') + 10; 2414 else if ((cur >= 'A') && (cur <= 'F')) 2415 val = val * 16 + (cur - 'A') + 10; 2416 else { 2417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2418 val = 0; 2419 break; 2420 } 2421 if (val > 0x10FFFF) 2422 outofrange = val; 2423 2424 ptr++; 2425 cur = *ptr; 2426 } 2427 if (cur == ';') 2428 ptr++; 2429 } else if ((cur == '&') && (ptr[1] == '#')){ 2430 ptr += 2; 2431 cur = *ptr; 2432 while (cur != ';') { /* Non input consuming loops */ 2433 if ((cur >= '0') && (cur <= '9')) 2434 val = val * 10 + (cur - '0'); 2435 else { 2436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2437 val = 0; 2438 break; 2439 } 2440 if (val > 0x10FFFF) 2441 outofrange = val; 2442 2443 ptr++; 2444 cur = *ptr; 2445 } 2446 if (cur == ';') 2447 ptr++; 2448 } else { 2449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2450 return(0); 2451 } 2452 *str = ptr; 2453 2454 /* 2455 * [ WFC: Legal Character ] 2456 * Characters referred to using character references must match the 2457 * production for Char. 2458 */ 2459 if ((IS_CHAR(val) && (outofrange == 0))) { 2460 return(val); 2461 } else { 2462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2463 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2464 val); 2465 } 2466 return(0); 2467 } 2468 2469 /** 2470 * xmlParserHandlePEReference: 2471 * @ctxt: the parser context 2472 * 2473 * [69] PEReference ::= '%' Name ';' 2474 * 2475 * [ WFC: No Recursion ] 2476 * A parsed entity must not contain a recursive 2477 * reference to itself, either directly or indirectly. 2478 * 2479 * [ WFC: Entity Declared ] 2480 * In a document without any DTD, a document with only an internal DTD 2481 * subset which contains no parameter entity references, or a document 2482 * with "standalone='yes'", ... ... The declaration of a parameter 2483 * entity must precede any reference to it... 2484 * 2485 * [ VC: Entity Declared ] 2486 * In a document with an external subset or external parameter entities 2487 * with "standalone='no'", ... ... The declaration of a parameter entity 2488 * must precede any reference to it... 2489 * 2490 * [ WFC: In DTD ] 2491 * Parameter-entity references may only appear in the DTD. 2492 * NOTE: misleading but this is handled. 2493 * 2494 * A PEReference may have been detected in the current input stream 2495 * the handling is done accordingly to 2496 * http://www.w3.org/TR/REC-xml#entproc 2497 * i.e. 2498 * - Included in literal in entity values 2499 * - Included as Parameter Entity reference within DTDs 2500 */ 2501 void 2502 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2503 switch(ctxt->instate) { 2504 case XML_PARSER_CDATA_SECTION: 2505 return; 2506 case XML_PARSER_COMMENT: 2507 return; 2508 case XML_PARSER_START_TAG: 2509 return; 2510 case XML_PARSER_END_TAG: 2511 return; 2512 case XML_PARSER_EOF: 2513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2514 return; 2515 case XML_PARSER_PROLOG: 2516 case XML_PARSER_START: 2517 case XML_PARSER_MISC: 2518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2519 return; 2520 case XML_PARSER_ENTITY_DECL: 2521 case XML_PARSER_CONTENT: 2522 case XML_PARSER_ATTRIBUTE_VALUE: 2523 case XML_PARSER_PI: 2524 case XML_PARSER_SYSTEM_LITERAL: 2525 case XML_PARSER_PUBLIC_LITERAL: 2526 /* we just ignore it there */ 2527 return; 2528 case XML_PARSER_EPILOG: 2529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2530 return; 2531 case XML_PARSER_ENTITY_VALUE: 2532 /* 2533 * NOTE: in the case of entity values, we don't do the 2534 * substitution here since we need the literal 2535 * entity value to be able to save the internal 2536 * subset of the document. 2537 * This will be handled by xmlStringDecodeEntities 2538 */ 2539 return; 2540 case XML_PARSER_DTD: 2541 /* 2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2543 * In the internal DTD subset, parameter-entity references 2544 * can occur only where markup declarations can occur, not 2545 * within markup declarations. 2546 * In that case this is handled in xmlParseMarkupDecl 2547 */ 2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2549 return; 2550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2551 return; 2552 break; 2553 case XML_PARSER_IGNORE: 2554 return; 2555 } 2556 2557 xmlParsePEReference(ctxt); 2558 } 2559 2560 /* 2561 * Macro used to grow the current buffer. 2562 * buffer##_size is expected to be a size_t 2563 * mem_error: is expected to handle memory allocation failures 2564 */ 2565 #define growBuffer(buffer, n) { \ 2566 xmlChar *tmp; \ 2567 size_t new_size = buffer##_size * 2 + n; \ 2568 if (new_size < buffer##_size) goto mem_error; \ 2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2570 if (tmp == NULL) goto mem_error; \ 2571 buffer = tmp; \ 2572 buffer##_size = new_size; \ 2573 } 2574 2575 /** 2576 * xmlStringLenDecodeEntities: 2577 * @ctxt: the parser context 2578 * @str: the input string 2579 * @len: the string length 2580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2581 * @end: an end marker xmlChar, 0 if none 2582 * @end2: an end marker xmlChar, 0 if none 2583 * @end3: an end marker xmlChar, 0 if none 2584 * 2585 * Takes a entity string content and process to do the adequate substitutions. 2586 * 2587 * [67] Reference ::= EntityRef | CharRef 2588 * 2589 * [69] PEReference ::= '%' Name ';' 2590 * 2591 * Returns A newly allocated string with the substitution done. The caller 2592 * must deallocate it ! 2593 */ 2594 xmlChar * 2595 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2596 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2597 xmlChar *buffer = NULL; 2598 size_t buffer_size = 0; 2599 size_t nbchars = 0; 2600 2601 xmlChar *current = NULL; 2602 xmlChar *rep = NULL; 2603 const xmlChar *last; 2604 xmlEntityPtr ent; 2605 int c,l; 2606 2607 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2608 return(NULL); 2609 last = str + len; 2610 2611 if (((ctxt->depth > 40) && 2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2613 (ctxt->depth > 1024)) { 2614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2615 return(NULL); 2616 } 2617 2618 /* 2619 * allocate a translation buffer. 2620 */ 2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2623 if (buffer == NULL) goto mem_error; 2624 2625 /* 2626 * OK loop until we reach one of the ending char or a size limit. 2627 * we are operating on already parsed values. 2628 */ 2629 if (str < last) 2630 c = CUR_SCHAR(str, l); 2631 else 2632 c = 0; 2633 while ((c != 0) && (c != end) && /* non input consuming loop */ 2634 (c != end2) && (c != end3)) { 2635 2636 if (c == 0) break; 2637 if ((c == '&') && (str[1] == '#')) { 2638 int val = xmlParseStringCharRef(ctxt, &str); 2639 if (val == 0) 2640 goto int_error; 2641 COPY_BUF(0,buffer,nbchars,val); 2642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2644 } 2645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2646 if (xmlParserDebugEntities) 2647 xmlGenericError(xmlGenericErrorContext, 2648 "String decoding Entity Reference: %.30s\n", 2649 str); 2650 ent = xmlParseStringEntityRef(ctxt, &str); 2651 xmlParserEntityCheck(ctxt, 0, ent, 0); 2652 if (ent != NULL) 2653 ctxt->nbentities += ent->checked / 2; 2654 if ((ent != NULL) && 2655 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2656 if (ent->content != NULL) { 2657 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2660 } 2661 } else { 2662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2663 "predefined entity has no content\n"); 2664 goto int_error; 2665 } 2666 } else if ((ent != NULL) && (ent->content != NULL)) { 2667 ctxt->depth++; 2668 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2669 0, 0, 0); 2670 ctxt->depth--; 2671 if (rep == NULL) 2672 goto int_error; 2673 2674 current = rep; 2675 while (*current != 0) { /* non input consuming loop */ 2676 buffer[nbchars++] = *current++; 2677 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2678 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2679 goto int_error; 2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2681 } 2682 } 2683 xmlFree(rep); 2684 rep = NULL; 2685 } else if (ent != NULL) { 2686 int i = xmlStrlen(ent->name); 2687 const xmlChar *cur = ent->name; 2688 2689 buffer[nbchars++] = '&'; 2690 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2691 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2692 } 2693 for (;i > 0;i--) 2694 buffer[nbchars++] = *cur++; 2695 buffer[nbchars++] = ';'; 2696 } 2697 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2698 if (xmlParserDebugEntities) 2699 xmlGenericError(xmlGenericErrorContext, 2700 "String decoding PE Reference: %.30s\n", str); 2701 ent = xmlParseStringPEReference(ctxt, &str); 2702 xmlParserEntityCheck(ctxt, 0, ent, 0); 2703 if (ent != NULL) 2704 ctxt->nbentities += ent->checked / 2; 2705 if (ent != NULL) { 2706 if (ent->content == NULL) { 2707 /* 2708 * Note: external parsed entities will not be loaded, 2709 * it is not required for a non-validating parser to 2710 * complete external PEreferences coming from the 2711 * internal subset 2712 */ 2713 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2714 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2715 (ctxt->validate != 0)) { 2716 xmlLoadEntityContent(ctxt, ent); 2717 } else { 2718 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2719 "not validating will not read content for PE entity %s\n", 2720 ent->name, NULL); 2721 } 2722 } 2723 ctxt->depth++; 2724 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2725 0, 0, 0); 2726 ctxt->depth--; 2727 if (rep == NULL) 2728 goto int_error; 2729 current = rep; 2730 while (*current != 0) { /* non input consuming loop */ 2731 buffer[nbchars++] = *current++; 2732 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2733 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2734 goto int_error; 2735 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2736 } 2737 } 2738 xmlFree(rep); 2739 rep = NULL; 2740 } 2741 } else { 2742 COPY_BUF(l,buffer,nbchars,c); 2743 str += l; 2744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2746 } 2747 } 2748 if (str < last) 2749 c = CUR_SCHAR(str, l); 2750 else 2751 c = 0; 2752 } 2753 buffer[nbchars] = 0; 2754 return(buffer); 2755 2756 mem_error: 2757 xmlErrMemory(ctxt, NULL); 2758 int_error: 2759 if (rep != NULL) 2760 xmlFree(rep); 2761 if (buffer != NULL) 2762 xmlFree(buffer); 2763 return(NULL); 2764 } 2765 2766 /** 2767 * xmlStringDecodeEntities: 2768 * @ctxt: the parser context 2769 * @str: the input string 2770 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2771 * @end: an end marker xmlChar, 0 if none 2772 * @end2: an end marker xmlChar, 0 if none 2773 * @end3: an end marker xmlChar, 0 if none 2774 * 2775 * Takes a entity string content and process to do the adequate substitutions. 2776 * 2777 * [67] Reference ::= EntityRef | CharRef 2778 * 2779 * [69] PEReference ::= '%' Name ';' 2780 * 2781 * Returns A newly allocated string with the substitution done. The caller 2782 * must deallocate it ! 2783 */ 2784 xmlChar * 2785 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2786 xmlChar end, xmlChar end2, xmlChar end3) { 2787 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2788 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2789 end, end2, end3)); 2790 } 2791 2792 /************************************************************************ 2793 * * 2794 * Commodity functions, cleanup needed ? * 2795 * * 2796 ************************************************************************/ 2797 2798 /** 2799 * areBlanks: 2800 * @ctxt: an XML parser context 2801 * @str: a xmlChar * 2802 * @len: the size of @str 2803 * @blank_chars: we know the chars are blanks 2804 * 2805 * Is this a sequence of blank chars that one can ignore ? 2806 * 2807 * Returns 1 if ignorable 0 otherwise. 2808 */ 2809 2810 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2811 int blank_chars) { 2812 int i, ret; 2813 xmlNodePtr lastChild; 2814 2815 /* 2816 * Don't spend time trying to differentiate them, the same callback is 2817 * used ! 2818 */ 2819 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2820 return(0); 2821 2822 /* 2823 * Check for xml:space value. 2824 */ 2825 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2826 (*(ctxt->space) == -2)) 2827 return(0); 2828 2829 /* 2830 * Check that the string is made of blanks 2831 */ 2832 if (blank_chars == 0) { 2833 for (i = 0;i < len;i++) 2834 if (!(IS_BLANK_CH(str[i]))) return(0); 2835 } 2836 2837 /* 2838 * Look if the element is mixed content in the DTD if available 2839 */ 2840 if (ctxt->node == NULL) return(0); 2841 if (ctxt->myDoc != NULL) { 2842 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2843 if (ret == 0) return(1); 2844 if (ret == 1) return(0); 2845 } 2846 2847 /* 2848 * Otherwise, heuristic :-\ 2849 */ 2850 if ((RAW != '<') && (RAW != 0xD)) return(0); 2851 if ((ctxt->node->children == NULL) && 2852 (RAW == '<') && (NXT(1) == '/')) return(0); 2853 2854 lastChild = xmlGetLastChild(ctxt->node); 2855 if (lastChild == NULL) { 2856 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2857 (ctxt->node->content != NULL)) return(0); 2858 } else if (xmlNodeIsText(lastChild)) 2859 return(0); 2860 else if ((ctxt->node->children != NULL) && 2861 (xmlNodeIsText(ctxt->node->children))) 2862 return(0); 2863 return(1); 2864 } 2865 2866 /************************************************************************ 2867 * * 2868 * Extra stuff for namespace support * 2869 * Relates to http://www.w3.org/TR/WD-xml-names * 2870 * * 2871 ************************************************************************/ 2872 2873 /** 2874 * xmlSplitQName: 2875 * @ctxt: an XML parser context 2876 * @name: an XML parser context 2877 * @prefix: a xmlChar ** 2878 * 2879 * parse an UTF8 encoded XML qualified name string 2880 * 2881 * [NS 5] QName ::= (Prefix ':')? LocalPart 2882 * 2883 * [NS 6] Prefix ::= NCName 2884 * 2885 * [NS 7] LocalPart ::= NCName 2886 * 2887 * Returns the local part, and prefix is updated 2888 * to get the Prefix if any. 2889 */ 2890 2891 xmlChar * 2892 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2893 xmlChar buf[XML_MAX_NAMELEN + 5]; 2894 xmlChar *buffer = NULL; 2895 int len = 0; 2896 int max = XML_MAX_NAMELEN; 2897 xmlChar *ret = NULL; 2898 const xmlChar *cur = name; 2899 int c; 2900 2901 if (prefix == NULL) return(NULL); 2902 *prefix = NULL; 2903 2904 if (cur == NULL) return(NULL); 2905 2906 #ifndef XML_XML_NAMESPACE 2907 /* xml: prefix is not really a namespace */ 2908 if ((cur[0] == 'x') && (cur[1] == 'm') && 2909 (cur[2] == 'l') && (cur[3] == ':')) 2910 return(xmlStrdup(name)); 2911 #endif 2912 2913 /* nasty but well=formed */ 2914 if (cur[0] == ':') 2915 return(xmlStrdup(name)); 2916 2917 c = *cur++; 2918 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2919 buf[len++] = c; 2920 c = *cur++; 2921 } 2922 if (len >= max) { 2923 /* 2924 * Okay someone managed to make a huge name, so he's ready to pay 2925 * for the processing speed. 2926 */ 2927 max = len * 2; 2928 2929 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2930 if (buffer == NULL) { 2931 xmlErrMemory(ctxt, NULL); 2932 return(NULL); 2933 } 2934 memcpy(buffer, buf, len); 2935 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2936 if (len + 10 > max) { 2937 xmlChar *tmp; 2938 2939 max *= 2; 2940 tmp = (xmlChar *) xmlRealloc(buffer, 2941 max * sizeof(xmlChar)); 2942 if (tmp == NULL) { 2943 xmlFree(buffer); 2944 xmlErrMemory(ctxt, NULL); 2945 return(NULL); 2946 } 2947 buffer = tmp; 2948 } 2949 buffer[len++] = c; 2950 c = *cur++; 2951 } 2952 buffer[len] = 0; 2953 } 2954 2955 if ((c == ':') && (*cur == 0)) { 2956 if (buffer != NULL) 2957 xmlFree(buffer); 2958 *prefix = NULL; 2959 return(xmlStrdup(name)); 2960 } 2961 2962 if (buffer == NULL) 2963 ret = xmlStrndup(buf, len); 2964 else { 2965 ret = buffer; 2966 buffer = NULL; 2967 max = XML_MAX_NAMELEN; 2968 } 2969 2970 2971 if (c == ':') { 2972 c = *cur; 2973 *prefix = ret; 2974 if (c == 0) { 2975 return(xmlStrndup(BAD_CAST "", 0)); 2976 } 2977 len = 0; 2978 2979 /* 2980 * Check that the first character is proper to start 2981 * a new name 2982 */ 2983 if (!(((c >= 0x61) && (c <= 0x7A)) || 2984 ((c >= 0x41) && (c <= 0x5A)) || 2985 (c == '_') || (c == ':'))) { 2986 int l; 2987 int first = CUR_SCHAR(cur, l); 2988 2989 if (!IS_LETTER(first) && (first != '_')) { 2990 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2991 "Name %s is not XML Namespace compliant\n", 2992 name); 2993 } 2994 } 2995 cur++; 2996 2997 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2998 buf[len++] = c; 2999 c = *cur++; 3000 } 3001 if (len >= max) { 3002 /* 3003 * Okay someone managed to make a huge name, so he's ready to pay 3004 * for the processing speed. 3005 */ 3006 max = len * 2; 3007 3008 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3009 if (buffer == NULL) { 3010 xmlErrMemory(ctxt, NULL); 3011 return(NULL); 3012 } 3013 memcpy(buffer, buf, len); 3014 while (c != 0) { /* tested bigname2.xml */ 3015 if (len + 10 > max) { 3016 xmlChar *tmp; 3017 3018 max *= 2; 3019 tmp = (xmlChar *) xmlRealloc(buffer, 3020 max * sizeof(xmlChar)); 3021 if (tmp == NULL) { 3022 xmlErrMemory(ctxt, NULL); 3023 xmlFree(buffer); 3024 return(NULL); 3025 } 3026 buffer = tmp; 3027 } 3028 buffer[len++] = c; 3029 c = *cur++; 3030 } 3031 buffer[len] = 0; 3032 } 3033 3034 if (buffer == NULL) 3035 ret = xmlStrndup(buf, len); 3036 else { 3037 ret = buffer; 3038 } 3039 } 3040 3041 return(ret); 3042 } 3043 3044 /************************************************************************ 3045 * * 3046 * The parser itself * 3047 * Relates to http://www.w3.org/TR/REC-xml * 3048 * * 3049 ************************************************************************/ 3050 3051 /************************************************************************ 3052 * * 3053 * Routines to parse Name, NCName and NmToken * 3054 * * 3055 ************************************************************************/ 3056 #ifdef DEBUG 3057 static unsigned long nbParseName = 0; 3058 static unsigned long nbParseNmToken = 0; 3059 static unsigned long nbParseNCName = 0; 3060 static unsigned long nbParseNCNameComplex = 0; 3061 static unsigned long nbParseNameComplex = 0; 3062 static unsigned long nbParseStringName = 0; 3063 #endif 3064 3065 /* 3066 * The two following functions are related to the change of accepted 3067 * characters for Name and NmToken in the Revision 5 of XML-1.0 3068 * They correspond to the modified production [4] and the new production [4a] 3069 * changes in that revision. Also note that the macros used for the 3070 * productions Letter, Digit, CombiningChar and Extender are not needed 3071 * anymore. 3072 * We still keep compatibility to pre-revision5 parsing semantic if the 3073 * new XML_PARSE_OLD10 option is given to the parser. 3074 */ 3075 static int 3076 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3077 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3078 /* 3079 * Use the new checks of production [4] [4a] amd [5] of the 3080 * Update 5 of XML-1.0 3081 */ 3082 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3083 (((c >= 'a') && (c <= 'z')) || 3084 ((c >= 'A') && (c <= 'Z')) || 3085 (c == '_') || (c == ':') || 3086 ((c >= 0xC0) && (c <= 0xD6)) || 3087 ((c >= 0xD8) && (c <= 0xF6)) || 3088 ((c >= 0xF8) && (c <= 0x2FF)) || 3089 ((c >= 0x370) && (c <= 0x37D)) || 3090 ((c >= 0x37F) && (c <= 0x1FFF)) || 3091 ((c >= 0x200C) && (c <= 0x200D)) || 3092 ((c >= 0x2070) && (c <= 0x218F)) || 3093 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3094 ((c >= 0x3001) && (c <= 0xD7FF)) || 3095 ((c >= 0xF900) && (c <= 0xFDCF)) || 3096 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3097 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3098 return(1); 3099 } else { 3100 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3101 return(1); 3102 } 3103 return(0); 3104 } 3105 3106 static int 3107 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3108 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3109 /* 3110 * Use the new checks of production [4] [4a] amd [5] of the 3111 * Update 5 of XML-1.0 3112 */ 3113 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3114 (((c >= 'a') && (c <= 'z')) || 3115 ((c >= 'A') && (c <= 'Z')) || 3116 ((c >= '0') && (c <= '9')) || /* !start */ 3117 (c == '_') || (c == ':') || 3118 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3119 ((c >= 0xC0) && (c <= 0xD6)) || 3120 ((c >= 0xD8) && (c <= 0xF6)) || 3121 ((c >= 0xF8) && (c <= 0x2FF)) || 3122 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3123 ((c >= 0x370) && (c <= 0x37D)) || 3124 ((c >= 0x37F) && (c <= 0x1FFF)) || 3125 ((c >= 0x200C) && (c <= 0x200D)) || 3126 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3127 ((c >= 0x2070) && (c <= 0x218F)) || 3128 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3129 ((c >= 0x3001) && (c <= 0xD7FF)) || 3130 ((c >= 0xF900) && (c <= 0xFDCF)) || 3131 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3132 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3133 return(1); 3134 } else { 3135 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3136 (c == '.') || (c == '-') || 3137 (c == '_') || (c == ':') || 3138 (IS_COMBINING(c)) || 3139 (IS_EXTENDER(c))) 3140 return(1); 3141 } 3142 return(0); 3143 } 3144 3145 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3146 int *len, int *alloc, int normalize); 3147 3148 static const xmlChar * 3149 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3150 int len = 0, l; 3151 int c; 3152 int count = 0; 3153 3154 #ifdef DEBUG 3155 nbParseNameComplex++; 3156 #endif 3157 3158 /* 3159 * Handler for more complex cases 3160 */ 3161 GROW; 3162 if (ctxt->instate == XML_PARSER_EOF) 3163 return(NULL); 3164 c = CUR_CHAR(l); 3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3166 /* 3167 * Use the new checks of production [4] [4a] amd [5] of the 3168 * Update 5 of XML-1.0 3169 */ 3170 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3171 (!(((c >= 'a') && (c <= 'z')) || 3172 ((c >= 'A') && (c <= 'Z')) || 3173 (c == '_') || (c == ':') || 3174 ((c >= 0xC0) && (c <= 0xD6)) || 3175 ((c >= 0xD8) && (c <= 0xF6)) || 3176 ((c >= 0xF8) && (c <= 0x2FF)) || 3177 ((c >= 0x370) && (c <= 0x37D)) || 3178 ((c >= 0x37F) && (c <= 0x1FFF)) || 3179 ((c >= 0x200C) && (c <= 0x200D)) || 3180 ((c >= 0x2070) && (c <= 0x218F)) || 3181 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3182 ((c >= 0x3001) && (c <= 0xD7FF)) || 3183 ((c >= 0xF900) && (c <= 0xFDCF)) || 3184 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3185 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3186 return(NULL); 3187 } 3188 len += l; 3189 NEXTL(l); 3190 c = CUR_CHAR(l); 3191 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3192 (((c >= 'a') && (c <= 'z')) || 3193 ((c >= 'A') && (c <= 'Z')) || 3194 ((c >= '0') && (c <= '9')) || /* !start */ 3195 (c == '_') || (c == ':') || 3196 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3197 ((c >= 0xC0) && (c <= 0xD6)) || 3198 ((c >= 0xD8) && (c <= 0xF6)) || 3199 ((c >= 0xF8) && (c <= 0x2FF)) || 3200 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3201 ((c >= 0x370) && (c <= 0x37D)) || 3202 ((c >= 0x37F) && (c <= 0x1FFF)) || 3203 ((c >= 0x200C) && (c <= 0x200D)) || 3204 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3205 ((c >= 0x2070) && (c <= 0x218F)) || 3206 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3207 ((c >= 0x3001) && (c <= 0xD7FF)) || 3208 ((c >= 0xF900) && (c <= 0xFDCF)) || 3209 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3210 ((c >= 0x10000) && (c <= 0xEFFFF)) 3211 )) { 3212 if (count++ > XML_PARSER_CHUNK_SIZE) { 3213 count = 0; 3214 GROW; 3215 if (ctxt->instate == XML_PARSER_EOF) 3216 return(NULL); 3217 } 3218 len += l; 3219 NEXTL(l); 3220 c = CUR_CHAR(l); 3221 } 3222 } else { 3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3224 (!IS_LETTER(c) && (c != '_') && 3225 (c != ':'))) { 3226 return(NULL); 3227 } 3228 len += l; 3229 NEXTL(l); 3230 c = CUR_CHAR(l); 3231 3232 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3233 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3234 (c == '.') || (c == '-') || 3235 (c == '_') || (c == ':') || 3236 (IS_COMBINING(c)) || 3237 (IS_EXTENDER(c)))) { 3238 if (count++ > XML_PARSER_CHUNK_SIZE) { 3239 count = 0; 3240 GROW; 3241 if (ctxt->instate == XML_PARSER_EOF) 3242 return(NULL); 3243 } 3244 len += l; 3245 NEXTL(l); 3246 c = CUR_CHAR(l); 3247 } 3248 } 3249 if ((len > XML_MAX_NAME_LENGTH) && 3250 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3251 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3252 return(NULL); 3253 } 3254 if (ctxt->input->cur - ctxt->input->base < len) { 3255 /* 3256 * There were a couple of bugs where PERefs lead to to a change 3257 * of the buffer. Check the buffer size to avoid passing an invalid 3258 * pointer to xmlDictLookup. 3259 */ 3260 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3261 "unexpected change of input buffer"); 3262 return (NULL); 3263 } 3264 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3265 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3267 } 3268 3269 /** 3270 * xmlParseName: 3271 * @ctxt: an XML parser context 3272 * 3273 * parse an XML name. 3274 * 3275 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3276 * CombiningChar | Extender 3277 * 3278 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3279 * 3280 * [6] Names ::= Name (#x20 Name)* 3281 * 3282 * Returns the Name parsed or NULL 3283 */ 3284 3285 const xmlChar * 3286 xmlParseName(xmlParserCtxtPtr ctxt) { 3287 const xmlChar *in; 3288 const xmlChar *ret; 3289 int count = 0; 3290 3291 GROW; 3292 3293 #ifdef DEBUG 3294 nbParseName++; 3295 #endif 3296 3297 /* 3298 * Accelerator for simple ASCII names 3299 */ 3300 in = ctxt->input->cur; 3301 if (((*in >= 0x61) && (*in <= 0x7A)) || 3302 ((*in >= 0x41) && (*in <= 0x5A)) || 3303 (*in == '_') || (*in == ':')) { 3304 in++; 3305 while (((*in >= 0x61) && (*in <= 0x7A)) || 3306 ((*in >= 0x41) && (*in <= 0x5A)) || 3307 ((*in >= 0x30) && (*in <= 0x39)) || 3308 (*in == '_') || (*in == '-') || 3309 (*in == ':') || (*in == '.')) 3310 in++; 3311 if ((*in > 0) && (*in < 0x80)) { 3312 count = in - ctxt->input->cur; 3313 if ((count > XML_MAX_NAME_LENGTH) && 3314 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3316 return(NULL); 3317 } 3318 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3319 ctxt->input->cur = in; 3320 ctxt->nbChars += count; 3321 ctxt->input->col += count; 3322 if (ret == NULL) 3323 xmlErrMemory(ctxt, NULL); 3324 return(ret); 3325 } 3326 } 3327 /* accelerator for special cases */ 3328 return(xmlParseNameComplex(ctxt)); 3329 } 3330 3331 static const xmlChar * 3332 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3333 int len = 0, l; 3334 int c; 3335 int count = 0; 3336 size_t startPosition = 0; 3337 3338 #ifdef DEBUG 3339 nbParseNCNameComplex++; 3340 #endif 3341 3342 /* 3343 * Handler for more complex cases 3344 */ 3345 GROW; 3346 startPosition = CUR_PTR - BASE_PTR; 3347 c = CUR_CHAR(l); 3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3349 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3350 return(NULL); 3351 } 3352 3353 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3354 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3355 if (count++ > XML_PARSER_CHUNK_SIZE) { 3356 if ((len > XML_MAX_NAME_LENGTH) && 3357 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3358 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3359 return(NULL); 3360 } 3361 count = 0; 3362 GROW; 3363 if (ctxt->instate == XML_PARSER_EOF) 3364 return(NULL); 3365 } 3366 len += l; 3367 NEXTL(l); 3368 c = CUR_CHAR(l); 3369 if (c == 0) { 3370 count = 0; 3371 /* 3372 * when shrinking to extend the buffer we really need to preserve 3373 * the part of the name we already parsed. Hence rolling back 3374 * by current lenght. 3375 */ 3376 ctxt->input->cur -= l; 3377 GROW; 3378 ctxt->input->cur += l; 3379 if (ctxt->instate == XML_PARSER_EOF) 3380 return(NULL); 3381 c = CUR_CHAR(l); 3382 } 3383 } 3384 if ((len > XML_MAX_NAME_LENGTH) && 3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3387 return(NULL); 3388 } 3389 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3390 } 3391 3392 /** 3393 * xmlParseNCName: 3394 * @ctxt: an XML parser context 3395 * @len: length of the string parsed 3396 * 3397 * parse an XML name. 3398 * 3399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3400 * CombiningChar | Extender 3401 * 3402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3403 * 3404 * Returns the Name parsed or NULL 3405 */ 3406 3407 static const xmlChar * 3408 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3409 const xmlChar *in, *e; 3410 const xmlChar *ret; 3411 int count = 0; 3412 3413 #ifdef DEBUG 3414 nbParseNCName++; 3415 #endif 3416 3417 /* 3418 * Accelerator for simple ASCII names 3419 */ 3420 in = ctxt->input->cur; 3421 e = ctxt->input->end; 3422 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3423 ((*in >= 0x41) && (*in <= 0x5A)) || 3424 (*in == '_')) && (in < e)) { 3425 in++; 3426 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3427 ((*in >= 0x41) && (*in <= 0x5A)) || 3428 ((*in >= 0x30) && (*in <= 0x39)) || 3429 (*in == '_') || (*in == '-') || 3430 (*in == '.')) && (in < e)) 3431 in++; 3432 if (in >= e) 3433 goto complex; 3434 if ((*in > 0) && (*in < 0x80)) { 3435 count = in - ctxt->input->cur; 3436 if ((count > XML_MAX_NAME_LENGTH) && 3437 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3438 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3439 return(NULL); 3440 } 3441 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3442 ctxt->input->cur = in; 3443 ctxt->nbChars += count; 3444 ctxt->input->col += count; 3445 if (ret == NULL) { 3446 xmlErrMemory(ctxt, NULL); 3447 } 3448 return(ret); 3449 } 3450 } 3451 complex: 3452 return(xmlParseNCNameComplex(ctxt)); 3453 } 3454 3455 /** 3456 * xmlParseNameAndCompare: 3457 * @ctxt: an XML parser context 3458 * 3459 * parse an XML name and compares for match 3460 * (specialized for endtag parsing) 3461 * 3462 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3463 * and the name for mismatch 3464 */ 3465 3466 static const xmlChar * 3467 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3468 register const xmlChar *cmp = other; 3469 register const xmlChar *in; 3470 const xmlChar *ret; 3471 3472 GROW; 3473 if (ctxt->instate == XML_PARSER_EOF) 3474 return(NULL); 3475 3476 in = ctxt->input->cur; 3477 while (*in != 0 && *in == *cmp) { 3478 ++in; 3479 ++cmp; 3480 ctxt->input->col++; 3481 } 3482 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3483 /* success */ 3484 ctxt->input->cur = in; 3485 return (const xmlChar*) 1; 3486 } 3487 /* failure (or end of input buffer), check with full function */ 3488 ret = xmlParseName (ctxt); 3489 /* strings coming from the dictionary direct compare possible */ 3490 if (ret == other) { 3491 return (const xmlChar*) 1; 3492 } 3493 return ret; 3494 } 3495 3496 /** 3497 * xmlParseStringName: 3498 * @ctxt: an XML parser context 3499 * @str: a pointer to the string pointer (IN/OUT) 3500 * 3501 * parse an XML name. 3502 * 3503 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3504 * CombiningChar | Extender 3505 * 3506 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3507 * 3508 * [6] Names ::= Name (#x20 Name)* 3509 * 3510 * Returns the Name parsed or NULL. The @str pointer 3511 * is updated to the current location in the string. 3512 */ 3513 3514 static xmlChar * 3515 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3516 xmlChar buf[XML_MAX_NAMELEN + 5]; 3517 const xmlChar *cur = *str; 3518 int len = 0, l; 3519 int c; 3520 3521 #ifdef DEBUG 3522 nbParseStringName++; 3523 #endif 3524 3525 c = CUR_SCHAR(cur, l); 3526 if (!xmlIsNameStartChar(ctxt, c)) { 3527 return(NULL); 3528 } 3529 3530 COPY_BUF(l,buf,len,c); 3531 cur += l; 3532 c = CUR_SCHAR(cur, l); 3533 while (xmlIsNameChar(ctxt, c)) { 3534 COPY_BUF(l,buf,len,c); 3535 cur += l; 3536 c = CUR_SCHAR(cur, l); 3537 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3538 /* 3539 * Okay someone managed to make a huge name, so he's ready to pay 3540 * for the processing speed. 3541 */ 3542 xmlChar *buffer; 3543 int max = len * 2; 3544 3545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3546 if (buffer == NULL) { 3547 xmlErrMemory(ctxt, NULL); 3548 return(NULL); 3549 } 3550 memcpy(buffer, buf, len); 3551 while (xmlIsNameChar(ctxt, c)) { 3552 if (len + 10 > max) { 3553 xmlChar *tmp; 3554 3555 if ((len > XML_MAX_NAME_LENGTH) && 3556 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3557 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3558 xmlFree(buffer); 3559 return(NULL); 3560 } 3561 max *= 2; 3562 tmp = (xmlChar *) xmlRealloc(buffer, 3563 max * sizeof(xmlChar)); 3564 if (tmp == NULL) { 3565 xmlErrMemory(ctxt, NULL); 3566 xmlFree(buffer); 3567 return(NULL); 3568 } 3569 buffer = tmp; 3570 } 3571 COPY_BUF(l,buffer,len,c); 3572 cur += l; 3573 c = CUR_SCHAR(cur, l); 3574 } 3575 buffer[len] = 0; 3576 *str = cur; 3577 return(buffer); 3578 } 3579 } 3580 if ((len > XML_MAX_NAME_LENGTH) && 3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3583 return(NULL); 3584 } 3585 *str = cur; 3586 return(xmlStrndup(buf, len)); 3587 } 3588 3589 /** 3590 * xmlParseNmtoken: 3591 * @ctxt: an XML parser context 3592 * 3593 * parse an XML Nmtoken. 3594 * 3595 * [7] Nmtoken ::= (NameChar)+ 3596 * 3597 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3598 * 3599 * Returns the Nmtoken parsed or NULL 3600 */ 3601 3602 xmlChar * 3603 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3604 xmlChar buf[XML_MAX_NAMELEN + 5]; 3605 int len = 0, l; 3606 int c; 3607 int count = 0; 3608 3609 #ifdef DEBUG 3610 nbParseNmToken++; 3611 #endif 3612 3613 GROW; 3614 if (ctxt->instate == XML_PARSER_EOF) 3615 return(NULL); 3616 c = CUR_CHAR(l); 3617 3618 while (xmlIsNameChar(ctxt, c)) { 3619 if (count++ > XML_PARSER_CHUNK_SIZE) { 3620 count = 0; 3621 GROW; 3622 } 3623 COPY_BUF(l,buf,len,c); 3624 NEXTL(l); 3625 c = CUR_CHAR(l); 3626 if (c == 0) { 3627 count = 0; 3628 GROW; 3629 if (ctxt->instate == XML_PARSER_EOF) 3630 return(NULL); 3631 c = CUR_CHAR(l); 3632 } 3633 if (len >= XML_MAX_NAMELEN) { 3634 /* 3635 * Okay someone managed to make a huge token, so he's ready to pay 3636 * for the processing speed. 3637 */ 3638 xmlChar *buffer; 3639 int max = len * 2; 3640 3641 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3642 if (buffer == NULL) { 3643 xmlErrMemory(ctxt, NULL); 3644 return(NULL); 3645 } 3646 memcpy(buffer, buf, len); 3647 while (xmlIsNameChar(ctxt, c)) { 3648 if (count++ > XML_PARSER_CHUNK_SIZE) { 3649 count = 0; 3650 GROW; 3651 if (ctxt->instate == XML_PARSER_EOF) { 3652 xmlFree(buffer); 3653 return(NULL); 3654 } 3655 } 3656 if (len + 10 > max) { 3657 xmlChar *tmp; 3658 3659 if ((max > XML_MAX_NAME_LENGTH) && 3660 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3661 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3662 xmlFree(buffer); 3663 return(NULL); 3664 } 3665 max *= 2; 3666 tmp = (xmlChar *) xmlRealloc(buffer, 3667 max * sizeof(xmlChar)); 3668 if (tmp == NULL) { 3669 xmlErrMemory(ctxt, NULL); 3670 xmlFree(buffer); 3671 return(NULL); 3672 } 3673 buffer = tmp; 3674 } 3675 COPY_BUF(l,buffer,len,c); 3676 NEXTL(l); 3677 c = CUR_CHAR(l); 3678 } 3679 buffer[len] = 0; 3680 return(buffer); 3681 } 3682 } 3683 if (len == 0) 3684 return(NULL); 3685 if ((len > XML_MAX_NAME_LENGTH) && 3686 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3687 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3688 return(NULL); 3689 } 3690 return(xmlStrndup(buf, len)); 3691 } 3692 3693 /** 3694 * xmlParseEntityValue: 3695 * @ctxt: an XML parser context 3696 * @orig: if non-NULL store a copy of the original entity value 3697 * 3698 * parse a value for ENTITY declarations 3699 * 3700 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3701 * "'" ([^%&'] | PEReference | Reference)* "'" 3702 * 3703 * Returns the EntityValue parsed with reference substituted or NULL 3704 */ 3705 3706 xmlChar * 3707 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3708 xmlChar *buf = NULL; 3709 int len = 0; 3710 int size = XML_PARSER_BUFFER_SIZE; 3711 int c, l; 3712 xmlChar stop; 3713 xmlChar *ret = NULL; 3714 const xmlChar *cur = NULL; 3715 xmlParserInputPtr input; 3716 3717 if (RAW == '"') stop = '"'; 3718 else if (RAW == '\'') stop = '\''; 3719 else { 3720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3721 return(NULL); 3722 } 3723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3724 if (buf == NULL) { 3725 xmlErrMemory(ctxt, NULL); 3726 return(NULL); 3727 } 3728 3729 /* 3730 * The content of the entity definition is copied in a buffer. 3731 */ 3732 3733 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3734 input = ctxt->input; 3735 GROW; 3736 if (ctxt->instate == XML_PARSER_EOF) 3737 goto error; 3738 NEXT; 3739 c = CUR_CHAR(l); 3740 /* 3741 * NOTE: 4.4.5 Included in Literal 3742 * When a parameter entity reference appears in a literal entity 3743 * value, ... a single or double quote character in the replacement 3744 * text is always treated as a normal data character and will not 3745 * terminate the literal. 3746 * In practice it means we stop the loop only when back at parsing 3747 * the initial entity and the quote is found 3748 */ 3749 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3750 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3751 if (len + 5 >= size) { 3752 xmlChar *tmp; 3753 3754 size *= 2; 3755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3756 if (tmp == NULL) { 3757 xmlErrMemory(ctxt, NULL); 3758 goto error; 3759 } 3760 buf = tmp; 3761 } 3762 COPY_BUF(l,buf,len,c); 3763 NEXTL(l); 3764 3765 GROW; 3766 c = CUR_CHAR(l); 3767 if (c == 0) { 3768 GROW; 3769 c = CUR_CHAR(l); 3770 } 3771 } 3772 buf[len] = 0; 3773 if (ctxt->instate == XML_PARSER_EOF) 3774 goto error; 3775 if (c != stop) { 3776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3777 goto error; 3778 } 3779 NEXT; 3780 3781 /* 3782 * Raise problem w.r.t. '&' and '%' being used in non-entities 3783 * reference constructs. Note Charref will be handled in 3784 * xmlStringDecodeEntities() 3785 */ 3786 cur = buf; 3787 while (*cur != 0) { /* non input consuming */ 3788 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3789 xmlChar *name; 3790 xmlChar tmp = *cur; 3791 int nameOk = 0; 3792 3793 cur++; 3794 name = xmlParseStringName(ctxt, &cur); 3795 if (name != NULL) { 3796 nameOk = 1; 3797 xmlFree(name); 3798 } 3799 if ((nameOk == 0) || (*cur != ';')) { 3800 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3801 "EntityValue: '%c' forbidden except for entities references\n", 3802 tmp); 3803 goto error; 3804 } 3805 if ((tmp == '%') && (ctxt->inSubset == 1) && 3806 (ctxt->inputNr == 1)) { 3807 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3808 goto error; 3809 } 3810 if (*cur == 0) 3811 break; 3812 } 3813 cur++; 3814 } 3815 3816 /* 3817 * Then PEReference entities are substituted. 3818 * 3819 * NOTE: 4.4.7 Bypassed 3820 * When a general entity reference appears in the EntityValue in 3821 * an entity declaration, it is bypassed and left as is. 3822 * so XML_SUBSTITUTE_REF is not set here. 3823 */ 3824 ++ctxt->depth; 3825 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3826 0, 0, 0); 3827 --ctxt->depth; 3828 if (orig != NULL) { 3829 *orig = buf; 3830 buf = NULL; 3831 } 3832 3833 error: 3834 if (buf != NULL) 3835 xmlFree(buf); 3836 return(ret); 3837 } 3838 3839 /** 3840 * xmlParseAttValueComplex: 3841 * @ctxt: an XML parser context 3842 * @len: the resulting attribute len 3843 * @normalize: wether to apply the inner normalization 3844 * 3845 * parse a value for an attribute, this is the fallback function 3846 * of xmlParseAttValue() when the attribute parsing requires handling 3847 * of non-ASCII characters, or normalization compaction. 3848 * 3849 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3850 */ 3851 static xmlChar * 3852 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3853 xmlChar limit = 0; 3854 xmlChar *buf = NULL; 3855 xmlChar *rep = NULL; 3856 size_t len = 0; 3857 size_t buf_size = 0; 3858 int c, l, in_space = 0; 3859 xmlChar *current = NULL; 3860 xmlEntityPtr ent; 3861 3862 if (NXT(0) == '"') { 3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3864 limit = '"'; 3865 NEXT; 3866 } else if (NXT(0) == '\'') { 3867 limit = '\''; 3868 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3869 NEXT; 3870 } else { 3871 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3872 return(NULL); 3873 } 3874 3875 /* 3876 * allocate a translation buffer. 3877 */ 3878 buf_size = XML_PARSER_BUFFER_SIZE; 3879 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3880 if (buf == NULL) goto mem_error; 3881 3882 /* 3883 * OK loop until we reach one of the ending char or a size limit. 3884 */ 3885 c = CUR_CHAR(l); 3886 while (((NXT(0) != limit) && /* checked */ 3887 (IS_CHAR(c)) && (c != '<')) && 3888 (ctxt->instate != XML_PARSER_EOF)) { 3889 /* 3890 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3891 * special option is given 3892 */ 3893 if ((len > XML_MAX_TEXT_LENGTH) && 3894 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3895 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3896 "AttValue length too long\n"); 3897 goto mem_error; 3898 } 3899 if (c == 0) break; 3900 if (c == '&') { 3901 in_space = 0; 3902 if (NXT(1) == '#') { 3903 int val = xmlParseCharRef(ctxt); 3904 3905 if (val == '&') { 3906 if (ctxt->replaceEntities) { 3907 if (len + 10 > buf_size) { 3908 growBuffer(buf, 10); 3909 } 3910 buf[len++] = '&'; 3911 } else { 3912 /* 3913 * The reparsing will be done in xmlStringGetNodeList() 3914 * called by the attribute() function in SAX.c 3915 */ 3916 if (len + 10 > buf_size) { 3917 growBuffer(buf, 10); 3918 } 3919 buf[len++] = '&'; 3920 buf[len++] = '#'; 3921 buf[len++] = '3'; 3922 buf[len++] = '8'; 3923 buf[len++] = ';'; 3924 } 3925 } else if (val != 0) { 3926 if (len + 10 > buf_size) { 3927 growBuffer(buf, 10); 3928 } 3929 len += xmlCopyChar(0, &buf[len], val); 3930 } 3931 } else { 3932 ent = xmlParseEntityRef(ctxt); 3933 ctxt->nbentities++; 3934 if (ent != NULL) 3935 ctxt->nbentities += ent->owner; 3936 if ((ent != NULL) && 3937 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3938 if (len + 10 > buf_size) { 3939 growBuffer(buf, 10); 3940 } 3941 if ((ctxt->replaceEntities == 0) && 3942 (ent->content[0] == '&')) { 3943 buf[len++] = '&'; 3944 buf[len++] = '#'; 3945 buf[len++] = '3'; 3946 buf[len++] = '8'; 3947 buf[len++] = ';'; 3948 } else { 3949 buf[len++] = ent->content[0]; 3950 } 3951 } else if ((ent != NULL) && 3952 (ctxt->replaceEntities != 0)) { 3953 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3954 ++ctxt->depth; 3955 rep = xmlStringDecodeEntities(ctxt, ent->content, 3956 XML_SUBSTITUTE_REF, 3957 0, 0, 0); 3958 --ctxt->depth; 3959 if (rep != NULL) { 3960 current = rep; 3961 while (*current != 0) { /* non input consuming */ 3962 if ((*current == 0xD) || (*current == 0xA) || 3963 (*current == 0x9)) { 3964 buf[len++] = 0x20; 3965 current++; 3966 } else 3967 buf[len++] = *current++; 3968 if (len + 10 > buf_size) { 3969 growBuffer(buf, 10); 3970 } 3971 } 3972 xmlFree(rep); 3973 rep = NULL; 3974 } 3975 } else { 3976 if (len + 10 > buf_size) { 3977 growBuffer(buf, 10); 3978 } 3979 if (ent->content != NULL) 3980 buf[len++] = ent->content[0]; 3981 } 3982 } else if (ent != NULL) { 3983 int i = xmlStrlen(ent->name); 3984 const xmlChar *cur = ent->name; 3985 3986 /* 3987 * This may look absurd but is needed to detect 3988 * entities problems 3989 */ 3990 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3991 (ent->content != NULL) && (ent->checked == 0)) { 3992 unsigned long oldnbent = ctxt->nbentities; 3993 3994 ++ctxt->depth; 3995 rep = xmlStringDecodeEntities(ctxt, ent->content, 3996 XML_SUBSTITUTE_REF, 0, 0, 0); 3997 --ctxt->depth; 3998 3999 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4000 if (rep != NULL) { 4001 if (xmlStrchr(rep, '<')) 4002 ent->checked |= 1; 4003 xmlFree(rep); 4004 rep = NULL; 4005 } else { 4006 ent->content[0] = 0; 4007 } 4008 } 4009 4010 /* 4011 * Just output the reference 4012 */ 4013 buf[len++] = '&'; 4014 while (len + i + 10 > buf_size) { 4015 growBuffer(buf, i + 10); 4016 } 4017 for (;i > 0;i--) 4018 buf[len++] = *cur++; 4019 buf[len++] = ';'; 4020 } 4021 } 4022 } else { 4023 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4024 if ((len != 0) || (!normalize)) { 4025 if ((!normalize) || (!in_space)) { 4026 COPY_BUF(l,buf,len,0x20); 4027 while (len + 10 > buf_size) { 4028 growBuffer(buf, 10); 4029 } 4030 } 4031 in_space = 1; 4032 } 4033 } else { 4034 in_space = 0; 4035 COPY_BUF(l,buf,len,c); 4036 if (len + 10 > buf_size) { 4037 growBuffer(buf, 10); 4038 } 4039 } 4040 NEXTL(l); 4041 } 4042 GROW; 4043 c = CUR_CHAR(l); 4044 } 4045 if (ctxt->instate == XML_PARSER_EOF) 4046 goto error; 4047 4048 if ((in_space) && (normalize)) { 4049 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4050 } 4051 buf[len] = 0; 4052 if (RAW == '<') { 4053 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4054 } else if (RAW != limit) { 4055 if ((c != 0) && (!IS_CHAR(c))) { 4056 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4057 "invalid character in attribute value\n"); 4058 } else { 4059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4060 "AttValue: ' expected\n"); 4061 } 4062 } else 4063 NEXT; 4064 4065 /* 4066 * There we potentially risk an overflow, don't allow attribute value of 4067 * length more than INT_MAX it is a very reasonnable assumption ! 4068 */ 4069 if (len >= INT_MAX) { 4070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4071 "AttValue length too long\n"); 4072 goto mem_error; 4073 } 4074 4075 if (attlen != NULL) *attlen = (int) len; 4076 return(buf); 4077 4078 mem_error: 4079 xmlErrMemory(ctxt, NULL); 4080 error: 4081 if (buf != NULL) 4082 xmlFree(buf); 4083 if (rep != NULL) 4084 xmlFree(rep); 4085 return(NULL); 4086 } 4087 4088 /** 4089 * xmlParseAttValue: 4090 * @ctxt: an XML parser context 4091 * 4092 * parse a value for an attribute 4093 * Note: the parser won't do substitution of entities here, this 4094 * will be handled later in xmlStringGetNodeList 4095 * 4096 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4097 * "'" ([^<&'] | Reference)* "'" 4098 * 4099 * 3.3.3 Attribute-Value Normalization: 4100 * Before the value of an attribute is passed to the application or 4101 * checked for validity, the XML processor must normalize it as follows: 4102 * - a character reference is processed by appending the referenced 4103 * character to the attribute value 4104 * - an entity reference is processed by recursively processing the 4105 * replacement text of the entity 4106 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4107 * appending #x20 to the normalized value, except that only a single 4108 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4109 * parsed entity or the literal entity value of an internal parsed entity 4110 * - other characters are processed by appending them to the normalized value 4111 * If the declared value is not CDATA, then the XML processor must further 4112 * process the normalized attribute value by discarding any leading and 4113 * trailing space (#x20) characters, and by replacing sequences of space 4114 * (#x20) characters by a single space (#x20) character. 4115 * All attributes for which no declaration has been read should be treated 4116 * by a non-validating parser as if declared CDATA. 4117 * 4118 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4119 */ 4120 4121 4122 xmlChar * 4123 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4124 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4125 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4126 } 4127 4128 /** 4129 * xmlParseSystemLiteral: 4130 * @ctxt: an XML parser context 4131 * 4132 * parse an XML Literal 4133 * 4134 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4135 * 4136 * Returns the SystemLiteral parsed or NULL 4137 */ 4138 4139 xmlChar * 4140 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4141 xmlChar *buf = NULL; 4142 int len = 0; 4143 int size = XML_PARSER_BUFFER_SIZE; 4144 int cur, l; 4145 xmlChar stop; 4146 int state = ctxt->instate; 4147 int count = 0; 4148 4149 SHRINK; 4150 if (RAW == '"') { 4151 NEXT; 4152 stop = '"'; 4153 } else if (RAW == '\'') { 4154 NEXT; 4155 stop = '\''; 4156 } else { 4157 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4158 return(NULL); 4159 } 4160 4161 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4162 if (buf == NULL) { 4163 xmlErrMemory(ctxt, NULL); 4164 return(NULL); 4165 } 4166 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4167 cur = CUR_CHAR(l); 4168 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4169 if (len + 5 >= size) { 4170 xmlChar *tmp; 4171 4172 if ((size > XML_MAX_NAME_LENGTH) && 4173 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4174 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4175 xmlFree(buf); 4176 ctxt->instate = (xmlParserInputState) state; 4177 return(NULL); 4178 } 4179 size *= 2; 4180 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4181 if (tmp == NULL) { 4182 xmlFree(buf); 4183 xmlErrMemory(ctxt, NULL); 4184 ctxt->instate = (xmlParserInputState) state; 4185 return(NULL); 4186 } 4187 buf = tmp; 4188 } 4189 count++; 4190 if (count > 50) { 4191 GROW; 4192 count = 0; 4193 if (ctxt->instate == XML_PARSER_EOF) { 4194 xmlFree(buf); 4195 return(NULL); 4196 } 4197 } 4198 COPY_BUF(l,buf,len,cur); 4199 NEXTL(l); 4200 cur = CUR_CHAR(l); 4201 if (cur == 0) { 4202 GROW; 4203 SHRINK; 4204 cur = CUR_CHAR(l); 4205 } 4206 } 4207 buf[len] = 0; 4208 ctxt->instate = (xmlParserInputState) state; 4209 if (!IS_CHAR(cur)) { 4210 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4211 } else { 4212 NEXT; 4213 } 4214 return(buf); 4215 } 4216 4217 /** 4218 * xmlParsePubidLiteral: 4219 * @ctxt: an XML parser context 4220 * 4221 * parse an XML public literal 4222 * 4223 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4224 * 4225 * Returns the PubidLiteral parsed or NULL. 4226 */ 4227 4228 xmlChar * 4229 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4230 xmlChar *buf = NULL; 4231 int len = 0; 4232 int size = XML_PARSER_BUFFER_SIZE; 4233 xmlChar cur; 4234 xmlChar stop; 4235 int count = 0; 4236 xmlParserInputState oldstate = ctxt->instate; 4237 4238 SHRINK; 4239 if (RAW == '"') { 4240 NEXT; 4241 stop = '"'; 4242 } else if (RAW == '\'') { 4243 NEXT; 4244 stop = '\''; 4245 } else { 4246 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4247 return(NULL); 4248 } 4249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4250 if (buf == NULL) { 4251 xmlErrMemory(ctxt, NULL); 4252 return(NULL); 4253 } 4254 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4255 cur = CUR; 4256 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4257 if (len + 1 >= size) { 4258 xmlChar *tmp; 4259 4260 if ((size > XML_MAX_NAME_LENGTH) && 4261 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4262 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4263 xmlFree(buf); 4264 return(NULL); 4265 } 4266 size *= 2; 4267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4268 if (tmp == NULL) { 4269 xmlErrMemory(ctxt, NULL); 4270 xmlFree(buf); 4271 return(NULL); 4272 } 4273 buf = tmp; 4274 } 4275 buf[len++] = cur; 4276 count++; 4277 if (count > 50) { 4278 GROW; 4279 count = 0; 4280 if (ctxt->instate == XML_PARSER_EOF) { 4281 xmlFree(buf); 4282 return(NULL); 4283 } 4284 } 4285 NEXT; 4286 cur = CUR; 4287 if (cur == 0) { 4288 GROW; 4289 SHRINK; 4290 cur = CUR; 4291 } 4292 } 4293 buf[len] = 0; 4294 if (cur != stop) { 4295 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4296 } else { 4297 NEXT; 4298 } 4299 ctxt->instate = oldstate; 4300 return(buf); 4301 } 4302 4303 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4304 4305 /* 4306 * used for the test in the inner loop of the char data testing 4307 */ 4308 static const unsigned char test_char_data[256] = { 4309 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4310 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4311 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4312 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4313 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4314 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4315 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4316 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4317 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4318 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4319 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4320 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4321 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4322 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4323 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4324 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4341 }; 4342 4343 /** 4344 * xmlParseCharData: 4345 * @ctxt: an XML parser context 4346 * @cdata: int indicating whether we are within a CDATA section 4347 * 4348 * parse a CharData section. 4349 * if we are within a CDATA section ']]>' marks an end of section. 4350 * 4351 * The right angle bracket (>) may be represented using the string ">", 4352 * and must, for compatibility, be escaped using ">" or a character 4353 * reference when it appears in the string "]]>" in content, when that 4354 * string is not marking the end of a CDATA section. 4355 * 4356 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4357 */ 4358 4359 void 4360 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4361 const xmlChar *in; 4362 int nbchar = 0; 4363 int line = ctxt->input->line; 4364 int col = ctxt->input->col; 4365 int ccol; 4366 4367 SHRINK; 4368 GROW; 4369 /* 4370 * Accelerated common case where input don't need to be 4371 * modified before passing it to the handler. 4372 */ 4373 if (!cdata) { 4374 in = ctxt->input->cur; 4375 do { 4376 get_more_space: 4377 while (*in == 0x20) { in++; ctxt->input->col++; } 4378 if (*in == 0xA) { 4379 do { 4380 ctxt->input->line++; ctxt->input->col = 1; 4381 in++; 4382 } while (*in == 0xA); 4383 goto get_more_space; 4384 } 4385 if (*in == '<') { 4386 nbchar = in - ctxt->input->cur; 4387 if (nbchar > 0) { 4388 const xmlChar *tmp = ctxt->input->cur; 4389 ctxt->input->cur = in; 4390 4391 if ((ctxt->sax != NULL) && 4392 (ctxt->sax->ignorableWhitespace != 4393 ctxt->sax->characters)) { 4394 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4395 if (ctxt->sax->ignorableWhitespace != NULL) 4396 ctxt->sax->ignorableWhitespace(ctxt->userData, 4397 tmp, nbchar); 4398 } else { 4399 if (ctxt->sax->characters != NULL) 4400 ctxt->sax->characters(ctxt->userData, 4401 tmp, nbchar); 4402 if (*ctxt->space == -1) 4403 *ctxt->space = -2; 4404 } 4405 } else if ((ctxt->sax != NULL) && 4406 (ctxt->sax->characters != NULL)) { 4407 ctxt->sax->characters(ctxt->userData, 4408 tmp, nbchar); 4409 } 4410 } 4411 return; 4412 } 4413 4414 get_more: 4415 ccol = ctxt->input->col; 4416 while (test_char_data[*in]) { 4417 in++; 4418 ccol++; 4419 } 4420 ctxt->input->col = ccol; 4421 if (*in == 0xA) { 4422 do { 4423 ctxt->input->line++; ctxt->input->col = 1; 4424 in++; 4425 } while (*in == 0xA); 4426 goto get_more; 4427 } 4428 if (*in == ']') { 4429 if ((in[1] == ']') && (in[2] == '>')) { 4430 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4431 ctxt->input->cur = in + 1; 4432 return; 4433 } 4434 in++; 4435 ctxt->input->col++; 4436 goto get_more; 4437 } 4438 nbchar = in - ctxt->input->cur; 4439 if (nbchar > 0) { 4440 if ((ctxt->sax != NULL) && 4441 (ctxt->sax->ignorableWhitespace != 4442 ctxt->sax->characters) && 4443 (IS_BLANK_CH(*ctxt->input->cur))) { 4444 const xmlChar *tmp = ctxt->input->cur; 4445 ctxt->input->cur = in; 4446 4447 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4448 if (ctxt->sax->ignorableWhitespace != NULL) 4449 ctxt->sax->ignorableWhitespace(ctxt->userData, 4450 tmp, nbchar); 4451 } else { 4452 if (ctxt->sax->characters != NULL) 4453 ctxt->sax->characters(ctxt->userData, 4454 tmp, nbchar); 4455 if (*ctxt->space == -1) 4456 *ctxt->space = -2; 4457 } 4458 line = ctxt->input->line; 4459 col = ctxt->input->col; 4460 } else if (ctxt->sax != NULL) { 4461 if (ctxt->sax->characters != NULL) 4462 ctxt->sax->characters(ctxt->userData, 4463 ctxt->input->cur, nbchar); 4464 line = ctxt->input->line; 4465 col = ctxt->input->col; 4466 } 4467 /* something really bad happened in the SAX callback */ 4468 if (ctxt->instate != XML_PARSER_CONTENT) 4469 return; 4470 } 4471 ctxt->input->cur = in; 4472 if (*in == 0xD) { 4473 in++; 4474 if (*in == 0xA) { 4475 ctxt->input->cur = in; 4476 in++; 4477 ctxt->input->line++; ctxt->input->col = 1; 4478 continue; /* while */ 4479 } 4480 in--; 4481 } 4482 if (*in == '<') { 4483 return; 4484 } 4485 if (*in == '&') { 4486 return; 4487 } 4488 SHRINK; 4489 GROW; 4490 if (ctxt->instate == XML_PARSER_EOF) 4491 return; 4492 in = ctxt->input->cur; 4493 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4494 nbchar = 0; 4495 } 4496 ctxt->input->line = line; 4497 ctxt->input->col = col; 4498 xmlParseCharDataComplex(ctxt, cdata); 4499 } 4500 4501 /** 4502 * xmlParseCharDataComplex: 4503 * @ctxt: an XML parser context 4504 * @cdata: int indicating whether we are within a CDATA section 4505 * 4506 * parse a CharData section.this is the fallback function 4507 * of xmlParseCharData() when the parsing requires handling 4508 * of non-ASCII characters. 4509 */ 4510 static void 4511 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4512 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4513 int nbchar = 0; 4514 int cur, l; 4515 int count = 0; 4516 4517 SHRINK; 4518 GROW; 4519 cur = CUR_CHAR(l); 4520 while ((cur != '<') && /* checked */ 4521 (cur != '&') && 4522 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4523 if ((cur == ']') && (NXT(1) == ']') && 4524 (NXT(2) == '>')) { 4525 if (cdata) break; 4526 else { 4527 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4528 } 4529 } 4530 COPY_BUF(l,buf,nbchar,cur); 4531 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4532 buf[nbchar] = 0; 4533 4534 /* 4535 * OK the segment is to be consumed as chars. 4536 */ 4537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4538 if (areBlanks(ctxt, buf, nbchar, 0)) { 4539 if (ctxt->sax->ignorableWhitespace != NULL) 4540 ctxt->sax->ignorableWhitespace(ctxt->userData, 4541 buf, nbchar); 4542 } else { 4543 if (ctxt->sax->characters != NULL) 4544 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4545 if ((ctxt->sax->characters != 4546 ctxt->sax->ignorableWhitespace) && 4547 (*ctxt->space == -1)) 4548 *ctxt->space = -2; 4549 } 4550 } 4551 nbchar = 0; 4552 /* something really bad happened in the SAX callback */ 4553 if (ctxt->instate != XML_PARSER_CONTENT) 4554 return; 4555 } 4556 count++; 4557 if (count > 50) { 4558 GROW; 4559 count = 0; 4560 if (ctxt->instate == XML_PARSER_EOF) 4561 return; 4562 } 4563 NEXTL(l); 4564 cur = CUR_CHAR(l); 4565 } 4566 if (nbchar != 0) { 4567 buf[nbchar] = 0; 4568 /* 4569 * OK the segment is to be consumed as chars. 4570 */ 4571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4572 if (areBlanks(ctxt, buf, nbchar, 0)) { 4573 if (ctxt->sax->ignorableWhitespace != NULL) 4574 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4575 } else { 4576 if (ctxt->sax->characters != NULL) 4577 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4578 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4579 (*ctxt->space == -1)) 4580 *ctxt->space = -2; 4581 } 4582 } 4583 } 4584 if ((cur != 0) && (!IS_CHAR(cur))) { 4585 /* Generate the error and skip the offending character */ 4586 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4587 "PCDATA invalid Char value %d\n", 4588 cur); 4589 NEXTL(l); 4590 } 4591 } 4592 4593 /** 4594 * xmlParseExternalID: 4595 * @ctxt: an XML parser context 4596 * @publicID: a xmlChar** receiving PubidLiteral 4597 * @strict: indicate whether we should restrict parsing to only 4598 * production [75], see NOTE below 4599 * 4600 * Parse an External ID or a Public ID 4601 * 4602 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4603 * 'PUBLIC' S PubidLiteral S SystemLiteral 4604 * 4605 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4606 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4607 * 4608 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4609 * 4610 * Returns the function returns SystemLiteral and in the second 4611 * case publicID receives PubidLiteral, is strict is off 4612 * it is possible to return NULL and have publicID set. 4613 */ 4614 4615 xmlChar * 4616 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4617 xmlChar *URI = NULL; 4618 4619 SHRINK; 4620 4621 *publicID = NULL; 4622 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4623 SKIP(6); 4624 if (SKIP_BLANKS == 0) { 4625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4626 "Space required after 'SYSTEM'\n"); 4627 } 4628 URI = xmlParseSystemLiteral(ctxt); 4629 if (URI == NULL) { 4630 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4631 } 4632 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4633 SKIP(6); 4634 if (SKIP_BLANKS == 0) { 4635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4636 "Space required after 'PUBLIC'\n"); 4637 } 4638 *publicID = xmlParsePubidLiteral(ctxt); 4639 if (*publicID == NULL) { 4640 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4641 } 4642 if (strict) { 4643 /* 4644 * We don't handle [83] so "S SystemLiteral" is required. 4645 */ 4646 if (SKIP_BLANKS == 0) { 4647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4648 "Space required after the Public Identifier\n"); 4649 } 4650 } else { 4651 /* 4652 * We handle [83] so we return immediately, if 4653 * "S SystemLiteral" is not detected. We skip blanks if no 4654 * system literal was found, but this is harmless since we must 4655 * be at the end of a NotationDecl. 4656 */ 4657 if (SKIP_BLANKS == 0) return(NULL); 4658 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4659 } 4660 URI = xmlParseSystemLiteral(ctxt); 4661 if (URI == NULL) { 4662 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4663 } 4664 } 4665 return(URI); 4666 } 4667 4668 /** 4669 * xmlParseCommentComplex: 4670 * @ctxt: an XML parser context 4671 * @buf: the already parsed part of the buffer 4672 * @len: number of bytes filles in the buffer 4673 * @size: allocated size of the buffer 4674 * 4675 * Skip an XML (SGML) comment <!-- .... --> 4676 * The spec says that "For compatibility, the string "--" (double-hyphen) 4677 * must not occur within comments. " 4678 * This is the slow routine in case the accelerator for ascii didn't work 4679 * 4680 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4681 */ 4682 static void 4683 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4684 size_t len, size_t size) { 4685 int q, ql; 4686 int r, rl; 4687 int cur, l; 4688 size_t count = 0; 4689 int inputid; 4690 4691 inputid = ctxt->input->id; 4692 4693 if (buf == NULL) { 4694 len = 0; 4695 size = XML_PARSER_BUFFER_SIZE; 4696 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4697 if (buf == NULL) { 4698 xmlErrMemory(ctxt, NULL); 4699 return; 4700 } 4701 } 4702 GROW; /* Assure there's enough input data */ 4703 q = CUR_CHAR(ql); 4704 if (q == 0) 4705 goto not_terminated; 4706 if (!IS_CHAR(q)) { 4707 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4708 "xmlParseComment: invalid xmlChar value %d\n", 4709 q); 4710 xmlFree (buf); 4711 return; 4712 } 4713 NEXTL(ql); 4714 r = CUR_CHAR(rl); 4715 if (r == 0) 4716 goto not_terminated; 4717 if (!IS_CHAR(r)) { 4718 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4719 "xmlParseComment: invalid xmlChar value %d\n", 4720 q); 4721 xmlFree (buf); 4722 return; 4723 } 4724 NEXTL(rl); 4725 cur = CUR_CHAR(l); 4726 if (cur == 0) 4727 goto not_terminated; 4728 while (IS_CHAR(cur) && /* checked */ 4729 ((cur != '>') || 4730 (r != '-') || (q != '-'))) { 4731 if ((r == '-') && (q == '-')) { 4732 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4733 } 4734 if ((len > XML_MAX_TEXT_LENGTH) && 4735 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4736 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4737 "Comment too big found", NULL); 4738 xmlFree (buf); 4739 return; 4740 } 4741 if (len + 5 >= size) { 4742 xmlChar *new_buf; 4743 size_t new_size; 4744 4745 new_size = size * 2; 4746 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4747 if (new_buf == NULL) { 4748 xmlFree (buf); 4749 xmlErrMemory(ctxt, NULL); 4750 return; 4751 } 4752 buf = new_buf; 4753 size = new_size; 4754 } 4755 COPY_BUF(ql,buf,len,q); 4756 q = r; 4757 ql = rl; 4758 r = cur; 4759 rl = l; 4760 4761 count++; 4762 if (count > 50) { 4763 GROW; 4764 count = 0; 4765 if (ctxt->instate == XML_PARSER_EOF) { 4766 xmlFree(buf); 4767 return; 4768 } 4769 } 4770 NEXTL(l); 4771 cur = CUR_CHAR(l); 4772 if (cur == 0) { 4773 SHRINK; 4774 GROW; 4775 cur = CUR_CHAR(l); 4776 } 4777 } 4778 buf[len] = 0; 4779 if (cur == 0) { 4780 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4781 "Comment not terminated \n<!--%.50s\n", buf); 4782 } else if (!IS_CHAR(cur)) { 4783 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4784 "xmlParseComment: invalid xmlChar value %d\n", 4785 cur); 4786 } else { 4787 if (inputid != ctxt->input->id) { 4788 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4789 "Comment doesn't start and stop in the same" 4790 " entity\n"); 4791 } 4792 NEXT; 4793 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4794 (!ctxt->disableSAX)) 4795 ctxt->sax->comment(ctxt->userData, buf); 4796 } 4797 xmlFree(buf); 4798 return; 4799 not_terminated: 4800 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4801 "Comment not terminated\n", NULL); 4802 xmlFree(buf); 4803 return; 4804 } 4805 4806 /** 4807 * xmlParseComment: 4808 * @ctxt: an XML parser context 4809 * 4810 * Skip an XML (SGML) comment <!-- .... --> 4811 * The spec says that "For compatibility, the string "--" (double-hyphen) 4812 * must not occur within comments. " 4813 * 4814 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4815 */ 4816 void 4817 xmlParseComment(xmlParserCtxtPtr ctxt) { 4818 xmlChar *buf = NULL; 4819 size_t size = XML_PARSER_BUFFER_SIZE; 4820 size_t len = 0; 4821 xmlParserInputState state; 4822 const xmlChar *in; 4823 size_t nbchar = 0; 4824 int ccol; 4825 int inputid; 4826 4827 /* 4828 * Check that there is a comment right here. 4829 */ 4830 if ((RAW != '<') || (NXT(1) != '!') || 4831 (NXT(2) != '-') || (NXT(3) != '-')) return; 4832 state = ctxt->instate; 4833 ctxt->instate = XML_PARSER_COMMENT; 4834 inputid = ctxt->input->id; 4835 SKIP(4); 4836 SHRINK; 4837 GROW; 4838 4839 /* 4840 * Accelerated common case where input don't need to be 4841 * modified before passing it to the handler. 4842 */ 4843 in = ctxt->input->cur; 4844 do { 4845 if (*in == 0xA) { 4846 do { 4847 ctxt->input->line++; ctxt->input->col = 1; 4848 in++; 4849 } while (*in == 0xA); 4850 } 4851 get_more: 4852 ccol = ctxt->input->col; 4853 while (((*in > '-') && (*in <= 0x7F)) || 4854 ((*in >= 0x20) && (*in < '-')) || 4855 (*in == 0x09)) { 4856 in++; 4857 ccol++; 4858 } 4859 ctxt->input->col = ccol; 4860 if (*in == 0xA) { 4861 do { 4862 ctxt->input->line++; ctxt->input->col = 1; 4863 in++; 4864 } while (*in == 0xA); 4865 goto get_more; 4866 } 4867 nbchar = in - ctxt->input->cur; 4868 /* 4869 * save current set of data 4870 */ 4871 if (nbchar > 0) { 4872 if ((ctxt->sax != NULL) && 4873 (ctxt->sax->comment != NULL)) { 4874 if (buf == NULL) { 4875 if ((*in == '-') && (in[1] == '-')) 4876 size = nbchar + 1; 4877 else 4878 size = XML_PARSER_BUFFER_SIZE + nbchar; 4879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4880 if (buf == NULL) { 4881 xmlErrMemory(ctxt, NULL); 4882 ctxt->instate = state; 4883 return; 4884 } 4885 len = 0; 4886 } else if (len + nbchar + 1 >= size) { 4887 xmlChar *new_buf; 4888 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4889 new_buf = (xmlChar *) xmlRealloc(buf, 4890 size * sizeof(xmlChar)); 4891 if (new_buf == NULL) { 4892 xmlFree (buf); 4893 xmlErrMemory(ctxt, NULL); 4894 ctxt->instate = state; 4895 return; 4896 } 4897 buf = new_buf; 4898 } 4899 memcpy(&buf[len], ctxt->input->cur, nbchar); 4900 len += nbchar; 4901 buf[len] = 0; 4902 } 4903 } 4904 if ((len > XML_MAX_TEXT_LENGTH) && 4905 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4906 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4907 "Comment too big found", NULL); 4908 xmlFree (buf); 4909 return; 4910 } 4911 ctxt->input->cur = in; 4912 if (*in == 0xA) { 4913 in++; 4914 ctxt->input->line++; ctxt->input->col = 1; 4915 } 4916 if (*in == 0xD) { 4917 in++; 4918 if (*in == 0xA) { 4919 ctxt->input->cur = in; 4920 in++; 4921 ctxt->input->line++; ctxt->input->col = 1; 4922 continue; /* while */ 4923 } 4924 in--; 4925 } 4926 SHRINK; 4927 GROW; 4928 if (ctxt->instate == XML_PARSER_EOF) { 4929 xmlFree(buf); 4930 return; 4931 } 4932 in = ctxt->input->cur; 4933 if (*in == '-') { 4934 if (in[1] == '-') { 4935 if (in[2] == '>') { 4936 if (ctxt->input->id != inputid) { 4937 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4938 "comment doesn't start and stop in the" 4939 " same entity\n"); 4940 } 4941 SKIP(3); 4942 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4943 (!ctxt->disableSAX)) { 4944 if (buf != NULL) 4945 ctxt->sax->comment(ctxt->userData, buf); 4946 else 4947 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4948 } 4949 if (buf != NULL) 4950 xmlFree(buf); 4951 if (ctxt->instate != XML_PARSER_EOF) 4952 ctxt->instate = state; 4953 return; 4954 } 4955 if (buf != NULL) { 4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4957 "Double hyphen within comment: " 4958 "<!--%.50s\n", 4959 buf); 4960 } else 4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4962 "Double hyphen within comment\n", NULL); 4963 in++; 4964 ctxt->input->col++; 4965 } 4966 in++; 4967 ctxt->input->col++; 4968 goto get_more; 4969 } 4970 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4971 xmlParseCommentComplex(ctxt, buf, len, size); 4972 ctxt->instate = state; 4973 return; 4974 } 4975 4976 4977 /** 4978 * xmlParsePITarget: 4979 * @ctxt: an XML parser context 4980 * 4981 * parse the name of a PI 4982 * 4983 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4984 * 4985 * Returns the PITarget name or NULL 4986 */ 4987 4988 const xmlChar * 4989 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4990 const xmlChar *name; 4991 4992 name = xmlParseName(ctxt); 4993 if ((name != NULL) && 4994 ((name[0] == 'x') || (name[0] == 'X')) && 4995 ((name[1] == 'm') || (name[1] == 'M')) && 4996 ((name[2] == 'l') || (name[2] == 'L'))) { 4997 int i; 4998 if ((name[0] == 'x') && (name[1] == 'm') && 4999 (name[2] == 'l') && (name[3] == 0)) { 5000 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5001 "XML declaration allowed only at the start of the document\n"); 5002 return(name); 5003 } else if (name[3] == 0) { 5004 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5005 return(name); 5006 } 5007 for (i = 0;;i++) { 5008 if (xmlW3CPIs[i] == NULL) break; 5009 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5010 return(name); 5011 } 5012 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5013 "xmlParsePITarget: invalid name prefix 'xml'\n", 5014 NULL, NULL); 5015 } 5016 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5017 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5018 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5019 } 5020 return(name); 5021 } 5022 5023 #ifdef LIBXML_CATALOG_ENABLED 5024 /** 5025 * xmlParseCatalogPI: 5026 * @ctxt: an XML parser context 5027 * @catalog: the PI value string 5028 * 5029 * parse an XML Catalog Processing Instruction. 5030 * 5031 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5032 * 5033 * Occurs only if allowed by the user and if happening in the Misc 5034 * part of the document before any doctype informations 5035 * This will add the given catalog to the parsing context in order 5036 * to be used if there is a resolution need further down in the document 5037 */ 5038 5039 static void 5040 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5041 xmlChar *URL = NULL; 5042 const xmlChar *tmp, *base; 5043 xmlChar marker; 5044 5045 tmp = catalog; 5046 while (IS_BLANK_CH(*tmp)) tmp++; 5047 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5048 goto error; 5049 tmp += 7; 5050 while (IS_BLANK_CH(*tmp)) tmp++; 5051 if (*tmp != '=') { 5052 return; 5053 } 5054 tmp++; 5055 while (IS_BLANK_CH(*tmp)) tmp++; 5056 marker = *tmp; 5057 if ((marker != '\'') && (marker != '"')) 5058 goto error; 5059 tmp++; 5060 base = tmp; 5061 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5062 if (*tmp == 0) 5063 goto error; 5064 URL = xmlStrndup(base, tmp - base); 5065 tmp++; 5066 while (IS_BLANK_CH(*tmp)) tmp++; 5067 if (*tmp != 0) 5068 goto error; 5069 5070 if (URL != NULL) { 5071 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5072 xmlFree(URL); 5073 } 5074 return; 5075 5076 error: 5077 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5078 "Catalog PI syntax error: %s\n", 5079 catalog, NULL); 5080 if (URL != NULL) 5081 xmlFree(URL); 5082 } 5083 #endif 5084 5085 /** 5086 * xmlParsePI: 5087 * @ctxt: an XML parser context 5088 * 5089 * parse an XML Processing Instruction. 5090 * 5091 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5092 * 5093 * The processing is transfered to SAX once parsed. 5094 */ 5095 5096 void 5097 xmlParsePI(xmlParserCtxtPtr ctxt) { 5098 xmlChar *buf = NULL; 5099 size_t len = 0; 5100 size_t size = XML_PARSER_BUFFER_SIZE; 5101 int cur, l; 5102 const xmlChar *target; 5103 xmlParserInputState state; 5104 int count = 0; 5105 5106 if ((RAW == '<') && (NXT(1) == '?')) { 5107 int inputid = ctxt->input->id; 5108 state = ctxt->instate; 5109 ctxt->instate = XML_PARSER_PI; 5110 /* 5111 * this is a Processing Instruction. 5112 */ 5113 SKIP(2); 5114 SHRINK; 5115 5116 /* 5117 * Parse the target name and check for special support like 5118 * namespace. 5119 */ 5120 target = xmlParsePITarget(ctxt); 5121 if (target != NULL) { 5122 if ((RAW == '?') && (NXT(1) == '>')) { 5123 if (inputid != ctxt->input->id) { 5124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5125 "PI declaration doesn't start and stop in" 5126 " the same entity\n"); 5127 } 5128 SKIP(2); 5129 5130 /* 5131 * SAX: PI detected. 5132 */ 5133 if ((ctxt->sax) && (!ctxt->disableSAX) && 5134 (ctxt->sax->processingInstruction != NULL)) 5135 ctxt->sax->processingInstruction(ctxt->userData, 5136 target, NULL); 5137 if (ctxt->instate != XML_PARSER_EOF) 5138 ctxt->instate = state; 5139 return; 5140 } 5141 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5142 if (buf == NULL) { 5143 xmlErrMemory(ctxt, NULL); 5144 ctxt->instate = state; 5145 return; 5146 } 5147 if (SKIP_BLANKS == 0) { 5148 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5149 "ParsePI: PI %s space expected\n", target); 5150 } 5151 cur = CUR_CHAR(l); 5152 while (IS_CHAR(cur) && /* checked */ 5153 ((cur != '?') || (NXT(1) != '>'))) { 5154 if (len + 5 >= size) { 5155 xmlChar *tmp; 5156 size_t new_size = size * 2; 5157 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5158 if (tmp == NULL) { 5159 xmlErrMemory(ctxt, NULL); 5160 xmlFree(buf); 5161 ctxt->instate = state; 5162 return; 5163 } 5164 buf = tmp; 5165 size = new_size; 5166 } 5167 count++; 5168 if (count > 50) { 5169 GROW; 5170 if (ctxt->instate == XML_PARSER_EOF) { 5171 xmlFree(buf); 5172 return; 5173 } 5174 count = 0; 5175 if ((len > XML_MAX_TEXT_LENGTH) && 5176 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5177 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5178 "PI %s too big found", target); 5179 xmlFree(buf); 5180 ctxt->instate = state; 5181 return; 5182 } 5183 } 5184 COPY_BUF(l,buf,len,cur); 5185 NEXTL(l); 5186 cur = CUR_CHAR(l); 5187 if (cur == 0) { 5188 SHRINK; 5189 GROW; 5190 cur = CUR_CHAR(l); 5191 } 5192 } 5193 if ((len > XML_MAX_TEXT_LENGTH) && 5194 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5195 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5196 "PI %s too big found", target); 5197 xmlFree(buf); 5198 ctxt->instate = state; 5199 return; 5200 } 5201 buf[len] = 0; 5202 if (cur != '?') { 5203 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5204 "ParsePI: PI %s never end ...\n", target); 5205 } else { 5206 if (inputid != ctxt->input->id) { 5207 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5208 "PI declaration doesn't start and stop in" 5209 " the same entity\n"); 5210 } 5211 SKIP(2); 5212 5213 #ifdef LIBXML_CATALOG_ENABLED 5214 if (((state == XML_PARSER_MISC) || 5215 (state == XML_PARSER_START)) && 5216 (xmlStrEqual(target, XML_CATALOG_PI))) { 5217 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5218 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5219 (allow == XML_CATA_ALLOW_ALL)) 5220 xmlParseCatalogPI(ctxt, buf); 5221 } 5222 #endif 5223 5224 5225 /* 5226 * SAX: PI detected. 5227 */ 5228 if ((ctxt->sax) && (!ctxt->disableSAX) && 5229 (ctxt->sax->processingInstruction != NULL)) 5230 ctxt->sax->processingInstruction(ctxt->userData, 5231 target, buf); 5232 } 5233 xmlFree(buf); 5234 } else { 5235 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5236 } 5237 if (ctxt->instate != XML_PARSER_EOF) 5238 ctxt->instate = state; 5239 } 5240 } 5241 5242 /** 5243 * xmlParseNotationDecl: 5244 * @ctxt: an XML parser context 5245 * 5246 * parse a notation declaration 5247 * 5248 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5249 * 5250 * Hence there is actually 3 choices: 5251 * 'PUBLIC' S PubidLiteral 5252 * 'PUBLIC' S PubidLiteral S SystemLiteral 5253 * and 'SYSTEM' S SystemLiteral 5254 * 5255 * See the NOTE on xmlParseExternalID(). 5256 */ 5257 5258 void 5259 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5260 const xmlChar *name; 5261 xmlChar *Pubid; 5262 xmlChar *Systemid; 5263 5264 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5265 int inputid = ctxt->input->id; 5266 SHRINK; 5267 SKIP(10); 5268 if (SKIP_BLANKS == 0) { 5269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5270 "Space required after '<!NOTATION'\n"); 5271 return; 5272 } 5273 5274 name = xmlParseName(ctxt); 5275 if (name == NULL) { 5276 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5277 return; 5278 } 5279 if (xmlStrchr(name, ':') != NULL) { 5280 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5281 "colons are forbidden from notation names '%s'\n", 5282 name, NULL, NULL); 5283 } 5284 if (SKIP_BLANKS == 0) { 5285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5286 "Space required after the NOTATION name'\n"); 5287 return; 5288 } 5289 5290 /* 5291 * Parse the IDs. 5292 */ 5293 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5294 SKIP_BLANKS; 5295 5296 if (RAW == '>') { 5297 if (inputid != ctxt->input->id) { 5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5299 "Notation declaration doesn't start and stop" 5300 " in the same entity\n"); 5301 } 5302 NEXT; 5303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5304 (ctxt->sax->notationDecl != NULL)) 5305 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5306 } else { 5307 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5308 } 5309 if (Systemid != NULL) xmlFree(Systemid); 5310 if (Pubid != NULL) xmlFree(Pubid); 5311 } 5312 } 5313 5314 /** 5315 * xmlParseEntityDecl: 5316 * @ctxt: an XML parser context 5317 * 5318 * parse <!ENTITY declarations 5319 * 5320 * [70] EntityDecl ::= GEDecl | PEDecl 5321 * 5322 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5323 * 5324 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5325 * 5326 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5327 * 5328 * [74] PEDef ::= EntityValue | ExternalID 5329 * 5330 * [76] NDataDecl ::= S 'NDATA' S Name 5331 * 5332 * [ VC: Notation Declared ] 5333 * The Name must match the declared name of a notation. 5334 */ 5335 5336 void 5337 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5338 const xmlChar *name = NULL; 5339 xmlChar *value = NULL; 5340 xmlChar *URI = NULL, *literal = NULL; 5341 const xmlChar *ndata = NULL; 5342 int isParameter = 0; 5343 xmlChar *orig = NULL; 5344 5345 /* GROW; done in the caller */ 5346 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5347 int inputid = ctxt->input->id; 5348 SHRINK; 5349 SKIP(8); 5350 if (SKIP_BLANKS == 0) { 5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5352 "Space required after '<!ENTITY'\n"); 5353 } 5354 5355 if (RAW == '%') { 5356 NEXT; 5357 if (SKIP_BLANKS == 0) { 5358 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5359 "Space required after '%%'\n"); 5360 } 5361 isParameter = 1; 5362 } 5363 5364 name = xmlParseName(ctxt); 5365 if (name == NULL) { 5366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5367 "xmlParseEntityDecl: no name\n"); 5368 return; 5369 } 5370 if (xmlStrchr(name, ':') != NULL) { 5371 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5372 "colons are forbidden from entities names '%s'\n", 5373 name, NULL, NULL); 5374 } 5375 if (SKIP_BLANKS == 0) { 5376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5377 "Space required after the entity name\n"); 5378 } 5379 5380 ctxt->instate = XML_PARSER_ENTITY_DECL; 5381 /* 5382 * handle the various case of definitions... 5383 */ 5384 if (isParameter) { 5385 if ((RAW == '"') || (RAW == '\'')) { 5386 value = xmlParseEntityValue(ctxt, &orig); 5387 if (value) { 5388 if ((ctxt->sax != NULL) && 5389 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5390 ctxt->sax->entityDecl(ctxt->userData, name, 5391 XML_INTERNAL_PARAMETER_ENTITY, 5392 NULL, NULL, value); 5393 } 5394 } else { 5395 URI = xmlParseExternalID(ctxt, &literal, 1); 5396 if ((URI == NULL) && (literal == NULL)) { 5397 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5398 } 5399 if (URI) { 5400 xmlURIPtr uri; 5401 5402 uri = xmlParseURI((const char *) URI); 5403 if (uri == NULL) { 5404 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5405 "Invalid URI: %s\n", URI); 5406 /* 5407 * This really ought to be a well formedness error 5408 * but the XML Core WG decided otherwise c.f. issue 5409 * E26 of the XML erratas. 5410 */ 5411 } else { 5412 if (uri->fragment != NULL) { 5413 /* 5414 * Okay this is foolish to block those but not 5415 * invalid URIs. 5416 */ 5417 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5418 } else { 5419 if ((ctxt->sax != NULL) && 5420 (!ctxt->disableSAX) && 5421 (ctxt->sax->entityDecl != NULL)) 5422 ctxt->sax->entityDecl(ctxt->userData, name, 5423 XML_EXTERNAL_PARAMETER_ENTITY, 5424 literal, URI, NULL); 5425 } 5426 xmlFreeURI(uri); 5427 } 5428 } 5429 } 5430 } else { 5431 if ((RAW == '"') || (RAW == '\'')) { 5432 value = xmlParseEntityValue(ctxt, &orig); 5433 if ((ctxt->sax != NULL) && 5434 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5435 ctxt->sax->entityDecl(ctxt->userData, name, 5436 XML_INTERNAL_GENERAL_ENTITY, 5437 NULL, NULL, value); 5438 /* 5439 * For expat compatibility in SAX mode. 5440 */ 5441 if ((ctxt->myDoc == NULL) || 5442 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5443 if (ctxt->myDoc == NULL) { 5444 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5445 if (ctxt->myDoc == NULL) { 5446 xmlErrMemory(ctxt, "New Doc failed"); 5447 return; 5448 } 5449 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5450 } 5451 if (ctxt->myDoc->intSubset == NULL) 5452 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5453 BAD_CAST "fake", NULL, NULL); 5454 5455 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5456 NULL, NULL, value); 5457 } 5458 } else { 5459 URI = xmlParseExternalID(ctxt, &literal, 1); 5460 if ((URI == NULL) && (literal == NULL)) { 5461 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5462 } 5463 if (URI) { 5464 xmlURIPtr uri; 5465 5466 uri = xmlParseURI((const char *)URI); 5467 if (uri == NULL) { 5468 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5469 "Invalid URI: %s\n", URI); 5470 /* 5471 * This really ought to be a well formedness error 5472 * but the XML Core WG decided otherwise c.f. issue 5473 * E26 of the XML erratas. 5474 */ 5475 } else { 5476 if (uri->fragment != NULL) { 5477 /* 5478 * Okay this is foolish to block those but not 5479 * invalid URIs. 5480 */ 5481 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5482 } 5483 xmlFreeURI(uri); 5484 } 5485 } 5486 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5488 "Space required before 'NDATA'\n"); 5489 } 5490 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5491 SKIP(5); 5492 if (SKIP_BLANKS == 0) { 5493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5494 "Space required after 'NDATA'\n"); 5495 } 5496 ndata = xmlParseName(ctxt); 5497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5498 (ctxt->sax->unparsedEntityDecl != NULL)) 5499 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5500 literal, URI, ndata); 5501 } else { 5502 if ((ctxt->sax != NULL) && 5503 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5504 ctxt->sax->entityDecl(ctxt->userData, name, 5505 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5506 literal, URI, NULL); 5507 /* 5508 * For expat compatibility in SAX mode. 5509 * assuming the entity repalcement was asked for 5510 */ 5511 if ((ctxt->replaceEntities != 0) && 5512 ((ctxt->myDoc == NULL) || 5513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5514 if (ctxt->myDoc == NULL) { 5515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5516 if (ctxt->myDoc == NULL) { 5517 xmlErrMemory(ctxt, "New Doc failed"); 5518 return; 5519 } 5520 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5521 } 5522 5523 if (ctxt->myDoc->intSubset == NULL) 5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5525 BAD_CAST "fake", NULL, NULL); 5526 xmlSAX2EntityDecl(ctxt, name, 5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5528 literal, URI, NULL); 5529 } 5530 } 5531 } 5532 } 5533 if (ctxt->instate == XML_PARSER_EOF) 5534 goto done; 5535 SKIP_BLANKS; 5536 if (RAW != '>') { 5537 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5538 "xmlParseEntityDecl: entity %s not terminated\n", name); 5539 xmlHaltParser(ctxt); 5540 } else { 5541 if (inputid != ctxt->input->id) { 5542 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5543 "Entity declaration doesn't start and stop in" 5544 " the same entity\n"); 5545 } 5546 NEXT; 5547 } 5548 if (orig != NULL) { 5549 /* 5550 * Ugly mechanism to save the raw entity value. 5551 */ 5552 xmlEntityPtr cur = NULL; 5553 5554 if (isParameter) { 5555 if ((ctxt->sax != NULL) && 5556 (ctxt->sax->getParameterEntity != NULL)) 5557 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5558 } else { 5559 if ((ctxt->sax != NULL) && 5560 (ctxt->sax->getEntity != NULL)) 5561 cur = ctxt->sax->getEntity(ctxt->userData, name); 5562 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5563 cur = xmlSAX2GetEntity(ctxt, name); 5564 } 5565 } 5566 if ((cur != NULL) && (cur->orig == NULL)) { 5567 cur->orig = orig; 5568 orig = NULL; 5569 } 5570 } 5571 5572 done: 5573 if (value != NULL) xmlFree(value); 5574 if (URI != NULL) xmlFree(URI); 5575 if (literal != NULL) xmlFree(literal); 5576 if (orig != NULL) xmlFree(orig); 5577 } 5578 } 5579 5580 /** 5581 * xmlParseDefaultDecl: 5582 * @ctxt: an XML parser context 5583 * @value: Receive a possible fixed default value for the attribute 5584 * 5585 * Parse an attribute default declaration 5586 * 5587 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5588 * 5589 * [ VC: Required Attribute ] 5590 * if the default declaration is the keyword #REQUIRED, then the 5591 * attribute must be specified for all elements of the type in the 5592 * attribute-list declaration. 5593 * 5594 * [ VC: Attribute Default Legal ] 5595 * The declared default value must meet the lexical constraints of 5596 * the declared attribute type c.f. xmlValidateAttributeDecl() 5597 * 5598 * [ VC: Fixed Attribute Default ] 5599 * if an attribute has a default value declared with the #FIXED 5600 * keyword, instances of that attribute must match the default value. 5601 * 5602 * [ WFC: No < in Attribute Values ] 5603 * handled in xmlParseAttValue() 5604 * 5605 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5606 * or XML_ATTRIBUTE_FIXED. 5607 */ 5608 5609 int 5610 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5611 int val; 5612 xmlChar *ret; 5613 5614 *value = NULL; 5615 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5616 SKIP(9); 5617 return(XML_ATTRIBUTE_REQUIRED); 5618 } 5619 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5620 SKIP(8); 5621 return(XML_ATTRIBUTE_IMPLIED); 5622 } 5623 val = XML_ATTRIBUTE_NONE; 5624 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5625 SKIP(6); 5626 val = XML_ATTRIBUTE_FIXED; 5627 if (SKIP_BLANKS == 0) { 5628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5629 "Space required after '#FIXED'\n"); 5630 } 5631 } 5632 ret = xmlParseAttValue(ctxt); 5633 ctxt->instate = XML_PARSER_DTD; 5634 if (ret == NULL) { 5635 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5636 "Attribute default value declaration error\n"); 5637 } else 5638 *value = ret; 5639 return(val); 5640 } 5641 5642 /** 5643 * xmlParseNotationType: 5644 * @ctxt: an XML parser context 5645 * 5646 * parse an Notation attribute type. 5647 * 5648 * Note: the leading 'NOTATION' S part has already being parsed... 5649 * 5650 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5651 * 5652 * [ VC: Notation Attributes ] 5653 * Values of this type must match one of the notation names included 5654 * in the declaration; all notation names in the declaration must be declared. 5655 * 5656 * Returns: the notation attribute tree built while parsing 5657 */ 5658 5659 xmlEnumerationPtr 5660 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5661 const xmlChar *name; 5662 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5663 5664 if (RAW != '(') { 5665 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5666 return(NULL); 5667 } 5668 SHRINK; 5669 do { 5670 NEXT; 5671 SKIP_BLANKS; 5672 name = xmlParseName(ctxt); 5673 if (name == NULL) { 5674 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5675 "Name expected in NOTATION declaration\n"); 5676 xmlFreeEnumeration(ret); 5677 return(NULL); 5678 } 5679 tmp = ret; 5680 while (tmp != NULL) { 5681 if (xmlStrEqual(name, tmp->name)) { 5682 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5683 "standalone: attribute notation value token %s duplicated\n", 5684 name, NULL); 5685 if (!xmlDictOwns(ctxt->dict, name)) 5686 xmlFree((xmlChar *) name); 5687 break; 5688 } 5689 tmp = tmp->next; 5690 } 5691 if (tmp == NULL) { 5692 cur = xmlCreateEnumeration(name); 5693 if (cur == NULL) { 5694 xmlFreeEnumeration(ret); 5695 return(NULL); 5696 } 5697 if (last == NULL) ret = last = cur; 5698 else { 5699 last->next = cur; 5700 last = cur; 5701 } 5702 } 5703 SKIP_BLANKS; 5704 } while (RAW == '|'); 5705 if (RAW != ')') { 5706 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5707 xmlFreeEnumeration(ret); 5708 return(NULL); 5709 } 5710 NEXT; 5711 return(ret); 5712 } 5713 5714 /** 5715 * xmlParseEnumerationType: 5716 * @ctxt: an XML parser context 5717 * 5718 * parse an Enumeration attribute type. 5719 * 5720 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5721 * 5722 * [ VC: Enumeration ] 5723 * Values of this type must match one of the Nmtoken tokens in 5724 * the declaration 5725 * 5726 * Returns: the enumeration attribute tree built while parsing 5727 */ 5728 5729 xmlEnumerationPtr 5730 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5731 xmlChar *name; 5732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5733 5734 if (RAW != '(') { 5735 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5736 return(NULL); 5737 } 5738 SHRINK; 5739 do { 5740 NEXT; 5741 SKIP_BLANKS; 5742 name = xmlParseNmtoken(ctxt); 5743 if (name == NULL) { 5744 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5745 return(ret); 5746 } 5747 tmp = ret; 5748 while (tmp != NULL) { 5749 if (xmlStrEqual(name, tmp->name)) { 5750 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5751 "standalone: attribute enumeration value token %s duplicated\n", 5752 name, NULL); 5753 if (!xmlDictOwns(ctxt->dict, name)) 5754 xmlFree(name); 5755 break; 5756 } 5757 tmp = tmp->next; 5758 } 5759 if (tmp == NULL) { 5760 cur = xmlCreateEnumeration(name); 5761 if (!xmlDictOwns(ctxt->dict, name)) 5762 xmlFree(name); 5763 if (cur == NULL) { 5764 xmlFreeEnumeration(ret); 5765 return(NULL); 5766 } 5767 if (last == NULL) ret = last = cur; 5768 else { 5769 last->next = cur; 5770 last = cur; 5771 } 5772 } 5773 SKIP_BLANKS; 5774 } while (RAW == '|'); 5775 if (RAW != ')') { 5776 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5777 return(ret); 5778 } 5779 NEXT; 5780 return(ret); 5781 } 5782 5783 /** 5784 * xmlParseEnumeratedType: 5785 * @ctxt: an XML parser context 5786 * @tree: the enumeration tree built while parsing 5787 * 5788 * parse an Enumerated attribute type. 5789 * 5790 * [57] EnumeratedType ::= NotationType | Enumeration 5791 * 5792 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5793 * 5794 * 5795 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5796 */ 5797 5798 int 5799 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5800 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5801 SKIP(8); 5802 if (SKIP_BLANKS == 0) { 5803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5804 "Space required after 'NOTATION'\n"); 5805 return(0); 5806 } 5807 *tree = xmlParseNotationType(ctxt); 5808 if (*tree == NULL) return(0); 5809 return(XML_ATTRIBUTE_NOTATION); 5810 } 5811 *tree = xmlParseEnumerationType(ctxt); 5812 if (*tree == NULL) return(0); 5813 return(XML_ATTRIBUTE_ENUMERATION); 5814 } 5815 5816 /** 5817 * xmlParseAttributeType: 5818 * @ctxt: an XML parser context 5819 * @tree: the enumeration tree built while parsing 5820 * 5821 * parse the Attribute list def for an element 5822 * 5823 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5824 * 5825 * [55] StringType ::= 'CDATA' 5826 * 5827 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5828 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5829 * 5830 * Validity constraints for attribute values syntax are checked in 5831 * xmlValidateAttributeValue() 5832 * 5833 * [ VC: ID ] 5834 * Values of type ID must match the Name production. A name must not 5835 * appear more than once in an XML document as a value of this type; 5836 * i.e., ID values must uniquely identify the elements which bear them. 5837 * 5838 * [ VC: One ID per Element Type ] 5839 * No element type may have more than one ID attribute specified. 5840 * 5841 * [ VC: ID Attribute Default ] 5842 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5843 * 5844 * [ VC: IDREF ] 5845 * Values of type IDREF must match the Name production, and values 5846 * of type IDREFS must match Names; each IDREF Name must match the value 5847 * of an ID attribute on some element in the XML document; i.e. IDREF 5848 * values must match the value of some ID attribute. 5849 * 5850 * [ VC: Entity Name ] 5851 * Values of type ENTITY must match the Name production, values 5852 * of type ENTITIES must match Names; each Entity Name must match the 5853 * name of an unparsed entity declared in the DTD. 5854 * 5855 * [ VC: Name Token ] 5856 * Values of type NMTOKEN must match the Nmtoken production; values 5857 * of type NMTOKENS must match Nmtokens. 5858 * 5859 * Returns the attribute type 5860 */ 5861 int 5862 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5863 SHRINK; 5864 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5865 SKIP(5); 5866 return(XML_ATTRIBUTE_CDATA); 5867 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5868 SKIP(6); 5869 return(XML_ATTRIBUTE_IDREFS); 5870 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5871 SKIP(5); 5872 return(XML_ATTRIBUTE_IDREF); 5873 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5874 SKIP(2); 5875 return(XML_ATTRIBUTE_ID); 5876 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5877 SKIP(6); 5878 return(XML_ATTRIBUTE_ENTITY); 5879 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5880 SKIP(8); 5881 return(XML_ATTRIBUTE_ENTITIES); 5882 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5883 SKIP(8); 5884 return(XML_ATTRIBUTE_NMTOKENS); 5885 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5886 SKIP(7); 5887 return(XML_ATTRIBUTE_NMTOKEN); 5888 } 5889 return(xmlParseEnumeratedType(ctxt, tree)); 5890 } 5891 5892 /** 5893 * xmlParseAttributeListDecl: 5894 * @ctxt: an XML parser context 5895 * 5896 * : parse the Attribute list def for an element 5897 * 5898 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5899 * 5900 * [53] AttDef ::= S Name S AttType S DefaultDecl 5901 * 5902 */ 5903 void 5904 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5905 const xmlChar *elemName; 5906 const xmlChar *attrName; 5907 xmlEnumerationPtr tree; 5908 5909 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5910 int inputid = ctxt->input->id; 5911 5912 SKIP(9); 5913 if (SKIP_BLANKS == 0) { 5914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5915 "Space required after '<!ATTLIST'\n"); 5916 } 5917 elemName = xmlParseName(ctxt); 5918 if (elemName == NULL) { 5919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5920 "ATTLIST: no name for Element\n"); 5921 return; 5922 } 5923 SKIP_BLANKS; 5924 GROW; 5925 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5926 int type; 5927 int def; 5928 xmlChar *defaultValue = NULL; 5929 5930 GROW; 5931 tree = NULL; 5932 attrName = xmlParseName(ctxt); 5933 if (attrName == NULL) { 5934 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5935 "ATTLIST: no name for Attribute\n"); 5936 break; 5937 } 5938 GROW; 5939 if (SKIP_BLANKS == 0) { 5940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5941 "Space required after the attribute name\n"); 5942 break; 5943 } 5944 5945 type = xmlParseAttributeType(ctxt, &tree); 5946 if (type <= 0) { 5947 break; 5948 } 5949 5950 GROW; 5951 if (SKIP_BLANKS == 0) { 5952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5953 "Space required after the attribute type\n"); 5954 if (tree != NULL) 5955 xmlFreeEnumeration(tree); 5956 break; 5957 } 5958 5959 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5960 if (def <= 0) { 5961 if (defaultValue != NULL) 5962 xmlFree(defaultValue); 5963 if (tree != NULL) 5964 xmlFreeEnumeration(tree); 5965 break; 5966 } 5967 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5968 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5969 5970 GROW; 5971 if (RAW != '>') { 5972 if (SKIP_BLANKS == 0) { 5973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5974 "Space required after the attribute default value\n"); 5975 if (defaultValue != NULL) 5976 xmlFree(defaultValue); 5977 if (tree != NULL) 5978 xmlFreeEnumeration(tree); 5979 break; 5980 } 5981 } 5982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5983 (ctxt->sax->attributeDecl != NULL)) 5984 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5985 type, def, defaultValue, tree); 5986 else if (tree != NULL) 5987 xmlFreeEnumeration(tree); 5988 5989 if ((ctxt->sax2) && (defaultValue != NULL) && 5990 (def != XML_ATTRIBUTE_IMPLIED) && 5991 (def != XML_ATTRIBUTE_REQUIRED)) { 5992 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5993 } 5994 if (ctxt->sax2) { 5995 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5996 } 5997 if (defaultValue != NULL) 5998 xmlFree(defaultValue); 5999 GROW; 6000 } 6001 if (RAW == '>') { 6002 if (inputid != ctxt->input->id) { 6003 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6004 "Attribute list declaration doesn't start and" 6005 " stop in the same entity\n"); 6006 } 6007 NEXT; 6008 } 6009 } 6010 } 6011 6012 /** 6013 * xmlParseElementMixedContentDecl: 6014 * @ctxt: an XML parser context 6015 * @inputchk: the input used for the current entity, needed for boundary checks 6016 * 6017 * parse the declaration for a Mixed Element content 6018 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6019 * 6020 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6021 * '(' S? '#PCDATA' S? ')' 6022 * 6023 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6024 * 6025 * [ VC: No Duplicate Types ] 6026 * The same name must not appear more than once in a single 6027 * mixed-content declaration. 6028 * 6029 * returns: the list of the xmlElementContentPtr describing the element choices 6030 */ 6031 xmlElementContentPtr 6032 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6033 xmlElementContentPtr ret = NULL, cur = NULL, n; 6034 const xmlChar *elem = NULL; 6035 6036 GROW; 6037 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6038 SKIP(7); 6039 SKIP_BLANKS; 6040 SHRINK; 6041 if (RAW == ')') { 6042 if (ctxt->input->id != inputchk) { 6043 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6044 "Element content declaration doesn't start and" 6045 " stop in the same entity\n"); 6046 } 6047 NEXT; 6048 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6049 if (ret == NULL) 6050 return(NULL); 6051 if (RAW == '*') { 6052 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6053 NEXT; 6054 } 6055 return(ret); 6056 } 6057 if ((RAW == '(') || (RAW == '|')) { 6058 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6059 if (ret == NULL) return(NULL); 6060 } 6061 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6062 NEXT; 6063 if (elem == NULL) { 6064 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6065 if (ret == NULL) return(NULL); 6066 ret->c1 = cur; 6067 if (cur != NULL) 6068 cur->parent = ret; 6069 cur = ret; 6070 } else { 6071 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6072 if (n == NULL) return(NULL); 6073 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6074 if (n->c1 != NULL) 6075 n->c1->parent = n; 6076 cur->c2 = n; 6077 if (n != NULL) 6078 n->parent = cur; 6079 cur = n; 6080 } 6081 SKIP_BLANKS; 6082 elem = xmlParseName(ctxt); 6083 if (elem == NULL) { 6084 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6085 "xmlParseElementMixedContentDecl : Name expected\n"); 6086 xmlFreeDocElementContent(ctxt->myDoc, ret); 6087 return(NULL); 6088 } 6089 SKIP_BLANKS; 6090 GROW; 6091 } 6092 if ((RAW == ')') && (NXT(1) == '*')) { 6093 if (elem != NULL) { 6094 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6095 XML_ELEMENT_CONTENT_ELEMENT); 6096 if (cur->c2 != NULL) 6097 cur->c2->parent = cur; 6098 } 6099 if (ret != NULL) 6100 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6101 if (ctxt->input->id != inputchk) { 6102 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6103 "Element content declaration doesn't start and" 6104 " stop in the same entity\n"); 6105 } 6106 SKIP(2); 6107 } else { 6108 xmlFreeDocElementContent(ctxt->myDoc, ret); 6109 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6110 return(NULL); 6111 } 6112 6113 } else { 6114 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6115 } 6116 return(ret); 6117 } 6118 6119 /** 6120 * xmlParseElementChildrenContentDeclPriv: 6121 * @ctxt: an XML parser context 6122 * @inputchk: the input used for the current entity, needed for boundary checks 6123 * @depth: the level of recursion 6124 * 6125 * parse the declaration for a Mixed Element content 6126 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6127 * 6128 * 6129 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6130 * 6131 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6132 * 6133 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6134 * 6135 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6136 * 6137 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6138 * TODO Parameter-entity replacement text must be properly nested 6139 * with parenthesized groups. That is to say, if either of the 6140 * opening or closing parentheses in a choice, seq, or Mixed 6141 * construct is contained in the replacement text for a parameter 6142 * entity, both must be contained in the same replacement text. For 6143 * interoperability, if a parameter-entity reference appears in a 6144 * choice, seq, or Mixed construct, its replacement text should not 6145 * be empty, and neither the first nor last non-blank character of 6146 * the replacement text should be a connector (| or ,). 6147 * 6148 * Returns the tree of xmlElementContentPtr describing the element 6149 * hierarchy. 6150 */ 6151 static xmlElementContentPtr 6152 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6153 int depth) { 6154 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6155 const xmlChar *elem; 6156 xmlChar type = 0; 6157 6158 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6159 (depth > 2048)) { 6160 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6161 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6162 depth); 6163 return(NULL); 6164 } 6165 SKIP_BLANKS; 6166 GROW; 6167 if (RAW == '(') { 6168 int inputid = ctxt->input->id; 6169 6170 /* Recurse on first child */ 6171 NEXT; 6172 SKIP_BLANKS; 6173 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6174 depth + 1); 6175 SKIP_BLANKS; 6176 GROW; 6177 } else { 6178 elem = xmlParseName(ctxt); 6179 if (elem == NULL) { 6180 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6181 return(NULL); 6182 } 6183 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6184 if (cur == NULL) { 6185 xmlErrMemory(ctxt, NULL); 6186 return(NULL); 6187 } 6188 GROW; 6189 if (RAW == '?') { 6190 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6191 NEXT; 6192 } else if (RAW == '*') { 6193 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6194 NEXT; 6195 } else if (RAW == '+') { 6196 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6197 NEXT; 6198 } else { 6199 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6200 } 6201 GROW; 6202 } 6203 SKIP_BLANKS; 6204 SHRINK; 6205 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6206 /* 6207 * Each loop we parse one separator and one element. 6208 */ 6209 if (RAW == ',') { 6210 if (type == 0) type = CUR; 6211 6212 /* 6213 * Detect "Name | Name , Name" error 6214 */ 6215 else if (type != CUR) { 6216 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6217 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6218 type); 6219 if ((last != NULL) && (last != ret)) 6220 xmlFreeDocElementContent(ctxt->myDoc, last); 6221 if (ret != NULL) 6222 xmlFreeDocElementContent(ctxt->myDoc, ret); 6223 return(NULL); 6224 } 6225 NEXT; 6226 6227 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6228 if (op == NULL) { 6229 if ((last != NULL) && (last != ret)) 6230 xmlFreeDocElementContent(ctxt->myDoc, last); 6231 xmlFreeDocElementContent(ctxt->myDoc, ret); 6232 return(NULL); 6233 } 6234 if (last == NULL) { 6235 op->c1 = ret; 6236 if (ret != NULL) 6237 ret->parent = op; 6238 ret = cur = op; 6239 } else { 6240 cur->c2 = op; 6241 if (op != NULL) 6242 op->parent = cur; 6243 op->c1 = last; 6244 if (last != NULL) 6245 last->parent = op; 6246 cur =op; 6247 last = NULL; 6248 } 6249 } else if (RAW == '|') { 6250 if (type == 0) type = CUR; 6251 6252 /* 6253 * Detect "Name , Name | Name" error 6254 */ 6255 else if (type != CUR) { 6256 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6257 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6258 type); 6259 if ((last != NULL) && (last != ret)) 6260 xmlFreeDocElementContent(ctxt->myDoc, last); 6261 if (ret != NULL) 6262 xmlFreeDocElementContent(ctxt->myDoc, ret); 6263 return(NULL); 6264 } 6265 NEXT; 6266 6267 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6268 if (op == NULL) { 6269 if ((last != NULL) && (last != ret)) 6270 xmlFreeDocElementContent(ctxt->myDoc, last); 6271 if (ret != NULL) 6272 xmlFreeDocElementContent(ctxt->myDoc, ret); 6273 return(NULL); 6274 } 6275 if (last == NULL) { 6276 op->c1 = ret; 6277 if (ret != NULL) 6278 ret->parent = op; 6279 ret = cur = op; 6280 } else { 6281 cur->c2 = op; 6282 if (op != NULL) 6283 op->parent = cur; 6284 op->c1 = last; 6285 if (last != NULL) 6286 last->parent = op; 6287 cur =op; 6288 last = NULL; 6289 } 6290 } else { 6291 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6292 if ((last != NULL) && (last != ret)) 6293 xmlFreeDocElementContent(ctxt->myDoc, last); 6294 if (ret != NULL) 6295 xmlFreeDocElementContent(ctxt->myDoc, ret); 6296 return(NULL); 6297 } 6298 GROW; 6299 SKIP_BLANKS; 6300 GROW; 6301 if (RAW == '(') { 6302 int inputid = ctxt->input->id; 6303 /* Recurse on second child */ 6304 NEXT; 6305 SKIP_BLANKS; 6306 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6307 depth + 1); 6308 SKIP_BLANKS; 6309 } else { 6310 elem = xmlParseName(ctxt); 6311 if (elem == NULL) { 6312 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6313 if (ret != NULL) 6314 xmlFreeDocElementContent(ctxt->myDoc, ret); 6315 return(NULL); 6316 } 6317 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6318 if (last == NULL) { 6319 if (ret != NULL) 6320 xmlFreeDocElementContent(ctxt->myDoc, ret); 6321 return(NULL); 6322 } 6323 if (RAW == '?') { 6324 last->ocur = XML_ELEMENT_CONTENT_OPT; 6325 NEXT; 6326 } else if (RAW == '*') { 6327 last->ocur = XML_ELEMENT_CONTENT_MULT; 6328 NEXT; 6329 } else if (RAW == '+') { 6330 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6331 NEXT; 6332 } else { 6333 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6334 } 6335 } 6336 SKIP_BLANKS; 6337 GROW; 6338 } 6339 if ((cur != NULL) && (last != NULL)) { 6340 cur->c2 = last; 6341 if (last != NULL) 6342 last->parent = cur; 6343 } 6344 if (ctxt->input->id != inputchk) { 6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6346 "Element content declaration doesn't start and stop in" 6347 " the same entity\n"); 6348 } 6349 NEXT; 6350 if (RAW == '?') { 6351 if (ret != NULL) { 6352 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6353 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6354 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6355 else 6356 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6357 } 6358 NEXT; 6359 } else if (RAW == '*') { 6360 if (ret != NULL) { 6361 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6362 cur = ret; 6363 /* 6364 * Some normalization: 6365 * (a | b* | c?)* == (a | b | c)* 6366 */ 6367 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6368 if ((cur->c1 != NULL) && 6369 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6370 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6371 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6372 if ((cur->c2 != NULL) && 6373 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6374 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6375 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6376 cur = cur->c2; 6377 } 6378 } 6379 NEXT; 6380 } else if (RAW == '+') { 6381 if (ret != NULL) { 6382 int found = 0; 6383 6384 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6385 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6386 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6387 else 6388 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6389 /* 6390 * Some normalization: 6391 * (a | b*)+ == (a | b)* 6392 * (a | b?)+ == (a | b)* 6393 */ 6394 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6395 if ((cur->c1 != NULL) && 6396 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6397 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6398 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6399 found = 1; 6400 } 6401 if ((cur->c2 != NULL) && 6402 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6403 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6404 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6405 found = 1; 6406 } 6407 cur = cur->c2; 6408 } 6409 if (found) 6410 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6411 } 6412 NEXT; 6413 } 6414 return(ret); 6415 } 6416 6417 /** 6418 * xmlParseElementChildrenContentDecl: 6419 * @ctxt: an XML parser context 6420 * @inputchk: the input used for the current entity, needed for boundary checks 6421 * 6422 * parse the declaration for a Mixed Element content 6423 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6424 * 6425 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6426 * 6427 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6428 * 6429 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6430 * 6431 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6432 * 6433 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6434 * TODO Parameter-entity replacement text must be properly nested 6435 * with parenthesized groups. That is to say, if either of the 6436 * opening or closing parentheses in a choice, seq, or Mixed 6437 * construct is contained in the replacement text for a parameter 6438 * entity, both must be contained in the same replacement text. For 6439 * interoperability, if a parameter-entity reference appears in a 6440 * choice, seq, or Mixed construct, its replacement text should not 6441 * be empty, and neither the first nor last non-blank character of 6442 * the replacement text should be a connector (| or ,). 6443 * 6444 * Returns the tree of xmlElementContentPtr describing the element 6445 * hierarchy. 6446 */ 6447 xmlElementContentPtr 6448 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6449 /* stub left for API/ABI compat */ 6450 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6451 } 6452 6453 /** 6454 * xmlParseElementContentDecl: 6455 * @ctxt: an XML parser context 6456 * @name: the name of the element being defined. 6457 * @result: the Element Content pointer will be stored here if any 6458 * 6459 * parse the declaration for an Element content either Mixed or Children, 6460 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6461 * 6462 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6463 * 6464 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6465 */ 6466 6467 int 6468 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6469 xmlElementContentPtr *result) { 6470 6471 xmlElementContentPtr tree = NULL; 6472 int inputid = ctxt->input->id; 6473 int res; 6474 6475 *result = NULL; 6476 6477 if (RAW != '(') { 6478 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6479 "xmlParseElementContentDecl : %s '(' expected\n", name); 6480 return(-1); 6481 } 6482 NEXT; 6483 GROW; 6484 if (ctxt->instate == XML_PARSER_EOF) 6485 return(-1); 6486 SKIP_BLANKS; 6487 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6488 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6489 res = XML_ELEMENT_TYPE_MIXED; 6490 } else { 6491 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6492 res = XML_ELEMENT_TYPE_ELEMENT; 6493 } 6494 SKIP_BLANKS; 6495 *result = tree; 6496 return(res); 6497 } 6498 6499 /** 6500 * xmlParseElementDecl: 6501 * @ctxt: an XML parser context 6502 * 6503 * parse an Element declaration. 6504 * 6505 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6506 * 6507 * [ VC: Unique Element Type Declaration ] 6508 * No element type may be declared more than once 6509 * 6510 * Returns the type of the element, or -1 in case of error 6511 */ 6512 int 6513 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6514 const xmlChar *name; 6515 int ret = -1; 6516 xmlElementContentPtr content = NULL; 6517 6518 /* GROW; done in the caller */ 6519 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6520 int inputid = ctxt->input->id; 6521 6522 SKIP(9); 6523 if (SKIP_BLANKS == 0) { 6524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6525 "Space required after 'ELEMENT'\n"); 6526 return(-1); 6527 } 6528 name = xmlParseName(ctxt); 6529 if (name == NULL) { 6530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6531 "xmlParseElementDecl: no name for Element\n"); 6532 return(-1); 6533 } 6534 if (SKIP_BLANKS == 0) { 6535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6536 "Space required after the element name\n"); 6537 } 6538 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6539 SKIP(5); 6540 /* 6541 * Element must always be empty. 6542 */ 6543 ret = XML_ELEMENT_TYPE_EMPTY; 6544 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6545 (NXT(2) == 'Y')) { 6546 SKIP(3); 6547 /* 6548 * Element is a generic container. 6549 */ 6550 ret = XML_ELEMENT_TYPE_ANY; 6551 } else if (RAW == '(') { 6552 ret = xmlParseElementContentDecl(ctxt, name, &content); 6553 } else { 6554 /* 6555 * [ WFC: PEs in Internal Subset ] error handling. 6556 */ 6557 if ((RAW == '%') && (ctxt->external == 0) && 6558 (ctxt->inputNr == 1)) { 6559 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6560 "PEReference: forbidden within markup decl in internal subset\n"); 6561 } else { 6562 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6563 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6564 } 6565 return(-1); 6566 } 6567 6568 SKIP_BLANKS; 6569 6570 if (RAW != '>') { 6571 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6572 if (content != NULL) { 6573 xmlFreeDocElementContent(ctxt->myDoc, content); 6574 } 6575 } else { 6576 if (inputid != ctxt->input->id) { 6577 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6578 "Element declaration doesn't start and stop in" 6579 " the same entity\n"); 6580 } 6581 6582 NEXT; 6583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6584 (ctxt->sax->elementDecl != NULL)) { 6585 if (content != NULL) 6586 content->parent = NULL; 6587 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6588 content); 6589 if ((content != NULL) && (content->parent == NULL)) { 6590 /* 6591 * this is a trick: if xmlAddElementDecl is called, 6592 * instead of copying the full tree it is plugged directly 6593 * if called from the parser. Avoid duplicating the 6594 * interfaces or change the API/ABI 6595 */ 6596 xmlFreeDocElementContent(ctxt->myDoc, content); 6597 } 6598 } else if (content != NULL) { 6599 xmlFreeDocElementContent(ctxt->myDoc, content); 6600 } 6601 } 6602 } 6603 return(ret); 6604 } 6605 6606 /** 6607 * xmlParseConditionalSections 6608 * @ctxt: an XML parser context 6609 * 6610 * [61] conditionalSect ::= includeSect | ignoreSect 6611 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6612 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6613 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6614 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6615 */ 6616 6617 static void 6618 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6619 int id = ctxt->input->id; 6620 6621 SKIP(3); 6622 SKIP_BLANKS; 6623 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6624 SKIP(7); 6625 SKIP_BLANKS; 6626 if (RAW != '[') { 6627 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6628 xmlHaltParser(ctxt); 6629 return; 6630 } else { 6631 if (ctxt->input->id != id) { 6632 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6633 "All markup of the conditional section is not" 6634 " in the same entity\n"); 6635 } 6636 NEXT; 6637 } 6638 if (xmlParserDebugEntities) { 6639 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6640 xmlGenericError(xmlGenericErrorContext, 6641 "%s(%d): ", ctxt->input->filename, 6642 ctxt->input->line); 6643 xmlGenericError(xmlGenericErrorContext, 6644 "Entering INCLUDE Conditional Section\n"); 6645 } 6646 6647 SKIP_BLANKS; 6648 GROW; 6649 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6650 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6651 const xmlChar *check = CUR_PTR; 6652 unsigned int cons = ctxt->input->consumed; 6653 6654 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6655 xmlParseConditionalSections(ctxt); 6656 } else 6657 xmlParseMarkupDecl(ctxt); 6658 6659 SKIP_BLANKS; 6660 GROW; 6661 6662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6664 xmlHaltParser(ctxt); 6665 break; 6666 } 6667 } 6668 if (xmlParserDebugEntities) { 6669 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6670 xmlGenericError(xmlGenericErrorContext, 6671 "%s(%d): ", ctxt->input->filename, 6672 ctxt->input->line); 6673 xmlGenericError(xmlGenericErrorContext, 6674 "Leaving INCLUDE Conditional Section\n"); 6675 } 6676 6677 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6678 int state; 6679 xmlParserInputState instate; 6680 int depth = 0; 6681 6682 SKIP(6); 6683 SKIP_BLANKS; 6684 if (RAW != '[') { 6685 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6686 xmlHaltParser(ctxt); 6687 return; 6688 } else { 6689 if (ctxt->input->id != id) { 6690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6691 "All markup of the conditional section is not" 6692 " in the same entity\n"); 6693 } 6694 NEXT; 6695 } 6696 if (xmlParserDebugEntities) { 6697 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6698 xmlGenericError(xmlGenericErrorContext, 6699 "%s(%d): ", ctxt->input->filename, 6700 ctxt->input->line); 6701 xmlGenericError(xmlGenericErrorContext, 6702 "Entering IGNORE Conditional Section\n"); 6703 } 6704 6705 /* 6706 * Parse up to the end of the conditional section 6707 * But disable SAX event generating DTD building in the meantime 6708 */ 6709 state = ctxt->disableSAX; 6710 instate = ctxt->instate; 6711 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6712 ctxt->instate = XML_PARSER_IGNORE; 6713 6714 while (((depth >= 0) && (RAW != 0)) && 6715 (ctxt->instate != XML_PARSER_EOF)) { 6716 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6717 depth++; 6718 SKIP(3); 6719 continue; 6720 } 6721 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6722 if (--depth >= 0) SKIP(3); 6723 continue; 6724 } 6725 NEXT; 6726 continue; 6727 } 6728 6729 ctxt->disableSAX = state; 6730 ctxt->instate = instate; 6731 6732 if (xmlParserDebugEntities) { 6733 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6734 xmlGenericError(xmlGenericErrorContext, 6735 "%s(%d): ", ctxt->input->filename, 6736 ctxt->input->line); 6737 xmlGenericError(xmlGenericErrorContext, 6738 "Leaving IGNORE Conditional Section\n"); 6739 } 6740 6741 } else { 6742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6743 xmlHaltParser(ctxt); 6744 return; 6745 } 6746 6747 if (RAW == 0) 6748 SHRINK; 6749 6750 if (RAW == 0) { 6751 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6752 } else { 6753 if (ctxt->input->id != id) { 6754 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6755 "All markup of the conditional section is not in" 6756 " the same entity\n"); 6757 } 6758 if ((ctxt-> instate != XML_PARSER_EOF) && 6759 ((ctxt->input->cur + 3) <= ctxt->input->end)) 6760 SKIP(3); 6761 } 6762 } 6763 6764 /** 6765 * xmlParseMarkupDecl: 6766 * @ctxt: an XML parser context 6767 * 6768 * parse Markup declarations 6769 * 6770 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6771 * NotationDecl | PI | Comment 6772 * 6773 * [ VC: Proper Declaration/PE Nesting ] 6774 * Parameter-entity replacement text must be properly nested with 6775 * markup declarations. That is to say, if either the first character 6776 * or the last character of a markup declaration (markupdecl above) is 6777 * contained in the replacement text for a parameter-entity reference, 6778 * both must be contained in the same replacement text. 6779 * 6780 * [ WFC: PEs in Internal Subset ] 6781 * In the internal DTD subset, parameter-entity references can occur 6782 * only where markup declarations can occur, not within markup declarations. 6783 * (This does not apply to references that occur in external parameter 6784 * entities or to the external subset.) 6785 */ 6786 void 6787 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6788 GROW; 6789 if (CUR == '<') { 6790 if (NXT(1) == '!') { 6791 switch (NXT(2)) { 6792 case 'E': 6793 if (NXT(3) == 'L') 6794 xmlParseElementDecl(ctxt); 6795 else if (NXT(3) == 'N') 6796 xmlParseEntityDecl(ctxt); 6797 break; 6798 case 'A': 6799 xmlParseAttributeListDecl(ctxt); 6800 break; 6801 case 'N': 6802 xmlParseNotationDecl(ctxt); 6803 break; 6804 case '-': 6805 xmlParseComment(ctxt); 6806 break; 6807 default: 6808 /* there is an error but it will be detected later */ 6809 break; 6810 } 6811 } else if (NXT(1) == '?') { 6812 xmlParsePI(ctxt); 6813 } 6814 } 6815 6816 /* 6817 * detect requirement to exit there and act accordingly 6818 * and avoid having instate overriden later on 6819 */ 6820 if (ctxt->instate == XML_PARSER_EOF) 6821 return; 6822 6823 /* 6824 * Conditional sections are allowed from entities included 6825 * by PE References in the internal subset. 6826 */ 6827 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6828 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6829 xmlParseConditionalSections(ctxt); 6830 } 6831 } 6832 6833 ctxt->instate = XML_PARSER_DTD; 6834 } 6835 6836 /** 6837 * xmlParseTextDecl: 6838 * @ctxt: an XML parser context 6839 * 6840 * parse an XML declaration header for external entities 6841 * 6842 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6843 */ 6844 6845 void 6846 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6847 xmlChar *version; 6848 const xmlChar *encoding; 6849 6850 /* 6851 * We know that '<?xml' is here. 6852 */ 6853 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6854 SKIP(5); 6855 } else { 6856 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6857 return; 6858 } 6859 6860 if (SKIP_BLANKS == 0) { 6861 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6862 "Space needed after '<?xml'\n"); 6863 } 6864 6865 /* 6866 * We may have the VersionInfo here. 6867 */ 6868 version = xmlParseVersionInfo(ctxt); 6869 if (version == NULL) 6870 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6871 else { 6872 if (SKIP_BLANKS == 0) { 6873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6874 "Space needed here\n"); 6875 } 6876 } 6877 ctxt->input->version = version; 6878 6879 /* 6880 * We must have the encoding declaration 6881 */ 6882 encoding = xmlParseEncodingDecl(ctxt); 6883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6884 /* 6885 * The XML REC instructs us to stop parsing right here 6886 */ 6887 return; 6888 } 6889 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6890 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6891 "Missing encoding in text declaration\n"); 6892 } 6893 6894 SKIP_BLANKS; 6895 if ((RAW == '?') && (NXT(1) == '>')) { 6896 SKIP(2); 6897 } else if (RAW == '>') { 6898 /* Deprecated old WD ... */ 6899 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6900 NEXT; 6901 } else { 6902 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6903 MOVETO_ENDTAG(CUR_PTR); 6904 NEXT; 6905 } 6906 } 6907 6908 /** 6909 * xmlParseExternalSubset: 6910 * @ctxt: an XML parser context 6911 * @ExternalID: the external identifier 6912 * @SystemID: the system identifier (or URL) 6913 * 6914 * parse Markup declarations from an external subset 6915 * 6916 * [30] extSubset ::= textDecl? extSubsetDecl 6917 * 6918 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6919 */ 6920 void 6921 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6922 const xmlChar *SystemID) { 6923 xmlDetectSAX2(ctxt); 6924 GROW; 6925 6926 if ((ctxt->encoding == NULL) && 6927 (ctxt->input->end - ctxt->input->cur >= 4)) { 6928 xmlChar start[4]; 6929 xmlCharEncoding enc; 6930 6931 start[0] = RAW; 6932 start[1] = NXT(1); 6933 start[2] = NXT(2); 6934 start[3] = NXT(3); 6935 enc = xmlDetectCharEncoding(start, 4); 6936 if (enc != XML_CHAR_ENCODING_NONE) 6937 xmlSwitchEncoding(ctxt, enc); 6938 } 6939 6940 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6941 xmlParseTextDecl(ctxt); 6942 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6943 /* 6944 * The XML REC instructs us to stop parsing right here 6945 */ 6946 xmlHaltParser(ctxt); 6947 return; 6948 } 6949 } 6950 if (ctxt->myDoc == NULL) { 6951 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6952 if (ctxt->myDoc == NULL) { 6953 xmlErrMemory(ctxt, "New Doc failed"); 6954 return; 6955 } 6956 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6957 } 6958 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6959 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6960 6961 ctxt->instate = XML_PARSER_DTD; 6962 ctxt->external = 1; 6963 SKIP_BLANKS; 6964 while (((RAW == '<') && (NXT(1) == '?')) || 6965 ((RAW == '<') && (NXT(1) == '!')) || 6966 (RAW == '%')) { 6967 const xmlChar *check = CUR_PTR; 6968 unsigned int cons = ctxt->input->consumed; 6969 6970 GROW; 6971 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6972 xmlParseConditionalSections(ctxt); 6973 } else 6974 xmlParseMarkupDecl(ctxt); 6975 SKIP_BLANKS; 6976 6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6978 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6979 break; 6980 } 6981 } 6982 6983 if (RAW != 0) { 6984 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6985 } 6986 6987 } 6988 6989 /** 6990 * xmlParseReference: 6991 * @ctxt: an XML parser context 6992 * 6993 * parse and handle entity references in content, depending on the SAX 6994 * interface, this may end-up in a call to character() if this is a 6995 * CharRef, a predefined entity, if there is no reference() callback. 6996 * or if the parser was asked to switch to that mode. 6997 * 6998 * [67] Reference ::= EntityRef | CharRef 6999 */ 7000 void 7001 xmlParseReference(xmlParserCtxtPtr ctxt) { 7002 xmlEntityPtr ent; 7003 xmlChar *val; 7004 int was_checked; 7005 xmlNodePtr list = NULL; 7006 xmlParserErrors ret = XML_ERR_OK; 7007 7008 7009 if (RAW != '&') 7010 return; 7011 7012 /* 7013 * Simple case of a CharRef 7014 */ 7015 if (NXT(1) == '#') { 7016 int i = 0; 7017 xmlChar out[10]; 7018 int hex = NXT(2); 7019 int value = xmlParseCharRef(ctxt); 7020 7021 if (value == 0) 7022 return; 7023 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7024 /* 7025 * So we are using non-UTF-8 buffers 7026 * Check that the char fit on 8bits, if not 7027 * generate a CharRef. 7028 */ 7029 if (value <= 0xFF) { 7030 out[0] = value; 7031 out[1] = 0; 7032 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7033 (!ctxt->disableSAX)) 7034 ctxt->sax->characters(ctxt->userData, out, 1); 7035 } else { 7036 if ((hex == 'x') || (hex == 'X')) 7037 snprintf((char *)out, sizeof(out), "#x%X", value); 7038 else 7039 snprintf((char *)out, sizeof(out), "#%d", value); 7040 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7041 (!ctxt->disableSAX)) 7042 ctxt->sax->reference(ctxt->userData, out); 7043 } 7044 } else { 7045 /* 7046 * Just encode the value in UTF-8 7047 */ 7048 COPY_BUF(0 ,out, i, value); 7049 out[i] = 0; 7050 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7051 (!ctxt->disableSAX)) 7052 ctxt->sax->characters(ctxt->userData, out, i); 7053 } 7054 return; 7055 } 7056 7057 /* 7058 * We are seeing an entity reference 7059 */ 7060 ent = xmlParseEntityRef(ctxt); 7061 if (ent == NULL) return; 7062 if (!ctxt->wellFormed) 7063 return; 7064 was_checked = ent->checked; 7065 7066 /* special case of predefined entities */ 7067 if ((ent->name == NULL) || 7068 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7069 val = ent->content; 7070 if (val == NULL) return; 7071 /* 7072 * inline the entity. 7073 */ 7074 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7075 (!ctxt->disableSAX)) 7076 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7077 return; 7078 } 7079 7080 /* 7081 * The first reference to the entity trigger a parsing phase 7082 * where the ent->children is filled with the result from 7083 * the parsing. 7084 * Note: external parsed entities will not be loaded, it is not 7085 * required for a non-validating parser, unless the parsing option 7086 * of validating, or substituting entities were given. Doing so is 7087 * far more secure as the parser will only process data coming from 7088 * the document entity by default. 7089 */ 7090 if (((ent->checked == 0) || 7091 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7092 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7093 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7094 unsigned long oldnbent = ctxt->nbentities; 7095 7096 /* 7097 * This is a bit hackish but this seems the best 7098 * way to make sure both SAX and DOM entity support 7099 * behaves okay. 7100 */ 7101 void *user_data; 7102 if (ctxt->userData == ctxt) 7103 user_data = NULL; 7104 else 7105 user_data = ctxt->userData; 7106 7107 /* 7108 * Check that this entity is well formed 7109 * 4.3.2: An internal general parsed entity is well-formed 7110 * if its replacement text matches the production labeled 7111 * content. 7112 */ 7113 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7114 ctxt->depth++; 7115 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7116 user_data, &list); 7117 ctxt->depth--; 7118 7119 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7120 ctxt->depth++; 7121 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7122 user_data, ctxt->depth, ent->URI, 7123 ent->ExternalID, &list); 7124 ctxt->depth--; 7125 } else { 7126 ret = XML_ERR_ENTITY_PE_INTERNAL; 7127 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7128 "invalid entity type found\n", NULL); 7129 } 7130 7131 /* 7132 * Store the number of entities needing parsing for this entity 7133 * content and do checkings 7134 */ 7135 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7136 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7137 ent->checked |= 1; 7138 if (ret == XML_ERR_ENTITY_LOOP) { 7139 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7140 xmlFreeNodeList(list); 7141 return; 7142 } 7143 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7144 xmlFreeNodeList(list); 7145 return; 7146 } 7147 7148 if ((ret == XML_ERR_OK) && (list != NULL)) { 7149 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7151 (ent->children == NULL)) { 7152 ent->children = list; 7153 if (ctxt->replaceEntities) { 7154 /* 7155 * Prune it directly in the generated document 7156 * except for single text nodes. 7157 */ 7158 if (((list->type == XML_TEXT_NODE) && 7159 (list->next == NULL)) || 7160 (ctxt->parseMode == XML_PARSE_READER)) { 7161 list->parent = (xmlNodePtr) ent; 7162 list = NULL; 7163 ent->owner = 1; 7164 } else { 7165 ent->owner = 0; 7166 while (list != NULL) { 7167 list->parent = (xmlNodePtr) ctxt->node; 7168 list->doc = ctxt->myDoc; 7169 if (list->next == NULL) 7170 ent->last = list; 7171 list = list->next; 7172 } 7173 list = ent->children; 7174 #ifdef LIBXML_LEGACY_ENABLED 7175 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7176 xmlAddEntityReference(ent, list, NULL); 7177 #endif /* LIBXML_LEGACY_ENABLED */ 7178 } 7179 } else { 7180 ent->owner = 1; 7181 while (list != NULL) { 7182 list->parent = (xmlNodePtr) ent; 7183 xmlSetTreeDoc(list, ent->doc); 7184 if (list->next == NULL) 7185 ent->last = list; 7186 list = list->next; 7187 } 7188 } 7189 } else { 7190 xmlFreeNodeList(list); 7191 list = NULL; 7192 } 7193 } else if ((ret != XML_ERR_OK) && 7194 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7195 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7196 "Entity '%s' failed to parse\n", ent->name); 7197 xmlParserEntityCheck(ctxt, 0, ent, 0); 7198 } else if (list != NULL) { 7199 xmlFreeNodeList(list); 7200 list = NULL; 7201 } 7202 if (ent->checked == 0) 7203 ent->checked = 2; 7204 7205 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7206 was_checked = 0; 7207 } else if (ent->checked != 1) { 7208 ctxt->nbentities += ent->checked / 2; 7209 } 7210 7211 /* 7212 * Now that the entity content has been gathered 7213 * provide it to the application, this can take different forms based 7214 * on the parsing modes. 7215 */ 7216 if (ent->children == NULL) { 7217 /* 7218 * Probably running in SAX mode and the callbacks don't 7219 * build the entity content. So unless we already went 7220 * though parsing for first checking go though the entity 7221 * content to generate callbacks associated to the entity 7222 */ 7223 if (was_checked != 0) { 7224 void *user_data; 7225 /* 7226 * This is a bit hackish but this seems the best 7227 * way to make sure both SAX and DOM entity support 7228 * behaves okay. 7229 */ 7230 if (ctxt->userData == ctxt) 7231 user_data = NULL; 7232 else 7233 user_data = ctxt->userData; 7234 7235 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7236 ctxt->depth++; 7237 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7238 ent->content, user_data, NULL); 7239 ctxt->depth--; 7240 } else if (ent->etype == 7241 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7242 ctxt->depth++; 7243 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7244 ctxt->sax, user_data, ctxt->depth, 7245 ent->URI, ent->ExternalID, NULL); 7246 ctxt->depth--; 7247 } else { 7248 ret = XML_ERR_ENTITY_PE_INTERNAL; 7249 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7250 "invalid entity type found\n", NULL); 7251 } 7252 if (ret == XML_ERR_ENTITY_LOOP) { 7253 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7254 return; 7255 } 7256 } 7257 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7258 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7259 /* 7260 * Entity reference callback comes second, it's somewhat 7261 * superfluous but a compatibility to historical behaviour 7262 */ 7263 ctxt->sax->reference(ctxt->userData, ent->name); 7264 } 7265 return; 7266 } 7267 7268 /* 7269 * If we didn't get any children for the entity being built 7270 */ 7271 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7272 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7273 /* 7274 * Create a node. 7275 */ 7276 ctxt->sax->reference(ctxt->userData, ent->name); 7277 return; 7278 } 7279 7280 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7281 /* 7282 * There is a problem on the handling of _private for entities 7283 * (bug 155816): Should we copy the content of the field from 7284 * the entity (possibly overwriting some value set by the user 7285 * when a copy is created), should we leave it alone, or should 7286 * we try to take care of different situations? The problem 7287 * is exacerbated by the usage of this field by the xmlReader. 7288 * To fix this bug, we look at _private on the created node 7289 * and, if it's NULL, we copy in whatever was in the entity. 7290 * If it's not NULL we leave it alone. This is somewhat of a 7291 * hack - maybe we should have further tests to determine 7292 * what to do. 7293 */ 7294 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7295 /* 7296 * Seems we are generating the DOM content, do 7297 * a simple tree copy for all references except the first 7298 * In the first occurrence list contains the replacement. 7299 */ 7300 if (((list == NULL) && (ent->owner == 0)) || 7301 (ctxt->parseMode == XML_PARSE_READER)) { 7302 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7303 7304 /* 7305 * We are copying here, make sure there is no abuse 7306 */ 7307 ctxt->sizeentcopy += ent->length + 5; 7308 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7309 return; 7310 7311 /* 7312 * when operating on a reader, the entities definitions 7313 * are always owning the entities subtree. 7314 if (ctxt->parseMode == XML_PARSE_READER) 7315 ent->owner = 1; 7316 */ 7317 7318 cur = ent->children; 7319 while (cur != NULL) { 7320 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7321 if (nw != NULL) { 7322 if (nw->_private == NULL) 7323 nw->_private = cur->_private; 7324 if (firstChild == NULL){ 7325 firstChild = nw; 7326 } 7327 nw = xmlAddChild(ctxt->node, nw); 7328 } 7329 if (cur == ent->last) { 7330 /* 7331 * needed to detect some strange empty 7332 * node cases in the reader tests 7333 */ 7334 if ((ctxt->parseMode == XML_PARSE_READER) && 7335 (nw != NULL) && 7336 (nw->type == XML_ELEMENT_NODE) && 7337 (nw->children == NULL)) 7338 nw->extra = 1; 7339 7340 break; 7341 } 7342 cur = cur->next; 7343 } 7344 #ifdef LIBXML_LEGACY_ENABLED 7345 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7346 xmlAddEntityReference(ent, firstChild, nw); 7347 #endif /* LIBXML_LEGACY_ENABLED */ 7348 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7349 xmlNodePtr nw = NULL, cur, next, last, 7350 firstChild = NULL; 7351 7352 /* 7353 * We are copying here, make sure there is no abuse 7354 */ 7355 ctxt->sizeentcopy += ent->length + 5; 7356 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7357 return; 7358 7359 /* 7360 * Copy the entity child list and make it the new 7361 * entity child list. The goal is to make sure any 7362 * ID or REF referenced will be the one from the 7363 * document content and not the entity copy. 7364 */ 7365 cur = ent->children; 7366 ent->children = NULL; 7367 last = ent->last; 7368 ent->last = NULL; 7369 while (cur != NULL) { 7370 next = cur->next; 7371 cur->next = NULL; 7372 cur->parent = NULL; 7373 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7374 if (nw != NULL) { 7375 if (nw->_private == NULL) 7376 nw->_private = cur->_private; 7377 if (firstChild == NULL){ 7378 firstChild = cur; 7379 } 7380 xmlAddChild((xmlNodePtr) ent, nw); 7381 xmlAddChild(ctxt->node, cur); 7382 } 7383 if (cur == last) 7384 break; 7385 cur = next; 7386 } 7387 if (ent->owner == 0) 7388 ent->owner = 1; 7389 #ifdef LIBXML_LEGACY_ENABLED 7390 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7391 xmlAddEntityReference(ent, firstChild, nw); 7392 #endif /* LIBXML_LEGACY_ENABLED */ 7393 } else { 7394 const xmlChar *nbktext; 7395 7396 /* 7397 * the name change is to avoid coalescing of the 7398 * node with a possible previous text one which 7399 * would make ent->children a dangling pointer 7400 */ 7401 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7402 -1); 7403 if (ent->children->type == XML_TEXT_NODE) 7404 ent->children->name = nbktext; 7405 if ((ent->last != ent->children) && 7406 (ent->last->type == XML_TEXT_NODE)) 7407 ent->last->name = nbktext; 7408 xmlAddChildList(ctxt->node, ent->children); 7409 } 7410 7411 /* 7412 * This is to avoid a nasty side effect, see 7413 * characters() in SAX.c 7414 */ 7415 ctxt->nodemem = 0; 7416 ctxt->nodelen = 0; 7417 return; 7418 } 7419 } 7420 } 7421 7422 /** 7423 * xmlParseEntityRef: 7424 * @ctxt: an XML parser context 7425 * 7426 * parse ENTITY references declarations 7427 * 7428 * [68] EntityRef ::= '&' Name ';' 7429 * 7430 * [ WFC: Entity Declared ] 7431 * In a document without any DTD, a document with only an internal DTD 7432 * subset which contains no parameter entity references, or a document 7433 * with "standalone='yes'", the Name given in the entity reference 7434 * must match that in an entity declaration, except that well-formed 7435 * documents need not declare any of the following entities: amp, lt, 7436 * gt, apos, quot. The declaration of a parameter entity must precede 7437 * any reference to it. Similarly, the declaration of a general entity 7438 * must precede any reference to it which appears in a default value in an 7439 * attribute-list declaration. Note that if entities are declared in the 7440 * external subset or in external parameter entities, a non-validating 7441 * processor is not obligated to read and process their declarations; 7442 * for such documents, the rule that an entity must be declared is a 7443 * well-formedness constraint only if standalone='yes'. 7444 * 7445 * [ WFC: Parsed Entity ] 7446 * An entity reference must not contain the name of an unparsed entity 7447 * 7448 * Returns the xmlEntityPtr if found, or NULL otherwise. 7449 */ 7450 xmlEntityPtr 7451 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7452 const xmlChar *name; 7453 xmlEntityPtr ent = NULL; 7454 7455 GROW; 7456 if (ctxt->instate == XML_PARSER_EOF) 7457 return(NULL); 7458 7459 if (RAW != '&') 7460 return(NULL); 7461 NEXT; 7462 name = xmlParseName(ctxt); 7463 if (name == NULL) { 7464 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7465 "xmlParseEntityRef: no name\n"); 7466 return(NULL); 7467 } 7468 if (RAW != ';') { 7469 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7470 return(NULL); 7471 } 7472 NEXT; 7473 7474 /* 7475 * Predefined entities override any extra definition 7476 */ 7477 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7478 ent = xmlGetPredefinedEntity(name); 7479 if (ent != NULL) 7480 return(ent); 7481 } 7482 7483 /* 7484 * Increase the number of entity references parsed 7485 */ 7486 ctxt->nbentities++; 7487 7488 /* 7489 * Ask first SAX for entity resolution, otherwise try the 7490 * entities which may have stored in the parser context. 7491 */ 7492 if (ctxt->sax != NULL) { 7493 if (ctxt->sax->getEntity != NULL) 7494 ent = ctxt->sax->getEntity(ctxt->userData, name); 7495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7496 (ctxt->options & XML_PARSE_OLDSAX)) 7497 ent = xmlGetPredefinedEntity(name); 7498 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7499 (ctxt->userData==ctxt)) { 7500 ent = xmlSAX2GetEntity(ctxt, name); 7501 } 7502 } 7503 if (ctxt->instate == XML_PARSER_EOF) 7504 return(NULL); 7505 /* 7506 * [ WFC: Entity Declared ] 7507 * In a document without any DTD, a document with only an 7508 * internal DTD subset which contains no parameter entity 7509 * references, or a document with "standalone='yes'", the 7510 * Name given in the entity reference must match that in an 7511 * entity declaration, except that well-formed documents 7512 * need not declare any of the following entities: amp, lt, 7513 * gt, apos, quot. 7514 * The declaration of a parameter entity must precede any 7515 * reference to it. 7516 * Similarly, the declaration of a general entity must 7517 * precede any reference to it which appears in a default 7518 * value in an attribute-list declaration. Note that if 7519 * entities are declared in the external subset or in 7520 * external parameter entities, a non-validating processor 7521 * is not obligated to read and process their declarations; 7522 * for such documents, the rule that an entity must be 7523 * declared is a well-formedness constraint only if 7524 * standalone='yes'. 7525 */ 7526 if (ent == NULL) { 7527 if ((ctxt->standalone == 1) || 7528 ((ctxt->hasExternalSubset == 0) && 7529 (ctxt->hasPErefs == 0))) { 7530 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7531 "Entity '%s' not defined\n", name); 7532 } else { 7533 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7534 "Entity '%s' not defined\n", name); 7535 if ((ctxt->inSubset == 0) && 7536 (ctxt->sax != NULL) && 7537 (ctxt->sax->reference != NULL)) { 7538 ctxt->sax->reference(ctxt->userData, name); 7539 } 7540 } 7541 xmlParserEntityCheck(ctxt, 0, ent, 0); 7542 ctxt->valid = 0; 7543 } 7544 7545 /* 7546 * [ WFC: Parsed Entity ] 7547 * An entity reference must not contain the name of an 7548 * unparsed entity 7549 */ 7550 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7551 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7552 "Entity reference to unparsed entity %s\n", name); 7553 } 7554 7555 /* 7556 * [ WFC: No External Entity References ] 7557 * Attribute values cannot contain direct or indirect 7558 * entity references to external entities. 7559 */ 7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7561 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7562 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7563 "Attribute references external entity '%s'\n", name); 7564 } 7565 /* 7566 * [ WFC: No < in Attribute Values ] 7567 * The replacement text of any entity referred to directly or 7568 * indirectly in an attribute value (other than "<") must 7569 * not contain a <. 7570 */ 7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7572 (ent != NULL) && 7573 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7574 if (((ent->checked & 1) || (ent->checked == 0)) && 7575 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7576 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7577 "'<' in entity '%s' is not allowed in attributes values\n", name); 7578 } 7579 } 7580 7581 /* 7582 * Internal check, no parameter entities here ... 7583 */ 7584 else { 7585 switch (ent->etype) { 7586 case XML_INTERNAL_PARAMETER_ENTITY: 7587 case XML_EXTERNAL_PARAMETER_ENTITY: 7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7589 "Attempt to reference the parameter entity '%s'\n", 7590 name); 7591 break; 7592 default: 7593 break; 7594 } 7595 } 7596 7597 /* 7598 * [ WFC: No Recursion ] 7599 * A parsed entity must not contain a recursive reference 7600 * to itself, either directly or indirectly. 7601 * Done somewhere else 7602 */ 7603 return(ent); 7604 } 7605 7606 /** 7607 * xmlParseStringEntityRef: 7608 * @ctxt: an XML parser context 7609 * @str: a pointer to an index in the string 7610 * 7611 * parse ENTITY references declarations, but this version parses it from 7612 * a string value. 7613 * 7614 * [68] EntityRef ::= '&' Name ';' 7615 * 7616 * [ WFC: Entity Declared ] 7617 * In a document without any DTD, a document with only an internal DTD 7618 * subset which contains no parameter entity references, or a document 7619 * with "standalone='yes'", the Name given in the entity reference 7620 * must match that in an entity declaration, except that well-formed 7621 * documents need not declare any of the following entities: amp, lt, 7622 * gt, apos, quot. The declaration of a parameter entity must precede 7623 * any reference to it. Similarly, the declaration of a general entity 7624 * must precede any reference to it which appears in a default value in an 7625 * attribute-list declaration. Note that if entities are declared in the 7626 * external subset or in external parameter entities, a non-validating 7627 * processor is not obligated to read and process their declarations; 7628 * for such documents, the rule that an entity must be declared is a 7629 * well-formedness constraint only if standalone='yes'. 7630 * 7631 * [ WFC: Parsed Entity ] 7632 * An entity reference must not contain the name of an unparsed entity 7633 * 7634 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7635 * is updated to the current location in the string. 7636 */ 7637 static xmlEntityPtr 7638 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7639 xmlChar *name; 7640 const xmlChar *ptr; 7641 xmlChar cur; 7642 xmlEntityPtr ent = NULL; 7643 7644 if ((str == NULL) || (*str == NULL)) 7645 return(NULL); 7646 ptr = *str; 7647 cur = *ptr; 7648 if (cur != '&') 7649 return(NULL); 7650 7651 ptr++; 7652 name = xmlParseStringName(ctxt, &ptr); 7653 if (name == NULL) { 7654 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7655 "xmlParseStringEntityRef: no name\n"); 7656 *str = ptr; 7657 return(NULL); 7658 } 7659 if (*ptr != ';') { 7660 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7661 xmlFree(name); 7662 *str = ptr; 7663 return(NULL); 7664 } 7665 ptr++; 7666 7667 7668 /* 7669 * Predefined entities override any extra definition 7670 */ 7671 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7672 ent = xmlGetPredefinedEntity(name); 7673 if (ent != NULL) { 7674 xmlFree(name); 7675 *str = ptr; 7676 return(ent); 7677 } 7678 } 7679 7680 /* 7681 * Increate the number of entity references parsed 7682 */ 7683 ctxt->nbentities++; 7684 7685 /* 7686 * Ask first SAX for entity resolution, otherwise try the 7687 * entities which may have stored in the parser context. 7688 */ 7689 if (ctxt->sax != NULL) { 7690 if (ctxt->sax->getEntity != NULL) 7691 ent = ctxt->sax->getEntity(ctxt->userData, name); 7692 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7693 ent = xmlGetPredefinedEntity(name); 7694 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7695 ent = xmlSAX2GetEntity(ctxt, name); 7696 } 7697 } 7698 if (ctxt->instate == XML_PARSER_EOF) { 7699 xmlFree(name); 7700 return(NULL); 7701 } 7702 7703 /* 7704 * [ WFC: Entity Declared ] 7705 * In a document without any DTD, a document with only an 7706 * internal DTD subset which contains no parameter entity 7707 * references, or a document with "standalone='yes'", the 7708 * Name given in the entity reference must match that in an 7709 * entity declaration, except that well-formed documents 7710 * need not declare any of the following entities: amp, lt, 7711 * gt, apos, quot. 7712 * The declaration of a parameter entity must precede any 7713 * reference to it. 7714 * Similarly, the declaration of a general entity must 7715 * precede any reference to it which appears in a default 7716 * value in an attribute-list declaration. Note that if 7717 * entities are declared in the external subset or in 7718 * external parameter entities, a non-validating processor 7719 * is not obligated to read and process their declarations; 7720 * for such documents, the rule that an entity must be 7721 * declared is a well-formedness constraint only if 7722 * standalone='yes'. 7723 */ 7724 if (ent == NULL) { 7725 if ((ctxt->standalone == 1) || 7726 ((ctxt->hasExternalSubset == 0) && 7727 (ctxt->hasPErefs == 0))) { 7728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7729 "Entity '%s' not defined\n", name); 7730 } else { 7731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7732 "Entity '%s' not defined\n", 7733 name); 7734 } 7735 xmlParserEntityCheck(ctxt, 0, ent, 0); 7736 /* TODO ? check regressions ctxt->valid = 0; */ 7737 } 7738 7739 /* 7740 * [ WFC: Parsed Entity ] 7741 * An entity reference must not contain the name of an 7742 * unparsed entity 7743 */ 7744 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7746 "Entity reference to unparsed entity %s\n", name); 7747 } 7748 7749 /* 7750 * [ WFC: No External Entity References ] 7751 * Attribute values cannot contain direct or indirect 7752 * entity references to external entities. 7753 */ 7754 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7756 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7757 "Attribute references external entity '%s'\n", name); 7758 } 7759 /* 7760 * [ WFC: No < in Attribute Values ] 7761 * The replacement text of any entity referred to directly or 7762 * indirectly in an attribute value (other than "<") must 7763 * not contain a <. 7764 */ 7765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7766 (ent != NULL) && (ent->content != NULL) && 7767 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7768 (xmlStrchr(ent->content, '<'))) { 7769 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7770 "'<' in entity '%s' is not allowed in attributes values\n", 7771 name); 7772 } 7773 7774 /* 7775 * Internal check, no parameter entities here ... 7776 */ 7777 else { 7778 switch (ent->etype) { 7779 case XML_INTERNAL_PARAMETER_ENTITY: 7780 case XML_EXTERNAL_PARAMETER_ENTITY: 7781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7782 "Attempt to reference the parameter entity '%s'\n", 7783 name); 7784 break; 7785 default: 7786 break; 7787 } 7788 } 7789 7790 /* 7791 * [ WFC: No Recursion ] 7792 * A parsed entity must not contain a recursive reference 7793 * to itself, either directly or indirectly. 7794 * Done somewhere else 7795 */ 7796 7797 xmlFree(name); 7798 *str = ptr; 7799 return(ent); 7800 } 7801 7802 /** 7803 * xmlParsePEReference: 7804 * @ctxt: an XML parser context 7805 * 7806 * parse PEReference declarations 7807 * The entity content is handled directly by pushing it's content as 7808 * a new input stream. 7809 * 7810 * [69] PEReference ::= '%' Name ';' 7811 * 7812 * [ WFC: No Recursion ] 7813 * A parsed entity must not contain a recursive 7814 * reference to itself, either directly or indirectly. 7815 * 7816 * [ WFC: Entity Declared ] 7817 * In a document without any DTD, a document with only an internal DTD 7818 * subset which contains no parameter entity references, or a document 7819 * with "standalone='yes'", ... ... The declaration of a parameter 7820 * entity must precede any reference to it... 7821 * 7822 * [ VC: Entity Declared ] 7823 * In a document with an external subset or external parameter entities 7824 * with "standalone='no'", ... ... The declaration of a parameter entity 7825 * must precede any reference to it... 7826 * 7827 * [ WFC: In DTD ] 7828 * Parameter-entity references may only appear in the DTD. 7829 * NOTE: misleading but this is handled. 7830 */ 7831 void 7832 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7833 { 7834 const xmlChar *name; 7835 xmlEntityPtr entity = NULL; 7836 xmlParserInputPtr input; 7837 7838 if (RAW != '%') 7839 return; 7840 NEXT; 7841 name = xmlParseName(ctxt); 7842 if (name == NULL) { 7843 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 7844 return; 7845 } 7846 if (xmlParserDebugEntities) 7847 xmlGenericError(xmlGenericErrorContext, 7848 "PEReference: %s\n", name); 7849 if (RAW != ';') { 7850 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 7851 return; 7852 } 7853 7854 NEXT; 7855 7856 /* 7857 * Increate the number of entity references parsed 7858 */ 7859 ctxt->nbentities++; 7860 7861 /* 7862 * Request the entity from SAX 7863 */ 7864 if ((ctxt->sax != NULL) && 7865 (ctxt->sax->getParameterEntity != NULL)) 7866 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7867 if (ctxt->instate == XML_PARSER_EOF) 7868 return; 7869 if (entity == NULL) { 7870 /* 7871 * [ WFC: Entity Declared ] 7872 * In a document without any DTD, a document with only an 7873 * internal DTD subset which contains no parameter entity 7874 * references, or a document with "standalone='yes'", ... 7875 * ... The declaration of a parameter entity must precede 7876 * any reference to it... 7877 */ 7878 if ((ctxt->standalone == 1) || 7879 ((ctxt->hasExternalSubset == 0) && 7880 (ctxt->hasPErefs == 0))) { 7881 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7882 "PEReference: %%%s; not found\n", 7883 name); 7884 } else { 7885 /* 7886 * [ VC: Entity Declared ] 7887 * In a document with an external subset or external 7888 * parameter entities with "standalone='no'", ... 7889 * ... The declaration of a parameter entity must 7890 * precede any reference to it... 7891 */ 7892 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 7893 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 7894 "PEReference: %%%s; not found\n", 7895 name, NULL); 7896 } else 7897 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7898 "PEReference: %%%s; not found\n", 7899 name, NULL); 7900 ctxt->valid = 0; 7901 } 7902 xmlParserEntityCheck(ctxt, 0, NULL, 0); 7903 } else { 7904 /* 7905 * Internal checking in case the entity quest barfed 7906 */ 7907 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7908 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7910 "Internal: %%%s; is not a parameter entity\n", 7911 name, NULL); 7912 } else { 7913 xmlChar start[4]; 7914 xmlCharEncoding enc; 7915 7916 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7917 ((ctxt->options & XML_PARSE_NOENT) == 0) && 7918 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 7919 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 7920 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 7921 (ctxt->replaceEntities == 0) && 7922 (ctxt->validate == 0)) 7923 return; 7924 7925 input = xmlNewEntityInputStream(ctxt, entity); 7926 if (xmlPushInput(ctxt, input) < 0) { 7927 xmlFreeInputStream(input); 7928 return; 7929 } 7930 7931 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 7932 /* 7933 * Get the 4 first bytes and decode the charset 7934 * if enc != XML_CHAR_ENCODING_NONE 7935 * plug some encoding conversion routines. 7936 * Note that, since we may have some non-UTF8 7937 * encoding (like UTF16, bug 135229), the 'length' 7938 * is not known, but we can calculate based upon 7939 * the amount of data in the buffer. 7940 */ 7941 GROW 7942 if (ctxt->instate == XML_PARSER_EOF) 7943 return; 7944 if ((ctxt->input->end - ctxt->input->cur)>=4) { 7945 start[0] = RAW; 7946 start[1] = NXT(1); 7947 start[2] = NXT(2); 7948 start[3] = NXT(3); 7949 enc = xmlDetectCharEncoding(start, 4); 7950 if (enc != XML_CHAR_ENCODING_NONE) { 7951 xmlSwitchEncoding(ctxt, enc); 7952 } 7953 } 7954 7955 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7956 (IS_BLANK_CH(NXT(5)))) { 7957 xmlParseTextDecl(ctxt); 7958 } 7959 } 7960 } 7961 } 7962 ctxt->hasPErefs = 1; 7963 } 7964 7965 /** 7966 * xmlLoadEntityContent: 7967 * @ctxt: an XML parser context 7968 * @entity: an unloaded system entity 7969 * 7970 * Load the original content of the given system entity from the 7971 * ExternalID/SystemID given. This is to be used for Included in Literal 7972 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7973 * 7974 * Returns 0 in case of success and -1 in case of failure 7975 */ 7976 static int 7977 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7978 xmlParserInputPtr input; 7979 xmlBufferPtr buf; 7980 int l, c; 7981 int count = 0; 7982 7983 if ((ctxt == NULL) || (entity == NULL) || 7984 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7985 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7986 (entity->content != NULL)) { 7987 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7988 "xmlLoadEntityContent parameter error"); 7989 return(-1); 7990 } 7991 7992 if (xmlParserDebugEntities) 7993 xmlGenericError(xmlGenericErrorContext, 7994 "Reading %s entity content input\n", entity->name); 7995 7996 buf = xmlBufferCreate(); 7997 if (buf == NULL) { 7998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7999 "xmlLoadEntityContent parameter error"); 8000 return(-1); 8001 } 8002 8003 input = xmlNewEntityInputStream(ctxt, entity); 8004 if (input == NULL) { 8005 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8006 "xmlLoadEntityContent input error"); 8007 xmlBufferFree(buf); 8008 return(-1); 8009 } 8010 8011 /* 8012 * Push the entity as the current input, read char by char 8013 * saving to the buffer until the end of the entity or an error 8014 */ 8015 if (xmlPushInput(ctxt, input) < 0) { 8016 xmlBufferFree(buf); 8017 return(-1); 8018 } 8019 8020 GROW; 8021 c = CUR_CHAR(l); 8022 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8023 (IS_CHAR(c))) { 8024 xmlBufferAdd(buf, ctxt->input->cur, l); 8025 if (count++ > XML_PARSER_CHUNK_SIZE) { 8026 count = 0; 8027 GROW; 8028 if (ctxt->instate == XML_PARSER_EOF) { 8029 xmlBufferFree(buf); 8030 return(-1); 8031 } 8032 } 8033 NEXTL(l); 8034 c = CUR_CHAR(l); 8035 if (c == 0) { 8036 count = 0; 8037 GROW; 8038 if (ctxt->instate == XML_PARSER_EOF) { 8039 xmlBufferFree(buf); 8040 return(-1); 8041 } 8042 c = CUR_CHAR(l); 8043 } 8044 } 8045 8046 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8047 xmlPopInput(ctxt); 8048 } else if (!IS_CHAR(c)) { 8049 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8050 "xmlLoadEntityContent: invalid char value %d\n", 8051 c); 8052 xmlBufferFree(buf); 8053 return(-1); 8054 } 8055 entity->content = buf->content; 8056 buf->content = NULL; 8057 xmlBufferFree(buf); 8058 8059 return(0); 8060 } 8061 8062 /** 8063 * xmlParseStringPEReference: 8064 * @ctxt: an XML parser context 8065 * @str: a pointer to an index in the string 8066 * 8067 * parse PEReference declarations 8068 * 8069 * [69] PEReference ::= '%' Name ';' 8070 * 8071 * [ WFC: No Recursion ] 8072 * A parsed entity must not contain a recursive 8073 * reference to itself, either directly or indirectly. 8074 * 8075 * [ WFC: Entity Declared ] 8076 * In a document without any DTD, a document with only an internal DTD 8077 * subset which contains no parameter entity references, or a document 8078 * with "standalone='yes'", ... ... The declaration of a parameter 8079 * entity must precede any reference to it... 8080 * 8081 * [ VC: Entity Declared ] 8082 * In a document with an external subset or external parameter entities 8083 * with "standalone='no'", ... ... The declaration of a parameter entity 8084 * must precede any reference to it... 8085 * 8086 * [ WFC: In DTD ] 8087 * Parameter-entity references may only appear in the DTD. 8088 * NOTE: misleading but this is handled. 8089 * 8090 * Returns the string of the entity content. 8091 * str is updated to the current value of the index 8092 */ 8093 static xmlEntityPtr 8094 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8095 const xmlChar *ptr; 8096 xmlChar cur; 8097 xmlChar *name; 8098 xmlEntityPtr entity = NULL; 8099 8100 if ((str == NULL) || (*str == NULL)) return(NULL); 8101 ptr = *str; 8102 cur = *ptr; 8103 if (cur != '%') 8104 return(NULL); 8105 ptr++; 8106 name = xmlParseStringName(ctxt, &ptr); 8107 if (name == NULL) { 8108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8109 "xmlParseStringPEReference: no name\n"); 8110 *str = ptr; 8111 return(NULL); 8112 } 8113 cur = *ptr; 8114 if (cur != ';') { 8115 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8116 xmlFree(name); 8117 *str = ptr; 8118 return(NULL); 8119 } 8120 ptr++; 8121 8122 /* 8123 * Increate the number of entity references parsed 8124 */ 8125 ctxt->nbentities++; 8126 8127 /* 8128 * Request the entity from SAX 8129 */ 8130 if ((ctxt->sax != NULL) && 8131 (ctxt->sax->getParameterEntity != NULL)) 8132 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8133 if (ctxt->instate == XML_PARSER_EOF) { 8134 xmlFree(name); 8135 *str = ptr; 8136 return(NULL); 8137 } 8138 if (entity == NULL) { 8139 /* 8140 * [ WFC: Entity Declared ] 8141 * In a document without any DTD, a document with only an 8142 * internal DTD subset which contains no parameter entity 8143 * references, or a document with "standalone='yes'", ... 8144 * ... The declaration of a parameter entity must precede 8145 * any reference to it... 8146 */ 8147 if ((ctxt->standalone == 1) || 8148 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8149 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8150 "PEReference: %%%s; not found\n", name); 8151 } else { 8152 /* 8153 * [ VC: Entity Declared ] 8154 * In a document with an external subset or external 8155 * parameter entities with "standalone='no'", ... 8156 * ... The declaration of a parameter entity must 8157 * precede any reference to it... 8158 */ 8159 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8160 "PEReference: %%%s; not found\n", 8161 name, NULL); 8162 ctxt->valid = 0; 8163 } 8164 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8165 } else { 8166 /* 8167 * Internal checking in case the entity quest barfed 8168 */ 8169 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8170 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8171 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8172 "%%%s; is not a parameter entity\n", 8173 name, NULL); 8174 } 8175 } 8176 ctxt->hasPErefs = 1; 8177 xmlFree(name); 8178 *str = ptr; 8179 return(entity); 8180 } 8181 8182 /** 8183 * xmlParseDocTypeDecl: 8184 * @ctxt: an XML parser context 8185 * 8186 * parse a DOCTYPE declaration 8187 * 8188 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8189 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8190 * 8191 * [ VC: Root Element Type ] 8192 * The Name in the document type declaration must match the element 8193 * type of the root element. 8194 */ 8195 8196 void 8197 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8198 const xmlChar *name = NULL; 8199 xmlChar *ExternalID = NULL; 8200 xmlChar *URI = NULL; 8201 8202 /* 8203 * We know that '<!DOCTYPE' has been detected. 8204 */ 8205 SKIP(9); 8206 8207 SKIP_BLANKS; 8208 8209 /* 8210 * Parse the DOCTYPE name. 8211 */ 8212 name = xmlParseName(ctxt); 8213 if (name == NULL) { 8214 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8215 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8216 } 8217 ctxt->intSubName = name; 8218 8219 SKIP_BLANKS; 8220 8221 /* 8222 * Check for SystemID and ExternalID 8223 */ 8224 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8225 8226 if ((URI != NULL) || (ExternalID != NULL)) { 8227 ctxt->hasExternalSubset = 1; 8228 } 8229 ctxt->extSubURI = URI; 8230 ctxt->extSubSystem = ExternalID; 8231 8232 SKIP_BLANKS; 8233 8234 /* 8235 * Create and update the internal subset. 8236 */ 8237 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8238 (!ctxt->disableSAX)) 8239 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8240 if (ctxt->instate == XML_PARSER_EOF) 8241 return; 8242 8243 /* 8244 * Is there any internal subset declarations ? 8245 * they are handled separately in xmlParseInternalSubset() 8246 */ 8247 if (RAW == '[') 8248 return; 8249 8250 /* 8251 * We should be at the end of the DOCTYPE declaration. 8252 */ 8253 if (RAW != '>') { 8254 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8255 } 8256 NEXT; 8257 } 8258 8259 /** 8260 * xmlParseInternalSubset: 8261 * @ctxt: an XML parser context 8262 * 8263 * parse the internal subset declaration 8264 * 8265 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8266 */ 8267 8268 static void 8269 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8270 /* 8271 * Is there any DTD definition ? 8272 */ 8273 if (RAW == '[') { 8274 int baseInputNr = ctxt->inputNr; 8275 ctxt->instate = XML_PARSER_DTD; 8276 NEXT; 8277 /* 8278 * Parse the succession of Markup declarations and 8279 * PEReferences. 8280 * Subsequence (markupdecl | PEReference | S)* 8281 */ 8282 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8283 (ctxt->instate != XML_PARSER_EOF)) { 8284 const xmlChar *check = CUR_PTR; 8285 unsigned int cons = ctxt->input->consumed; 8286 8287 SKIP_BLANKS; 8288 xmlParseMarkupDecl(ctxt); 8289 xmlParsePEReference(ctxt); 8290 8291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8292 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8293 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8294 if (ctxt->inputNr > baseInputNr) 8295 xmlPopInput(ctxt); 8296 else 8297 break; 8298 } 8299 } 8300 if (RAW == ']') { 8301 NEXT; 8302 SKIP_BLANKS; 8303 } 8304 } 8305 8306 /* 8307 * We should be at the end of the DOCTYPE declaration. 8308 */ 8309 if (RAW != '>') { 8310 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8311 return; 8312 } 8313 NEXT; 8314 } 8315 8316 #ifdef LIBXML_SAX1_ENABLED 8317 /** 8318 * xmlParseAttribute: 8319 * @ctxt: an XML parser context 8320 * @value: a xmlChar ** used to store the value of the attribute 8321 * 8322 * parse an attribute 8323 * 8324 * [41] Attribute ::= Name Eq AttValue 8325 * 8326 * [ WFC: No External Entity References ] 8327 * Attribute values cannot contain direct or indirect entity references 8328 * to external entities. 8329 * 8330 * [ WFC: No < in Attribute Values ] 8331 * The replacement text of any entity referred to directly or indirectly in 8332 * an attribute value (other than "<") must not contain a <. 8333 * 8334 * [ VC: Attribute Value Type ] 8335 * The attribute must have been declared; the value must be of the type 8336 * declared for it. 8337 * 8338 * [25] Eq ::= S? '=' S? 8339 * 8340 * With namespace: 8341 * 8342 * [NS 11] Attribute ::= QName Eq AttValue 8343 * 8344 * Also the case QName == xmlns:??? is handled independently as a namespace 8345 * definition. 8346 * 8347 * Returns the attribute name, and the value in *value. 8348 */ 8349 8350 const xmlChar * 8351 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8352 const xmlChar *name; 8353 xmlChar *val; 8354 8355 *value = NULL; 8356 GROW; 8357 name = xmlParseName(ctxt); 8358 if (name == NULL) { 8359 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8360 "error parsing attribute name\n"); 8361 return(NULL); 8362 } 8363 8364 /* 8365 * read the value 8366 */ 8367 SKIP_BLANKS; 8368 if (RAW == '=') { 8369 NEXT; 8370 SKIP_BLANKS; 8371 val = xmlParseAttValue(ctxt); 8372 ctxt->instate = XML_PARSER_CONTENT; 8373 } else { 8374 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8375 "Specification mandates value for attribute %s\n", name); 8376 return(NULL); 8377 } 8378 8379 /* 8380 * Check that xml:lang conforms to the specification 8381 * No more registered as an error, just generate a warning now 8382 * since this was deprecated in XML second edition 8383 */ 8384 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8385 if (!xmlCheckLanguageID(val)) { 8386 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8387 "Malformed value for xml:lang : %s\n", 8388 val, NULL); 8389 } 8390 } 8391 8392 /* 8393 * Check that xml:space conforms to the specification 8394 */ 8395 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8396 if (xmlStrEqual(val, BAD_CAST "default")) 8397 *(ctxt->space) = 0; 8398 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8399 *(ctxt->space) = 1; 8400 else { 8401 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8402 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8403 val, NULL); 8404 } 8405 } 8406 8407 *value = val; 8408 return(name); 8409 } 8410 8411 /** 8412 * xmlParseStartTag: 8413 * @ctxt: an XML parser context 8414 * 8415 * parse a start of tag either for rule element or 8416 * EmptyElement. In both case we don't parse the tag closing chars. 8417 * 8418 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8419 * 8420 * [ WFC: Unique Att Spec ] 8421 * No attribute name may appear more than once in the same start-tag or 8422 * empty-element tag. 8423 * 8424 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8425 * 8426 * [ WFC: Unique Att Spec ] 8427 * No attribute name may appear more than once in the same start-tag or 8428 * empty-element tag. 8429 * 8430 * With namespace: 8431 * 8432 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8433 * 8434 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8435 * 8436 * Returns the element name parsed 8437 */ 8438 8439 const xmlChar * 8440 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8441 const xmlChar *name; 8442 const xmlChar *attname; 8443 xmlChar *attvalue; 8444 const xmlChar **atts = ctxt->atts; 8445 int nbatts = 0; 8446 int maxatts = ctxt->maxatts; 8447 int i; 8448 8449 if (RAW != '<') return(NULL); 8450 NEXT1; 8451 8452 name = xmlParseName(ctxt); 8453 if (name == NULL) { 8454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8455 "xmlParseStartTag: invalid element name\n"); 8456 return(NULL); 8457 } 8458 8459 /* 8460 * Now parse the attributes, it ends up with the ending 8461 * 8462 * (S Attribute)* S? 8463 */ 8464 SKIP_BLANKS; 8465 GROW; 8466 8467 while (((RAW != '>') && 8468 ((RAW != '/') || (NXT(1) != '>')) && 8469 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8470 const xmlChar *q = CUR_PTR; 8471 unsigned int cons = ctxt->input->consumed; 8472 8473 attname = xmlParseAttribute(ctxt, &attvalue); 8474 if ((attname != NULL) && (attvalue != NULL)) { 8475 /* 8476 * [ WFC: Unique Att Spec ] 8477 * No attribute name may appear more than once in the same 8478 * start-tag or empty-element tag. 8479 */ 8480 for (i = 0; i < nbatts;i += 2) { 8481 if (xmlStrEqual(atts[i], attname)) { 8482 xmlErrAttributeDup(ctxt, NULL, attname); 8483 xmlFree(attvalue); 8484 goto failed; 8485 } 8486 } 8487 /* 8488 * Add the pair to atts 8489 */ 8490 if (atts == NULL) { 8491 maxatts = 22; /* allow for 10 attrs by default */ 8492 atts = (const xmlChar **) 8493 xmlMalloc(maxatts * sizeof(xmlChar *)); 8494 if (atts == NULL) { 8495 xmlErrMemory(ctxt, NULL); 8496 if (attvalue != NULL) 8497 xmlFree(attvalue); 8498 goto failed; 8499 } 8500 ctxt->atts = atts; 8501 ctxt->maxatts = maxatts; 8502 } else if (nbatts + 4 > maxatts) { 8503 const xmlChar **n; 8504 8505 maxatts *= 2; 8506 n = (const xmlChar **) xmlRealloc((void *) atts, 8507 maxatts * sizeof(const xmlChar *)); 8508 if (n == NULL) { 8509 xmlErrMemory(ctxt, NULL); 8510 if (attvalue != NULL) 8511 xmlFree(attvalue); 8512 goto failed; 8513 } 8514 atts = n; 8515 ctxt->atts = atts; 8516 ctxt->maxatts = maxatts; 8517 } 8518 atts[nbatts++] = attname; 8519 atts[nbatts++] = attvalue; 8520 atts[nbatts] = NULL; 8521 atts[nbatts + 1] = NULL; 8522 } else { 8523 if (attvalue != NULL) 8524 xmlFree(attvalue); 8525 } 8526 8527 failed: 8528 8529 GROW 8530 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8531 break; 8532 if (SKIP_BLANKS == 0) { 8533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8534 "attributes construct error\n"); 8535 } 8536 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8537 (attname == NULL) && (attvalue == NULL)) { 8538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8539 "xmlParseStartTag: problem parsing attributes\n"); 8540 break; 8541 } 8542 SHRINK; 8543 GROW; 8544 } 8545 8546 /* 8547 * SAX: Start of Element ! 8548 */ 8549 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8550 (!ctxt->disableSAX)) { 8551 if (nbatts > 0) 8552 ctxt->sax->startElement(ctxt->userData, name, atts); 8553 else 8554 ctxt->sax->startElement(ctxt->userData, name, NULL); 8555 } 8556 8557 if (atts != NULL) { 8558 /* Free only the content strings */ 8559 for (i = 1;i < nbatts;i+=2) 8560 if (atts[i] != NULL) 8561 xmlFree((xmlChar *) atts[i]); 8562 } 8563 return(name); 8564 } 8565 8566 /** 8567 * xmlParseEndTag1: 8568 * @ctxt: an XML parser context 8569 * @line: line of the start tag 8570 * @nsNr: number of namespaces on the start tag 8571 * 8572 * parse an end of tag 8573 * 8574 * [42] ETag ::= '</' Name S? '>' 8575 * 8576 * With namespace 8577 * 8578 * [NS 9] ETag ::= '</' QName S? '>' 8579 */ 8580 8581 static void 8582 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8583 const xmlChar *name; 8584 8585 GROW; 8586 if ((RAW != '<') || (NXT(1) != '/')) { 8587 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8588 "xmlParseEndTag: '</' not found\n"); 8589 return; 8590 } 8591 SKIP(2); 8592 8593 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8594 8595 /* 8596 * We should definitely be at the ending "S? '>'" part 8597 */ 8598 GROW; 8599 SKIP_BLANKS; 8600 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8601 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8602 } else 8603 NEXT1; 8604 8605 /* 8606 * [ WFC: Element Type Match ] 8607 * The Name in an element's end-tag must match the element type in the 8608 * start-tag. 8609 * 8610 */ 8611 if (name != (xmlChar*)1) { 8612 if (name == NULL) name = BAD_CAST "unparseable"; 8613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8614 "Opening and ending tag mismatch: %s line %d and %s\n", 8615 ctxt->name, line, name); 8616 } 8617 8618 /* 8619 * SAX: End of Tag 8620 */ 8621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8622 (!ctxt->disableSAX)) 8623 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8624 8625 namePop(ctxt); 8626 spacePop(ctxt); 8627 return; 8628 } 8629 8630 /** 8631 * xmlParseEndTag: 8632 * @ctxt: an XML parser context 8633 * 8634 * parse an end of tag 8635 * 8636 * [42] ETag ::= '</' Name S? '>' 8637 * 8638 * With namespace 8639 * 8640 * [NS 9] ETag ::= '</' QName S? '>' 8641 */ 8642 8643 void 8644 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8645 xmlParseEndTag1(ctxt, 0); 8646 } 8647 #endif /* LIBXML_SAX1_ENABLED */ 8648 8649 /************************************************************************ 8650 * * 8651 * SAX 2 specific operations * 8652 * * 8653 ************************************************************************/ 8654 8655 /* 8656 * xmlGetNamespace: 8657 * @ctxt: an XML parser context 8658 * @prefix: the prefix to lookup 8659 * 8660 * Lookup the namespace name for the @prefix (which ca be NULL) 8661 * The prefix must come from the @ctxt->dict dictionary 8662 * 8663 * Returns the namespace name or NULL if not bound 8664 */ 8665 static const xmlChar * 8666 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8667 int i; 8668 8669 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8670 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8671 if (ctxt->nsTab[i] == prefix) { 8672 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8673 return(NULL); 8674 return(ctxt->nsTab[i + 1]); 8675 } 8676 return(NULL); 8677 } 8678 8679 /** 8680 * xmlParseQName: 8681 * @ctxt: an XML parser context 8682 * @prefix: pointer to store the prefix part 8683 * 8684 * parse an XML Namespace QName 8685 * 8686 * [6] QName ::= (Prefix ':')? LocalPart 8687 * [7] Prefix ::= NCName 8688 * [8] LocalPart ::= NCName 8689 * 8690 * Returns the Name parsed or NULL 8691 */ 8692 8693 static const xmlChar * 8694 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8695 const xmlChar *l, *p; 8696 8697 GROW; 8698 8699 l = xmlParseNCName(ctxt); 8700 if (l == NULL) { 8701 if (CUR == ':') { 8702 l = xmlParseName(ctxt); 8703 if (l != NULL) { 8704 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8705 "Failed to parse QName '%s'\n", l, NULL, NULL); 8706 *prefix = NULL; 8707 return(l); 8708 } 8709 } 8710 return(NULL); 8711 } 8712 if (CUR == ':') { 8713 NEXT; 8714 p = l; 8715 l = xmlParseNCName(ctxt); 8716 if (l == NULL) { 8717 xmlChar *tmp; 8718 8719 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8720 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8721 l = xmlParseNmtoken(ctxt); 8722 if (l == NULL) 8723 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8724 else { 8725 tmp = xmlBuildQName(l, p, NULL, 0); 8726 xmlFree((char *)l); 8727 } 8728 p = xmlDictLookup(ctxt->dict, tmp, -1); 8729 if (tmp != NULL) xmlFree(tmp); 8730 *prefix = NULL; 8731 return(p); 8732 } 8733 if (CUR == ':') { 8734 xmlChar *tmp; 8735 8736 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8737 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8738 NEXT; 8739 tmp = (xmlChar *) xmlParseName(ctxt); 8740 if (tmp != NULL) { 8741 tmp = xmlBuildQName(tmp, l, NULL, 0); 8742 l = xmlDictLookup(ctxt->dict, tmp, -1); 8743 if (tmp != NULL) xmlFree(tmp); 8744 *prefix = p; 8745 return(l); 8746 } 8747 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8748 l = xmlDictLookup(ctxt->dict, tmp, -1); 8749 if (tmp != NULL) xmlFree(tmp); 8750 *prefix = p; 8751 return(l); 8752 } 8753 *prefix = p; 8754 } else 8755 *prefix = NULL; 8756 return(l); 8757 } 8758 8759 /** 8760 * xmlParseQNameAndCompare: 8761 * @ctxt: an XML parser context 8762 * @name: the localname 8763 * @prefix: the prefix, if any. 8764 * 8765 * parse an XML name and compares for match 8766 * (specialized for endtag parsing) 8767 * 8768 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8769 * and the name for mismatch 8770 */ 8771 8772 static const xmlChar * 8773 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8774 xmlChar const *prefix) { 8775 const xmlChar *cmp; 8776 const xmlChar *in; 8777 const xmlChar *ret; 8778 const xmlChar *prefix2; 8779 8780 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8781 8782 GROW; 8783 in = ctxt->input->cur; 8784 8785 cmp = prefix; 8786 while (*in != 0 && *in == *cmp) { 8787 ++in; 8788 ++cmp; 8789 } 8790 if ((*cmp == 0) && (*in == ':')) { 8791 in++; 8792 cmp = name; 8793 while (*in != 0 && *in == *cmp) { 8794 ++in; 8795 ++cmp; 8796 } 8797 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8798 /* success */ 8799 ctxt->input->cur = in; 8800 return((const xmlChar*) 1); 8801 } 8802 } 8803 /* 8804 * all strings coms from the dictionary, equality can be done directly 8805 */ 8806 ret = xmlParseQName (ctxt, &prefix2); 8807 if ((ret == name) && (prefix == prefix2)) 8808 return((const xmlChar*) 1); 8809 return ret; 8810 } 8811 8812 /** 8813 * xmlParseAttValueInternal: 8814 * @ctxt: an XML parser context 8815 * @len: attribute len result 8816 * @alloc: whether the attribute was reallocated as a new string 8817 * @normalize: if 1 then further non-CDATA normalization must be done 8818 * 8819 * parse a value for an attribute. 8820 * NOTE: if no normalization is needed, the routine will return pointers 8821 * directly from the data buffer. 8822 * 8823 * 3.3.3 Attribute-Value Normalization: 8824 * Before the value of an attribute is passed to the application or 8825 * checked for validity, the XML processor must normalize it as follows: 8826 * - a character reference is processed by appending the referenced 8827 * character to the attribute value 8828 * - an entity reference is processed by recursively processing the 8829 * replacement text of the entity 8830 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8831 * appending #x20 to the normalized value, except that only a single 8832 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8833 * parsed entity or the literal entity value of an internal parsed entity 8834 * - other characters are processed by appending them to the normalized value 8835 * If the declared value is not CDATA, then the XML processor must further 8836 * process the normalized attribute value by discarding any leading and 8837 * trailing space (#x20) characters, and by replacing sequences of space 8838 * (#x20) characters by a single space (#x20) character. 8839 * All attributes for which no declaration has been read should be treated 8840 * by a non-validating parser as if declared CDATA. 8841 * 8842 * Returns the AttValue parsed or NULL. The value has to be freed by the 8843 * caller if it was copied, this can be detected by val[*len] == 0. 8844 */ 8845 8846 static xmlChar * 8847 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8848 int normalize) 8849 { 8850 xmlChar limit = 0; 8851 const xmlChar *in = NULL, *start, *end, *last; 8852 xmlChar *ret = NULL; 8853 int line, col; 8854 8855 GROW; 8856 in = (xmlChar *) CUR_PTR; 8857 line = ctxt->input->line; 8858 col = ctxt->input->col; 8859 if (*in != '"' && *in != '\'') { 8860 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8861 return (NULL); 8862 } 8863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8864 8865 /* 8866 * try to handle in this routine the most common case where no 8867 * allocation of a new string is required and where content is 8868 * pure ASCII. 8869 */ 8870 limit = *in++; 8871 col++; 8872 end = ctxt->input->end; 8873 start = in; 8874 if (in >= end) { 8875 const xmlChar *oldbase = ctxt->input->base; 8876 GROW; 8877 if (oldbase != ctxt->input->base) { 8878 long delta = ctxt->input->base - oldbase; 8879 start = start + delta; 8880 in = in + delta; 8881 } 8882 end = ctxt->input->end; 8883 } 8884 if (normalize) { 8885 /* 8886 * Skip any leading spaces 8887 */ 8888 while ((in < end) && (*in != limit) && 8889 ((*in == 0x20) || (*in == 0x9) || 8890 (*in == 0xA) || (*in == 0xD))) { 8891 if (*in == 0xA) { 8892 line++; col = 1; 8893 } else { 8894 col++; 8895 } 8896 in++; 8897 start = in; 8898 if (in >= end) { 8899 const xmlChar *oldbase = ctxt->input->base; 8900 GROW; 8901 if (ctxt->instate == XML_PARSER_EOF) 8902 return(NULL); 8903 if (oldbase != ctxt->input->base) { 8904 long delta = ctxt->input->base - oldbase; 8905 start = start + delta; 8906 in = in + delta; 8907 } 8908 end = ctxt->input->end; 8909 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8910 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8911 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8912 "AttValue length too long\n"); 8913 return(NULL); 8914 } 8915 } 8916 } 8917 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8918 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8919 col++; 8920 if ((*in++ == 0x20) && (*in == 0x20)) break; 8921 if (in >= end) { 8922 const xmlChar *oldbase = ctxt->input->base; 8923 GROW; 8924 if (ctxt->instate == XML_PARSER_EOF) 8925 return(NULL); 8926 if (oldbase != ctxt->input->base) { 8927 long delta = ctxt->input->base - oldbase; 8928 start = start + delta; 8929 in = in + delta; 8930 } 8931 end = ctxt->input->end; 8932 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8933 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8934 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8935 "AttValue length too long\n"); 8936 return(NULL); 8937 } 8938 } 8939 } 8940 last = in; 8941 /* 8942 * skip the trailing blanks 8943 */ 8944 while ((last[-1] == 0x20) && (last > start)) last--; 8945 while ((in < end) && (*in != limit) && 8946 ((*in == 0x20) || (*in == 0x9) || 8947 (*in == 0xA) || (*in == 0xD))) { 8948 if (*in == 0xA) { 8949 line++, col = 1; 8950 } else { 8951 col++; 8952 } 8953 in++; 8954 if (in >= end) { 8955 const xmlChar *oldbase = ctxt->input->base; 8956 GROW; 8957 if (ctxt->instate == XML_PARSER_EOF) 8958 return(NULL); 8959 if (oldbase != ctxt->input->base) { 8960 long delta = ctxt->input->base - oldbase; 8961 start = start + delta; 8962 in = in + delta; 8963 last = last + delta; 8964 } 8965 end = ctxt->input->end; 8966 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8967 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8968 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8969 "AttValue length too long\n"); 8970 return(NULL); 8971 } 8972 } 8973 } 8974 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8975 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8976 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8977 "AttValue length too long\n"); 8978 return(NULL); 8979 } 8980 if (*in != limit) goto need_complex; 8981 } else { 8982 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8983 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8984 in++; 8985 col++; 8986 if (in >= end) { 8987 const xmlChar *oldbase = ctxt->input->base; 8988 GROW; 8989 if (ctxt->instate == XML_PARSER_EOF) 8990 return(NULL); 8991 if (oldbase != ctxt->input->base) { 8992 long delta = ctxt->input->base - oldbase; 8993 start = start + delta; 8994 in = in + delta; 8995 } 8996 end = ctxt->input->end; 8997 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9000 "AttValue length too long\n"); 9001 return(NULL); 9002 } 9003 } 9004 } 9005 last = in; 9006 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9007 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9008 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9009 "AttValue length too long\n"); 9010 return(NULL); 9011 } 9012 if (*in != limit) goto need_complex; 9013 } 9014 in++; 9015 col++; 9016 if (len != NULL) { 9017 *len = last - start; 9018 ret = (xmlChar *) start; 9019 } else { 9020 if (alloc) *alloc = 1; 9021 ret = xmlStrndup(start, last - start); 9022 } 9023 CUR_PTR = in; 9024 ctxt->input->line = line; 9025 ctxt->input->col = col; 9026 if (alloc) *alloc = 0; 9027 return ret; 9028 need_complex: 9029 if (alloc) *alloc = 1; 9030 return xmlParseAttValueComplex(ctxt, len, normalize); 9031 } 9032 9033 /** 9034 * xmlParseAttribute2: 9035 * @ctxt: an XML parser context 9036 * @pref: the element prefix 9037 * @elem: the element name 9038 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9039 * @value: a xmlChar ** used to store the value of the attribute 9040 * @len: an int * to save the length of the attribute 9041 * @alloc: an int * to indicate if the attribute was allocated 9042 * 9043 * parse an attribute in the new SAX2 framework. 9044 * 9045 * Returns the attribute name, and the value in *value, . 9046 */ 9047 9048 static const xmlChar * 9049 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9050 const xmlChar * pref, const xmlChar * elem, 9051 const xmlChar ** prefix, xmlChar ** value, 9052 int *len, int *alloc) 9053 { 9054 const xmlChar *name; 9055 xmlChar *val, *internal_val = NULL; 9056 int normalize = 0; 9057 9058 *value = NULL; 9059 GROW; 9060 name = xmlParseQName(ctxt, prefix); 9061 if (name == NULL) { 9062 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9063 "error parsing attribute name\n"); 9064 return (NULL); 9065 } 9066 9067 /* 9068 * get the type if needed 9069 */ 9070 if (ctxt->attsSpecial != NULL) { 9071 int type; 9072 9073 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9074 pref, elem, *prefix, name); 9075 if (type != 0) 9076 normalize = 1; 9077 } 9078 9079 /* 9080 * read the value 9081 */ 9082 SKIP_BLANKS; 9083 if (RAW == '=') { 9084 NEXT; 9085 SKIP_BLANKS; 9086 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9087 if (normalize) { 9088 /* 9089 * Sometimes a second normalisation pass for spaces is needed 9090 * but that only happens if charrefs or entities refernces 9091 * have been used in the attribute value, i.e. the attribute 9092 * value have been extracted in an allocated string already. 9093 */ 9094 if (*alloc) { 9095 const xmlChar *val2; 9096 9097 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9098 if ((val2 != NULL) && (val2 != val)) { 9099 xmlFree(val); 9100 val = (xmlChar *) val2; 9101 } 9102 } 9103 } 9104 ctxt->instate = XML_PARSER_CONTENT; 9105 } else { 9106 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9107 "Specification mandates value for attribute %s\n", 9108 name); 9109 return (NULL); 9110 } 9111 9112 if (*prefix == ctxt->str_xml) { 9113 /* 9114 * Check that xml:lang conforms to the specification 9115 * No more registered as an error, just generate a warning now 9116 * since this was deprecated in XML second edition 9117 */ 9118 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9119 internal_val = xmlStrndup(val, *len); 9120 if (!xmlCheckLanguageID(internal_val)) { 9121 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9122 "Malformed value for xml:lang : %s\n", 9123 internal_val, NULL); 9124 } 9125 } 9126 9127 /* 9128 * Check that xml:space conforms to the specification 9129 */ 9130 if (xmlStrEqual(name, BAD_CAST "space")) { 9131 internal_val = xmlStrndup(val, *len); 9132 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9133 *(ctxt->space) = 0; 9134 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9135 *(ctxt->space) = 1; 9136 else { 9137 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9138 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9139 internal_val, NULL); 9140 } 9141 } 9142 if (internal_val) { 9143 xmlFree(internal_val); 9144 } 9145 } 9146 9147 *value = val; 9148 return (name); 9149 } 9150 /** 9151 * xmlParseStartTag2: 9152 * @ctxt: an XML parser context 9153 * 9154 * parse a start of tag either for rule element or 9155 * EmptyElement. In both case we don't parse the tag closing chars. 9156 * This routine is called when running SAX2 parsing 9157 * 9158 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9159 * 9160 * [ WFC: Unique Att Spec ] 9161 * No attribute name may appear more than once in the same start-tag or 9162 * empty-element tag. 9163 * 9164 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9165 * 9166 * [ WFC: Unique Att Spec ] 9167 * No attribute name may appear more than once in the same start-tag or 9168 * empty-element tag. 9169 * 9170 * With namespace: 9171 * 9172 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9173 * 9174 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9175 * 9176 * Returns the element name parsed 9177 */ 9178 9179 static const xmlChar * 9180 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9181 const xmlChar **URI, int *tlen) { 9182 const xmlChar *localname; 9183 const xmlChar *prefix; 9184 const xmlChar *attname; 9185 const xmlChar *aprefix; 9186 const xmlChar *nsname; 9187 xmlChar *attvalue; 9188 const xmlChar **atts = ctxt->atts; 9189 int maxatts = ctxt->maxatts; 9190 int nratts, nbatts, nbdef, inputid; 9191 int i, j, nbNs, attval; 9192 unsigned long cur; 9193 int nsNr = ctxt->nsNr; 9194 9195 if (RAW != '<') return(NULL); 9196 NEXT1; 9197 9198 /* 9199 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9200 * point since the attribute values may be stored as pointers to 9201 * the buffer and calling SHRINK would destroy them ! 9202 * The Shrinking is only possible once the full set of attribute 9203 * callbacks have been done. 9204 */ 9205 SHRINK; 9206 cur = ctxt->input->cur - ctxt->input->base; 9207 inputid = ctxt->input->id; 9208 nbatts = 0; 9209 nratts = 0; 9210 nbdef = 0; 9211 nbNs = 0; 9212 attval = 0; 9213 /* Forget any namespaces added during an earlier parse of this element. */ 9214 ctxt->nsNr = nsNr; 9215 9216 localname = xmlParseQName(ctxt, &prefix); 9217 if (localname == NULL) { 9218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9219 "StartTag: invalid element name\n"); 9220 return(NULL); 9221 } 9222 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9223 9224 /* 9225 * Now parse the attributes, it ends up with the ending 9226 * 9227 * (S Attribute)* S? 9228 */ 9229 SKIP_BLANKS; 9230 GROW; 9231 9232 while (((RAW != '>') && 9233 ((RAW != '/') || (NXT(1) != '>')) && 9234 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9235 const xmlChar *q = CUR_PTR; 9236 unsigned int cons = ctxt->input->consumed; 9237 int len = -1, alloc = 0; 9238 9239 attname = xmlParseAttribute2(ctxt, prefix, localname, 9240 &aprefix, &attvalue, &len, &alloc); 9241 if ((attname == NULL) || (attvalue == NULL)) 9242 goto next_attr; 9243 if (len < 0) len = xmlStrlen(attvalue); 9244 9245 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9246 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9247 xmlURIPtr uri; 9248 9249 if (URL == NULL) { 9250 xmlErrMemory(ctxt, "dictionary allocation failure"); 9251 if ((attvalue != NULL) && (alloc != 0)) 9252 xmlFree(attvalue); 9253 return(NULL); 9254 } 9255 if (*URL != 0) { 9256 uri = xmlParseURI((const char *) URL); 9257 if (uri == NULL) { 9258 xmlNsErr(ctxt, XML_WAR_NS_URI, 9259 "xmlns: '%s' is not a valid URI\n", 9260 URL, NULL, NULL); 9261 } else { 9262 if (uri->scheme == NULL) { 9263 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9264 "xmlns: URI %s is not absolute\n", 9265 URL, NULL, NULL); 9266 } 9267 xmlFreeURI(uri); 9268 } 9269 if (URL == ctxt->str_xml_ns) { 9270 if (attname != ctxt->str_xml) { 9271 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9272 "xml namespace URI cannot be the default namespace\n", 9273 NULL, NULL, NULL); 9274 } 9275 goto next_attr; 9276 } 9277 if ((len == 29) && 9278 (xmlStrEqual(URL, 9279 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9280 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9281 "reuse of the xmlns namespace name is forbidden\n", 9282 NULL, NULL, NULL); 9283 goto next_attr; 9284 } 9285 } 9286 /* 9287 * check that it's not a defined namespace 9288 */ 9289 for (j = 1;j <= nbNs;j++) 9290 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9291 break; 9292 if (j <= nbNs) 9293 xmlErrAttributeDup(ctxt, NULL, attname); 9294 else 9295 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9296 9297 } else if (aprefix == ctxt->str_xmlns) { 9298 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9299 xmlURIPtr uri; 9300 9301 if (attname == ctxt->str_xml) { 9302 if (URL != ctxt->str_xml_ns) { 9303 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9304 "xml namespace prefix mapped to wrong URI\n", 9305 NULL, NULL, NULL); 9306 } 9307 /* 9308 * Do not keep a namespace definition node 9309 */ 9310 goto next_attr; 9311 } 9312 if (URL == ctxt->str_xml_ns) { 9313 if (attname != ctxt->str_xml) { 9314 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9315 "xml namespace URI mapped to wrong prefix\n", 9316 NULL, NULL, NULL); 9317 } 9318 goto next_attr; 9319 } 9320 if (attname == ctxt->str_xmlns) { 9321 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9322 "redefinition of the xmlns prefix is forbidden\n", 9323 NULL, NULL, NULL); 9324 goto next_attr; 9325 } 9326 if ((len == 29) && 9327 (xmlStrEqual(URL, 9328 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9329 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9330 "reuse of the xmlns namespace name is forbidden\n", 9331 NULL, NULL, NULL); 9332 goto next_attr; 9333 } 9334 if ((URL == NULL) || (URL[0] == 0)) { 9335 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9336 "xmlns:%s: Empty XML namespace is not allowed\n", 9337 attname, NULL, NULL); 9338 goto next_attr; 9339 } else { 9340 uri = xmlParseURI((const char *) URL); 9341 if (uri == NULL) { 9342 xmlNsErr(ctxt, XML_WAR_NS_URI, 9343 "xmlns:%s: '%s' is not a valid URI\n", 9344 attname, URL, NULL); 9345 } else { 9346 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9347 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9348 "xmlns:%s: URI %s is not absolute\n", 9349 attname, URL, NULL); 9350 } 9351 xmlFreeURI(uri); 9352 } 9353 } 9354 9355 /* 9356 * check that it's not a defined namespace 9357 */ 9358 for (j = 1;j <= nbNs;j++) 9359 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9360 break; 9361 if (j <= nbNs) 9362 xmlErrAttributeDup(ctxt, aprefix, attname); 9363 else 9364 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9365 9366 } else { 9367 /* 9368 * Add the pair to atts 9369 */ 9370 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9371 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9372 goto next_attr; 9373 } 9374 maxatts = ctxt->maxatts; 9375 atts = ctxt->atts; 9376 } 9377 ctxt->attallocs[nratts++] = alloc; 9378 atts[nbatts++] = attname; 9379 atts[nbatts++] = aprefix; 9380 /* 9381 * The namespace URI field is used temporarily to point at the 9382 * base of the current input buffer for non-alloced attributes. 9383 * When the input buffer is reallocated, all the pointers become 9384 * invalid, but they can be reconstructed later. 9385 */ 9386 if (alloc) 9387 atts[nbatts++] = NULL; 9388 else 9389 atts[nbatts++] = ctxt->input->base; 9390 atts[nbatts++] = attvalue; 9391 attvalue += len; 9392 atts[nbatts++] = attvalue; 9393 /* 9394 * tag if some deallocation is needed 9395 */ 9396 if (alloc != 0) attval = 1; 9397 attvalue = NULL; /* moved into atts */ 9398 } 9399 9400 next_attr: 9401 if ((attvalue != NULL) && (alloc != 0)) { 9402 xmlFree(attvalue); 9403 attvalue = NULL; 9404 } 9405 9406 GROW 9407 if (ctxt->instate == XML_PARSER_EOF) 9408 break; 9409 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9410 break; 9411 if (SKIP_BLANKS == 0) { 9412 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9413 "attributes construct error\n"); 9414 break; 9415 } 9416 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9417 (attname == NULL) && (attvalue == NULL)) { 9418 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9419 "xmlParseStartTag: problem parsing attributes\n"); 9420 break; 9421 } 9422 GROW; 9423 } 9424 9425 if (ctxt->input->id != inputid) { 9426 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9427 "Unexpected change of input\n"); 9428 localname = NULL; 9429 goto done; 9430 } 9431 9432 /* Reconstruct attribute value pointers. */ 9433 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9434 if (atts[i+2] != NULL) { 9435 /* 9436 * Arithmetic on dangling pointers is technically undefined 9437 * behavior, but well... 9438 */ 9439 ptrdiff_t offset = ctxt->input->base - atts[i+2]; 9440 atts[i+2] = NULL; /* Reset repurposed namespace URI */ 9441 atts[i+3] += offset; /* value */ 9442 atts[i+4] += offset; /* valuend */ 9443 } 9444 } 9445 9446 /* 9447 * The attributes defaulting 9448 */ 9449 if (ctxt->attsDefault != NULL) { 9450 xmlDefAttrsPtr defaults; 9451 9452 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9453 if (defaults != NULL) { 9454 for (i = 0;i < defaults->nbAttrs;i++) { 9455 attname = defaults->values[5 * i]; 9456 aprefix = defaults->values[5 * i + 1]; 9457 9458 /* 9459 * special work for namespaces defaulted defs 9460 */ 9461 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9462 /* 9463 * check that it's not a defined namespace 9464 */ 9465 for (j = 1;j <= nbNs;j++) 9466 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9467 break; 9468 if (j <= nbNs) continue; 9469 9470 nsname = xmlGetNamespace(ctxt, NULL); 9471 if (nsname != defaults->values[5 * i + 2]) { 9472 if (nsPush(ctxt, NULL, 9473 defaults->values[5 * i + 2]) > 0) 9474 nbNs++; 9475 } 9476 } else if (aprefix == ctxt->str_xmlns) { 9477 /* 9478 * check that it's not a defined namespace 9479 */ 9480 for (j = 1;j <= nbNs;j++) 9481 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9482 break; 9483 if (j <= nbNs) continue; 9484 9485 nsname = xmlGetNamespace(ctxt, attname); 9486 if (nsname != defaults->values[2]) { 9487 if (nsPush(ctxt, attname, 9488 defaults->values[5 * i + 2]) > 0) 9489 nbNs++; 9490 } 9491 } else { 9492 /* 9493 * check that it's not a defined attribute 9494 */ 9495 for (j = 0;j < nbatts;j+=5) { 9496 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9497 break; 9498 } 9499 if (j < nbatts) continue; 9500 9501 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9502 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9503 return(NULL); 9504 } 9505 maxatts = ctxt->maxatts; 9506 atts = ctxt->atts; 9507 } 9508 atts[nbatts++] = attname; 9509 atts[nbatts++] = aprefix; 9510 if (aprefix == NULL) 9511 atts[nbatts++] = NULL; 9512 else 9513 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9514 atts[nbatts++] = defaults->values[5 * i + 2]; 9515 atts[nbatts++] = defaults->values[5 * i + 3]; 9516 if ((ctxt->standalone == 1) && 9517 (defaults->values[5 * i + 4] != NULL)) { 9518 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9519 "standalone: attribute %s on %s defaulted from external subset\n", 9520 attname, localname); 9521 } 9522 nbdef++; 9523 } 9524 } 9525 } 9526 } 9527 9528 /* 9529 * The attributes checkings 9530 */ 9531 for (i = 0; i < nbatts;i += 5) { 9532 /* 9533 * The default namespace does not apply to attribute names. 9534 */ 9535 if (atts[i + 1] != NULL) { 9536 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9537 if (nsname == NULL) { 9538 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9539 "Namespace prefix %s for %s on %s is not defined\n", 9540 atts[i + 1], atts[i], localname); 9541 } 9542 atts[i + 2] = nsname; 9543 } else 9544 nsname = NULL; 9545 /* 9546 * [ WFC: Unique Att Spec ] 9547 * No attribute name may appear more than once in the same 9548 * start-tag or empty-element tag. 9549 * As extended by the Namespace in XML REC. 9550 */ 9551 for (j = 0; j < i;j += 5) { 9552 if (atts[i] == atts[j]) { 9553 if (atts[i+1] == atts[j+1]) { 9554 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9555 break; 9556 } 9557 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9558 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9559 "Namespaced Attribute %s in '%s' redefined\n", 9560 atts[i], nsname, NULL); 9561 break; 9562 } 9563 } 9564 } 9565 } 9566 9567 nsname = xmlGetNamespace(ctxt, prefix); 9568 if ((prefix != NULL) && (nsname == NULL)) { 9569 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9570 "Namespace prefix %s on %s is not defined\n", 9571 prefix, localname, NULL); 9572 } 9573 *pref = prefix; 9574 *URI = nsname; 9575 9576 /* 9577 * SAX: Start of Element ! 9578 */ 9579 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9580 (!ctxt->disableSAX)) { 9581 if (nbNs > 0) 9582 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9583 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9584 nbatts / 5, nbdef, atts); 9585 else 9586 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9587 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9588 } 9589 9590 done: 9591 /* 9592 * Free up attribute allocated strings if needed 9593 */ 9594 if (attval != 0) { 9595 for (i = 3,j = 0; j < nratts;i += 5,j++) 9596 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9597 xmlFree((xmlChar *) atts[i]); 9598 } 9599 9600 return(localname); 9601 } 9602 9603 /** 9604 * xmlParseEndTag2: 9605 * @ctxt: an XML parser context 9606 * @line: line of the start tag 9607 * @nsNr: number of namespaces on the start tag 9608 * 9609 * parse an end of tag 9610 * 9611 * [42] ETag ::= '</' Name S? '>' 9612 * 9613 * With namespace 9614 * 9615 * [NS 9] ETag ::= '</' QName S? '>' 9616 */ 9617 9618 static void 9619 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9620 const xmlChar *URI, int line, int nsNr, int tlen) { 9621 const xmlChar *name; 9622 size_t curLength; 9623 9624 GROW; 9625 if ((RAW != '<') || (NXT(1) != '/')) { 9626 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9627 return; 9628 } 9629 SKIP(2); 9630 9631 curLength = ctxt->input->end - ctxt->input->cur; 9632 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9633 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9634 if ((curLength >= (size_t)(tlen + 1)) && 9635 (ctxt->input->cur[tlen] == '>')) { 9636 ctxt->input->cur += tlen + 1; 9637 ctxt->input->col += tlen + 1; 9638 goto done; 9639 } 9640 ctxt->input->cur += tlen; 9641 ctxt->input->col += tlen; 9642 name = (xmlChar*)1; 9643 } else { 9644 if (prefix == NULL) 9645 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9646 else 9647 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9648 } 9649 9650 /* 9651 * We should definitely be at the ending "S? '>'" part 9652 */ 9653 GROW; 9654 if (ctxt->instate == XML_PARSER_EOF) 9655 return; 9656 SKIP_BLANKS; 9657 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9658 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9659 } else 9660 NEXT1; 9661 9662 /* 9663 * [ WFC: Element Type Match ] 9664 * The Name in an element's end-tag must match the element type in the 9665 * start-tag. 9666 * 9667 */ 9668 if (name != (xmlChar*)1) { 9669 if (name == NULL) name = BAD_CAST "unparseable"; 9670 if ((line == 0) && (ctxt->node != NULL)) 9671 line = ctxt->node->line; 9672 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9673 "Opening and ending tag mismatch: %s line %d and %s\n", 9674 ctxt->name, line, name); 9675 } 9676 9677 /* 9678 * SAX: End of Tag 9679 */ 9680 done: 9681 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9682 (!ctxt->disableSAX)) 9683 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9684 9685 spacePop(ctxt); 9686 if (nsNr != 0) 9687 nsPop(ctxt, nsNr); 9688 return; 9689 } 9690 9691 /** 9692 * xmlParseCDSect: 9693 * @ctxt: an XML parser context 9694 * 9695 * Parse escaped pure raw content. 9696 * 9697 * [18] CDSect ::= CDStart CData CDEnd 9698 * 9699 * [19] CDStart ::= '<![CDATA[' 9700 * 9701 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9702 * 9703 * [21] CDEnd ::= ']]>' 9704 */ 9705 void 9706 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9707 xmlChar *buf = NULL; 9708 int len = 0; 9709 int size = XML_PARSER_BUFFER_SIZE; 9710 int r, rl; 9711 int s, sl; 9712 int cur, l; 9713 int count = 0; 9714 9715 /* Check 2.6.0 was NXT(0) not RAW */ 9716 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9717 SKIP(9); 9718 } else 9719 return; 9720 9721 ctxt->instate = XML_PARSER_CDATA_SECTION; 9722 r = CUR_CHAR(rl); 9723 if (!IS_CHAR(r)) { 9724 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9725 ctxt->instate = XML_PARSER_CONTENT; 9726 return; 9727 } 9728 NEXTL(rl); 9729 s = CUR_CHAR(sl); 9730 if (!IS_CHAR(s)) { 9731 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9732 ctxt->instate = XML_PARSER_CONTENT; 9733 return; 9734 } 9735 NEXTL(sl); 9736 cur = CUR_CHAR(l); 9737 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9738 if (buf == NULL) { 9739 xmlErrMemory(ctxt, NULL); 9740 return; 9741 } 9742 while (IS_CHAR(cur) && 9743 ((r != ']') || (s != ']') || (cur != '>'))) { 9744 if (len + 5 >= size) { 9745 xmlChar *tmp; 9746 9747 if ((size > XML_MAX_TEXT_LENGTH) && 9748 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9749 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9750 "CData section too big found", NULL); 9751 xmlFree (buf); 9752 return; 9753 } 9754 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9755 if (tmp == NULL) { 9756 xmlFree(buf); 9757 xmlErrMemory(ctxt, NULL); 9758 return; 9759 } 9760 buf = tmp; 9761 size *= 2; 9762 } 9763 COPY_BUF(rl,buf,len,r); 9764 r = s; 9765 rl = sl; 9766 s = cur; 9767 sl = l; 9768 count++; 9769 if (count > 50) { 9770 GROW; 9771 if (ctxt->instate == XML_PARSER_EOF) { 9772 xmlFree(buf); 9773 return; 9774 } 9775 count = 0; 9776 } 9777 NEXTL(l); 9778 cur = CUR_CHAR(l); 9779 } 9780 buf[len] = 0; 9781 ctxt->instate = XML_PARSER_CONTENT; 9782 if (cur != '>') { 9783 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9784 "CData section not finished\n%.50s\n", buf); 9785 xmlFree(buf); 9786 return; 9787 } 9788 NEXTL(l); 9789 9790 /* 9791 * OK the buffer is to be consumed as cdata. 9792 */ 9793 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9794 if (ctxt->sax->cdataBlock != NULL) 9795 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9796 else if (ctxt->sax->characters != NULL) 9797 ctxt->sax->characters(ctxt->userData, buf, len); 9798 } 9799 xmlFree(buf); 9800 } 9801 9802 /** 9803 * xmlParseContent: 9804 * @ctxt: an XML parser context 9805 * 9806 * Parse a content: 9807 * 9808 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9809 */ 9810 9811 void 9812 xmlParseContent(xmlParserCtxtPtr ctxt) { 9813 GROW; 9814 while ((RAW != 0) && 9815 ((RAW != '<') || (NXT(1) != '/')) && 9816 (ctxt->instate != XML_PARSER_EOF)) { 9817 const xmlChar *test = CUR_PTR; 9818 unsigned int cons = ctxt->input->consumed; 9819 const xmlChar *cur = ctxt->input->cur; 9820 9821 /* 9822 * First case : a Processing Instruction. 9823 */ 9824 if ((*cur == '<') && (cur[1] == '?')) { 9825 xmlParsePI(ctxt); 9826 } 9827 9828 /* 9829 * Second case : a CDSection 9830 */ 9831 /* 2.6.0 test was *cur not RAW */ 9832 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9833 xmlParseCDSect(ctxt); 9834 } 9835 9836 /* 9837 * Third case : a comment 9838 */ 9839 else if ((*cur == '<') && (NXT(1) == '!') && 9840 (NXT(2) == '-') && (NXT(3) == '-')) { 9841 xmlParseComment(ctxt); 9842 ctxt->instate = XML_PARSER_CONTENT; 9843 } 9844 9845 /* 9846 * Fourth case : a sub-element. 9847 */ 9848 else if (*cur == '<') { 9849 xmlParseElement(ctxt); 9850 } 9851 9852 /* 9853 * Fifth case : a reference. If if has not been resolved, 9854 * parsing returns it's Name, create the node 9855 */ 9856 9857 else if (*cur == '&') { 9858 xmlParseReference(ctxt); 9859 } 9860 9861 /* 9862 * Last case, text. Note that References are handled directly. 9863 */ 9864 else { 9865 xmlParseCharData(ctxt, 0); 9866 } 9867 9868 GROW; 9869 SHRINK; 9870 9871 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9872 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9873 "detected an error in element content\n"); 9874 xmlHaltParser(ctxt); 9875 break; 9876 } 9877 } 9878 } 9879 9880 /** 9881 * xmlParseElement: 9882 * @ctxt: an XML parser context 9883 * 9884 * parse an XML element, this is highly recursive 9885 * 9886 * [39] element ::= EmptyElemTag | STag content ETag 9887 * 9888 * [ WFC: Element Type Match ] 9889 * The Name in an element's end-tag must match the element type in the 9890 * start-tag. 9891 * 9892 */ 9893 9894 void 9895 xmlParseElement(xmlParserCtxtPtr ctxt) { 9896 const xmlChar *name; 9897 const xmlChar *prefix = NULL; 9898 const xmlChar *URI = NULL; 9899 xmlParserNodeInfo node_info; 9900 int line, tlen = 0; 9901 xmlNodePtr ret; 9902 int nsNr = ctxt->nsNr; 9903 9904 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9905 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9906 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9907 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9908 xmlParserMaxDepth); 9909 xmlHaltParser(ctxt); 9910 return; 9911 } 9912 9913 /* Capture start position */ 9914 if (ctxt->record_info) { 9915 node_info.begin_pos = ctxt->input->consumed + 9916 (CUR_PTR - ctxt->input->base); 9917 node_info.begin_line = ctxt->input->line; 9918 } 9919 9920 if (ctxt->spaceNr == 0) 9921 spacePush(ctxt, -1); 9922 else if (*ctxt->space == -2) 9923 spacePush(ctxt, -1); 9924 else 9925 spacePush(ctxt, *ctxt->space); 9926 9927 line = ctxt->input->line; 9928 #ifdef LIBXML_SAX1_ENABLED 9929 if (ctxt->sax2) 9930 #endif /* LIBXML_SAX1_ENABLED */ 9931 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9932 #ifdef LIBXML_SAX1_ENABLED 9933 else 9934 name = xmlParseStartTag(ctxt); 9935 #endif /* LIBXML_SAX1_ENABLED */ 9936 if (ctxt->instate == XML_PARSER_EOF) 9937 return; 9938 if (name == NULL) { 9939 spacePop(ctxt); 9940 return; 9941 } 9942 namePush(ctxt, name); 9943 ret = ctxt->node; 9944 9945 #ifdef LIBXML_VALID_ENABLED 9946 /* 9947 * [ VC: Root Element Type ] 9948 * The Name in the document type declaration must match the element 9949 * type of the root element. 9950 */ 9951 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9952 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9953 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9954 #endif /* LIBXML_VALID_ENABLED */ 9955 9956 /* 9957 * Check for an Empty Element. 9958 */ 9959 if ((RAW == '/') && (NXT(1) == '>')) { 9960 SKIP(2); 9961 if (ctxt->sax2) { 9962 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9963 (!ctxt->disableSAX)) 9964 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9965 #ifdef LIBXML_SAX1_ENABLED 9966 } else { 9967 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9968 (!ctxt->disableSAX)) 9969 ctxt->sax->endElement(ctxt->userData, name); 9970 #endif /* LIBXML_SAX1_ENABLED */ 9971 } 9972 namePop(ctxt); 9973 spacePop(ctxt); 9974 if (nsNr != ctxt->nsNr) 9975 nsPop(ctxt, ctxt->nsNr - nsNr); 9976 if ( ret != NULL && ctxt->record_info ) { 9977 node_info.end_pos = ctxt->input->consumed + 9978 (CUR_PTR - ctxt->input->base); 9979 node_info.end_line = ctxt->input->line; 9980 node_info.node = ret; 9981 xmlParserAddNodeInfo(ctxt, &node_info); 9982 } 9983 return; 9984 } 9985 if (RAW == '>') { 9986 NEXT1; 9987 } else { 9988 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9989 "Couldn't find end of Start Tag %s line %d\n", 9990 name, line, NULL); 9991 9992 /* 9993 * end of parsing of this node. 9994 */ 9995 nodePop(ctxt); 9996 namePop(ctxt); 9997 spacePop(ctxt); 9998 if (nsNr != ctxt->nsNr) 9999 nsPop(ctxt, ctxt->nsNr - nsNr); 10000 10001 /* 10002 * Capture end position and add node 10003 */ 10004 if ( ret != NULL && ctxt->record_info ) { 10005 node_info.end_pos = ctxt->input->consumed + 10006 (CUR_PTR - ctxt->input->base); 10007 node_info.end_line = ctxt->input->line; 10008 node_info.node = ret; 10009 xmlParserAddNodeInfo(ctxt, &node_info); 10010 } 10011 return; 10012 } 10013 10014 /* 10015 * Parse the content of the element: 10016 */ 10017 xmlParseContent(ctxt); 10018 if (ctxt->instate == XML_PARSER_EOF) 10019 return; 10020 if (!IS_BYTE_CHAR(RAW)) { 10021 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10022 "Premature end of data in tag %s line %d\n", 10023 name, line, NULL); 10024 10025 /* 10026 * end of parsing of this node. 10027 */ 10028 nodePop(ctxt); 10029 namePop(ctxt); 10030 spacePop(ctxt); 10031 if (nsNr != ctxt->nsNr) 10032 nsPop(ctxt, ctxt->nsNr - nsNr); 10033 return; 10034 } 10035 10036 /* 10037 * parse the end of tag: '</' should be here. 10038 */ 10039 if (ctxt->sax2) { 10040 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10041 namePop(ctxt); 10042 } 10043 #ifdef LIBXML_SAX1_ENABLED 10044 else 10045 xmlParseEndTag1(ctxt, line); 10046 #endif /* LIBXML_SAX1_ENABLED */ 10047 10048 /* 10049 * Capture end position and add node 10050 */ 10051 if ( ret != NULL && ctxt->record_info ) { 10052 node_info.end_pos = ctxt->input->consumed + 10053 (CUR_PTR - ctxt->input->base); 10054 node_info.end_line = ctxt->input->line; 10055 node_info.node = ret; 10056 xmlParserAddNodeInfo(ctxt, &node_info); 10057 } 10058 } 10059 10060 /** 10061 * xmlParseVersionNum: 10062 * @ctxt: an XML parser context 10063 * 10064 * parse the XML version value. 10065 * 10066 * [26] VersionNum ::= '1.' [0-9]+ 10067 * 10068 * In practice allow [0-9].[0-9]+ at that level 10069 * 10070 * Returns the string giving the XML version number, or NULL 10071 */ 10072 xmlChar * 10073 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10074 xmlChar *buf = NULL; 10075 int len = 0; 10076 int size = 10; 10077 xmlChar cur; 10078 10079 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10080 if (buf == NULL) { 10081 xmlErrMemory(ctxt, NULL); 10082 return(NULL); 10083 } 10084 cur = CUR; 10085 if (!((cur >= '0') && (cur <= '9'))) { 10086 xmlFree(buf); 10087 return(NULL); 10088 } 10089 buf[len++] = cur; 10090 NEXT; 10091 cur=CUR; 10092 if (cur != '.') { 10093 xmlFree(buf); 10094 return(NULL); 10095 } 10096 buf[len++] = cur; 10097 NEXT; 10098 cur=CUR; 10099 while ((cur >= '0') && (cur <= '9')) { 10100 if (len + 1 >= size) { 10101 xmlChar *tmp; 10102 10103 size *= 2; 10104 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10105 if (tmp == NULL) { 10106 xmlFree(buf); 10107 xmlErrMemory(ctxt, NULL); 10108 return(NULL); 10109 } 10110 buf = tmp; 10111 } 10112 buf[len++] = cur; 10113 NEXT; 10114 cur=CUR; 10115 } 10116 buf[len] = 0; 10117 return(buf); 10118 } 10119 10120 /** 10121 * xmlParseVersionInfo: 10122 * @ctxt: an XML parser context 10123 * 10124 * parse the XML version. 10125 * 10126 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10127 * 10128 * [25] Eq ::= S? '=' S? 10129 * 10130 * Returns the version string, e.g. "1.0" 10131 */ 10132 10133 xmlChar * 10134 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10135 xmlChar *version = NULL; 10136 10137 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10138 SKIP(7); 10139 SKIP_BLANKS; 10140 if (RAW != '=') { 10141 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10142 return(NULL); 10143 } 10144 NEXT; 10145 SKIP_BLANKS; 10146 if (RAW == '"') { 10147 NEXT; 10148 version = xmlParseVersionNum(ctxt); 10149 if (RAW != '"') { 10150 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10151 } else 10152 NEXT; 10153 } else if (RAW == '\''){ 10154 NEXT; 10155 version = xmlParseVersionNum(ctxt); 10156 if (RAW != '\'') { 10157 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10158 } else 10159 NEXT; 10160 } else { 10161 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10162 } 10163 } 10164 return(version); 10165 } 10166 10167 /** 10168 * xmlParseEncName: 10169 * @ctxt: an XML parser context 10170 * 10171 * parse the XML encoding name 10172 * 10173 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10174 * 10175 * Returns the encoding name value or NULL 10176 */ 10177 xmlChar * 10178 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10179 xmlChar *buf = NULL; 10180 int len = 0; 10181 int size = 10; 10182 xmlChar cur; 10183 10184 cur = CUR; 10185 if (((cur >= 'a') && (cur <= 'z')) || 10186 ((cur >= 'A') && (cur <= 'Z'))) { 10187 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10188 if (buf == NULL) { 10189 xmlErrMemory(ctxt, NULL); 10190 return(NULL); 10191 } 10192 10193 buf[len++] = cur; 10194 NEXT; 10195 cur = CUR; 10196 while (((cur >= 'a') && (cur <= 'z')) || 10197 ((cur >= 'A') && (cur <= 'Z')) || 10198 ((cur >= '0') && (cur <= '9')) || 10199 (cur == '.') || (cur == '_') || 10200 (cur == '-')) { 10201 if (len + 1 >= size) { 10202 xmlChar *tmp; 10203 10204 size *= 2; 10205 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10206 if (tmp == NULL) { 10207 xmlErrMemory(ctxt, NULL); 10208 xmlFree(buf); 10209 return(NULL); 10210 } 10211 buf = tmp; 10212 } 10213 buf[len++] = cur; 10214 NEXT; 10215 cur = CUR; 10216 if (cur == 0) { 10217 SHRINK; 10218 GROW; 10219 cur = CUR; 10220 } 10221 } 10222 buf[len] = 0; 10223 } else { 10224 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10225 } 10226 return(buf); 10227 } 10228 10229 /** 10230 * xmlParseEncodingDecl: 10231 * @ctxt: an XML parser context 10232 * 10233 * parse the XML encoding declaration 10234 * 10235 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10236 * 10237 * this setups the conversion filters. 10238 * 10239 * Returns the encoding value or NULL 10240 */ 10241 10242 const xmlChar * 10243 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10244 xmlChar *encoding = NULL; 10245 10246 SKIP_BLANKS; 10247 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10248 SKIP(8); 10249 SKIP_BLANKS; 10250 if (RAW != '=') { 10251 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10252 return(NULL); 10253 } 10254 NEXT; 10255 SKIP_BLANKS; 10256 if (RAW == '"') { 10257 NEXT; 10258 encoding = xmlParseEncName(ctxt); 10259 if (RAW != '"') { 10260 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10261 xmlFree((xmlChar *) encoding); 10262 return(NULL); 10263 } else 10264 NEXT; 10265 } else if (RAW == '\''){ 10266 NEXT; 10267 encoding = xmlParseEncName(ctxt); 10268 if (RAW != '\'') { 10269 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10270 xmlFree((xmlChar *) encoding); 10271 return(NULL); 10272 } else 10273 NEXT; 10274 } else { 10275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10276 } 10277 10278 /* 10279 * Non standard parsing, allowing the user to ignore encoding 10280 */ 10281 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10282 xmlFree((xmlChar *) encoding); 10283 return(NULL); 10284 } 10285 10286 /* 10287 * UTF-16 encoding stwich has already taken place at this stage, 10288 * more over the little-endian/big-endian selection is already done 10289 */ 10290 if ((encoding != NULL) && 10291 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10292 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10293 /* 10294 * If no encoding was passed to the parser, that we are 10295 * using UTF-16 and no decoder is present i.e. the 10296 * document is apparently UTF-8 compatible, then raise an 10297 * encoding mismatch fatal error 10298 */ 10299 if ((ctxt->encoding == NULL) && 10300 (ctxt->input->buf != NULL) && 10301 (ctxt->input->buf->encoder == NULL)) { 10302 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10303 "Document labelled UTF-16 but has UTF-8 content\n"); 10304 } 10305 if (ctxt->encoding != NULL) 10306 xmlFree((xmlChar *) ctxt->encoding); 10307 ctxt->encoding = encoding; 10308 } 10309 /* 10310 * UTF-8 encoding is handled natively 10311 */ 10312 else if ((encoding != NULL) && 10313 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10314 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10315 if (ctxt->encoding != NULL) 10316 xmlFree((xmlChar *) ctxt->encoding); 10317 ctxt->encoding = encoding; 10318 } 10319 else if (encoding != NULL) { 10320 xmlCharEncodingHandlerPtr handler; 10321 10322 if (ctxt->input->encoding != NULL) 10323 xmlFree((xmlChar *) ctxt->input->encoding); 10324 ctxt->input->encoding = encoding; 10325 10326 handler = xmlFindCharEncodingHandler((const char *) encoding); 10327 if (handler != NULL) { 10328 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10329 /* failed to convert */ 10330 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10331 return(NULL); 10332 } 10333 } else { 10334 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10335 "Unsupported encoding %s\n", encoding); 10336 return(NULL); 10337 } 10338 } 10339 } 10340 return(encoding); 10341 } 10342 10343 /** 10344 * xmlParseSDDecl: 10345 * @ctxt: an XML parser context 10346 * 10347 * parse the XML standalone declaration 10348 * 10349 * [32] SDDecl ::= S 'standalone' Eq 10350 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10351 * 10352 * [ VC: Standalone Document Declaration ] 10353 * TODO The standalone document declaration must have the value "no" 10354 * if any external markup declarations contain declarations of: 10355 * - attributes with default values, if elements to which these 10356 * attributes apply appear in the document without specifications 10357 * of values for these attributes, or 10358 * - entities (other than amp, lt, gt, apos, quot), if references 10359 * to those entities appear in the document, or 10360 * - attributes with values subject to normalization, where the 10361 * attribute appears in the document with a value which will change 10362 * as a result of normalization, or 10363 * - element types with element content, if white space occurs directly 10364 * within any instance of those types. 10365 * 10366 * Returns: 10367 * 1 if standalone="yes" 10368 * 0 if standalone="no" 10369 * -2 if standalone attribute is missing or invalid 10370 * (A standalone value of -2 means that the XML declaration was found, 10371 * but no value was specified for the standalone attribute). 10372 */ 10373 10374 int 10375 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10376 int standalone = -2; 10377 10378 SKIP_BLANKS; 10379 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10380 SKIP(10); 10381 SKIP_BLANKS; 10382 if (RAW != '=') { 10383 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10384 return(standalone); 10385 } 10386 NEXT; 10387 SKIP_BLANKS; 10388 if (RAW == '\''){ 10389 NEXT; 10390 if ((RAW == 'n') && (NXT(1) == 'o')) { 10391 standalone = 0; 10392 SKIP(2); 10393 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10394 (NXT(2) == 's')) { 10395 standalone = 1; 10396 SKIP(3); 10397 } else { 10398 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10399 } 10400 if (RAW != '\'') { 10401 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10402 } else 10403 NEXT; 10404 } else if (RAW == '"'){ 10405 NEXT; 10406 if ((RAW == 'n') && (NXT(1) == 'o')) { 10407 standalone = 0; 10408 SKIP(2); 10409 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10410 (NXT(2) == 's')) { 10411 standalone = 1; 10412 SKIP(3); 10413 } else { 10414 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10415 } 10416 if (RAW != '"') { 10417 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10418 } else 10419 NEXT; 10420 } else { 10421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10422 } 10423 } 10424 return(standalone); 10425 } 10426 10427 /** 10428 * xmlParseXMLDecl: 10429 * @ctxt: an XML parser context 10430 * 10431 * parse an XML declaration header 10432 * 10433 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10434 */ 10435 10436 void 10437 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10438 xmlChar *version; 10439 10440 /* 10441 * This value for standalone indicates that the document has an 10442 * XML declaration but it does not have a standalone attribute. 10443 * It will be overwritten later if a standalone attribute is found. 10444 */ 10445 ctxt->input->standalone = -2; 10446 10447 /* 10448 * We know that '<?xml' is here. 10449 */ 10450 SKIP(5); 10451 10452 if (!IS_BLANK_CH(RAW)) { 10453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10454 "Blank needed after '<?xml'\n"); 10455 } 10456 SKIP_BLANKS; 10457 10458 /* 10459 * We must have the VersionInfo here. 10460 */ 10461 version = xmlParseVersionInfo(ctxt); 10462 if (version == NULL) { 10463 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10464 } else { 10465 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10466 /* 10467 * Changed here for XML-1.0 5th edition 10468 */ 10469 if (ctxt->options & XML_PARSE_OLD10) { 10470 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10471 "Unsupported version '%s'\n", 10472 version); 10473 } else { 10474 if ((version[0] == '1') && ((version[1] == '.'))) { 10475 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10476 "Unsupported version '%s'\n", 10477 version, NULL); 10478 } else { 10479 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10480 "Unsupported version '%s'\n", 10481 version); 10482 } 10483 } 10484 } 10485 if (ctxt->version != NULL) 10486 xmlFree((void *) ctxt->version); 10487 ctxt->version = version; 10488 } 10489 10490 /* 10491 * We may have the encoding declaration 10492 */ 10493 if (!IS_BLANK_CH(RAW)) { 10494 if ((RAW == '?') && (NXT(1) == '>')) { 10495 SKIP(2); 10496 return; 10497 } 10498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10499 } 10500 xmlParseEncodingDecl(ctxt); 10501 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10502 (ctxt->instate == XML_PARSER_EOF)) { 10503 /* 10504 * The XML REC instructs us to stop parsing right here 10505 */ 10506 return; 10507 } 10508 10509 /* 10510 * We may have the standalone status. 10511 */ 10512 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10513 if ((RAW == '?') && (NXT(1) == '>')) { 10514 SKIP(2); 10515 return; 10516 } 10517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10518 } 10519 10520 /* 10521 * We can grow the input buffer freely at that point 10522 */ 10523 GROW; 10524 10525 SKIP_BLANKS; 10526 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10527 10528 SKIP_BLANKS; 10529 if ((RAW == '?') && (NXT(1) == '>')) { 10530 SKIP(2); 10531 } else if (RAW == '>') { 10532 /* Deprecated old WD ... */ 10533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10534 NEXT; 10535 } else { 10536 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10537 MOVETO_ENDTAG(CUR_PTR); 10538 NEXT; 10539 } 10540 } 10541 10542 /** 10543 * xmlParseMisc: 10544 * @ctxt: an XML parser context 10545 * 10546 * parse an XML Misc* optional field. 10547 * 10548 * [27] Misc ::= Comment | PI | S 10549 */ 10550 10551 void 10552 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10553 while ((ctxt->instate != XML_PARSER_EOF) && 10554 (((RAW == '<') && (NXT(1) == '?')) || 10555 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10556 IS_BLANK_CH(CUR))) { 10557 if ((RAW == '<') && (NXT(1) == '?')) { 10558 xmlParsePI(ctxt); 10559 } else if (IS_BLANK_CH(CUR)) { 10560 NEXT; 10561 } else 10562 xmlParseComment(ctxt); 10563 } 10564 } 10565 10566 /** 10567 * xmlParseDocument: 10568 * @ctxt: an XML parser context 10569 * 10570 * parse an XML document (and build a tree if using the standard SAX 10571 * interface). 10572 * 10573 * [1] document ::= prolog element Misc* 10574 * 10575 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10576 * 10577 * Returns 0, -1 in case of error. the parser context is augmented 10578 * as a result of the parsing. 10579 */ 10580 10581 int 10582 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10583 xmlChar start[4]; 10584 xmlCharEncoding enc; 10585 10586 xmlInitParser(); 10587 10588 if ((ctxt == NULL) || (ctxt->input == NULL)) 10589 return(-1); 10590 10591 GROW; 10592 10593 /* 10594 * SAX: detecting the level. 10595 */ 10596 xmlDetectSAX2(ctxt); 10597 10598 /* 10599 * SAX: beginning of the document processing. 10600 */ 10601 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10602 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10603 if (ctxt->instate == XML_PARSER_EOF) 10604 return(-1); 10605 10606 if ((ctxt->encoding == NULL) && 10607 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10608 /* 10609 * Get the 4 first bytes and decode the charset 10610 * if enc != XML_CHAR_ENCODING_NONE 10611 * plug some encoding conversion routines. 10612 */ 10613 start[0] = RAW; 10614 start[1] = NXT(1); 10615 start[2] = NXT(2); 10616 start[3] = NXT(3); 10617 enc = xmlDetectCharEncoding(&start[0], 4); 10618 if (enc != XML_CHAR_ENCODING_NONE) { 10619 xmlSwitchEncoding(ctxt, enc); 10620 } 10621 } 10622 10623 10624 if (CUR == 0) { 10625 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10626 return(-1); 10627 } 10628 10629 /* 10630 * Check for the XMLDecl in the Prolog. 10631 * do not GROW here to avoid the detected encoder to decode more 10632 * than just the first line, unless the amount of data is really 10633 * too small to hold "<?xml version="1.0" encoding="foo" 10634 */ 10635 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10636 GROW; 10637 } 10638 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10639 10640 /* 10641 * Note that we will switch encoding on the fly. 10642 */ 10643 xmlParseXMLDecl(ctxt); 10644 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10645 (ctxt->instate == XML_PARSER_EOF)) { 10646 /* 10647 * The XML REC instructs us to stop parsing right here 10648 */ 10649 return(-1); 10650 } 10651 ctxt->standalone = ctxt->input->standalone; 10652 SKIP_BLANKS; 10653 } else { 10654 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10655 } 10656 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10657 ctxt->sax->startDocument(ctxt->userData); 10658 if (ctxt->instate == XML_PARSER_EOF) 10659 return(-1); 10660 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10661 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10662 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10663 } 10664 10665 /* 10666 * The Misc part of the Prolog 10667 */ 10668 GROW; 10669 xmlParseMisc(ctxt); 10670 10671 /* 10672 * Then possibly doc type declaration(s) and more Misc 10673 * (doctypedecl Misc*)? 10674 */ 10675 GROW; 10676 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10677 10678 ctxt->inSubset = 1; 10679 xmlParseDocTypeDecl(ctxt); 10680 if (RAW == '[') { 10681 ctxt->instate = XML_PARSER_DTD; 10682 xmlParseInternalSubset(ctxt); 10683 if (ctxt->instate == XML_PARSER_EOF) 10684 return(-1); 10685 } 10686 10687 /* 10688 * Create and update the external subset. 10689 */ 10690 ctxt->inSubset = 2; 10691 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10692 (!ctxt->disableSAX)) 10693 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10694 ctxt->extSubSystem, ctxt->extSubURI); 10695 if (ctxt->instate == XML_PARSER_EOF) 10696 return(-1); 10697 ctxt->inSubset = 0; 10698 10699 xmlCleanSpecialAttr(ctxt); 10700 10701 ctxt->instate = XML_PARSER_PROLOG; 10702 xmlParseMisc(ctxt); 10703 } 10704 10705 /* 10706 * Time to start parsing the tree itself 10707 */ 10708 GROW; 10709 if (RAW != '<') { 10710 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10711 "Start tag expected, '<' not found\n"); 10712 } else { 10713 ctxt->instate = XML_PARSER_CONTENT; 10714 xmlParseElement(ctxt); 10715 ctxt->instate = XML_PARSER_EPILOG; 10716 10717 10718 /* 10719 * The Misc part at the end 10720 */ 10721 xmlParseMisc(ctxt); 10722 10723 if (RAW != 0) { 10724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10725 } 10726 ctxt->instate = XML_PARSER_EOF; 10727 } 10728 10729 /* 10730 * SAX: end of the document processing. 10731 */ 10732 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10733 ctxt->sax->endDocument(ctxt->userData); 10734 10735 /* 10736 * Remove locally kept entity definitions if the tree was not built 10737 */ 10738 if ((ctxt->myDoc != NULL) && 10739 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10740 xmlFreeDoc(ctxt->myDoc); 10741 ctxt->myDoc = NULL; 10742 } 10743 10744 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10745 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10746 if (ctxt->valid) 10747 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10748 if (ctxt->nsWellFormed) 10749 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10750 if (ctxt->options & XML_PARSE_OLD10) 10751 ctxt->myDoc->properties |= XML_DOC_OLD10; 10752 } 10753 if (! ctxt->wellFormed) { 10754 ctxt->valid = 0; 10755 return(-1); 10756 } 10757 return(0); 10758 } 10759 10760 /** 10761 * xmlParseExtParsedEnt: 10762 * @ctxt: an XML parser context 10763 * 10764 * parse a general parsed entity 10765 * An external general parsed entity is well-formed if it matches the 10766 * production labeled extParsedEnt. 10767 * 10768 * [78] extParsedEnt ::= TextDecl? content 10769 * 10770 * Returns 0, -1 in case of error. the parser context is augmented 10771 * as a result of the parsing. 10772 */ 10773 10774 int 10775 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10776 xmlChar start[4]; 10777 xmlCharEncoding enc; 10778 10779 if ((ctxt == NULL) || (ctxt->input == NULL)) 10780 return(-1); 10781 10782 xmlDefaultSAXHandlerInit(); 10783 10784 xmlDetectSAX2(ctxt); 10785 10786 GROW; 10787 10788 /* 10789 * SAX: beginning of the document processing. 10790 */ 10791 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10792 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10793 10794 /* 10795 * Get the 4 first bytes and decode the charset 10796 * if enc != XML_CHAR_ENCODING_NONE 10797 * plug some encoding conversion routines. 10798 */ 10799 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10800 start[0] = RAW; 10801 start[1] = NXT(1); 10802 start[2] = NXT(2); 10803 start[3] = NXT(3); 10804 enc = xmlDetectCharEncoding(start, 4); 10805 if (enc != XML_CHAR_ENCODING_NONE) { 10806 xmlSwitchEncoding(ctxt, enc); 10807 } 10808 } 10809 10810 10811 if (CUR == 0) { 10812 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10813 } 10814 10815 /* 10816 * Check for the XMLDecl in the Prolog. 10817 */ 10818 GROW; 10819 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10820 10821 /* 10822 * Note that we will switch encoding on the fly. 10823 */ 10824 xmlParseXMLDecl(ctxt); 10825 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10826 /* 10827 * The XML REC instructs us to stop parsing right here 10828 */ 10829 return(-1); 10830 } 10831 SKIP_BLANKS; 10832 } else { 10833 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10834 } 10835 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10836 ctxt->sax->startDocument(ctxt->userData); 10837 if (ctxt->instate == XML_PARSER_EOF) 10838 return(-1); 10839 10840 /* 10841 * Doing validity checking on chunk doesn't make sense 10842 */ 10843 ctxt->instate = XML_PARSER_CONTENT; 10844 ctxt->validate = 0; 10845 ctxt->loadsubset = 0; 10846 ctxt->depth = 0; 10847 10848 xmlParseContent(ctxt); 10849 if (ctxt->instate == XML_PARSER_EOF) 10850 return(-1); 10851 10852 if ((RAW == '<') && (NXT(1) == '/')) { 10853 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10854 } else if (RAW != 0) { 10855 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10856 } 10857 10858 /* 10859 * SAX: end of the document processing. 10860 */ 10861 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10862 ctxt->sax->endDocument(ctxt->userData); 10863 10864 if (! ctxt->wellFormed) return(-1); 10865 return(0); 10866 } 10867 10868 #ifdef LIBXML_PUSH_ENABLED 10869 /************************************************************************ 10870 * * 10871 * Progressive parsing interfaces * 10872 * * 10873 ************************************************************************/ 10874 10875 /** 10876 * xmlParseLookupSequence: 10877 * @ctxt: an XML parser context 10878 * @first: the first char to lookup 10879 * @next: the next char to lookup or zero 10880 * @third: the next char to lookup or zero 10881 * 10882 * Try to find if a sequence (first, next, third) or just (first next) or 10883 * (first) is available in the input stream. 10884 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10885 * to avoid rescanning sequences of bytes, it DOES change the state of the 10886 * parser, do not use liberally. 10887 * 10888 * Returns the index to the current parsing point if the full sequence 10889 * is available, -1 otherwise. 10890 */ 10891 static int 10892 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10893 xmlChar next, xmlChar third) { 10894 int base, len; 10895 xmlParserInputPtr in; 10896 const xmlChar *buf; 10897 10898 in = ctxt->input; 10899 if (in == NULL) return(-1); 10900 base = in->cur - in->base; 10901 if (base < 0) return(-1); 10902 if (ctxt->checkIndex > base) 10903 base = ctxt->checkIndex; 10904 if (in->buf == NULL) { 10905 buf = in->base; 10906 len = in->length; 10907 } else { 10908 buf = xmlBufContent(in->buf->buffer); 10909 len = xmlBufUse(in->buf->buffer); 10910 } 10911 /* take into account the sequence length */ 10912 if (third) len -= 2; 10913 else if (next) len --; 10914 for (;base < len;base++) { 10915 if (buf[base] == first) { 10916 if (third != 0) { 10917 if ((buf[base + 1] != next) || 10918 (buf[base + 2] != third)) continue; 10919 } else if (next != 0) { 10920 if (buf[base + 1] != next) continue; 10921 } 10922 ctxt->checkIndex = 0; 10923 #ifdef DEBUG_PUSH 10924 if (next == 0) 10925 xmlGenericError(xmlGenericErrorContext, 10926 "PP: lookup '%c' found at %d\n", 10927 first, base); 10928 else if (third == 0) 10929 xmlGenericError(xmlGenericErrorContext, 10930 "PP: lookup '%c%c' found at %d\n", 10931 first, next, base); 10932 else 10933 xmlGenericError(xmlGenericErrorContext, 10934 "PP: lookup '%c%c%c' found at %d\n", 10935 first, next, third, base); 10936 #endif 10937 return(base - (in->cur - in->base)); 10938 } 10939 } 10940 ctxt->checkIndex = base; 10941 #ifdef DEBUG_PUSH 10942 if (next == 0) 10943 xmlGenericError(xmlGenericErrorContext, 10944 "PP: lookup '%c' failed\n", first); 10945 else if (third == 0) 10946 xmlGenericError(xmlGenericErrorContext, 10947 "PP: lookup '%c%c' failed\n", first, next); 10948 else 10949 xmlGenericError(xmlGenericErrorContext, 10950 "PP: lookup '%c%c%c' failed\n", first, next, third); 10951 #endif 10952 return(-1); 10953 } 10954 10955 /** 10956 * xmlParseGetLasts: 10957 * @ctxt: an XML parser context 10958 * @lastlt: pointer to store the last '<' from the input 10959 * @lastgt: pointer to store the last '>' from the input 10960 * 10961 * Lookup the last < and > in the current chunk 10962 */ 10963 static void 10964 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10965 const xmlChar **lastgt) { 10966 const xmlChar *tmp; 10967 10968 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10969 xmlGenericError(xmlGenericErrorContext, 10970 "Internal error: xmlParseGetLasts\n"); 10971 return; 10972 } 10973 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10974 tmp = ctxt->input->end; 10975 tmp--; 10976 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10977 if (tmp < ctxt->input->base) { 10978 *lastlt = NULL; 10979 *lastgt = NULL; 10980 } else { 10981 *lastlt = tmp; 10982 tmp++; 10983 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10984 if (*tmp == '\'') { 10985 tmp++; 10986 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10987 if (tmp < ctxt->input->end) tmp++; 10988 } else if (*tmp == '"') { 10989 tmp++; 10990 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10991 if (tmp < ctxt->input->end) tmp++; 10992 } else 10993 tmp++; 10994 } 10995 if (tmp < ctxt->input->end) 10996 *lastgt = tmp; 10997 else { 10998 tmp = *lastlt; 10999 tmp--; 11000 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11001 if (tmp >= ctxt->input->base) 11002 *lastgt = tmp; 11003 else 11004 *lastgt = NULL; 11005 } 11006 } 11007 } else { 11008 *lastlt = NULL; 11009 *lastgt = NULL; 11010 } 11011 } 11012 /** 11013 * xmlCheckCdataPush: 11014 * @cur: pointer to the block of characters 11015 * @len: length of the block in bytes 11016 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11017 * 11018 * Check that the block of characters is okay as SCdata content [20] 11019 * 11020 * Returns the number of bytes to pass if okay, a negative index where an 11021 * UTF-8 error occurred otherwise 11022 */ 11023 static int 11024 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11025 int ix; 11026 unsigned char c; 11027 int codepoint; 11028 11029 if ((utf == NULL) || (len <= 0)) 11030 return(0); 11031 11032 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11033 c = utf[ix]; 11034 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11035 if (c >= 0x20) 11036 ix++; 11037 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11038 ix++; 11039 else 11040 return(-ix); 11041 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11042 if (ix + 2 > len) return(complete ? -ix : ix); 11043 if ((utf[ix+1] & 0xc0 ) != 0x80) 11044 return(-ix); 11045 codepoint = (utf[ix] & 0x1f) << 6; 11046 codepoint |= utf[ix+1] & 0x3f; 11047 if (!xmlIsCharQ(codepoint)) 11048 return(-ix); 11049 ix += 2; 11050 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11051 if (ix + 3 > len) return(complete ? -ix : ix); 11052 if (((utf[ix+1] & 0xc0) != 0x80) || 11053 ((utf[ix+2] & 0xc0) != 0x80)) 11054 return(-ix); 11055 codepoint = (utf[ix] & 0xf) << 12; 11056 codepoint |= (utf[ix+1] & 0x3f) << 6; 11057 codepoint |= utf[ix+2] & 0x3f; 11058 if (!xmlIsCharQ(codepoint)) 11059 return(-ix); 11060 ix += 3; 11061 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11062 if (ix + 4 > len) return(complete ? -ix : ix); 11063 if (((utf[ix+1] & 0xc0) != 0x80) || 11064 ((utf[ix+2] & 0xc0) != 0x80) || 11065 ((utf[ix+3] & 0xc0) != 0x80)) 11066 return(-ix); 11067 codepoint = (utf[ix] & 0x7) << 18; 11068 codepoint |= (utf[ix+1] & 0x3f) << 12; 11069 codepoint |= (utf[ix+2] & 0x3f) << 6; 11070 codepoint |= utf[ix+3] & 0x3f; 11071 if (!xmlIsCharQ(codepoint)) 11072 return(-ix); 11073 ix += 4; 11074 } else /* unknown encoding */ 11075 return(-ix); 11076 } 11077 return(ix); 11078 } 11079 11080 /** 11081 * xmlParseTryOrFinish: 11082 * @ctxt: an XML parser context 11083 * @terminate: last chunk indicator 11084 * 11085 * Try to progress on parsing 11086 * 11087 * Returns zero if no parsing was possible 11088 */ 11089 static int 11090 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11091 int ret = 0; 11092 int avail, tlen; 11093 xmlChar cur, next; 11094 const xmlChar *lastlt, *lastgt; 11095 11096 if (ctxt->input == NULL) 11097 return(0); 11098 11099 #ifdef DEBUG_PUSH 11100 switch (ctxt->instate) { 11101 case XML_PARSER_EOF: 11102 xmlGenericError(xmlGenericErrorContext, 11103 "PP: try EOF\n"); break; 11104 case XML_PARSER_START: 11105 xmlGenericError(xmlGenericErrorContext, 11106 "PP: try START\n"); break; 11107 case XML_PARSER_MISC: 11108 xmlGenericError(xmlGenericErrorContext, 11109 "PP: try MISC\n");break; 11110 case XML_PARSER_COMMENT: 11111 xmlGenericError(xmlGenericErrorContext, 11112 "PP: try COMMENT\n");break; 11113 case XML_PARSER_PROLOG: 11114 xmlGenericError(xmlGenericErrorContext, 11115 "PP: try PROLOG\n");break; 11116 case XML_PARSER_START_TAG: 11117 xmlGenericError(xmlGenericErrorContext, 11118 "PP: try START_TAG\n");break; 11119 case XML_PARSER_CONTENT: 11120 xmlGenericError(xmlGenericErrorContext, 11121 "PP: try CONTENT\n");break; 11122 case XML_PARSER_CDATA_SECTION: 11123 xmlGenericError(xmlGenericErrorContext, 11124 "PP: try CDATA_SECTION\n");break; 11125 case XML_PARSER_END_TAG: 11126 xmlGenericError(xmlGenericErrorContext, 11127 "PP: try END_TAG\n");break; 11128 case XML_PARSER_ENTITY_DECL: 11129 xmlGenericError(xmlGenericErrorContext, 11130 "PP: try ENTITY_DECL\n");break; 11131 case XML_PARSER_ENTITY_VALUE: 11132 xmlGenericError(xmlGenericErrorContext, 11133 "PP: try ENTITY_VALUE\n");break; 11134 case XML_PARSER_ATTRIBUTE_VALUE: 11135 xmlGenericError(xmlGenericErrorContext, 11136 "PP: try ATTRIBUTE_VALUE\n");break; 11137 case XML_PARSER_DTD: 11138 xmlGenericError(xmlGenericErrorContext, 11139 "PP: try DTD\n");break; 11140 case XML_PARSER_EPILOG: 11141 xmlGenericError(xmlGenericErrorContext, 11142 "PP: try EPILOG\n");break; 11143 case XML_PARSER_PI: 11144 xmlGenericError(xmlGenericErrorContext, 11145 "PP: try PI\n");break; 11146 case XML_PARSER_IGNORE: 11147 xmlGenericError(xmlGenericErrorContext, 11148 "PP: try IGNORE\n");break; 11149 } 11150 #endif 11151 11152 if ((ctxt->input != NULL) && 11153 (ctxt->input->cur - ctxt->input->base > 4096)) { 11154 xmlSHRINK(ctxt); 11155 ctxt->checkIndex = 0; 11156 } 11157 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11158 11159 while (ctxt->instate != XML_PARSER_EOF) { 11160 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11161 return(0); 11162 11163 if (ctxt->input == NULL) break; 11164 if (ctxt->input->buf == NULL) 11165 avail = ctxt->input->length - 11166 (ctxt->input->cur - ctxt->input->base); 11167 else { 11168 /* 11169 * If we are operating on converted input, try to flush 11170 * remainng chars to avoid them stalling in the non-converted 11171 * buffer. But do not do this in document start where 11172 * encoding="..." may not have been read and we work on a 11173 * guessed encoding. 11174 */ 11175 if ((ctxt->instate != XML_PARSER_START) && 11176 (ctxt->input->buf->raw != NULL) && 11177 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11178 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11179 ctxt->input); 11180 size_t current = ctxt->input->cur - ctxt->input->base; 11181 11182 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11183 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11184 base, current); 11185 } 11186 avail = xmlBufUse(ctxt->input->buf->buffer) - 11187 (ctxt->input->cur - ctxt->input->base); 11188 } 11189 if (avail < 1) 11190 goto done; 11191 switch (ctxt->instate) { 11192 case XML_PARSER_EOF: 11193 /* 11194 * Document parsing is done ! 11195 */ 11196 goto done; 11197 case XML_PARSER_START: 11198 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11199 xmlChar start[4]; 11200 xmlCharEncoding enc; 11201 11202 /* 11203 * Very first chars read from the document flow. 11204 */ 11205 if (avail < 4) 11206 goto done; 11207 11208 /* 11209 * Get the 4 first bytes and decode the charset 11210 * if enc != XML_CHAR_ENCODING_NONE 11211 * plug some encoding conversion routines, 11212 * else xmlSwitchEncoding will set to (default) 11213 * UTF8. 11214 */ 11215 start[0] = RAW; 11216 start[1] = NXT(1); 11217 start[2] = NXT(2); 11218 start[3] = NXT(3); 11219 enc = xmlDetectCharEncoding(start, 4); 11220 xmlSwitchEncoding(ctxt, enc); 11221 break; 11222 } 11223 11224 if (avail < 2) 11225 goto done; 11226 cur = ctxt->input->cur[0]; 11227 next = ctxt->input->cur[1]; 11228 if (cur == 0) { 11229 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11230 ctxt->sax->setDocumentLocator(ctxt->userData, 11231 &xmlDefaultSAXLocator); 11232 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11233 xmlHaltParser(ctxt); 11234 #ifdef DEBUG_PUSH 11235 xmlGenericError(xmlGenericErrorContext, 11236 "PP: entering EOF\n"); 11237 #endif 11238 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11239 ctxt->sax->endDocument(ctxt->userData); 11240 goto done; 11241 } 11242 if ((cur == '<') && (next == '?')) { 11243 /* PI or XML decl */ 11244 if (avail < 5) return(ret); 11245 if ((!terminate) && 11246 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11247 return(ret); 11248 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11249 ctxt->sax->setDocumentLocator(ctxt->userData, 11250 &xmlDefaultSAXLocator); 11251 if ((ctxt->input->cur[2] == 'x') && 11252 (ctxt->input->cur[3] == 'm') && 11253 (ctxt->input->cur[4] == 'l') && 11254 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11255 ret += 5; 11256 #ifdef DEBUG_PUSH 11257 xmlGenericError(xmlGenericErrorContext, 11258 "PP: Parsing XML Decl\n"); 11259 #endif 11260 xmlParseXMLDecl(ctxt); 11261 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11262 /* 11263 * The XML REC instructs us to stop parsing right 11264 * here 11265 */ 11266 xmlHaltParser(ctxt); 11267 return(0); 11268 } 11269 ctxt->standalone = ctxt->input->standalone; 11270 if ((ctxt->encoding == NULL) && 11271 (ctxt->input->encoding != NULL)) 11272 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11273 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11274 (!ctxt->disableSAX)) 11275 ctxt->sax->startDocument(ctxt->userData); 11276 ctxt->instate = XML_PARSER_MISC; 11277 #ifdef DEBUG_PUSH 11278 xmlGenericError(xmlGenericErrorContext, 11279 "PP: entering MISC\n"); 11280 #endif 11281 } else { 11282 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11283 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11284 (!ctxt->disableSAX)) 11285 ctxt->sax->startDocument(ctxt->userData); 11286 ctxt->instate = XML_PARSER_MISC; 11287 #ifdef DEBUG_PUSH 11288 xmlGenericError(xmlGenericErrorContext, 11289 "PP: entering MISC\n"); 11290 #endif 11291 } 11292 } else { 11293 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11294 ctxt->sax->setDocumentLocator(ctxt->userData, 11295 &xmlDefaultSAXLocator); 11296 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11297 if (ctxt->version == NULL) { 11298 xmlErrMemory(ctxt, NULL); 11299 break; 11300 } 11301 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11302 (!ctxt->disableSAX)) 11303 ctxt->sax->startDocument(ctxt->userData); 11304 ctxt->instate = XML_PARSER_MISC; 11305 #ifdef DEBUG_PUSH 11306 xmlGenericError(xmlGenericErrorContext, 11307 "PP: entering MISC\n"); 11308 #endif 11309 } 11310 break; 11311 case XML_PARSER_START_TAG: { 11312 const xmlChar *name; 11313 const xmlChar *prefix = NULL; 11314 const xmlChar *URI = NULL; 11315 int nsNr = ctxt->nsNr; 11316 11317 if ((avail < 2) && (ctxt->inputNr == 1)) 11318 goto done; 11319 cur = ctxt->input->cur[0]; 11320 if (cur != '<') { 11321 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11322 xmlHaltParser(ctxt); 11323 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11324 ctxt->sax->endDocument(ctxt->userData); 11325 goto done; 11326 } 11327 if (!terminate) { 11328 if (ctxt->progressive) { 11329 /* > can be found unescaped in attribute values */ 11330 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11331 goto done; 11332 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11333 goto done; 11334 } 11335 } 11336 if (ctxt->spaceNr == 0) 11337 spacePush(ctxt, -1); 11338 else if (*ctxt->space == -2) 11339 spacePush(ctxt, -1); 11340 else 11341 spacePush(ctxt, *ctxt->space); 11342 #ifdef LIBXML_SAX1_ENABLED 11343 if (ctxt->sax2) 11344 #endif /* LIBXML_SAX1_ENABLED */ 11345 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11346 #ifdef LIBXML_SAX1_ENABLED 11347 else 11348 name = xmlParseStartTag(ctxt); 11349 #endif /* LIBXML_SAX1_ENABLED */ 11350 if (ctxt->instate == XML_PARSER_EOF) 11351 goto done; 11352 if (name == NULL) { 11353 spacePop(ctxt); 11354 xmlHaltParser(ctxt); 11355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11356 ctxt->sax->endDocument(ctxt->userData); 11357 goto done; 11358 } 11359 #ifdef LIBXML_VALID_ENABLED 11360 /* 11361 * [ VC: Root Element Type ] 11362 * The Name in the document type declaration must match 11363 * the element type of the root element. 11364 */ 11365 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11366 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11367 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11368 #endif /* LIBXML_VALID_ENABLED */ 11369 11370 /* 11371 * Check for an Empty Element. 11372 */ 11373 if ((RAW == '/') && (NXT(1) == '>')) { 11374 SKIP(2); 11375 11376 if (ctxt->sax2) { 11377 if ((ctxt->sax != NULL) && 11378 (ctxt->sax->endElementNs != NULL) && 11379 (!ctxt->disableSAX)) 11380 ctxt->sax->endElementNs(ctxt->userData, name, 11381 prefix, URI); 11382 if (ctxt->nsNr - nsNr > 0) 11383 nsPop(ctxt, ctxt->nsNr - nsNr); 11384 #ifdef LIBXML_SAX1_ENABLED 11385 } else { 11386 if ((ctxt->sax != NULL) && 11387 (ctxt->sax->endElement != NULL) && 11388 (!ctxt->disableSAX)) 11389 ctxt->sax->endElement(ctxt->userData, name); 11390 #endif /* LIBXML_SAX1_ENABLED */ 11391 } 11392 if (ctxt->instate == XML_PARSER_EOF) 11393 goto done; 11394 spacePop(ctxt); 11395 if (ctxt->nameNr == 0) { 11396 ctxt->instate = XML_PARSER_EPILOG; 11397 } else { 11398 ctxt->instate = XML_PARSER_CONTENT; 11399 } 11400 ctxt->progressive = 1; 11401 break; 11402 } 11403 if (RAW == '>') { 11404 NEXT; 11405 } else { 11406 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11407 "Couldn't find end of Start Tag %s\n", 11408 name); 11409 nodePop(ctxt); 11410 spacePop(ctxt); 11411 } 11412 if (ctxt->sax2) 11413 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11414 #ifdef LIBXML_SAX1_ENABLED 11415 else 11416 namePush(ctxt, name); 11417 #endif /* LIBXML_SAX1_ENABLED */ 11418 11419 ctxt->instate = XML_PARSER_CONTENT; 11420 ctxt->progressive = 1; 11421 break; 11422 } 11423 case XML_PARSER_CONTENT: { 11424 const xmlChar *test; 11425 unsigned int cons; 11426 if ((avail < 2) && (ctxt->inputNr == 1)) 11427 goto done; 11428 cur = ctxt->input->cur[0]; 11429 next = ctxt->input->cur[1]; 11430 11431 test = CUR_PTR; 11432 cons = ctxt->input->consumed; 11433 if ((cur == '<') && (next == '/')) { 11434 ctxt->instate = XML_PARSER_END_TAG; 11435 break; 11436 } else if ((cur == '<') && (next == '?')) { 11437 if ((!terminate) && 11438 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11439 ctxt->progressive = XML_PARSER_PI; 11440 goto done; 11441 } 11442 xmlParsePI(ctxt); 11443 ctxt->instate = XML_PARSER_CONTENT; 11444 ctxt->progressive = 1; 11445 } else if ((cur == '<') && (next != '!')) { 11446 ctxt->instate = XML_PARSER_START_TAG; 11447 break; 11448 } else if ((cur == '<') && (next == '!') && 11449 (ctxt->input->cur[2] == '-') && 11450 (ctxt->input->cur[3] == '-')) { 11451 int term; 11452 11453 if (avail < 4) 11454 goto done; 11455 ctxt->input->cur += 4; 11456 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11457 ctxt->input->cur -= 4; 11458 if ((!terminate) && (term < 0)) { 11459 ctxt->progressive = XML_PARSER_COMMENT; 11460 goto done; 11461 } 11462 xmlParseComment(ctxt); 11463 ctxt->instate = XML_PARSER_CONTENT; 11464 ctxt->progressive = 1; 11465 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11466 (ctxt->input->cur[2] == '[') && 11467 (ctxt->input->cur[3] == 'C') && 11468 (ctxt->input->cur[4] == 'D') && 11469 (ctxt->input->cur[5] == 'A') && 11470 (ctxt->input->cur[6] == 'T') && 11471 (ctxt->input->cur[7] == 'A') && 11472 (ctxt->input->cur[8] == '[')) { 11473 SKIP(9); 11474 ctxt->instate = XML_PARSER_CDATA_SECTION; 11475 break; 11476 } else if ((cur == '<') && (next == '!') && 11477 (avail < 9)) { 11478 goto done; 11479 } else if (cur == '&') { 11480 if ((!terminate) && 11481 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11482 goto done; 11483 xmlParseReference(ctxt); 11484 } else { 11485 /* TODO Avoid the extra copy, handle directly !!! */ 11486 /* 11487 * Goal of the following test is: 11488 * - minimize calls to the SAX 'character' callback 11489 * when they are mergeable 11490 * - handle an problem for isBlank when we only parse 11491 * a sequence of blank chars and the next one is 11492 * not available to check against '<' presence. 11493 * - tries to homogenize the differences in SAX 11494 * callbacks between the push and pull versions 11495 * of the parser. 11496 */ 11497 if ((ctxt->inputNr == 1) && 11498 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11499 if (!terminate) { 11500 if (ctxt->progressive) { 11501 if ((lastlt == NULL) || 11502 (ctxt->input->cur > lastlt)) 11503 goto done; 11504 } else if (xmlParseLookupSequence(ctxt, 11505 '<', 0, 0) < 0) { 11506 goto done; 11507 } 11508 } 11509 } 11510 ctxt->checkIndex = 0; 11511 xmlParseCharData(ctxt, 0); 11512 } 11513 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11514 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11515 "detected an error in element content\n"); 11516 xmlHaltParser(ctxt); 11517 break; 11518 } 11519 break; 11520 } 11521 case XML_PARSER_END_TAG: 11522 if (avail < 2) 11523 goto done; 11524 if (!terminate) { 11525 if (ctxt->progressive) { 11526 /* > can be found unescaped in attribute values */ 11527 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11528 goto done; 11529 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11530 goto done; 11531 } 11532 } 11533 if (ctxt->sax2) { 11534 xmlParseEndTag2(ctxt, 11535 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11536 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11537 (int) (ptrdiff_t) 11538 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11539 nameNsPop(ctxt); 11540 } 11541 #ifdef LIBXML_SAX1_ENABLED 11542 else 11543 xmlParseEndTag1(ctxt, 0); 11544 #endif /* LIBXML_SAX1_ENABLED */ 11545 if (ctxt->instate == XML_PARSER_EOF) { 11546 /* Nothing */ 11547 } else if (ctxt->nameNr == 0) { 11548 ctxt->instate = XML_PARSER_EPILOG; 11549 } else { 11550 ctxt->instate = XML_PARSER_CONTENT; 11551 } 11552 break; 11553 case XML_PARSER_CDATA_SECTION: { 11554 /* 11555 * The Push mode need to have the SAX callback for 11556 * cdataBlock merge back contiguous callbacks. 11557 */ 11558 int base; 11559 11560 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11561 if (base < 0) { 11562 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11563 int tmp; 11564 11565 tmp = xmlCheckCdataPush(ctxt->input->cur, 11566 XML_PARSER_BIG_BUFFER_SIZE, 0); 11567 if (tmp < 0) { 11568 tmp = -tmp; 11569 ctxt->input->cur += tmp; 11570 goto encoding_error; 11571 } 11572 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11573 if (ctxt->sax->cdataBlock != NULL) 11574 ctxt->sax->cdataBlock(ctxt->userData, 11575 ctxt->input->cur, tmp); 11576 else if (ctxt->sax->characters != NULL) 11577 ctxt->sax->characters(ctxt->userData, 11578 ctxt->input->cur, tmp); 11579 } 11580 if (ctxt->instate == XML_PARSER_EOF) 11581 goto done; 11582 SKIPL(tmp); 11583 ctxt->checkIndex = 0; 11584 } 11585 goto done; 11586 } else { 11587 int tmp; 11588 11589 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11590 if ((tmp < 0) || (tmp != base)) { 11591 tmp = -tmp; 11592 ctxt->input->cur += tmp; 11593 goto encoding_error; 11594 } 11595 if ((ctxt->sax != NULL) && (base == 0) && 11596 (ctxt->sax->cdataBlock != NULL) && 11597 (!ctxt->disableSAX)) { 11598 /* 11599 * Special case to provide identical behaviour 11600 * between pull and push parsers on enpty CDATA 11601 * sections 11602 */ 11603 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11604 (!strncmp((const char *)&ctxt->input->cur[-9], 11605 "<![CDATA[", 9))) 11606 ctxt->sax->cdataBlock(ctxt->userData, 11607 BAD_CAST "", 0); 11608 } else if ((ctxt->sax != NULL) && (base > 0) && 11609 (!ctxt->disableSAX)) { 11610 if (ctxt->sax->cdataBlock != NULL) 11611 ctxt->sax->cdataBlock(ctxt->userData, 11612 ctxt->input->cur, base); 11613 else if (ctxt->sax->characters != NULL) 11614 ctxt->sax->characters(ctxt->userData, 11615 ctxt->input->cur, base); 11616 } 11617 if (ctxt->instate == XML_PARSER_EOF) 11618 goto done; 11619 SKIPL(base + 3); 11620 ctxt->checkIndex = 0; 11621 ctxt->instate = XML_PARSER_CONTENT; 11622 #ifdef DEBUG_PUSH 11623 xmlGenericError(xmlGenericErrorContext, 11624 "PP: entering CONTENT\n"); 11625 #endif 11626 } 11627 break; 11628 } 11629 case XML_PARSER_MISC: 11630 SKIP_BLANKS; 11631 if (ctxt->input->buf == NULL) 11632 avail = ctxt->input->length - 11633 (ctxt->input->cur - ctxt->input->base); 11634 else 11635 avail = xmlBufUse(ctxt->input->buf->buffer) - 11636 (ctxt->input->cur - ctxt->input->base); 11637 if (avail < 2) 11638 goto done; 11639 cur = ctxt->input->cur[0]; 11640 next = ctxt->input->cur[1]; 11641 if ((cur == '<') && (next == '?')) { 11642 if ((!terminate) && 11643 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11644 ctxt->progressive = XML_PARSER_PI; 11645 goto done; 11646 } 11647 #ifdef DEBUG_PUSH 11648 xmlGenericError(xmlGenericErrorContext, 11649 "PP: Parsing PI\n"); 11650 #endif 11651 xmlParsePI(ctxt); 11652 if (ctxt->instate == XML_PARSER_EOF) 11653 goto done; 11654 ctxt->instate = XML_PARSER_MISC; 11655 ctxt->progressive = 1; 11656 ctxt->checkIndex = 0; 11657 } else if ((cur == '<') && (next == '!') && 11658 (ctxt->input->cur[2] == '-') && 11659 (ctxt->input->cur[3] == '-')) { 11660 if ((!terminate) && 11661 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11662 ctxt->progressive = XML_PARSER_COMMENT; 11663 goto done; 11664 } 11665 #ifdef DEBUG_PUSH 11666 xmlGenericError(xmlGenericErrorContext, 11667 "PP: Parsing Comment\n"); 11668 #endif 11669 xmlParseComment(ctxt); 11670 if (ctxt->instate == XML_PARSER_EOF) 11671 goto done; 11672 ctxt->instate = XML_PARSER_MISC; 11673 ctxt->progressive = 1; 11674 ctxt->checkIndex = 0; 11675 } else if ((cur == '<') && (next == '!') && 11676 (ctxt->input->cur[2] == 'D') && 11677 (ctxt->input->cur[3] == 'O') && 11678 (ctxt->input->cur[4] == 'C') && 11679 (ctxt->input->cur[5] == 'T') && 11680 (ctxt->input->cur[6] == 'Y') && 11681 (ctxt->input->cur[7] == 'P') && 11682 (ctxt->input->cur[8] == 'E')) { 11683 if ((!terminate) && 11684 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11685 ctxt->progressive = XML_PARSER_DTD; 11686 goto done; 11687 } 11688 #ifdef DEBUG_PUSH 11689 xmlGenericError(xmlGenericErrorContext, 11690 "PP: Parsing internal subset\n"); 11691 #endif 11692 ctxt->inSubset = 1; 11693 ctxt->progressive = 0; 11694 ctxt->checkIndex = 0; 11695 xmlParseDocTypeDecl(ctxt); 11696 if (ctxt->instate == XML_PARSER_EOF) 11697 goto done; 11698 if (RAW == '[') { 11699 ctxt->instate = XML_PARSER_DTD; 11700 #ifdef DEBUG_PUSH 11701 xmlGenericError(xmlGenericErrorContext, 11702 "PP: entering DTD\n"); 11703 #endif 11704 } else { 11705 /* 11706 * Create and update the external subset. 11707 */ 11708 ctxt->inSubset = 2; 11709 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11710 (ctxt->sax->externalSubset != NULL)) 11711 ctxt->sax->externalSubset(ctxt->userData, 11712 ctxt->intSubName, ctxt->extSubSystem, 11713 ctxt->extSubURI); 11714 ctxt->inSubset = 0; 11715 xmlCleanSpecialAttr(ctxt); 11716 ctxt->instate = XML_PARSER_PROLOG; 11717 #ifdef DEBUG_PUSH 11718 xmlGenericError(xmlGenericErrorContext, 11719 "PP: entering PROLOG\n"); 11720 #endif 11721 } 11722 } else if ((cur == '<') && (next == '!') && 11723 (avail < 9)) { 11724 goto done; 11725 } else { 11726 ctxt->instate = XML_PARSER_START_TAG; 11727 ctxt->progressive = XML_PARSER_START_TAG; 11728 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11729 #ifdef DEBUG_PUSH 11730 xmlGenericError(xmlGenericErrorContext, 11731 "PP: entering START_TAG\n"); 11732 #endif 11733 } 11734 break; 11735 case XML_PARSER_PROLOG: 11736 SKIP_BLANKS; 11737 if (ctxt->input->buf == NULL) 11738 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11739 else 11740 avail = xmlBufUse(ctxt->input->buf->buffer) - 11741 (ctxt->input->cur - ctxt->input->base); 11742 if (avail < 2) 11743 goto done; 11744 cur = ctxt->input->cur[0]; 11745 next = ctxt->input->cur[1]; 11746 if ((cur == '<') && (next == '?')) { 11747 if ((!terminate) && 11748 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11749 ctxt->progressive = XML_PARSER_PI; 11750 goto done; 11751 } 11752 #ifdef DEBUG_PUSH 11753 xmlGenericError(xmlGenericErrorContext, 11754 "PP: Parsing PI\n"); 11755 #endif 11756 xmlParsePI(ctxt); 11757 if (ctxt->instate == XML_PARSER_EOF) 11758 goto done; 11759 ctxt->instate = XML_PARSER_PROLOG; 11760 ctxt->progressive = 1; 11761 } else if ((cur == '<') && (next == '!') && 11762 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11763 if ((!terminate) && 11764 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11765 ctxt->progressive = XML_PARSER_COMMENT; 11766 goto done; 11767 } 11768 #ifdef DEBUG_PUSH 11769 xmlGenericError(xmlGenericErrorContext, 11770 "PP: Parsing Comment\n"); 11771 #endif 11772 xmlParseComment(ctxt); 11773 if (ctxt->instate == XML_PARSER_EOF) 11774 goto done; 11775 ctxt->instate = XML_PARSER_PROLOG; 11776 ctxt->progressive = 1; 11777 } else if ((cur == '<') && (next == '!') && 11778 (avail < 4)) { 11779 goto done; 11780 } else { 11781 ctxt->instate = XML_PARSER_START_TAG; 11782 if (ctxt->progressive == 0) 11783 ctxt->progressive = XML_PARSER_START_TAG; 11784 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11785 #ifdef DEBUG_PUSH 11786 xmlGenericError(xmlGenericErrorContext, 11787 "PP: entering START_TAG\n"); 11788 #endif 11789 } 11790 break; 11791 case XML_PARSER_EPILOG: 11792 SKIP_BLANKS; 11793 if (ctxt->input->buf == NULL) 11794 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11795 else 11796 avail = xmlBufUse(ctxt->input->buf->buffer) - 11797 (ctxt->input->cur - ctxt->input->base); 11798 if (avail < 2) 11799 goto done; 11800 cur = ctxt->input->cur[0]; 11801 next = ctxt->input->cur[1]; 11802 if ((cur == '<') && (next == '?')) { 11803 if ((!terminate) && 11804 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11805 ctxt->progressive = XML_PARSER_PI; 11806 goto done; 11807 } 11808 #ifdef DEBUG_PUSH 11809 xmlGenericError(xmlGenericErrorContext, 11810 "PP: Parsing PI\n"); 11811 #endif 11812 xmlParsePI(ctxt); 11813 if (ctxt->instate == XML_PARSER_EOF) 11814 goto done; 11815 ctxt->instate = XML_PARSER_EPILOG; 11816 ctxt->progressive = 1; 11817 } else if ((cur == '<') && (next == '!') && 11818 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11819 if ((!terminate) && 11820 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11821 ctxt->progressive = XML_PARSER_COMMENT; 11822 goto done; 11823 } 11824 #ifdef DEBUG_PUSH 11825 xmlGenericError(xmlGenericErrorContext, 11826 "PP: Parsing Comment\n"); 11827 #endif 11828 xmlParseComment(ctxt); 11829 if (ctxt->instate == XML_PARSER_EOF) 11830 goto done; 11831 ctxt->instate = XML_PARSER_EPILOG; 11832 ctxt->progressive = 1; 11833 } else if ((cur == '<') && (next == '!') && 11834 (avail < 4)) { 11835 goto done; 11836 } else { 11837 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11838 xmlHaltParser(ctxt); 11839 #ifdef DEBUG_PUSH 11840 xmlGenericError(xmlGenericErrorContext, 11841 "PP: entering EOF\n"); 11842 #endif 11843 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11844 ctxt->sax->endDocument(ctxt->userData); 11845 goto done; 11846 } 11847 break; 11848 case XML_PARSER_DTD: { 11849 /* 11850 * Sorry but progressive parsing of the internal subset 11851 * is not expected to be supported. We first check that 11852 * the full content of the internal subset is available and 11853 * the parsing is launched only at that point. 11854 * Internal subset ends up with "']' S? '>'" in an unescaped 11855 * section and not in a ']]>' sequence which are conditional 11856 * sections (whoever argued to keep that crap in XML deserve 11857 * a place in hell !). 11858 */ 11859 int base, i; 11860 xmlChar *buf; 11861 xmlChar quote = 0; 11862 size_t use; 11863 11864 base = ctxt->input->cur - ctxt->input->base; 11865 if (base < 0) return(0); 11866 if (ctxt->checkIndex > base) 11867 base = ctxt->checkIndex; 11868 buf = xmlBufContent(ctxt->input->buf->buffer); 11869 use = xmlBufUse(ctxt->input->buf->buffer); 11870 for (;(unsigned int) base < use; base++) { 11871 if (quote != 0) { 11872 if (buf[base] == quote) 11873 quote = 0; 11874 continue; 11875 } 11876 if ((quote == 0) && (buf[base] == '<')) { 11877 int found = 0; 11878 /* special handling of comments */ 11879 if (((unsigned int) base + 4 < use) && 11880 (buf[base + 1] == '!') && 11881 (buf[base + 2] == '-') && 11882 (buf[base + 3] == '-')) { 11883 for (;(unsigned int) base + 3 < use; base++) { 11884 if ((buf[base] == '-') && 11885 (buf[base + 1] == '-') && 11886 (buf[base + 2] == '>')) { 11887 found = 1; 11888 base += 2; 11889 break; 11890 } 11891 } 11892 if (!found) { 11893 #if 0 11894 fprintf(stderr, "unfinished comment\n"); 11895 #endif 11896 break; /* for */ 11897 } 11898 continue; 11899 } 11900 } 11901 if (buf[base] == '"') { 11902 quote = '"'; 11903 continue; 11904 } 11905 if (buf[base] == '\'') { 11906 quote = '\''; 11907 continue; 11908 } 11909 if (buf[base] == ']') { 11910 #if 0 11911 fprintf(stderr, "%c%c%c%c: ", buf[base], 11912 buf[base + 1], buf[base + 2], buf[base + 3]); 11913 #endif 11914 if ((unsigned int) base +1 >= use) 11915 break; 11916 if (buf[base + 1] == ']') { 11917 /* conditional crap, skip both ']' ! */ 11918 base++; 11919 continue; 11920 } 11921 for (i = 1; (unsigned int) base + i < use; i++) { 11922 if (buf[base + i] == '>') { 11923 #if 0 11924 fprintf(stderr, "found\n"); 11925 #endif 11926 goto found_end_int_subset; 11927 } 11928 if (!IS_BLANK_CH(buf[base + i])) { 11929 #if 0 11930 fprintf(stderr, "not found\n"); 11931 #endif 11932 goto not_end_of_int_subset; 11933 } 11934 } 11935 #if 0 11936 fprintf(stderr, "end of stream\n"); 11937 #endif 11938 break; 11939 11940 } 11941 not_end_of_int_subset: 11942 continue; /* for */ 11943 } 11944 /* 11945 * We didn't found the end of the Internal subset 11946 */ 11947 if (quote == 0) 11948 ctxt->checkIndex = base; 11949 else 11950 ctxt->checkIndex = 0; 11951 #ifdef DEBUG_PUSH 11952 if (next == 0) 11953 xmlGenericError(xmlGenericErrorContext, 11954 "PP: lookup of int subset end filed\n"); 11955 #endif 11956 goto done; 11957 11958 found_end_int_subset: 11959 ctxt->checkIndex = 0; 11960 xmlParseInternalSubset(ctxt); 11961 if (ctxt->instate == XML_PARSER_EOF) 11962 goto done; 11963 ctxt->inSubset = 2; 11964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11965 (ctxt->sax->externalSubset != NULL)) 11966 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11967 ctxt->extSubSystem, ctxt->extSubURI); 11968 ctxt->inSubset = 0; 11969 xmlCleanSpecialAttr(ctxt); 11970 if (ctxt->instate == XML_PARSER_EOF) 11971 goto done; 11972 ctxt->instate = XML_PARSER_PROLOG; 11973 ctxt->checkIndex = 0; 11974 #ifdef DEBUG_PUSH 11975 xmlGenericError(xmlGenericErrorContext, 11976 "PP: entering PROLOG\n"); 11977 #endif 11978 break; 11979 } 11980 case XML_PARSER_COMMENT: 11981 xmlGenericError(xmlGenericErrorContext, 11982 "PP: internal error, state == COMMENT\n"); 11983 ctxt->instate = XML_PARSER_CONTENT; 11984 #ifdef DEBUG_PUSH 11985 xmlGenericError(xmlGenericErrorContext, 11986 "PP: entering CONTENT\n"); 11987 #endif 11988 break; 11989 case XML_PARSER_IGNORE: 11990 xmlGenericError(xmlGenericErrorContext, 11991 "PP: internal error, state == IGNORE"); 11992 ctxt->instate = XML_PARSER_DTD; 11993 #ifdef DEBUG_PUSH 11994 xmlGenericError(xmlGenericErrorContext, 11995 "PP: entering DTD\n"); 11996 #endif 11997 break; 11998 case XML_PARSER_PI: 11999 xmlGenericError(xmlGenericErrorContext, 12000 "PP: internal error, state == PI\n"); 12001 ctxt->instate = XML_PARSER_CONTENT; 12002 #ifdef DEBUG_PUSH 12003 xmlGenericError(xmlGenericErrorContext, 12004 "PP: entering CONTENT\n"); 12005 #endif 12006 break; 12007 case XML_PARSER_ENTITY_DECL: 12008 xmlGenericError(xmlGenericErrorContext, 12009 "PP: internal error, state == ENTITY_DECL\n"); 12010 ctxt->instate = XML_PARSER_DTD; 12011 #ifdef DEBUG_PUSH 12012 xmlGenericError(xmlGenericErrorContext, 12013 "PP: entering DTD\n"); 12014 #endif 12015 break; 12016 case XML_PARSER_ENTITY_VALUE: 12017 xmlGenericError(xmlGenericErrorContext, 12018 "PP: internal error, state == ENTITY_VALUE\n"); 12019 ctxt->instate = XML_PARSER_CONTENT; 12020 #ifdef DEBUG_PUSH 12021 xmlGenericError(xmlGenericErrorContext, 12022 "PP: entering DTD\n"); 12023 #endif 12024 break; 12025 case XML_PARSER_ATTRIBUTE_VALUE: 12026 xmlGenericError(xmlGenericErrorContext, 12027 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12028 ctxt->instate = XML_PARSER_START_TAG; 12029 #ifdef DEBUG_PUSH 12030 xmlGenericError(xmlGenericErrorContext, 12031 "PP: entering START_TAG\n"); 12032 #endif 12033 break; 12034 case XML_PARSER_SYSTEM_LITERAL: 12035 xmlGenericError(xmlGenericErrorContext, 12036 "PP: internal error, state == SYSTEM_LITERAL\n"); 12037 ctxt->instate = XML_PARSER_START_TAG; 12038 #ifdef DEBUG_PUSH 12039 xmlGenericError(xmlGenericErrorContext, 12040 "PP: entering START_TAG\n"); 12041 #endif 12042 break; 12043 case XML_PARSER_PUBLIC_LITERAL: 12044 xmlGenericError(xmlGenericErrorContext, 12045 "PP: internal error, state == PUBLIC_LITERAL\n"); 12046 ctxt->instate = XML_PARSER_START_TAG; 12047 #ifdef DEBUG_PUSH 12048 xmlGenericError(xmlGenericErrorContext, 12049 "PP: entering START_TAG\n"); 12050 #endif 12051 break; 12052 } 12053 } 12054 done: 12055 #ifdef DEBUG_PUSH 12056 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12057 #endif 12058 return(ret); 12059 encoding_error: 12060 { 12061 char buffer[150]; 12062 12063 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12064 ctxt->input->cur[0], ctxt->input->cur[1], 12065 ctxt->input->cur[2], ctxt->input->cur[3]); 12066 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12067 "Input is not proper UTF-8, indicate encoding !\n%s", 12068 BAD_CAST buffer, NULL); 12069 } 12070 return(0); 12071 } 12072 12073 /** 12074 * xmlParseCheckTransition: 12075 * @ctxt: an XML parser context 12076 * @chunk: a char array 12077 * @size: the size in byte of the chunk 12078 * 12079 * Check depending on the current parser state if the chunk given must be 12080 * processed immediately or one need more data to advance on parsing. 12081 * 12082 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12083 */ 12084 static int 12085 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12086 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12087 return(-1); 12088 if (ctxt->instate == XML_PARSER_START_TAG) { 12089 if (memchr(chunk, '>', size) != NULL) 12090 return(1); 12091 return(0); 12092 } 12093 if (ctxt->progressive == XML_PARSER_COMMENT) { 12094 if (memchr(chunk, '>', size) != NULL) 12095 return(1); 12096 return(0); 12097 } 12098 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12099 if (memchr(chunk, '>', size) != NULL) 12100 return(1); 12101 return(0); 12102 } 12103 if (ctxt->progressive == XML_PARSER_PI) { 12104 if (memchr(chunk, '>', size) != NULL) 12105 return(1); 12106 return(0); 12107 } 12108 if (ctxt->instate == XML_PARSER_END_TAG) { 12109 if (memchr(chunk, '>', size) != NULL) 12110 return(1); 12111 return(0); 12112 } 12113 if ((ctxt->progressive == XML_PARSER_DTD) || 12114 (ctxt->instate == XML_PARSER_DTD)) { 12115 if (memchr(chunk, '>', size) != NULL) 12116 return(1); 12117 return(0); 12118 } 12119 return(1); 12120 } 12121 12122 /** 12123 * xmlParseChunk: 12124 * @ctxt: an XML parser context 12125 * @chunk: an char array 12126 * @size: the size in byte of the chunk 12127 * @terminate: last chunk indicator 12128 * 12129 * Parse a Chunk of memory 12130 * 12131 * Returns zero if no error, the xmlParserErrors otherwise. 12132 */ 12133 int 12134 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12135 int terminate) { 12136 int end_in_lf = 0; 12137 int remain = 0; 12138 size_t old_avail = 0; 12139 size_t avail = 0; 12140 12141 if (ctxt == NULL) 12142 return(XML_ERR_INTERNAL_ERROR); 12143 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12144 return(ctxt->errNo); 12145 if (ctxt->instate == XML_PARSER_EOF) 12146 return(-1); 12147 if (ctxt->instate == XML_PARSER_START) 12148 xmlDetectSAX2(ctxt); 12149 if ((size > 0) && (chunk != NULL) && (!terminate) && 12150 (chunk[size - 1] == '\r')) { 12151 end_in_lf = 1; 12152 size--; 12153 } 12154 12155 xmldecl_done: 12156 12157 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12158 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12159 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12160 size_t cur = ctxt->input->cur - ctxt->input->base; 12161 int res; 12162 12163 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12164 /* 12165 * Specific handling if we autodetected an encoding, we should not 12166 * push more than the first line ... which depend on the encoding 12167 * And only push the rest once the final encoding was detected 12168 */ 12169 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12170 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12171 unsigned int len = 45; 12172 12173 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12174 BAD_CAST "UTF-16")) || 12175 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12176 BAD_CAST "UTF16"))) 12177 len = 90; 12178 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12179 BAD_CAST "UCS-4")) || 12180 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12181 BAD_CAST "UCS4"))) 12182 len = 180; 12183 12184 if (ctxt->input->buf->rawconsumed < len) 12185 len -= ctxt->input->buf->rawconsumed; 12186 12187 /* 12188 * Change size for reading the initial declaration only 12189 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12190 * will blindly copy extra bytes from memory. 12191 */ 12192 if ((unsigned int) size > len) { 12193 remain = size - len; 12194 size = len; 12195 } else { 12196 remain = 0; 12197 } 12198 } 12199 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12200 if (res < 0) { 12201 ctxt->errNo = XML_PARSER_EOF; 12202 xmlHaltParser(ctxt); 12203 return (XML_PARSER_EOF); 12204 } 12205 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12206 #ifdef DEBUG_PUSH 12207 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12208 #endif 12209 12210 } else if (ctxt->instate != XML_PARSER_EOF) { 12211 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12212 xmlParserInputBufferPtr in = ctxt->input->buf; 12213 if ((in->encoder != NULL) && (in->buffer != NULL) && 12214 (in->raw != NULL)) { 12215 int nbchars; 12216 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12217 size_t current = ctxt->input->cur - ctxt->input->base; 12218 12219 nbchars = xmlCharEncInput(in, terminate); 12220 if (nbchars < 0) { 12221 /* TODO 2.6.0 */ 12222 xmlGenericError(xmlGenericErrorContext, 12223 "xmlParseChunk: encoder error\n"); 12224 return(XML_ERR_INVALID_ENCODING); 12225 } 12226 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12227 } 12228 } 12229 } 12230 if (remain != 0) { 12231 xmlParseTryOrFinish(ctxt, 0); 12232 } else { 12233 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12234 avail = xmlBufUse(ctxt->input->buf->buffer); 12235 /* 12236 * Depending on the current state it may not be such 12237 * a good idea to try parsing if there is nothing in the chunk 12238 * which would be worth doing a parser state transition and we 12239 * need to wait for more data 12240 */ 12241 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12242 (old_avail == 0) || (avail == 0) || 12243 (xmlParseCheckTransition(ctxt, 12244 (const char *)&ctxt->input->base[old_avail], 12245 avail - old_avail))) 12246 xmlParseTryOrFinish(ctxt, terminate); 12247 } 12248 if (ctxt->instate == XML_PARSER_EOF) 12249 return(ctxt->errNo); 12250 12251 if ((ctxt->input != NULL) && 12252 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12253 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12254 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12256 xmlHaltParser(ctxt); 12257 } 12258 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12259 return(ctxt->errNo); 12260 12261 if (remain != 0) { 12262 chunk += size; 12263 size = remain; 12264 remain = 0; 12265 goto xmldecl_done; 12266 } 12267 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12268 (ctxt->input->buf != NULL)) { 12269 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12270 ctxt->input); 12271 size_t current = ctxt->input->cur - ctxt->input->base; 12272 12273 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12274 12275 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12276 base, current); 12277 } 12278 if (terminate) { 12279 /* 12280 * Check for termination 12281 */ 12282 int cur_avail = 0; 12283 12284 if (ctxt->input != NULL) { 12285 if (ctxt->input->buf == NULL) 12286 cur_avail = ctxt->input->length - 12287 (ctxt->input->cur - ctxt->input->base); 12288 else 12289 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12290 (ctxt->input->cur - ctxt->input->base); 12291 } 12292 12293 if ((ctxt->instate != XML_PARSER_EOF) && 12294 (ctxt->instate != XML_PARSER_EPILOG)) { 12295 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12296 } 12297 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12298 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12299 } 12300 if (ctxt->instate != XML_PARSER_EOF) { 12301 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12302 ctxt->sax->endDocument(ctxt->userData); 12303 } 12304 ctxt->instate = XML_PARSER_EOF; 12305 } 12306 if (ctxt->wellFormed == 0) 12307 return((xmlParserErrors) ctxt->errNo); 12308 else 12309 return(0); 12310 } 12311 12312 /************************************************************************ 12313 * * 12314 * I/O front end functions to the parser * 12315 * * 12316 ************************************************************************/ 12317 12318 /** 12319 * xmlCreatePushParserCtxt: 12320 * @sax: a SAX handler 12321 * @user_data: The user data returned on SAX callbacks 12322 * @chunk: a pointer to an array of chars 12323 * @size: number of chars in the array 12324 * @filename: an optional file name or URI 12325 * 12326 * Create a parser context for using the XML parser in push mode. 12327 * If @buffer and @size are non-NULL, the data is used to detect 12328 * the encoding. The remaining characters will be parsed so they 12329 * don't need to be fed in again through xmlParseChunk. 12330 * To allow content encoding detection, @size should be >= 4 12331 * The value of @filename is used for fetching external entities 12332 * and error/warning reports. 12333 * 12334 * Returns the new parser context or NULL 12335 */ 12336 12337 xmlParserCtxtPtr 12338 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12339 const char *chunk, int size, const char *filename) { 12340 xmlParserCtxtPtr ctxt; 12341 xmlParserInputPtr inputStream; 12342 xmlParserInputBufferPtr buf; 12343 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12344 12345 /* 12346 * plug some encoding conversion routines 12347 */ 12348 if ((chunk != NULL) && (size >= 4)) 12349 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12350 12351 buf = xmlAllocParserInputBuffer(enc); 12352 if (buf == NULL) return(NULL); 12353 12354 ctxt = xmlNewParserCtxt(); 12355 if (ctxt == NULL) { 12356 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12357 xmlFreeParserInputBuffer(buf); 12358 return(NULL); 12359 } 12360 ctxt->dictNames = 1; 12361 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12362 if (ctxt->pushTab == NULL) { 12363 xmlErrMemory(ctxt, NULL); 12364 xmlFreeParserInputBuffer(buf); 12365 xmlFreeParserCtxt(ctxt); 12366 return(NULL); 12367 } 12368 if (sax != NULL) { 12369 #ifdef LIBXML_SAX1_ENABLED 12370 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12371 #endif /* LIBXML_SAX1_ENABLED */ 12372 xmlFree(ctxt->sax); 12373 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12374 if (ctxt->sax == NULL) { 12375 xmlErrMemory(ctxt, NULL); 12376 xmlFreeParserInputBuffer(buf); 12377 xmlFreeParserCtxt(ctxt); 12378 return(NULL); 12379 } 12380 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12381 if (sax->initialized == XML_SAX2_MAGIC) 12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12383 else 12384 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12385 if (user_data != NULL) 12386 ctxt->userData = user_data; 12387 } 12388 if (filename == NULL) { 12389 ctxt->directory = NULL; 12390 } else { 12391 ctxt->directory = xmlParserGetDirectory(filename); 12392 } 12393 12394 inputStream = xmlNewInputStream(ctxt); 12395 if (inputStream == NULL) { 12396 xmlFreeParserCtxt(ctxt); 12397 xmlFreeParserInputBuffer(buf); 12398 return(NULL); 12399 } 12400 12401 if (filename == NULL) 12402 inputStream->filename = NULL; 12403 else { 12404 inputStream->filename = (char *) 12405 xmlCanonicPath((const xmlChar *) filename); 12406 if (inputStream->filename == NULL) { 12407 xmlFreeParserCtxt(ctxt); 12408 xmlFreeParserInputBuffer(buf); 12409 return(NULL); 12410 } 12411 } 12412 inputStream->buf = buf; 12413 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12414 inputPush(ctxt, inputStream); 12415 12416 /* 12417 * If the caller didn't provide an initial 'chunk' for determining 12418 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12419 * that it can be automatically determined later 12420 */ 12421 if ((size == 0) || (chunk == NULL)) { 12422 ctxt->charset = XML_CHAR_ENCODING_NONE; 12423 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12425 size_t cur = ctxt->input->cur - ctxt->input->base; 12426 12427 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12428 12429 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12430 #ifdef DEBUG_PUSH 12431 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12432 #endif 12433 } 12434 12435 if (enc != XML_CHAR_ENCODING_NONE) { 12436 xmlSwitchEncoding(ctxt, enc); 12437 } 12438 12439 return(ctxt); 12440 } 12441 #endif /* LIBXML_PUSH_ENABLED */ 12442 12443 /** 12444 * xmlHaltParser: 12445 * @ctxt: an XML parser context 12446 * 12447 * Blocks further parser processing don't override error 12448 * for internal use 12449 */ 12450 static void 12451 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12452 if (ctxt == NULL) 12453 return; 12454 ctxt->instate = XML_PARSER_EOF; 12455 ctxt->disableSAX = 1; 12456 while (ctxt->inputNr > 1) 12457 xmlFreeInputStream(inputPop(ctxt)); 12458 if (ctxt->input != NULL) { 12459 /* 12460 * in case there was a specific allocation deallocate before 12461 * overriding base 12462 */ 12463 if (ctxt->input->free != NULL) { 12464 ctxt->input->free((xmlChar *) ctxt->input->base); 12465 ctxt->input->free = NULL; 12466 } 12467 ctxt->input->cur = BAD_CAST""; 12468 ctxt->input->base = ctxt->input->cur; 12469 ctxt->input->end = ctxt->input->cur; 12470 } 12471 } 12472 12473 /** 12474 * xmlStopParser: 12475 * @ctxt: an XML parser context 12476 * 12477 * Blocks further parser processing 12478 */ 12479 void 12480 xmlStopParser(xmlParserCtxtPtr ctxt) { 12481 if (ctxt == NULL) 12482 return; 12483 xmlHaltParser(ctxt); 12484 ctxt->errNo = XML_ERR_USER_STOP; 12485 } 12486 12487 /** 12488 * xmlCreateIOParserCtxt: 12489 * @sax: a SAX handler 12490 * @user_data: The user data returned on SAX callbacks 12491 * @ioread: an I/O read function 12492 * @ioclose: an I/O close function 12493 * @ioctx: an I/O handler 12494 * @enc: the charset encoding if known 12495 * 12496 * Create a parser context for using the XML parser with an existing 12497 * I/O stream 12498 * 12499 * Returns the new parser context or NULL 12500 */ 12501 xmlParserCtxtPtr 12502 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12503 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12504 void *ioctx, xmlCharEncoding enc) { 12505 xmlParserCtxtPtr ctxt; 12506 xmlParserInputPtr inputStream; 12507 xmlParserInputBufferPtr buf; 12508 12509 if (ioread == NULL) return(NULL); 12510 12511 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12512 if (buf == NULL) { 12513 if (ioclose != NULL) 12514 ioclose(ioctx); 12515 return (NULL); 12516 } 12517 12518 ctxt = xmlNewParserCtxt(); 12519 if (ctxt == NULL) { 12520 xmlFreeParserInputBuffer(buf); 12521 return(NULL); 12522 } 12523 if (sax != NULL) { 12524 #ifdef LIBXML_SAX1_ENABLED 12525 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12526 #endif /* LIBXML_SAX1_ENABLED */ 12527 xmlFree(ctxt->sax); 12528 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12529 if (ctxt->sax == NULL) { 12530 xmlErrMemory(ctxt, NULL); 12531 xmlFreeParserCtxt(ctxt); 12532 return(NULL); 12533 } 12534 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12535 if (sax->initialized == XML_SAX2_MAGIC) 12536 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12537 else 12538 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12539 if (user_data != NULL) 12540 ctxt->userData = user_data; 12541 } 12542 12543 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12544 if (inputStream == NULL) { 12545 xmlFreeParserCtxt(ctxt); 12546 return(NULL); 12547 } 12548 inputPush(ctxt, inputStream); 12549 12550 return(ctxt); 12551 } 12552 12553 #ifdef LIBXML_VALID_ENABLED 12554 /************************************************************************ 12555 * * 12556 * Front ends when parsing a DTD * 12557 * * 12558 ************************************************************************/ 12559 12560 /** 12561 * xmlIOParseDTD: 12562 * @sax: the SAX handler block or NULL 12563 * @input: an Input Buffer 12564 * @enc: the charset encoding if known 12565 * 12566 * Load and parse a DTD 12567 * 12568 * Returns the resulting xmlDtdPtr or NULL in case of error. 12569 * @input will be freed by the function in any case. 12570 */ 12571 12572 xmlDtdPtr 12573 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12574 xmlCharEncoding enc) { 12575 xmlDtdPtr ret = NULL; 12576 xmlParserCtxtPtr ctxt; 12577 xmlParserInputPtr pinput = NULL; 12578 xmlChar start[4]; 12579 12580 if (input == NULL) 12581 return(NULL); 12582 12583 ctxt = xmlNewParserCtxt(); 12584 if (ctxt == NULL) { 12585 xmlFreeParserInputBuffer(input); 12586 return(NULL); 12587 } 12588 12589 /* We are loading a DTD */ 12590 ctxt->options |= XML_PARSE_DTDLOAD; 12591 12592 /* 12593 * Set-up the SAX context 12594 */ 12595 if (sax != NULL) { 12596 if (ctxt->sax != NULL) 12597 xmlFree(ctxt->sax); 12598 ctxt->sax = sax; 12599 ctxt->userData = ctxt; 12600 } 12601 xmlDetectSAX2(ctxt); 12602 12603 /* 12604 * generate a parser input from the I/O handler 12605 */ 12606 12607 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12608 if (pinput == NULL) { 12609 if (sax != NULL) ctxt->sax = NULL; 12610 xmlFreeParserInputBuffer(input); 12611 xmlFreeParserCtxt(ctxt); 12612 return(NULL); 12613 } 12614 12615 /* 12616 * plug some encoding conversion routines here. 12617 */ 12618 if (xmlPushInput(ctxt, pinput) < 0) { 12619 if (sax != NULL) ctxt->sax = NULL; 12620 xmlFreeParserCtxt(ctxt); 12621 return(NULL); 12622 } 12623 if (enc != XML_CHAR_ENCODING_NONE) { 12624 xmlSwitchEncoding(ctxt, enc); 12625 } 12626 12627 pinput->filename = NULL; 12628 pinput->line = 1; 12629 pinput->col = 1; 12630 pinput->base = ctxt->input->cur; 12631 pinput->cur = ctxt->input->cur; 12632 pinput->free = NULL; 12633 12634 /* 12635 * let's parse that entity knowing it's an external subset. 12636 */ 12637 ctxt->inSubset = 2; 12638 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12639 if (ctxt->myDoc == NULL) { 12640 xmlErrMemory(ctxt, "New Doc failed"); 12641 return(NULL); 12642 } 12643 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12644 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12645 BAD_CAST "none", BAD_CAST "none"); 12646 12647 if ((enc == XML_CHAR_ENCODING_NONE) && 12648 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12649 /* 12650 * Get the 4 first bytes and decode the charset 12651 * if enc != XML_CHAR_ENCODING_NONE 12652 * plug some encoding conversion routines. 12653 */ 12654 start[0] = RAW; 12655 start[1] = NXT(1); 12656 start[2] = NXT(2); 12657 start[3] = NXT(3); 12658 enc = xmlDetectCharEncoding(start, 4); 12659 if (enc != XML_CHAR_ENCODING_NONE) { 12660 xmlSwitchEncoding(ctxt, enc); 12661 } 12662 } 12663 12664 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12665 12666 if (ctxt->myDoc != NULL) { 12667 if (ctxt->wellFormed) { 12668 ret = ctxt->myDoc->extSubset; 12669 ctxt->myDoc->extSubset = NULL; 12670 if (ret != NULL) { 12671 xmlNodePtr tmp; 12672 12673 ret->doc = NULL; 12674 tmp = ret->children; 12675 while (tmp != NULL) { 12676 tmp->doc = NULL; 12677 tmp = tmp->next; 12678 } 12679 } 12680 } else { 12681 ret = NULL; 12682 } 12683 xmlFreeDoc(ctxt->myDoc); 12684 ctxt->myDoc = NULL; 12685 } 12686 if (sax != NULL) ctxt->sax = NULL; 12687 xmlFreeParserCtxt(ctxt); 12688 12689 return(ret); 12690 } 12691 12692 /** 12693 * xmlSAXParseDTD: 12694 * @sax: the SAX handler block 12695 * @ExternalID: a NAME* containing the External ID of the DTD 12696 * @SystemID: a NAME* containing the URL to the DTD 12697 * 12698 * Load and parse an external subset. 12699 * 12700 * Returns the resulting xmlDtdPtr or NULL in case of error. 12701 */ 12702 12703 xmlDtdPtr 12704 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12705 const xmlChar *SystemID) { 12706 xmlDtdPtr ret = NULL; 12707 xmlParserCtxtPtr ctxt; 12708 xmlParserInputPtr input = NULL; 12709 xmlCharEncoding enc; 12710 xmlChar* systemIdCanonic; 12711 12712 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12713 12714 ctxt = xmlNewParserCtxt(); 12715 if (ctxt == NULL) { 12716 return(NULL); 12717 } 12718 12719 /* We are loading a DTD */ 12720 ctxt->options |= XML_PARSE_DTDLOAD; 12721 12722 /* 12723 * Set-up the SAX context 12724 */ 12725 if (sax != NULL) { 12726 if (ctxt->sax != NULL) 12727 xmlFree(ctxt->sax); 12728 ctxt->sax = sax; 12729 ctxt->userData = ctxt; 12730 } 12731 12732 /* 12733 * Canonicalise the system ID 12734 */ 12735 systemIdCanonic = xmlCanonicPath(SystemID); 12736 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12737 xmlFreeParserCtxt(ctxt); 12738 return(NULL); 12739 } 12740 12741 /* 12742 * Ask the Entity resolver to load the damn thing 12743 */ 12744 12745 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12746 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12747 systemIdCanonic); 12748 if (input == NULL) { 12749 if (sax != NULL) ctxt->sax = NULL; 12750 xmlFreeParserCtxt(ctxt); 12751 if (systemIdCanonic != NULL) 12752 xmlFree(systemIdCanonic); 12753 return(NULL); 12754 } 12755 12756 /* 12757 * plug some encoding conversion routines here. 12758 */ 12759 if (xmlPushInput(ctxt, input) < 0) { 12760 if (sax != NULL) ctxt->sax = NULL; 12761 xmlFreeParserCtxt(ctxt); 12762 if (systemIdCanonic != NULL) 12763 xmlFree(systemIdCanonic); 12764 return(NULL); 12765 } 12766 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12767 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12768 xmlSwitchEncoding(ctxt, enc); 12769 } 12770 12771 if (input->filename == NULL) 12772 input->filename = (char *) systemIdCanonic; 12773 else 12774 xmlFree(systemIdCanonic); 12775 input->line = 1; 12776 input->col = 1; 12777 input->base = ctxt->input->cur; 12778 input->cur = ctxt->input->cur; 12779 input->free = NULL; 12780 12781 /* 12782 * let's parse that entity knowing it's an external subset. 12783 */ 12784 ctxt->inSubset = 2; 12785 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12786 if (ctxt->myDoc == NULL) { 12787 xmlErrMemory(ctxt, "New Doc failed"); 12788 if (sax != NULL) ctxt->sax = NULL; 12789 xmlFreeParserCtxt(ctxt); 12790 return(NULL); 12791 } 12792 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12793 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12794 ExternalID, SystemID); 12795 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12796 12797 if (ctxt->myDoc != NULL) { 12798 if (ctxt->wellFormed) { 12799 ret = ctxt->myDoc->extSubset; 12800 ctxt->myDoc->extSubset = NULL; 12801 if (ret != NULL) { 12802 xmlNodePtr tmp; 12803 12804 ret->doc = NULL; 12805 tmp = ret->children; 12806 while (tmp != NULL) { 12807 tmp->doc = NULL; 12808 tmp = tmp->next; 12809 } 12810 } 12811 } else { 12812 ret = NULL; 12813 } 12814 xmlFreeDoc(ctxt->myDoc); 12815 ctxt->myDoc = NULL; 12816 } 12817 if (sax != NULL) ctxt->sax = NULL; 12818 xmlFreeParserCtxt(ctxt); 12819 12820 return(ret); 12821 } 12822 12823 12824 /** 12825 * xmlParseDTD: 12826 * @ExternalID: a NAME* containing the External ID of the DTD 12827 * @SystemID: a NAME* containing the URL to the DTD 12828 * 12829 * Load and parse an external subset. 12830 * 12831 * Returns the resulting xmlDtdPtr or NULL in case of error. 12832 */ 12833 12834 xmlDtdPtr 12835 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12836 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12837 } 12838 #endif /* LIBXML_VALID_ENABLED */ 12839 12840 /************************************************************************ 12841 * * 12842 * Front ends when parsing an Entity * 12843 * * 12844 ************************************************************************/ 12845 12846 /** 12847 * xmlParseCtxtExternalEntity: 12848 * @ctx: the existing parsing context 12849 * @URL: the URL for the entity to load 12850 * @ID: the System ID for the entity to load 12851 * @lst: the return value for the set of parsed nodes 12852 * 12853 * Parse an external general entity within an existing parsing context 12854 * An external general parsed entity is well-formed if it matches the 12855 * production labeled extParsedEnt. 12856 * 12857 * [78] extParsedEnt ::= TextDecl? content 12858 * 12859 * Returns 0 if the entity is well formed, -1 in case of args problem and 12860 * the parser error code otherwise 12861 */ 12862 12863 int 12864 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12865 const xmlChar *ID, xmlNodePtr *lst) { 12866 xmlParserCtxtPtr ctxt; 12867 xmlDocPtr newDoc; 12868 xmlNodePtr newRoot; 12869 xmlSAXHandlerPtr oldsax = NULL; 12870 int ret = 0; 12871 xmlChar start[4]; 12872 xmlCharEncoding enc; 12873 12874 if (ctx == NULL) return(-1); 12875 12876 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12877 (ctx->depth > 1024)) { 12878 return(XML_ERR_ENTITY_LOOP); 12879 } 12880 12881 if (lst != NULL) 12882 *lst = NULL; 12883 if ((URL == NULL) && (ID == NULL)) 12884 return(-1); 12885 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12886 return(-1); 12887 12888 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12889 if (ctxt == NULL) { 12890 return(-1); 12891 } 12892 12893 oldsax = ctxt->sax; 12894 ctxt->sax = ctx->sax; 12895 xmlDetectSAX2(ctxt); 12896 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12897 if (newDoc == NULL) { 12898 xmlFreeParserCtxt(ctxt); 12899 return(-1); 12900 } 12901 newDoc->properties = XML_DOC_INTERNAL; 12902 if (ctx->myDoc->dict) { 12903 newDoc->dict = ctx->myDoc->dict; 12904 xmlDictReference(newDoc->dict); 12905 } 12906 if (ctx->myDoc != NULL) { 12907 newDoc->intSubset = ctx->myDoc->intSubset; 12908 newDoc->extSubset = ctx->myDoc->extSubset; 12909 } 12910 if (ctx->myDoc->URL != NULL) { 12911 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12912 } 12913 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12914 if (newRoot == NULL) { 12915 ctxt->sax = oldsax; 12916 xmlFreeParserCtxt(ctxt); 12917 newDoc->intSubset = NULL; 12918 newDoc->extSubset = NULL; 12919 xmlFreeDoc(newDoc); 12920 return(-1); 12921 } 12922 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12923 nodePush(ctxt, newDoc->children); 12924 if (ctx->myDoc == NULL) { 12925 ctxt->myDoc = newDoc; 12926 } else { 12927 ctxt->myDoc = ctx->myDoc; 12928 newDoc->children->doc = ctx->myDoc; 12929 } 12930 12931 /* 12932 * Get the 4 first bytes and decode the charset 12933 * if enc != XML_CHAR_ENCODING_NONE 12934 * plug some encoding conversion routines. 12935 */ 12936 GROW 12937 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12938 start[0] = RAW; 12939 start[1] = NXT(1); 12940 start[2] = NXT(2); 12941 start[3] = NXT(3); 12942 enc = xmlDetectCharEncoding(start, 4); 12943 if (enc != XML_CHAR_ENCODING_NONE) { 12944 xmlSwitchEncoding(ctxt, enc); 12945 } 12946 } 12947 12948 /* 12949 * Parse a possible text declaration first 12950 */ 12951 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12952 xmlParseTextDecl(ctxt); 12953 /* 12954 * An XML-1.0 document can't reference an entity not XML-1.0 12955 */ 12956 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12957 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12958 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12959 "Version mismatch between document and entity\n"); 12960 } 12961 } 12962 12963 /* 12964 * If the user provided its own SAX callbacks then reuse the 12965 * useData callback field, otherwise the expected setup in a 12966 * DOM builder is to have userData == ctxt 12967 */ 12968 if (ctx->userData == ctx) 12969 ctxt->userData = ctxt; 12970 else 12971 ctxt->userData = ctx->userData; 12972 12973 /* 12974 * Doing validity checking on chunk doesn't make sense 12975 */ 12976 ctxt->instate = XML_PARSER_CONTENT; 12977 ctxt->validate = ctx->validate; 12978 ctxt->valid = ctx->valid; 12979 ctxt->loadsubset = ctx->loadsubset; 12980 ctxt->depth = ctx->depth + 1; 12981 ctxt->replaceEntities = ctx->replaceEntities; 12982 if (ctxt->validate) { 12983 ctxt->vctxt.error = ctx->vctxt.error; 12984 ctxt->vctxt.warning = ctx->vctxt.warning; 12985 } else { 12986 ctxt->vctxt.error = NULL; 12987 ctxt->vctxt.warning = NULL; 12988 } 12989 ctxt->vctxt.nodeTab = NULL; 12990 ctxt->vctxt.nodeNr = 0; 12991 ctxt->vctxt.nodeMax = 0; 12992 ctxt->vctxt.node = NULL; 12993 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12994 ctxt->dict = ctx->dict; 12995 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12996 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12997 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12998 ctxt->dictNames = ctx->dictNames; 12999 ctxt->attsDefault = ctx->attsDefault; 13000 ctxt->attsSpecial = ctx->attsSpecial; 13001 ctxt->linenumbers = ctx->linenumbers; 13002 13003 xmlParseContent(ctxt); 13004 13005 ctx->validate = ctxt->validate; 13006 ctx->valid = ctxt->valid; 13007 if ((RAW == '<') && (NXT(1) == '/')) { 13008 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13009 } else if (RAW != 0) { 13010 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13011 } 13012 if (ctxt->node != newDoc->children) { 13013 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13014 } 13015 13016 if (!ctxt->wellFormed) { 13017 if (ctxt->errNo == 0) 13018 ret = 1; 13019 else 13020 ret = ctxt->errNo; 13021 } else { 13022 if (lst != NULL) { 13023 xmlNodePtr cur; 13024 13025 /* 13026 * Return the newly created nodeset after unlinking it from 13027 * they pseudo parent. 13028 */ 13029 cur = newDoc->children->children; 13030 *lst = cur; 13031 while (cur != NULL) { 13032 cur->parent = NULL; 13033 cur = cur->next; 13034 } 13035 newDoc->children->children = NULL; 13036 } 13037 ret = 0; 13038 } 13039 ctxt->sax = oldsax; 13040 ctxt->dict = NULL; 13041 ctxt->attsDefault = NULL; 13042 ctxt->attsSpecial = NULL; 13043 xmlFreeParserCtxt(ctxt); 13044 newDoc->intSubset = NULL; 13045 newDoc->extSubset = NULL; 13046 xmlFreeDoc(newDoc); 13047 13048 return(ret); 13049 } 13050 13051 /** 13052 * xmlParseExternalEntityPrivate: 13053 * @doc: the document the chunk pertains to 13054 * @oldctxt: the previous parser context if available 13055 * @sax: the SAX handler bloc (possibly NULL) 13056 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13057 * @depth: Used for loop detection, use 0 13058 * @URL: the URL for the entity to load 13059 * @ID: the System ID for the entity to load 13060 * @list: the return value for the set of parsed nodes 13061 * 13062 * Private version of xmlParseExternalEntity() 13063 * 13064 * Returns 0 if the entity is well formed, -1 in case of args problem and 13065 * the parser error code otherwise 13066 */ 13067 13068 static xmlParserErrors 13069 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13070 xmlSAXHandlerPtr sax, 13071 void *user_data, int depth, const xmlChar *URL, 13072 const xmlChar *ID, xmlNodePtr *list) { 13073 xmlParserCtxtPtr ctxt; 13074 xmlDocPtr newDoc; 13075 xmlNodePtr newRoot; 13076 xmlSAXHandlerPtr oldsax = NULL; 13077 xmlParserErrors ret = XML_ERR_OK; 13078 xmlChar start[4]; 13079 xmlCharEncoding enc; 13080 13081 if (((depth > 40) && 13082 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13083 (depth > 1024)) { 13084 return(XML_ERR_ENTITY_LOOP); 13085 } 13086 13087 if (list != NULL) 13088 *list = NULL; 13089 if ((URL == NULL) && (ID == NULL)) 13090 return(XML_ERR_INTERNAL_ERROR); 13091 if (doc == NULL) 13092 return(XML_ERR_INTERNAL_ERROR); 13093 13094 13095 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13096 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13097 ctxt->userData = ctxt; 13098 if (oldctxt != NULL) { 13099 ctxt->_private = oldctxt->_private; 13100 ctxt->loadsubset = oldctxt->loadsubset; 13101 ctxt->validate = oldctxt->validate; 13102 ctxt->external = oldctxt->external; 13103 ctxt->record_info = oldctxt->record_info; 13104 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13105 ctxt->node_seq.length = oldctxt->node_seq.length; 13106 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13107 } else { 13108 /* 13109 * Doing validity checking on chunk without context 13110 * doesn't make sense 13111 */ 13112 ctxt->_private = NULL; 13113 ctxt->validate = 0; 13114 ctxt->external = 2; 13115 ctxt->loadsubset = 0; 13116 } 13117 if (sax != NULL) { 13118 oldsax = ctxt->sax; 13119 ctxt->sax = sax; 13120 if (user_data != NULL) 13121 ctxt->userData = user_data; 13122 } 13123 xmlDetectSAX2(ctxt); 13124 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13125 if (newDoc == NULL) { 13126 ctxt->node_seq.maximum = 0; 13127 ctxt->node_seq.length = 0; 13128 ctxt->node_seq.buffer = NULL; 13129 xmlFreeParserCtxt(ctxt); 13130 return(XML_ERR_INTERNAL_ERROR); 13131 } 13132 newDoc->properties = XML_DOC_INTERNAL; 13133 newDoc->intSubset = doc->intSubset; 13134 newDoc->extSubset = doc->extSubset; 13135 newDoc->dict = doc->dict; 13136 xmlDictReference(newDoc->dict); 13137 13138 if (doc->URL != NULL) { 13139 newDoc->URL = xmlStrdup(doc->URL); 13140 } 13141 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13142 if (newRoot == NULL) { 13143 if (sax != NULL) 13144 ctxt->sax = oldsax; 13145 ctxt->node_seq.maximum = 0; 13146 ctxt->node_seq.length = 0; 13147 ctxt->node_seq.buffer = NULL; 13148 xmlFreeParserCtxt(ctxt); 13149 newDoc->intSubset = NULL; 13150 newDoc->extSubset = NULL; 13151 xmlFreeDoc(newDoc); 13152 return(XML_ERR_INTERNAL_ERROR); 13153 } 13154 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13155 nodePush(ctxt, newDoc->children); 13156 ctxt->myDoc = doc; 13157 newRoot->doc = doc; 13158 13159 /* 13160 * Get the 4 first bytes and decode the charset 13161 * if enc != XML_CHAR_ENCODING_NONE 13162 * plug some encoding conversion routines. 13163 */ 13164 GROW; 13165 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13166 start[0] = RAW; 13167 start[1] = NXT(1); 13168 start[2] = NXT(2); 13169 start[3] = NXT(3); 13170 enc = xmlDetectCharEncoding(start, 4); 13171 if (enc != XML_CHAR_ENCODING_NONE) { 13172 xmlSwitchEncoding(ctxt, enc); 13173 } 13174 } 13175 13176 /* 13177 * Parse a possible text declaration first 13178 */ 13179 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13180 xmlParseTextDecl(ctxt); 13181 } 13182 13183 ctxt->instate = XML_PARSER_CONTENT; 13184 ctxt->depth = depth; 13185 13186 xmlParseContent(ctxt); 13187 13188 if ((RAW == '<') && (NXT(1) == '/')) { 13189 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13190 } else if (RAW != 0) { 13191 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13192 } 13193 if (ctxt->node != newDoc->children) { 13194 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13195 } 13196 13197 if (!ctxt->wellFormed) { 13198 if (ctxt->errNo == 0) 13199 ret = XML_ERR_INTERNAL_ERROR; 13200 else 13201 ret = (xmlParserErrors)ctxt->errNo; 13202 } else { 13203 if (list != NULL) { 13204 xmlNodePtr cur; 13205 13206 /* 13207 * Return the newly created nodeset after unlinking it from 13208 * they pseudo parent. 13209 */ 13210 cur = newDoc->children->children; 13211 *list = cur; 13212 while (cur != NULL) { 13213 cur->parent = NULL; 13214 cur = cur->next; 13215 } 13216 newDoc->children->children = NULL; 13217 } 13218 ret = XML_ERR_OK; 13219 } 13220 13221 /* 13222 * Record in the parent context the number of entities replacement 13223 * done when parsing that reference. 13224 */ 13225 if (oldctxt != NULL) 13226 oldctxt->nbentities += ctxt->nbentities; 13227 13228 /* 13229 * Also record the size of the entity parsed 13230 */ 13231 if (ctxt->input != NULL && oldctxt != NULL) { 13232 oldctxt->sizeentities += ctxt->input->consumed; 13233 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13234 } 13235 /* 13236 * And record the last error if any 13237 */ 13238 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13239 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13240 13241 if (sax != NULL) 13242 ctxt->sax = oldsax; 13243 if (oldctxt != NULL) { 13244 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13245 oldctxt->node_seq.length = ctxt->node_seq.length; 13246 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13247 } 13248 ctxt->node_seq.maximum = 0; 13249 ctxt->node_seq.length = 0; 13250 ctxt->node_seq.buffer = NULL; 13251 xmlFreeParserCtxt(ctxt); 13252 newDoc->intSubset = NULL; 13253 newDoc->extSubset = NULL; 13254 xmlFreeDoc(newDoc); 13255 13256 return(ret); 13257 } 13258 13259 #ifdef LIBXML_SAX1_ENABLED 13260 /** 13261 * xmlParseExternalEntity: 13262 * @doc: the document the chunk pertains to 13263 * @sax: the SAX handler bloc (possibly NULL) 13264 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13265 * @depth: Used for loop detection, use 0 13266 * @URL: the URL for the entity to load 13267 * @ID: the System ID for the entity to load 13268 * @lst: the return value for the set of parsed nodes 13269 * 13270 * Parse an external general entity 13271 * An external general parsed entity is well-formed if it matches the 13272 * production labeled extParsedEnt. 13273 * 13274 * [78] extParsedEnt ::= TextDecl? content 13275 * 13276 * Returns 0 if the entity is well formed, -1 in case of args problem and 13277 * the parser error code otherwise 13278 */ 13279 13280 int 13281 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13282 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13283 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13284 ID, lst)); 13285 } 13286 13287 /** 13288 * xmlParseBalancedChunkMemory: 13289 * @doc: the document the chunk pertains to 13290 * @sax: the SAX handler bloc (possibly NULL) 13291 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13292 * @depth: Used for loop detection, use 0 13293 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13294 * @lst: the return value for the set of parsed nodes 13295 * 13296 * Parse a well-balanced chunk of an XML document 13297 * called by the parser 13298 * The allowed sequence for the Well Balanced Chunk is the one defined by 13299 * the content production in the XML grammar: 13300 * 13301 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13302 * 13303 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13304 * the parser error code otherwise 13305 */ 13306 13307 int 13308 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13309 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13310 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13311 depth, string, lst, 0 ); 13312 } 13313 #endif /* LIBXML_SAX1_ENABLED */ 13314 13315 /** 13316 * xmlParseBalancedChunkMemoryInternal: 13317 * @oldctxt: the existing parsing context 13318 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13319 * @user_data: the user data field for the parser context 13320 * @lst: the return value for the set of parsed nodes 13321 * 13322 * 13323 * Parse a well-balanced chunk of an XML document 13324 * called by the parser 13325 * The allowed sequence for the Well Balanced Chunk is the one defined by 13326 * the content production in the XML grammar: 13327 * 13328 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13329 * 13330 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13331 * error code otherwise 13332 * 13333 * In case recover is set to 1, the nodelist will not be empty even if 13334 * the parsed chunk is not well balanced. 13335 */ 13336 static xmlParserErrors 13337 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13338 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13339 xmlParserCtxtPtr ctxt; 13340 xmlDocPtr newDoc = NULL; 13341 xmlNodePtr newRoot; 13342 xmlSAXHandlerPtr oldsax = NULL; 13343 xmlNodePtr content = NULL; 13344 xmlNodePtr last = NULL; 13345 int size; 13346 xmlParserErrors ret = XML_ERR_OK; 13347 #ifdef SAX2 13348 int i; 13349 #endif 13350 13351 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13352 (oldctxt->depth > 1024)) { 13353 return(XML_ERR_ENTITY_LOOP); 13354 } 13355 13356 13357 if (lst != NULL) 13358 *lst = NULL; 13359 if (string == NULL) 13360 return(XML_ERR_INTERNAL_ERROR); 13361 13362 size = xmlStrlen(string); 13363 13364 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13365 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13366 if (user_data != NULL) 13367 ctxt->userData = user_data; 13368 else 13369 ctxt->userData = ctxt; 13370 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13371 ctxt->dict = oldctxt->dict; 13372 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13373 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13374 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13375 13376 #ifdef SAX2 13377 /* propagate namespaces down the entity */ 13378 for (i = 0;i < oldctxt->nsNr;i += 2) { 13379 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13380 } 13381 #endif 13382 13383 oldsax = ctxt->sax; 13384 ctxt->sax = oldctxt->sax; 13385 xmlDetectSAX2(ctxt); 13386 ctxt->replaceEntities = oldctxt->replaceEntities; 13387 ctxt->options = oldctxt->options; 13388 13389 ctxt->_private = oldctxt->_private; 13390 if (oldctxt->myDoc == NULL) { 13391 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13392 if (newDoc == NULL) { 13393 ctxt->sax = oldsax; 13394 ctxt->dict = NULL; 13395 xmlFreeParserCtxt(ctxt); 13396 return(XML_ERR_INTERNAL_ERROR); 13397 } 13398 newDoc->properties = XML_DOC_INTERNAL; 13399 newDoc->dict = ctxt->dict; 13400 xmlDictReference(newDoc->dict); 13401 ctxt->myDoc = newDoc; 13402 } else { 13403 ctxt->myDoc = oldctxt->myDoc; 13404 content = ctxt->myDoc->children; 13405 last = ctxt->myDoc->last; 13406 } 13407 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13408 if (newRoot == NULL) { 13409 ctxt->sax = oldsax; 13410 ctxt->dict = NULL; 13411 xmlFreeParserCtxt(ctxt); 13412 if (newDoc != NULL) { 13413 xmlFreeDoc(newDoc); 13414 } 13415 return(XML_ERR_INTERNAL_ERROR); 13416 } 13417 ctxt->myDoc->children = NULL; 13418 ctxt->myDoc->last = NULL; 13419 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13420 nodePush(ctxt, ctxt->myDoc->children); 13421 ctxt->instate = XML_PARSER_CONTENT; 13422 ctxt->depth = oldctxt->depth + 1; 13423 13424 ctxt->validate = 0; 13425 ctxt->loadsubset = oldctxt->loadsubset; 13426 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13427 /* 13428 * ID/IDREF registration will be done in xmlValidateElement below 13429 */ 13430 ctxt->loadsubset |= XML_SKIP_IDS; 13431 } 13432 ctxt->dictNames = oldctxt->dictNames; 13433 ctxt->attsDefault = oldctxt->attsDefault; 13434 ctxt->attsSpecial = oldctxt->attsSpecial; 13435 13436 xmlParseContent(ctxt); 13437 if ((RAW == '<') && (NXT(1) == '/')) { 13438 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13439 } else if (RAW != 0) { 13440 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13441 } 13442 if (ctxt->node != ctxt->myDoc->children) { 13443 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13444 } 13445 13446 if (!ctxt->wellFormed) { 13447 if (ctxt->errNo == 0) 13448 ret = XML_ERR_INTERNAL_ERROR; 13449 else 13450 ret = (xmlParserErrors)ctxt->errNo; 13451 } else { 13452 ret = XML_ERR_OK; 13453 } 13454 13455 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13456 xmlNodePtr cur; 13457 13458 /* 13459 * Return the newly created nodeset after unlinking it from 13460 * they pseudo parent. 13461 */ 13462 cur = ctxt->myDoc->children->children; 13463 *lst = cur; 13464 while (cur != NULL) { 13465 #ifdef LIBXML_VALID_ENABLED 13466 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13467 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13468 (cur->type == XML_ELEMENT_NODE)) { 13469 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13470 oldctxt->myDoc, cur); 13471 } 13472 #endif /* LIBXML_VALID_ENABLED */ 13473 cur->parent = NULL; 13474 cur = cur->next; 13475 } 13476 ctxt->myDoc->children->children = NULL; 13477 } 13478 if (ctxt->myDoc != NULL) { 13479 xmlFreeNode(ctxt->myDoc->children); 13480 ctxt->myDoc->children = content; 13481 ctxt->myDoc->last = last; 13482 } 13483 13484 /* 13485 * Record in the parent context the number of entities replacement 13486 * done when parsing that reference. 13487 */ 13488 if (oldctxt != NULL) 13489 oldctxt->nbentities += ctxt->nbentities; 13490 13491 /* 13492 * Also record the last error if any 13493 */ 13494 if (ctxt->lastError.code != XML_ERR_OK) 13495 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13496 13497 ctxt->sax = oldsax; 13498 ctxt->dict = NULL; 13499 ctxt->attsDefault = NULL; 13500 ctxt->attsSpecial = NULL; 13501 xmlFreeParserCtxt(ctxt); 13502 if (newDoc != NULL) { 13503 xmlFreeDoc(newDoc); 13504 } 13505 13506 return(ret); 13507 } 13508 13509 /** 13510 * xmlParseInNodeContext: 13511 * @node: the context node 13512 * @data: the input string 13513 * @datalen: the input string length in bytes 13514 * @options: a combination of xmlParserOption 13515 * @lst: the return value for the set of parsed nodes 13516 * 13517 * Parse a well-balanced chunk of an XML document 13518 * within the context (DTD, namespaces, etc ...) of the given node. 13519 * 13520 * The allowed sequence for the data is a Well Balanced Chunk defined by 13521 * the content production in the XML grammar: 13522 * 13523 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13524 * 13525 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13526 * error code otherwise 13527 */ 13528 xmlParserErrors 13529 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13530 int options, xmlNodePtr *lst) { 13531 #ifdef SAX2 13532 xmlParserCtxtPtr ctxt; 13533 xmlDocPtr doc = NULL; 13534 xmlNodePtr fake, cur; 13535 int nsnr = 0; 13536 13537 xmlParserErrors ret = XML_ERR_OK; 13538 13539 /* 13540 * check all input parameters, grab the document 13541 */ 13542 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13543 return(XML_ERR_INTERNAL_ERROR); 13544 switch (node->type) { 13545 case XML_ELEMENT_NODE: 13546 case XML_ATTRIBUTE_NODE: 13547 case XML_TEXT_NODE: 13548 case XML_CDATA_SECTION_NODE: 13549 case XML_ENTITY_REF_NODE: 13550 case XML_PI_NODE: 13551 case XML_COMMENT_NODE: 13552 case XML_DOCUMENT_NODE: 13553 case XML_HTML_DOCUMENT_NODE: 13554 break; 13555 default: 13556 return(XML_ERR_INTERNAL_ERROR); 13557 13558 } 13559 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13560 (node->type != XML_DOCUMENT_NODE) && 13561 (node->type != XML_HTML_DOCUMENT_NODE)) 13562 node = node->parent; 13563 if (node == NULL) 13564 return(XML_ERR_INTERNAL_ERROR); 13565 if (node->type == XML_ELEMENT_NODE) 13566 doc = node->doc; 13567 else 13568 doc = (xmlDocPtr) node; 13569 if (doc == NULL) 13570 return(XML_ERR_INTERNAL_ERROR); 13571 13572 /* 13573 * allocate a context and set-up everything not related to the 13574 * node position in the tree 13575 */ 13576 if (doc->type == XML_DOCUMENT_NODE) 13577 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13578 #ifdef LIBXML_HTML_ENABLED 13579 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13580 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13581 /* 13582 * When parsing in context, it makes no sense to add implied 13583 * elements like html/body/etc... 13584 */ 13585 options |= HTML_PARSE_NOIMPLIED; 13586 } 13587 #endif 13588 else 13589 return(XML_ERR_INTERNAL_ERROR); 13590 13591 if (ctxt == NULL) 13592 return(XML_ERR_NO_MEMORY); 13593 13594 /* 13595 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13596 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13597 * we must wait until the last moment to free the original one. 13598 */ 13599 if (doc->dict != NULL) { 13600 if (ctxt->dict != NULL) 13601 xmlDictFree(ctxt->dict); 13602 ctxt->dict = doc->dict; 13603 } else 13604 options |= XML_PARSE_NODICT; 13605 13606 if (doc->encoding != NULL) { 13607 xmlCharEncodingHandlerPtr hdlr; 13608 13609 if (ctxt->encoding != NULL) 13610 xmlFree((xmlChar *) ctxt->encoding); 13611 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13612 13613 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13614 if (hdlr != NULL) { 13615 xmlSwitchToEncoding(ctxt, hdlr); 13616 } else { 13617 return(XML_ERR_UNSUPPORTED_ENCODING); 13618 } 13619 } 13620 13621 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13622 xmlDetectSAX2(ctxt); 13623 ctxt->myDoc = doc; 13624 /* parsing in context, i.e. as within existing content */ 13625 ctxt->instate = XML_PARSER_CONTENT; 13626 13627 fake = xmlNewComment(NULL); 13628 if (fake == NULL) { 13629 xmlFreeParserCtxt(ctxt); 13630 return(XML_ERR_NO_MEMORY); 13631 } 13632 xmlAddChild(node, fake); 13633 13634 if (node->type == XML_ELEMENT_NODE) { 13635 nodePush(ctxt, node); 13636 /* 13637 * initialize the SAX2 namespaces stack 13638 */ 13639 cur = node; 13640 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13641 xmlNsPtr ns = cur->nsDef; 13642 const xmlChar *iprefix, *ihref; 13643 13644 while (ns != NULL) { 13645 if (ctxt->dict) { 13646 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13647 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13648 } else { 13649 iprefix = ns->prefix; 13650 ihref = ns->href; 13651 } 13652 13653 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13654 nsPush(ctxt, iprefix, ihref); 13655 nsnr++; 13656 } 13657 ns = ns->next; 13658 } 13659 cur = cur->parent; 13660 } 13661 } 13662 13663 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13664 /* 13665 * ID/IDREF registration will be done in xmlValidateElement below 13666 */ 13667 ctxt->loadsubset |= XML_SKIP_IDS; 13668 } 13669 13670 #ifdef LIBXML_HTML_ENABLED 13671 if (doc->type == XML_HTML_DOCUMENT_NODE) 13672 __htmlParseContent(ctxt); 13673 else 13674 #endif 13675 xmlParseContent(ctxt); 13676 13677 nsPop(ctxt, nsnr); 13678 if ((RAW == '<') && (NXT(1) == '/')) { 13679 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13680 } else if (RAW != 0) { 13681 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13682 } 13683 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13685 ctxt->wellFormed = 0; 13686 } 13687 13688 if (!ctxt->wellFormed) { 13689 if (ctxt->errNo == 0) 13690 ret = XML_ERR_INTERNAL_ERROR; 13691 else 13692 ret = (xmlParserErrors)ctxt->errNo; 13693 } else { 13694 ret = XML_ERR_OK; 13695 } 13696 13697 /* 13698 * Return the newly created nodeset after unlinking it from 13699 * the pseudo sibling. 13700 */ 13701 13702 cur = fake->next; 13703 fake->next = NULL; 13704 node->last = fake; 13705 13706 if (cur != NULL) { 13707 cur->prev = NULL; 13708 } 13709 13710 *lst = cur; 13711 13712 while (cur != NULL) { 13713 cur->parent = NULL; 13714 cur = cur->next; 13715 } 13716 13717 xmlUnlinkNode(fake); 13718 xmlFreeNode(fake); 13719 13720 13721 if (ret != XML_ERR_OK) { 13722 xmlFreeNodeList(*lst); 13723 *lst = NULL; 13724 } 13725 13726 if (doc->dict != NULL) 13727 ctxt->dict = NULL; 13728 xmlFreeParserCtxt(ctxt); 13729 13730 return(ret); 13731 #else /* !SAX2 */ 13732 return(XML_ERR_INTERNAL_ERROR); 13733 #endif 13734 } 13735 13736 #ifdef LIBXML_SAX1_ENABLED 13737 /** 13738 * xmlParseBalancedChunkMemoryRecover: 13739 * @doc: the document the chunk pertains to 13740 * @sax: the SAX handler bloc (possibly NULL) 13741 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13742 * @depth: Used for loop detection, use 0 13743 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13744 * @lst: the return value for the set of parsed nodes 13745 * @recover: return nodes even if the data is broken (use 0) 13746 * 13747 * 13748 * Parse a well-balanced chunk of an XML document 13749 * called by the parser 13750 * The allowed sequence for the Well Balanced Chunk is the one defined by 13751 * the content production in the XML grammar: 13752 * 13753 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13754 * 13755 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13756 * the parser error code otherwise 13757 * 13758 * In case recover is set to 1, the nodelist will not be empty even if 13759 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13760 * some extent. 13761 */ 13762 int 13763 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13764 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13765 int recover) { 13766 xmlParserCtxtPtr ctxt; 13767 xmlDocPtr newDoc; 13768 xmlSAXHandlerPtr oldsax = NULL; 13769 xmlNodePtr content, newRoot; 13770 int size; 13771 int ret = 0; 13772 13773 if (depth > 40) { 13774 return(XML_ERR_ENTITY_LOOP); 13775 } 13776 13777 13778 if (lst != NULL) 13779 *lst = NULL; 13780 if (string == NULL) 13781 return(-1); 13782 13783 size = xmlStrlen(string); 13784 13785 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13786 if (ctxt == NULL) return(-1); 13787 ctxt->userData = ctxt; 13788 if (sax != NULL) { 13789 oldsax = ctxt->sax; 13790 ctxt->sax = sax; 13791 if (user_data != NULL) 13792 ctxt->userData = user_data; 13793 } 13794 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13795 if (newDoc == NULL) { 13796 xmlFreeParserCtxt(ctxt); 13797 return(-1); 13798 } 13799 newDoc->properties = XML_DOC_INTERNAL; 13800 if ((doc != NULL) && (doc->dict != NULL)) { 13801 xmlDictFree(ctxt->dict); 13802 ctxt->dict = doc->dict; 13803 xmlDictReference(ctxt->dict); 13804 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13805 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13806 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13807 ctxt->dictNames = 1; 13808 } else { 13809 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13810 } 13811 if (doc != NULL) { 13812 newDoc->intSubset = doc->intSubset; 13813 newDoc->extSubset = doc->extSubset; 13814 } 13815 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13816 if (newRoot == NULL) { 13817 if (sax != NULL) 13818 ctxt->sax = oldsax; 13819 xmlFreeParserCtxt(ctxt); 13820 newDoc->intSubset = NULL; 13821 newDoc->extSubset = NULL; 13822 xmlFreeDoc(newDoc); 13823 return(-1); 13824 } 13825 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13826 nodePush(ctxt, newRoot); 13827 if (doc == NULL) { 13828 ctxt->myDoc = newDoc; 13829 } else { 13830 ctxt->myDoc = newDoc; 13831 newDoc->children->doc = doc; 13832 /* Ensure that doc has XML spec namespace */ 13833 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13834 newDoc->oldNs = doc->oldNs; 13835 } 13836 ctxt->instate = XML_PARSER_CONTENT; 13837 ctxt->depth = depth; 13838 13839 /* 13840 * Doing validity checking on chunk doesn't make sense 13841 */ 13842 ctxt->validate = 0; 13843 ctxt->loadsubset = 0; 13844 xmlDetectSAX2(ctxt); 13845 13846 if ( doc != NULL ){ 13847 content = doc->children; 13848 doc->children = NULL; 13849 xmlParseContent(ctxt); 13850 doc->children = content; 13851 } 13852 else { 13853 xmlParseContent(ctxt); 13854 } 13855 if ((RAW == '<') && (NXT(1) == '/')) { 13856 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13857 } else if (RAW != 0) { 13858 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13859 } 13860 if (ctxt->node != newDoc->children) { 13861 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13862 } 13863 13864 if (!ctxt->wellFormed) { 13865 if (ctxt->errNo == 0) 13866 ret = 1; 13867 else 13868 ret = ctxt->errNo; 13869 } else { 13870 ret = 0; 13871 } 13872 13873 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13874 xmlNodePtr cur; 13875 13876 /* 13877 * Return the newly created nodeset after unlinking it from 13878 * they pseudo parent. 13879 */ 13880 cur = newDoc->children->children; 13881 *lst = cur; 13882 while (cur != NULL) { 13883 xmlSetTreeDoc(cur, doc); 13884 cur->parent = NULL; 13885 cur = cur->next; 13886 } 13887 newDoc->children->children = NULL; 13888 } 13889 13890 if (sax != NULL) 13891 ctxt->sax = oldsax; 13892 xmlFreeParserCtxt(ctxt); 13893 newDoc->intSubset = NULL; 13894 newDoc->extSubset = NULL; 13895 newDoc->oldNs = NULL; 13896 xmlFreeDoc(newDoc); 13897 13898 return(ret); 13899 } 13900 13901 /** 13902 * xmlSAXParseEntity: 13903 * @sax: the SAX handler block 13904 * @filename: the filename 13905 * 13906 * parse an XML external entity out of context and build a tree. 13907 * It use the given SAX function block to handle the parsing callback. 13908 * If sax is NULL, fallback to the default DOM tree building routines. 13909 * 13910 * [78] extParsedEnt ::= TextDecl? content 13911 * 13912 * This correspond to a "Well Balanced" chunk 13913 * 13914 * Returns the resulting document tree 13915 */ 13916 13917 xmlDocPtr 13918 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13919 xmlDocPtr ret; 13920 xmlParserCtxtPtr ctxt; 13921 13922 ctxt = xmlCreateFileParserCtxt(filename); 13923 if (ctxt == NULL) { 13924 return(NULL); 13925 } 13926 if (sax != NULL) { 13927 if (ctxt->sax != NULL) 13928 xmlFree(ctxt->sax); 13929 ctxt->sax = sax; 13930 ctxt->userData = NULL; 13931 } 13932 13933 xmlParseExtParsedEnt(ctxt); 13934 13935 if (ctxt->wellFormed) 13936 ret = ctxt->myDoc; 13937 else { 13938 ret = NULL; 13939 xmlFreeDoc(ctxt->myDoc); 13940 ctxt->myDoc = NULL; 13941 } 13942 if (sax != NULL) 13943 ctxt->sax = NULL; 13944 xmlFreeParserCtxt(ctxt); 13945 13946 return(ret); 13947 } 13948 13949 /** 13950 * xmlParseEntity: 13951 * @filename: the filename 13952 * 13953 * parse an XML external entity out of context and build a tree. 13954 * 13955 * [78] extParsedEnt ::= TextDecl? content 13956 * 13957 * This correspond to a "Well Balanced" chunk 13958 * 13959 * Returns the resulting document tree 13960 */ 13961 13962 xmlDocPtr 13963 xmlParseEntity(const char *filename) { 13964 return(xmlSAXParseEntity(NULL, filename)); 13965 } 13966 #endif /* LIBXML_SAX1_ENABLED */ 13967 13968 /** 13969 * xmlCreateEntityParserCtxtInternal: 13970 * @URL: the entity URL 13971 * @ID: the entity PUBLIC ID 13972 * @base: a possible base for the target URI 13973 * @pctx: parser context used to set options on new context 13974 * 13975 * Create a parser context for an external entity 13976 * Automatic support for ZLIB/Compress compressed document is provided 13977 * by default if found at compile-time. 13978 * 13979 * Returns the new parser context or NULL 13980 */ 13981 static xmlParserCtxtPtr 13982 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13983 const xmlChar *base, xmlParserCtxtPtr pctx) { 13984 xmlParserCtxtPtr ctxt; 13985 xmlParserInputPtr inputStream; 13986 char *directory = NULL; 13987 xmlChar *uri; 13988 13989 ctxt = xmlNewParserCtxt(); 13990 if (ctxt == NULL) { 13991 return(NULL); 13992 } 13993 13994 if (pctx != NULL) { 13995 ctxt->options = pctx->options; 13996 ctxt->_private = pctx->_private; 13997 } 13998 13999 uri = xmlBuildURI(URL, base); 14000 14001 if (uri == NULL) { 14002 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14003 if (inputStream == NULL) { 14004 xmlFreeParserCtxt(ctxt); 14005 return(NULL); 14006 } 14007 14008 inputPush(ctxt, inputStream); 14009 14010 if ((ctxt->directory == NULL) && (directory == NULL)) 14011 directory = xmlParserGetDirectory((char *)URL); 14012 if ((ctxt->directory == NULL) && (directory != NULL)) 14013 ctxt->directory = directory; 14014 } else { 14015 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14016 if (inputStream == NULL) { 14017 xmlFree(uri); 14018 xmlFreeParserCtxt(ctxt); 14019 return(NULL); 14020 } 14021 14022 inputPush(ctxt, inputStream); 14023 14024 if ((ctxt->directory == NULL) && (directory == NULL)) 14025 directory = xmlParserGetDirectory((char *)uri); 14026 if ((ctxt->directory == NULL) && (directory != NULL)) 14027 ctxt->directory = directory; 14028 xmlFree(uri); 14029 } 14030 return(ctxt); 14031 } 14032 14033 /** 14034 * xmlCreateEntityParserCtxt: 14035 * @URL: the entity URL 14036 * @ID: the entity PUBLIC ID 14037 * @base: a possible base for the target URI 14038 * 14039 * Create a parser context for an external entity 14040 * Automatic support for ZLIB/Compress compressed document is provided 14041 * by default if found at compile-time. 14042 * 14043 * Returns the new parser context or NULL 14044 */ 14045 xmlParserCtxtPtr 14046 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14047 const xmlChar *base) { 14048 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14049 14050 } 14051 14052 /************************************************************************ 14053 * * 14054 * Front ends when parsing from a file * 14055 * * 14056 ************************************************************************/ 14057 14058 /** 14059 * xmlCreateURLParserCtxt: 14060 * @filename: the filename or URL 14061 * @options: a combination of xmlParserOption 14062 * 14063 * Create a parser context for a file or URL content. 14064 * Automatic support for ZLIB/Compress compressed document is provided 14065 * by default if found at compile-time and for file accesses 14066 * 14067 * Returns the new parser context or NULL 14068 */ 14069 xmlParserCtxtPtr 14070 xmlCreateURLParserCtxt(const char *filename, int options) 14071 { 14072 xmlParserCtxtPtr ctxt; 14073 xmlParserInputPtr inputStream; 14074 char *directory = NULL; 14075 14076 ctxt = xmlNewParserCtxt(); 14077 if (ctxt == NULL) { 14078 xmlErrMemory(NULL, "cannot allocate parser context"); 14079 return(NULL); 14080 } 14081 14082 if (options) 14083 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14084 ctxt->linenumbers = 1; 14085 14086 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14087 if (inputStream == NULL) { 14088 xmlFreeParserCtxt(ctxt); 14089 return(NULL); 14090 } 14091 14092 inputPush(ctxt, inputStream); 14093 if ((ctxt->directory == NULL) && (directory == NULL)) 14094 directory = xmlParserGetDirectory(filename); 14095 if ((ctxt->directory == NULL) && (directory != NULL)) 14096 ctxt->directory = directory; 14097 14098 return(ctxt); 14099 } 14100 14101 /** 14102 * xmlCreateFileParserCtxt: 14103 * @filename: the filename 14104 * 14105 * Create a parser context for a file content. 14106 * Automatic support for ZLIB/Compress compressed document is provided 14107 * by default if found at compile-time. 14108 * 14109 * Returns the new parser context or NULL 14110 */ 14111 xmlParserCtxtPtr 14112 xmlCreateFileParserCtxt(const char *filename) 14113 { 14114 return(xmlCreateURLParserCtxt(filename, 0)); 14115 } 14116 14117 #ifdef LIBXML_SAX1_ENABLED 14118 /** 14119 * xmlSAXParseFileWithData: 14120 * @sax: the SAX handler block 14121 * @filename: the filename 14122 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14123 * documents 14124 * @data: the userdata 14125 * 14126 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14127 * compressed document is provided by default if found at compile-time. 14128 * It use the given SAX function block to handle the parsing callback. 14129 * If sax is NULL, fallback to the default DOM tree building routines. 14130 * 14131 * User data (void *) is stored within the parser context in the 14132 * context's _private member, so it is available nearly everywhere in libxml 14133 * 14134 * Returns the resulting document tree 14135 */ 14136 14137 xmlDocPtr 14138 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14139 int recovery, void *data) { 14140 xmlDocPtr ret; 14141 xmlParserCtxtPtr ctxt; 14142 14143 xmlInitParser(); 14144 14145 ctxt = xmlCreateFileParserCtxt(filename); 14146 if (ctxt == NULL) { 14147 return(NULL); 14148 } 14149 if (sax != NULL) { 14150 if (ctxt->sax != NULL) 14151 xmlFree(ctxt->sax); 14152 ctxt->sax = sax; 14153 } 14154 xmlDetectSAX2(ctxt); 14155 if (data!=NULL) { 14156 ctxt->_private = data; 14157 } 14158 14159 if (ctxt->directory == NULL) 14160 ctxt->directory = xmlParserGetDirectory(filename); 14161 14162 ctxt->recovery = recovery; 14163 14164 xmlParseDocument(ctxt); 14165 14166 if ((ctxt->wellFormed) || recovery) { 14167 ret = ctxt->myDoc; 14168 if (ret != NULL) { 14169 if (ctxt->input->buf->compressed > 0) 14170 ret->compression = 9; 14171 else 14172 ret->compression = ctxt->input->buf->compressed; 14173 } 14174 } 14175 else { 14176 ret = NULL; 14177 xmlFreeDoc(ctxt->myDoc); 14178 ctxt->myDoc = NULL; 14179 } 14180 if (sax != NULL) 14181 ctxt->sax = NULL; 14182 xmlFreeParserCtxt(ctxt); 14183 14184 return(ret); 14185 } 14186 14187 /** 14188 * xmlSAXParseFile: 14189 * @sax: the SAX handler block 14190 * @filename: the filename 14191 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14192 * documents 14193 * 14194 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14195 * compressed document is provided by default if found at compile-time. 14196 * It use the given SAX function block to handle the parsing callback. 14197 * If sax is NULL, fallback to the default DOM tree building routines. 14198 * 14199 * Returns the resulting document tree 14200 */ 14201 14202 xmlDocPtr 14203 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14204 int recovery) { 14205 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14206 } 14207 14208 /** 14209 * xmlRecoverDoc: 14210 * @cur: a pointer to an array of xmlChar 14211 * 14212 * parse an XML in-memory document and build a tree. 14213 * In the case the document is not Well Formed, a attempt to build a 14214 * tree is tried anyway 14215 * 14216 * Returns the resulting document tree or NULL in case of failure 14217 */ 14218 14219 xmlDocPtr 14220 xmlRecoverDoc(const xmlChar *cur) { 14221 return(xmlSAXParseDoc(NULL, cur, 1)); 14222 } 14223 14224 /** 14225 * xmlParseFile: 14226 * @filename: the filename 14227 * 14228 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14229 * compressed document is provided by default if found at compile-time. 14230 * 14231 * Returns the resulting document tree if the file was wellformed, 14232 * NULL otherwise. 14233 */ 14234 14235 xmlDocPtr 14236 xmlParseFile(const char *filename) { 14237 return(xmlSAXParseFile(NULL, filename, 0)); 14238 } 14239 14240 /** 14241 * xmlRecoverFile: 14242 * @filename: the filename 14243 * 14244 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14245 * compressed document is provided by default if found at compile-time. 14246 * In the case the document is not Well Formed, it attempts to build 14247 * a tree anyway 14248 * 14249 * Returns the resulting document tree or NULL in case of failure 14250 */ 14251 14252 xmlDocPtr 14253 xmlRecoverFile(const char *filename) { 14254 return(xmlSAXParseFile(NULL, filename, 1)); 14255 } 14256 14257 14258 /** 14259 * xmlSetupParserForBuffer: 14260 * @ctxt: an XML parser context 14261 * @buffer: a xmlChar * buffer 14262 * @filename: a file name 14263 * 14264 * Setup the parser context to parse a new buffer; Clears any prior 14265 * contents from the parser context. The buffer parameter must not be 14266 * NULL, but the filename parameter can be 14267 */ 14268 void 14269 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14270 const char* filename) 14271 { 14272 xmlParserInputPtr input; 14273 14274 if ((ctxt == NULL) || (buffer == NULL)) 14275 return; 14276 14277 input = xmlNewInputStream(ctxt); 14278 if (input == NULL) { 14279 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14280 xmlClearParserCtxt(ctxt); 14281 return; 14282 } 14283 14284 xmlClearParserCtxt(ctxt); 14285 if (filename != NULL) 14286 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14287 input->base = buffer; 14288 input->cur = buffer; 14289 input->end = &buffer[xmlStrlen(buffer)]; 14290 inputPush(ctxt, input); 14291 } 14292 14293 /** 14294 * xmlSAXUserParseFile: 14295 * @sax: a SAX handler 14296 * @user_data: The user data returned on SAX callbacks 14297 * @filename: a file name 14298 * 14299 * parse an XML file and call the given SAX handler routines. 14300 * Automatic support for ZLIB/Compress compressed document is provided 14301 * 14302 * Returns 0 in case of success or a error number otherwise 14303 */ 14304 int 14305 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14306 const char *filename) { 14307 int ret = 0; 14308 xmlParserCtxtPtr ctxt; 14309 14310 ctxt = xmlCreateFileParserCtxt(filename); 14311 if (ctxt == NULL) return -1; 14312 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14313 xmlFree(ctxt->sax); 14314 ctxt->sax = sax; 14315 xmlDetectSAX2(ctxt); 14316 14317 if (user_data != NULL) 14318 ctxt->userData = user_data; 14319 14320 xmlParseDocument(ctxt); 14321 14322 if (ctxt->wellFormed) 14323 ret = 0; 14324 else { 14325 if (ctxt->errNo != 0) 14326 ret = ctxt->errNo; 14327 else 14328 ret = -1; 14329 } 14330 if (sax != NULL) 14331 ctxt->sax = NULL; 14332 if (ctxt->myDoc != NULL) { 14333 xmlFreeDoc(ctxt->myDoc); 14334 ctxt->myDoc = NULL; 14335 } 14336 xmlFreeParserCtxt(ctxt); 14337 14338 return ret; 14339 } 14340 #endif /* LIBXML_SAX1_ENABLED */ 14341 14342 /************************************************************************ 14343 * * 14344 * Front ends when parsing from memory * 14345 * * 14346 ************************************************************************/ 14347 14348 /** 14349 * xmlCreateMemoryParserCtxt: 14350 * @buffer: a pointer to a char array 14351 * @size: the size of the array 14352 * 14353 * Create a parser context for an XML in-memory document. 14354 * 14355 * Returns the new parser context or NULL 14356 */ 14357 xmlParserCtxtPtr 14358 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14359 xmlParserCtxtPtr ctxt; 14360 xmlParserInputPtr input; 14361 xmlParserInputBufferPtr buf; 14362 14363 if (buffer == NULL) 14364 return(NULL); 14365 if (size <= 0) 14366 return(NULL); 14367 14368 ctxt = xmlNewParserCtxt(); 14369 if (ctxt == NULL) 14370 return(NULL); 14371 14372 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14373 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14374 if (buf == NULL) { 14375 xmlFreeParserCtxt(ctxt); 14376 return(NULL); 14377 } 14378 14379 input = xmlNewInputStream(ctxt); 14380 if (input == NULL) { 14381 xmlFreeParserInputBuffer(buf); 14382 xmlFreeParserCtxt(ctxt); 14383 return(NULL); 14384 } 14385 14386 input->filename = NULL; 14387 input->buf = buf; 14388 xmlBufResetInput(input->buf->buffer, input); 14389 14390 inputPush(ctxt, input); 14391 return(ctxt); 14392 } 14393 14394 #ifdef LIBXML_SAX1_ENABLED 14395 /** 14396 * xmlSAXParseMemoryWithData: 14397 * @sax: the SAX handler block 14398 * @buffer: an pointer to a char array 14399 * @size: the size of the array 14400 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14401 * documents 14402 * @data: the userdata 14403 * 14404 * parse an XML in-memory block and use the given SAX function block 14405 * to handle the parsing callback. If sax is NULL, fallback to the default 14406 * DOM tree building routines. 14407 * 14408 * User data (void *) is stored within the parser context in the 14409 * context's _private member, so it is available nearly everywhere in libxml 14410 * 14411 * Returns the resulting document tree 14412 */ 14413 14414 xmlDocPtr 14415 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14416 int size, int recovery, void *data) { 14417 xmlDocPtr ret; 14418 xmlParserCtxtPtr ctxt; 14419 14420 xmlInitParser(); 14421 14422 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14423 if (ctxt == NULL) return(NULL); 14424 if (sax != NULL) { 14425 if (ctxt->sax != NULL) 14426 xmlFree(ctxt->sax); 14427 ctxt->sax = sax; 14428 } 14429 xmlDetectSAX2(ctxt); 14430 if (data!=NULL) { 14431 ctxt->_private=data; 14432 } 14433 14434 ctxt->recovery = recovery; 14435 14436 xmlParseDocument(ctxt); 14437 14438 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14439 else { 14440 ret = NULL; 14441 xmlFreeDoc(ctxt->myDoc); 14442 ctxt->myDoc = NULL; 14443 } 14444 if (sax != NULL) 14445 ctxt->sax = NULL; 14446 xmlFreeParserCtxt(ctxt); 14447 14448 return(ret); 14449 } 14450 14451 /** 14452 * xmlSAXParseMemory: 14453 * @sax: the SAX handler block 14454 * @buffer: an pointer to a char array 14455 * @size: the size of the array 14456 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14457 * documents 14458 * 14459 * parse an XML in-memory block and use the given SAX function block 14460 * to handle the parsing callback. If sax is NULL, fallback to the default 14461 * DOM tree building routines. 14462 * 14463 * Returns the resulting document tree 14464 */ 14465 xmlDocPtr 14466 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14467 int size, int recovery) { 14468 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14469 } 14470 14471 /** 14472 * xmlParseMemory: 14473 * @buffer: an pointer to a char array 14474 * @size: the size of the array 14475 * 14476 * parse an XML in-memory block and build a tree. 14477 * 14478 * Returns the resulting document tree 14479 */ 14480 14481 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14482 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14483 } 14484 14485 /** 14486 * xmlRecoverMemory: 14487 * @buffer: an pointer to a char array 14488 * @size: the size of the array 14489 * 14490 * parse an XML in-memory block and build a tree. 14491 * In the case the document is not Well Formed, an attempt to 14492 * build a tree is tried anyway 14493 * 14494 * Returns the resulting document tree or NULL in case of error 14495 */ 14496 14497 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14498 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14499 } 14500 14501 /** 14502 * xmlSAXUserParseMemory: 14503 * @sax: a SAX handler 14504 * @user_data: The user data returned on SAX callbacks 14505 * @buffer: an in-memory XML document input 14506 * @size: the length of the XML document in bytes 14507 * 14508 * A better SAX parsing routine. 14509 * parse an XML in-memory buffer and call the given SAX handler routines. 14510 * 14511 * Returns 0 in case of success or a error number otherwise 14512 */ 14513 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14514 const char *buffer, int size) { 14515 int ret = 0; 14516 xmlParserCtxtPtr ctxt; 14517 14518 xmlInitParser(); 14519 14520 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14521 if (ctxt == NULL) return -1; 14522 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14523 xmlFree(ctxt->sax); 14524 ctxt->sax = sax; 14525 xmlDetectSAX2(ctxt); 14526 14527 if (user_data != NULL) 14528 ctxt->userData = user_data; 14529 14530 xmlParseDocument(ctxt); 14531 14532 if (ctxt->wellFormed) 14533 ret = 0; 14534 else { 14535 if (ctxt->errNo != 0) 14536 ret = ctxt->errNo; 14537 else 14538 ret = -1; 14539 } 14540 if (sax != NULL) 14541 ctxt->sax = NULL; 14542 if (ctxt->myDoc != NULL) { 14543 xmlFreeDoc(ctxt->myDoc); 14544 ctxt->myDoc = NULL; 14545 } 14546 xmlFreeParserCtxt(ctxt); 14547 14548 return ret; 14549 } 14550 #endif /* LIBXML_SAX1_ENABLED */ 14551 14552 /** 14553 * xmlCreateDocParserCtxt: 14554 * @cur: a pointer to an array of xmlChar 14555 * 14556 * Creates a parser context for an XML in-memory document. 14557 * 14558 * Returns the new parser context or NULL 14559 */ 14560 xmlParserCtxtPtr 14561 xmlCreateDocParserCtxt(const xmlChar *cur) { 14562 int len; 14563 14564 if (cur == NULL) 14565 return(NULL); 14566 len = xmlStrlen(cur); 14567 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14568 } 14569 14570 #ifdef LIBXML_SAX1_ENABLED 14571 /** 14572 * xmlSAXParseDoc: 14573 * @sax: the SAX handler block 14574 * @cur: a pointer to an array of xmlChar 14575 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14576 * documents 14577 * 14578 * parse an XML in-memory document and build a tree. 14579 * It use the given SAX function block to handle the parsing callback. 14580 * If sax is NULL, fallback to the default DOM tree building routines. 14581 * 14582 * Returns the resulting document tree 14583 */ 14584 14585 xmlDocPtr 14586 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14587 xmlDocPtr ret; 14588 xmlParserCtxtPtr ctxt; 14589 xmlSAXHandlerPtr oldsax = NULL; 14590 14591 if (cur == NULL) return(NULL); 14592 14593 14594 ctxt = xmlCreateDocParserCtxt(cur); 14595 if (ctxt == NULL) return(NULL); 14596 if (sax != NULL) { 14597 oldsax = ctxt->sax; 14598 ctxt->sax = sax; 14599 ctxt->userData = NULL; 14600 } 14601 xmlDetectSAX2(ctxt); 14602 14603 xmlParseDocument(ctxt); 14604 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14605 else { 14606 ret = NULL; 14607 xmlFreeDoc(ctxt->myDoc); 14608 ctxt->myDoc = NULL; 14609 } 14610 if (sax != NULL) 14611 ctxt->sax = oldsax; 14612 xmlFreeParserCtxt(ctxt); 14613 14614 return(ret); 14615 } 14616 14617 /** 14618 * xmlParseDoc: 14619 * @cur: a pointer to an array of xmlChar 14620 * 14621 * parse an XML in-memory document and build a tree. 14622 * 14623 * Returns the resulting document tree 14624 */ 14625 14626 xmlDocPtr 14627 xmlParseDoc(const xmlChar *cur) { 14628 return(xmlSAXParseDoc(NULL, cur, 0)); 14629 } 14630 #endif /* LIBXML_SAX1_ENABLED */ 14631 14632 #ifdef LIBXML_LEGACY_ENABLED 14633 /************************************************************************ 14634 * * 14635 * Specific function to keep track of entities references * 14636 * and used by the XSLT debugger * 14637 * * 14638 ************************************************************************/ 14639 14640 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14641 14642 /** 14643 * xmlAddEntityReference: 14644 * @ent : A valid entity 14645 * @firstNode : A valid first node for children of entity 14646 * @lastNode : A valid last node of children entity 14647 * 14648 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14649 */ 14650 static void 14651 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14652 xmlNodePtr lastNode) 14653 { 14654 if (xmlEntityRefFunc != NULL) { 14655 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14656 } 14657 } 14658 14659 14660 /** 14661 * xmlSetEntityReferenceFunc: 14662 * @func: A valid function 14663 * 14664 * Set the function to call call back when a xml reference has been made 14665 */ 14666 void 14667 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14668 { 14669 xmlEntityRefFunc = func; 14670 } 14671 #endif /* LIBXML_LEGACY_ENABLED */ 14672 14673 /************************************************************************ 14674 * * 14675 * Miscellaneous * 14676 * * 14677 ************************************************************************/ 14678 14679 #ifdef LIBXML_XPATH_ENABLED 14680 #include <libxml/xpath.h> 14681 #endif 14682 14683 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14684 static int xmlParserInitialized = 0; 14685 14686 /** 14687 * xmlInitParser: 14688 * 14689 * Initialization function for the XML parser. 14690 * This is not reentrant. Call once before processing in case of 14691 * use in multithreaded programs. 14692 */ 14693 14694 void 14695 xmlInitParser(void) { 14696 if (xmlParserInitialized != 0) 14697 return; 14698 14699 #ifdef LIBXML_THREAD_ENABLED 14700 __xmlGlobalInitMutexLock(); 14701 if (xmlParserInitialized == 0) { 14702 #endif 14703 xmlInitThreads(); 14704 xmlInitGlobals(); 14705 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14706 (xmlGenericError == NULL)) 14707 initGenericErrorDefaultFunc(NULL); 14708 xmlInitMemory(); 14709 xmlInitializeDict(); 14710 xmlInitCharEncodingHandlers(); 14711 xmlDefaultSAXHandlerInit(); 14712 xmlRegisterDefaultInputCallbacks(); 14713 #ifdef LIBXML_OUTPUT_ENABLED 14714 xmlRegisterDefaultOutputCallbacks(); 14715 #endif /* LIBXML_OUTPUT_ENABLED */ 14716 #ifdef LIBXML_HTML_ENABLED 14717 htmlInitAutoClose(); 14718 htmlDefaultSAXHandlerInit(); 14719 #endif 14720 #ifdef LIBXML_XPATH_ENABLED 14721 xmlXPathInit(); 14722 #endif 14723 xmlParserInitialized = 1; 14724 #ifdef LIBXML_THREAD_ENABLED 14725 } 14726 __xmlGlobalInitMutexUnlock(); 14727 #endif 14728 } 14729 14730 /** 14731 * xmlCleanupParser: 14732 * 14733 * This function name is somewhat misleading. It does not clean up 14734 * parser state, it cleans up memory allocated by the library itself. 14735 * It is a cleanup function for the XML library. It tries to reclaim all 14736 * related global memory allocated for the library processing. 14737 * It doesn't deallocate any document related memory. One should 14738 * call xmlCleanupParser() only when the process has finished using 14739 * the library and all XML/HTML documents built with it. 14740 * See also xmlInitParser() which has the opposite function of preparing 14741 * the library for operations. 14742 * 14743 * WARNING: if your application is multithreaded or has plugin support 14744 * calling this may crash the application if another thread or 14745 * a plugin is still using libxml2. It's sometimes very hard to 14746 * guess if libxml2 is in use in the application, some libraries 14747 * or plugins may use it without notice. In case of doubt abstain 14748 * from calling this function or do it just before calling exit() 14749 * to avoid leak reports from valgrind ! 14750 */ 14751 14752 void 14753 xmlCleanupParser(void) { 14754 if (!xmlParserInitialized) 14755 return; 14756 14757 xmlCleanupCharEncodingHandlers(); 14758 #ifdef LIBXML_CATALOG_ENABLED 14759 xmlCatalogCleanup(); 14760 #endif 14761 xmlDictCleanup(); 14762 xmlCleanupInputCallbacks(); 14763 #ifdef LIBXML_OUTPUT_ENABLED 14764 xmlCleanupOutputCallbacks(); 14765 #endif 14766 #ifdef LIBXML_SCHEMAS_ENABLED 14767 xmlSchemaCleanupTypes(); 14768 xmlRelaxNGCleanupTypes(); 14769 #endif 14770 xmlResetLastError(); 14771 xmlCleanupGlobals(); 14772 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14773 xmlCleanupMemory(); 14774 xmlParserInitialized = 0; 14775 } 14776 14777 /************************************************************************ 14778 * * 14779 * New set (2.6.0) of simpler and more flexible APIs * 14780 * * 14781 ************************************************************************/ 14782 14783 /** 14784 * DICT_FREE: 14785 * @str: a string 14786 * 14787 * Free a string if it is not owned by the "dict" dictionary in the 14788 * current scope 14789 */ 14790 #define DICT_FREE(str) \ 14791 if ((str) && ((!dict) || \ 14792 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14793 xmlFree((char *)(str)); 14794 14795 /** 14796 * xmlCtxtReset: 14797 * @ctxt: an XML parser context 14798 * 14799 * Reset a parser context 14800 */ 14801 void 14802 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14803 { 14804 xmlParserInputPtr input; 14805 xmlDictPtr dict; 14806 14807 if (ctxt == NULL) 14808 return; 14809 14810 dict = ctxt->dict; 14811 14812 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14813 xmlFreeInputStream(input); 14814 } 14815 ctxt->inputNr = 0; 14816 ctxt->input = NULL; 14817 14818 ctxt->spaceNr = 0; 14819 if (ctxt->spaceTab != NULL) { 14820 ctxt->spaceTab[0] = -1; 14821 ctxt->space = &ctxt->spaceTab[0]; 14822 } else { 14823 ctxt->space = NULL; 14824 } 14825 14826 14827 ctxt->nodeNr = 0; 14828 ctxt->node = NULL; 14829 14830 ctxt->nameNr = 0; 14831 ctxt->name = NULL; 14832 14833 DICT_FREE(ctxt->version); 14834 ctxt->version = NULL; 14835 DICT_FREE(ctxt->encoding); 14836 ctxt->encoding = NULL; 14837 DICT_FREE(ctxt->directory); 14838 ctxt->directory = NULL; 14839 DICT_FREE(ctxt->extSubURI); 14840 ctxt->extSubURI = NULL; 14841 DICT_FREE(ctxt->extSubSystem); 14842 ctxt->extSubSystem = NULL; 14843 if (ctxt->myDoc != NULL) 14844 xmlFreeDoc(ctxt->myDoc); 14845 ctxt->myDoc = NULL; 14846 14847 ctxt->standalone = -1; 14848 ctxt->hasExternalSubset = 0; 14849 ctxt->hasPErefs = 0; 14850 ctxt->html = 0; 14851 ctxt->external = 0; 14852 ctxt->instate = XML_PARSER_START; 14853 ctxt->token = 0; 14854 14855 ctxt->wellFormed = 1; 14856 ctxt->nsWellFormed = 1; 14857 ctxt->disableSAX = 0; 14858 ctxt->valid = 1; 14859 #if 0 14860 ctxt->vctxt.userData = ctxt; 14861 ctxt->vctxt.error = xmlParserValidityError; 14862 ctxt->vctxt.warning = xmlParserValidityWarning; 14863 #endif 14864 ctxt->record_info = 0; 14865 ctxt->nbChars = 0; 14866 ctxt->checkIndex = 0; 14867 ctxt->inSubset = 0; 14868 ctxt->errNo = XML_ERR_OK; 14869 ctxt->depth = 0; 14870 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14871 ctxt->catalogs = NULL; 14872 ctxt->nbentities = 0; 14873 ctxt->sizeentities = 0; 14874 ctxt->sizeentcopy = 0; 14875 xmlInitNodeInfoSeq(&ctxt->node_seq); 14876 14877 if (ctxt->attsDefault != NULL) { 14878 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14879 ctxt->attsDefault = NULL; 14880 } 14881 if (ctxt->attsSpecial != NULL) { 14882 xmlHashFree(ctxt->attsSpecial, NULL); 14883 ctxt->attsSpecial = NULL; 14884 } 14885 14886 #ifdef LIBXML_CATALOG_ENABLED 14887 if (ctxt->catalogs != NULL) 14888 xmlCatalogFreeLocal(ctxt->catalogs); 14889 #endif 14890 if (ctxt->lastError.code != XML_ERR_OK) 14891 xmlResetError(&ctxt->lastError); 14892 } 14893 14894 /** 14895 * xmlCtxtResetPush: 14896 * @ctxt: an XML parser context 14897 * @chunk: a pointer to an array of chars 14898 * @size: number of chars in the array 14899 * @filename: an optional file name or URI 14900 * @encoding: the document encoding, or NULL 14901 * 14902 * Reset a push parser context 14903 * 14904 * Returns 0 in case of success and 1 in case of error 14905 */ 14906 int 14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14908 int size, const char *filename, const char *encoding) 14909 { 14910 xmlParserInputPtr inputStream; 14911 xmlParserInputBufferPtr buf; 14912 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14913 14914 if (ctxt == NULL) 14915 return(1); 14916 14917 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14918 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14919 14920 buf = xmlAllocParserInputBuffer(enc); 14921 if (buf == NULL) 14922 return(1); 14923 14924 if (ctxt == NULL) { 14925 xmlFreeParserInputBuffer(buf); 14926 return(1); 14927 } 14928 14929 xmlCtxtReset(ctxt); 14930 14931 if (ctxt->pushTab == NULL) { 14932 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14933 sizeof(xmlChar *)); 14934 if (ctxt->pushTab == NULL) { 14935 xmlErrMemory(ctxt, NULL); 14936 xmlFreeParserInputBuffer(buf); 14937 return(1); 14938 } 14939 } 14940 14941 if (filename == NULL) { 14942 ctxt->directory = NULL; 14943 } else { 14944 ctxt->directory = xmlParserGetDirectory(filename); 14945 } 14946 14947 inputStream = xmlNewInputStream(ctxt); 14948 if (inputStream == NULL) { 14949 xmlFreeParserInputBuffer(buf); 14950 return(1); 14951 } 14952 14953 if (filename == NULL) 14954 inputStream->filename = NULL; 14955 else 14956 inputStream->filename = (char *) 14957 xmlCanonicPath((const xmlChar *) filename); 14958 inputStream->buf = buf; 14959 xmlBufResetInput(buf->buffer, inputStream); 14960 14961 inputPush(ctxt, inputStream); 14962 14963 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14964 (ctxt->input->buf != NULL)) { 14965 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14966 size_t cur = ctxt->input->cur - ctxt->input->base; 14967 14968 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14969 14970 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14971 #ifdef DEBUG_PUSH 14972 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14973 #endif 14974 } 14975 14976 if (encoding != NULL) { 14977 xmlCharEncodingHandlerPtr hdlr; 14978 14979 if (ctxt->encoding != NULL) 14980 xmlFree((xmlChar *) ctxt->encoding); 14981 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14982 14983 hdlr = xmlFindCharEncodingHandler(encoding); 14984 if (hdlr != NULL) { 14985 xmlSwitchToEncoding(ctxt, hdlr); 14986 } else { 14987 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14988 "Unsupported encoding %s\n", BAD_CAST encoding); 14989 } 14990 } else if (enc != XML_CHAR_ENCODING_NONE) { 14991 xmlSwitchEncoding(ctxt, enc); 14992 } 14993 14994 return(0); 14995 } 14996 14997 14998 /** 14999 * xmlCtxtUseOptionsInternal: 15000 * @ctxt: an XML parser context 15001 * @options: a combination of xmlParserOption 15002 * @encoding: the user provided encoding to use 15003 * 15004 * Applies the options to the parser context 15005 * 15006 * Returns 0 in case of success, the set of unknown or unimplemented options 15007 * in case of error. 15008 */ 15009 static int 15010 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15011 { 15012 if (ctxt == NULL) 15013 return(-1); 15014 if (encoding != NULL) { 15015 if (ctxt->encoding != NULL) 15016 xmlFree((xmlChar *) ctxt->encoding); 15017 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15018 } 15019 if (options & XML_PARSE_RECOVER) { 15020 ctxt->recovery = 1; 15021 options -= XML_PARSE_RECOVER; 15022 ctxt->options |= XML_PARSE_RECOVER; 15023 } else 15024 ctxt->recovery = 0; 15025 if (options & XML_PARSE_DTDLOAD) { 15026 ctxt->loadsubset = XML_DETECT_IDS; 15027 options -= XML_PARSE_DTDLOAD; 15028 ctxt->options |= XML_PARSE_DTDLOAD; 15029 } else 15030 ctxt->loadsubset = 0; 15031 if (options & XML_PARSE_DTDATTR) { 15032 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15033 options -= XML_PARSE_DTDATTR; 15034 ctxt->options |= XML_PARSE_DTDATTR; 15035 } 15036 if (options & XML_PARSE_NOENT) { 15037 ctxt->replaceEntities = 1; 15038 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15039 options -= XML_PARSE_NOENT; 15040 ctxt->options |= XML_PARSE_NOENT; 15041 } else 15042 ctxt->replaceEntities = 0; 15043 if (options & XML_PARSE_PEDANTIC) { 15044 ctxt->pedantic = 1; 15045 options -= XML_PARSE_PEDANTIC; 15046 ctxt->options |= XML_PARSE_PEDANTIC; 15047 } else 15048 ctxt->pedantic = 0; 15049 if (options & XML_PARSE_NOBLANKS) { 15050 ctxt->keepBlanks = 0; 15051 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15052 options -= XML_PARSE_NOBLANKS; 15053 ctxt->options |= XML_PARSE_NOBLANKS; 15054 } else 15055 ctxt->keepBlanks = 1; 15056 if (options & XML_PARSE_DTDVALID) { 15057 ctxt->validate = 1; 15058 if (options & XML_PARSE_NOWARNING) 15059 ctxt->vctxt.warning = NULL; 15060 if (options & XML_PARSE_NOERROR) 15061 ctxt->vctxt.error = NULL; 15062 options -= XML_PARSE_DTDVALID; 15063 ctxt->options |= XML_PARSE_DTDVALID; 15064 } else 15065 ctxt->validate = 0; 15066 if (options & XML_PARSE_NOWARNING) { 15067 ctxt->sax->warning = NULL; 15068 options -= XML_PARSE_NOWARNING; 15069 } 15070 if (options & XML_PARSE_NOERROR) { 15071 ctxt->sax->error = NULL; 15072 ctxt->sax->fatalError = NULL; 15073 options -= XML_PARSE_NOERROR; 15074 } 15075 #ifdef LIBXML_SAX1_ENABLED 15076 if (options & XML_PARSE_SAX1) { 15077 ctxt->sax->startElement = xmlSAX2StartElement; 15078 ctxt->sax->endElement = xmlSAX2EndElement; 15079 ctxt->sax->startElementNs = NULL; 15080 ctxt->sax->endElementNs = NULL; 15081 ctxt->sax->initialized = 1; 15082 options -= XML_PARSE_SAX1; 15083 ctxt->options |= XML_PARSE_SAX1; 15084 } 15085 #endif /* LIBXML_SAX1_ENABLED */ 15086 if (options & XML_PARSE_NODICT) { 15087 ctxt->dictNames = 0; 15088 options -= XML_PARSE_NODICT; 15089 ctxt->options |= XML_PARSE_NODICT; 15090 } else { 15091 ctxt->dictNames = 1; 15092 } 15093 if (options & XML_PARSE_NOCDATA) { 15094 ctxt->sax->cdataBlock = NULL; 15095 options -= XML_PARSE_NOCDATA; 15096 ctxt->options |= XML_PARSE_NOCDATA; 15097 } 15098 if (options & XML_PARSE_NSCLEAN) { 15099 ctxt->options |= XML_PARSE_NSCLEAN; 15100 options -= XML_PARSE_NSCLEAN; 15101 } 15102 if (options & XML_PARSE_NONET) { 15103 ctxt->options |= XML_PARSE_NONET; 15104 options -= XML_PARSE_NONET; 15105 } 15106 if (options & XML_PARSE_COMPACT) { 15107 ctxt->options |= XML_PARSE_COMPACT; 15108 options -= XML_PARSE_COMPACT; 15109 } 15110 if (options & XML_PARSE_OLD10) { 15111 ctxt->options |= XML_PARSE_OLD10; 15112 options -= XML_PARSE_OLD10; 15113 } 15114 if (options & XML_PARSE_NOBASEFIX) { 15115 ctxt->options |= XML_PARSE_NOBASEFIX; 15116 options -= XML_PARSE_NOBASEFIX; 15117 } 15118 if (options & XML_PARSE_HUGE) { 15119 ctxt->options |= XML_PARSE_HUGE; 15120 options -= XML_PARSE_HUGE; 15121 if (ctxt->dict != NULL) 15122 xmlDictSetLimit(ctxt->dict, 0); 15123 } 15124 if (options & XML_PARSE_OLDSAX) { 15125 ctxt->options |= XML_PARSE_OLDSAX; 15126 options -= XML_PARSE_OLDSAX; 15127 } 15128 if (options & XML_PARSE_IGNORE_ENC) { 15129 ctxt->options |= XML_PARSE_IGNORE_ENC; 15130 options -= XML_PARSE_IGNORE_ENC; 15131 } 15132 if (options & XML_PARSE_BIG_LINES) { 15133 ctxt->options |= XML_PARSE_BIG_LINES; 15134 options -= XML_PARSE_BIG_LINES; 15135 } 15136 ctxt->linenumbers = 1; 15137 return (options); 15138 } 15139 15140 /** 15141 * xmlCtxtUseOptions: 15142 * @ctxt: an XML parser context 15143 * @options: a combination of xmlParserOption 15144 * 15145 * Applies the options to the parser context 15146 * 15147 * Returns 0 in case of success, the set of unknown or unimplemented options 15148 * in case of error. 15149 */ 15150 int 15151 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15152 { 15153 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15154 } 15155 15156 /** 15157 * xmlDoRead: 15158 * @ctxt: an XML parser context 15159 * @URL: the base URL to use for the document 15160 * @encoding: the document encoding, or NULL 15161 * @options: a combination of xmlParserOption 15162 * @reuse: keep the context for reuse 15163 * 15164 * Common front-end for the xmlRead functions 15165 * 15166 * Returns the resulting document tree or NULL 15167 */ 15168 static xmlDocPtr 15169 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15170 int options, int reuse) 15171 { 15172 xmlDocPtr ret; 15173 15174 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15175 if (encoding != NULL) { 15176 xmlCharEncodingHandlerPtr hdlr; 15177 15178 hdlr = xmlFindCharEncodingHandler(encoding); 15179 if (hdlr != NULL) 15180 xmlSwitchToEncoding(ctxt, hdlr); 15181 } 15182 if ((URL != NULL) && (ctxt->input != NULL) && 15183 (ctxt->input->filename == NULL)) 15184 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15185 xmlParseDocument(ctxt); 15186 if ((ctxt->wellFormed) || ctxt->recovery) 15187 ret = ctxt->myDoc; 15188 else { 15189 ret = NULL; 15190 if (ctxt->myDoc != NULL) { 15191 xmlFreeDoc(ctxt->myDoc); 15192 } 15193 } 15194 ctxt->myDoc = NULL; 15195 if (!reuse) { 15196 xmlFreeParserCtxt(ctxt); 15197 } 15198 15199 return (ret); 15200 } 15201 15202 /** 15203 * xmlReadDoc: 15204 * @cur: a pointer to a zero terminated string 15205 * @URL: the base URL to use for the document 15206 * @encoding: the document encoding, or NULL 15207 * @options: a combination of xmlParserOption 15208 * 15209 * parse an XML in-memory document and build a tree. 15210 * 15211 * Returns the resulting document tree 15212 */ 15213 xmlDocPtr 15214 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15215 { 15216 xmlParserCtxtPtr ctxt; 15217 15218 if (cur == NULL) 15219 return (NULL); 15220 xmlInitParser(); 15221 15222 ctxt = xmlCreateDocParserCtxt(cur); 15223 if (ctxt == NULL) 15224 return (NULL); 15225 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15226 } 15227 15228 /** 15229 * xmlReadFile: 15230 * @filename: a file or URL 15231 * @encoding: the document encoding, or NULL 15232 * @options: a combination of xmlParserOption 15233 * 15234 * parse an XML file from the filesystem or the network. 15235 * 15236 * Returns the resulting document tree 15237 */ 15238 xmlDocPtr 15239 xmlReadFile(const char *filename, const char *encoding, int options) 15240 { 15241 xmlParserCtxtPtr ctxt; 15242 15243 xmlInitParser(); 15244 ctxt = xmlCreateURLParserCtxt(filename, options); 15245 if (ctxt == NULL) 15246 return (NULL); 15247 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15248 } 15249 15250 /** 15251 * xmlReadMemory: 15252 * @buffer: a pointer to a char array 15253 * @size: the size of the array 15254 * @URL: the base URL to use for the document 15255 * @encoding: the document encoding, or NULL 15256 * @options: a combination of xmlParserOption 15257 * 15258 * parse an XML in-memory document and build a tree. 15259 * 15260 * Returns the resulting document tree 15261 */ 15262 xmlDocPtr 15263 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15264 { 15265 xmlParserCtxtPtr ctxt; 15266 15267 xmlInitParser(); 15268 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15269 if (ctxt == NULL) 15270 return (NULL); 15271 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15272 } 15273 15274 /** 15275 * xmlReadFd: 15276 * @fd: an open file descriptor 15277 * @URL: the base URL to use for the document 15278 * @encoding: the document encoding, or NULL 15279 * @options: a combination of xmlParserOption 15280 * 15281 * parse an XML from a file descriptor and build a tree. 15282 * NOTE that the file descriptor will not be closed when the 15283 * reader is closed or reset. 15284 * 15285 * Returns the resulting document tree 15286 */ 15287 xmlDocPtr 15288 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15289 { 15290 xmlParserCtxtPtr ctxt; 15291 xmlParserInputBufferPtr input; 15292 xmlParserInputPtr stream; 15293 15294 if (fd < 0) 15295 return (NULL); 15296 xmlInitParser(); 15297 15298 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15299 if (input == NULL) 15300 return (NULL); 15301 input->closecallback = NULL; 15302 ctxt = xmlNewParserCtxt(); 15303 if (ctxt == NULL) { 15304 xmlFreeParserInputBuffer(input); 15305 return (NULL); 15306 } 15307 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15308 if (stream == NULL) { 15309 xmlFreeParserInputBuffer(input); 15310 xmlFreeParserCtxt(ctxt); 15311 return (NULL); 15312 } 15313 inputPush(ctxt, stream); 15314 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15315 } 15316 15317 /** 15318 * xmlReadIO: 15319 * @ioread: an I/O read function 15320 * @ioclose: an I/O close function 15321 * @ioctx: an I/O handler 15322 * @URL: the base URL to use for the document 15323 * @encoding: the document encoding, or NULL 15324 * @options: a combination of xmlParserOption 15325 * 15326 * parse an XML document from I/O functions and source and build a tree. 15327 * 15328 * Returns the resulting document tree 15329 */ 15330 xmlDocPtr 15331 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15332 void *ioctx, const char *URL, const char *encoding, int options) 15333 { 15334 xmlParserCtxtPtr ctxt; 15335 xmlParserInputBufferPtr input; 15336 xmlParserInputPtr stream; 15337 15338 if (ioread == NULL) 15339 return (NULL); 15340 xmlInitParser(); 15341 15342 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15343 XML_CHAR_ENCODING_NONE); 15344 if (input == NULL) { 15345 if (ioclose != NULL) 15346 ioclose(ioctx); 15347 return (NULL); 15348 } 15349 ctxt = xmlNewParserCtxt(); 15350 if (ctxt == NULL) { 15351 xmlFreeParserInputBuffer(input); 15352 return (NULL); 15353 } 15354 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15355 if (stream == NULL) { 15356 xmlFreeParserInputBuffer(input); 15357 xmlFreeParserCtxt(ctxt); 15358 return (NULL); 15359 } 15360 inputPush(ctxt, stream); 15361 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15362 } 15363 15364 /** 15365 * xmlCtxtReadDoc: 15366 * @ctxt: an XML parser context 15367 * @cur: a pointer to a zero terminated string 15368 * @URL: the base URL to use for the document 15369 * @encoding: the document encoding, or NULL 15370 * @options: a combination of xmlParserOption 15371 * 15372 * parse an XML in-memory document and build a tree. 15373 * This reuses the existing @ctxt parser context 15374 * 15375 * Returns the resulting document tree 15376 */ 15377 xmlDocPtr 15378 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15379 const char *URL, const char *encoding, int options) 15380 { 15381 xmlParserInputPtr stream; 15382 15383 if (cur == NULL) 15384 return (NULL); 15385 if (ctxt == NULL) 15386 return (NULL); 15387 xmlInitParser(); 15388 15389 xmlCtxtReset(ctxt); 15390 15391 stream = xmlNewStringInputStream(ctxt, cur); 15392 if (stream == NULL) { 15393 return (NULL); 15394 } 15395 inputPush(ctxt, stream); 15396 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15397 } 15398 15399 /** 15400 * xmlCtxtReadFile: 15401 * @ctxt: an XML parser context 15402 * @filename: a file or URL 15403 * @encoding: the document encoding, or NULL 15404 * @options: a combination of xmlParserOption 15405 * 15406 * parse an XML file from the filesystem or the network. 15407 * This reuses the existing @ctxt parser context 15408 * 15409 * Returns the resulting document tree 15410 */ 15411 xmlDocPtr 15412 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15413 const char *encoding, int options) 15414 { 15415 xmlParserInputPtr stream; 15416 15417 if (filename == NULL) 15418 return (NULL); 15419 if (ctxt == NULL) 15420 return (NULL); 15421 xmlInitParser(); 15422 15423 xmlCtxtReset(ctxt); 15424 15425 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15426 if (stream == NULL) { 15427 return (NULL); 15428 } 15429 inputPush(ctxt, stream); 15430 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15431 } 15432 15433 /** 15434 * xmlCtxtReadMemory: 15435 * @ctxt: an XML parser context 15436 * @buffer: a pointer to a char array 15437 * @size: the size of the array 15438 * @URL: the base URL to use for the document 15439 * @encoding: the document encoding, or NULL 15440 * @options: a combination of xmlParserOption 15441 * 15442 * parse an XML in-memory document and build a tree. 15443 * This reuses the existing @ctxt parser context 15444 * 15445 * Returns the resulting document tree 15446 */ 15447 xmlDocPtr 15448 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15449 const char *URL, const char *encoding, int options) 15450 { 15451 xmlParserInputBufferPtr input; 15452 xmlParserInputPtr stream; 15453 15454 if (ctxt == NULL) 15455 return (NULL); 15456 if (buffer == NULL) 15457 return (NULL); 15458 xmlInitParser(); 15459 15460 xmlCtxtReset(ctxt); 15461 15462 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15463 if (input == NULL) { 15464 return(NULL); 15465 } 15466 15467 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15468 if (stream == NULL) { 15469 xmlFreeParserInputBuffer(input); 15470 return(NULL); 15471 } 15472 15473 inputPush(ctxt, stream); 15474 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15475 } 15476 15477 /** 15478 * xmlCtxtReadFd: 15479 * @ctxt: an XML parser context 15480 * @fd: an open file descriptor 15481 * @URL: the base URL to use for the document 15482 * @encoding: the document encoding, or NULL 15483 * @options: a combination of xmlParserOption 15484 * 15485 * parse an XML from a file descriptor and build a tree. 15486 * This reuses the existing @ctxt parser context 15487 * NOTE that the file descriptor will not be closed when the 15488 * reader is closed or reset. 15489 * 15490 * Returns the resulting document tree 15491 */ 15492 xmlDocPtr 15493 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15494 const char *URL, const char *encoding, int options) 15495 { 15496 xmlParserInputBufferPtr input; 15497 xmlParserInputPtr stream; 15498 15499 if (fd < 0) 15500 return (NULL); 15501 if (ctxt == NULL) 15502 return (NULL); 15503 xmlInitParser(); 15504 15505 xmlCtxtReset(ctxt); 15506 15507 15508 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15509 if (input == NULL) 15510 return (NULL); 15511 input->closecallback = NULL; 15512 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15513 if (stream == NULL) { 15514 xmlFreeParserInputBuffer(input); 15515 return (NULL); 15516 } 15517 inputPush(ctxt, stream); 15518 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15519 } 15520 15521 /** 15522 * xmlCtxtReadIO: 15523 * @ctxt: an XML parser context 15524 * @ioread: an I/O read function 15525 * @ioclose: an I/O close function 15526 * @ioctx: an I/O handler 15527 * @URL: the base URL to use for the document 15528 * @encoding: the document encoding, or NULL 15529 * @options: a combination of xmlParserOption 15530 * 15531 * parse an XML document from I/O functions and source and build a tree. 15532 * This reuses the existing @ctxt parser context 15533 * 15534 * Returns the resulting document tree 15535 */ 15536 xmlDocPtr 15537 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15538 xmlInputCloseCallback ioclose, void *ioctx, 15539 const char *URL, 15540 const char *encoding, int options) 15541 { 15542 xmlParserInputBufferPtr input; 15543 xmlParserInputPtr stream; 15544 15545 if (ioread == NULL) 15546 return (NULL); 15547 if (ctxt == NULL) 15548 return (NULL); 15549 xmlInitParser(); 15550 15551 xmlCtxtReset(ctxt); 15552 15553 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15554 XML_CHAR_ENCODING_NONE); 15555 if (input == NULL) { 15556 if (ioclose != NULL) 15557 ioclose(ioctx); 15558 return (NULL); 15559 } 15560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15561 if (stream == NULL) { 15562 xmlFreeParserInputBuffer(input); 15563 return (NULL); 15564 } 15565 inputPush(ctxt, stream); 15566 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15567 } 15568 15569 #define bottom_parser 15570 #include "elfgcchack.h" 15571