1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 /* To avoid EBCDIC trouble when parsing on zOS */ 34 #if defined(__MVS__) 35 #pragma convert("ISO8859-1") 36 #endif 37 38 #define IN_LIBXML 39 #include "libxml.h" 40 41 #if defined(_WIN32) && !defined (__CYGWIN__) 42 #define XML_DIR_SEP '\\' 43 #else 44 #define XML_DIR_SEP '/' 45 #endif 46 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <string.h> 50 #include <stdarg.h> 51 #include <stddef.h> 52 #include <libxml/xmlmemory.h> 53 #include <libxml/threads.h> 54 #include <libxml/globals.h> 55 #include <libxml/tree.h> 56 #include <libxml/parser.h> 57 #include <libxml/parserInternals.h> 58 #include <libxml/valid.h> 59 #include <libxml/entities.h> 60 #include <libxml/xmlerror.h> 61 #include <libxml/encoding.h> 62 #include <libxml/xmlIO.h> 63 #include <libxml/uri.h> 64 #ifdef LIBXML_CATALOG_ENABLED 65 #include <libxml/catalog.h> 66 #endif 67 #ifdef LIBXML_SCHEMAS_ENABLED 68 #include <libxml/xmlschemastypes.h> 69 #include <libxml/relaxng.h> 70 #endif 71 #ifdef HAVE_CTYPE_H 72 #include <ctype.h> 73 #endif 74 #ifdef HAVE_STDLIB_H 75 #include <stdlib.h> 76 #endif 77 #ifdef HAVE_SYS_STAT_H 78 #include <sys/stat.h> 79 #endif 80 #ifdef HAVE_FCNTL_H 81 #include <fcntl.h> 82 #endif 83 #ifdef HAVE_UNISTD_H 84 #include <unistd.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 static void 91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93 static xmlParserCtxtPtr 94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 98 99 /************************************************************************ 100 * * 101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 102 * * 103 ************************************************************************/ 104 105 #define XML_PARSER_BIG_ENTITY 1000 106 #define XML_PARSER_LOT_ENTITY 5000 107 108 /* 109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 110 * replacement over the size in byte of the input indicates that you have 111 * and eponential behaviour. A value of 10 correspond to at least 3 entity 112 * replacement per byte of input. 113 */ 114 #define XML_PARSER_NON_LINEAR 10 115 116 /* 117 * xmlParserEntityCheck 118 * 119 * Function to check non-linear entity expansion behaviour 120 * This is here to detect and stop exponential linear entity expansion 121 * This is not a limitation of the parser but a safety 122 * boundary feature. It can be disabled with the XML_PARSE_HUGE 123 * parser option. 124 */ 125 static int 126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 127 xmlEntityPtr ent, size_t replacement) 128 { 129 size_t consumed = 0; 130 131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 132 return (0); 133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 134 return (1); 135 136 /* 137 * This may look absurd but is needed to detect 138 * entities problems 139 */ 140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 141 (ent->content != NULL) && (ent->checked == 0) && 142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 143 unsigned long oldnbent = ctxt->nbentities; 144 xmlChar *rep; 145 146 ent->checked = 1; 147 148 ++ctxt->depth; 149 rep = xmlStringDecodeEntities(ctxt, ent->content, 150 XML_SUBSTITUTE_REF, 0, 0, 0); 151 --ctxt->depth; 152 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { 153 ent->content[0] = 0; 154 } 155 156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 157 if (rep != NULL) { 158 if (xmlStrchr(rep, '<')) 159 ent->checked |= 1; 160 xmlFree(rep); 161 rep = NULL; 162 } 163 } 164 if (replacement != 0) { 165 if (replacement < XML_MAX_TEXT_LENGTH) 166 return(0); 167 168 /* 169 * If the volume of entity copy reaches 10 times the 170 * amount of parsed data and over the large text threshold 171 * then that's very likely to be an abuse. 172 */ 173 if (ctxt->input != NULL) { 174 consumed = ctxt->input->consumed + 175 (ctxt->input->cur - ctxt->input->base); 176 } 177 consumed += ctxt->sizeentities; 178 179 if (replacement < XML_PARSER_NON_LINEAR * consumed) 180 return(0); 181 } else if (size != 0) { 182 /* 183 * Do the check based on the replacement size of the entity 184 */ 185 if (size < XML_PARSER_BIG_ENTITY) 186 return(0); 187 188 /* 189 * A limit on the amount of text data reasonably used 190 */ 191 if (ctxt->input != NULL) { 192 consumed = ctxt->input->consumed + 193 (ctxt->input->cur - ctxt->input->base); 194 } 195 consumed += ctxt->sizeentities; 196 197 if ((size < XML_PARSER_NON_LINEAR * consumed) && 198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 199 return (0); 200 } else if (ent != NULL) { 201 /* 202 * use the number of parsed entities in the replacement 203 */ 204 size = ent->checked / 2; 205 206 /* 207 * The amount of data parsed counting entities size only once 208 */ 209 if (ctxt->input != NULL) { 210 consumed = ctxt->input->consumed + 211 (ctxt->input->cur - ctxt->input->base); 212 } 213 consumed += ctxt->sizeentities; 214 215 /* 216 * Check the density of entities for the amount of data 217 * knowing an entity reference will take at least 3 bytes 218 */ 219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 220 return (0); 221 } else { 222 /* 223 * strange we got no data for checking 224 */ 225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 227 (ctxt->nbentities <= 10000)) 228 return (0); 229 } 230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 231 return (1); 232 } 233 234 /** 235 * xmlParserMaxDepth: 236 * 237 * arbitrary depth limit for the XML documents that we allow to 238 * process. This is not a limitation of the parser but a safety 239 * boundary feature. It can be disabled with the XML_PARSE_HUGE 240 * parser option. 241 */ 242 unsigned int xmlParserMaxDepth = 256; 243 244 245 246 #define SAX2 1 247 #define XML_PARSER_BIG_BUFFER_SIZE 300 248 #define XML_PARSER_BUFFER_SIZE 100 249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 250 251 /** 252 * XML_PARSER_CHUNK_SIZE 253 * 254 * When calling GROW that's the minimal amount of data 255 * the parser expected to have received. It is not a hard 256 * limit but an optimization when reading strings like Names 257 * It is not strictly needed as long as inputs available characters 258 * are followed by 0, which should be provided by the I/O level 259 */ 260 #define XML_PARSER_CHUNK_SIZE 100 261 262 /* 263 * List of XML prefixed PI allowed by W3C specs 264 */ 265 266 static const char *xmlW3CPIs[] = { 267 "xml-stylesheet", 268 "xml-model", 269 NULL 270 }; 271 272 273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 275 const xmlChar **str); 276 277 static xmlParserErrors 278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 279 xmlSAXHandlerPtr sax, 280 void *user_data, int depth, const xmlChar *URL, 281 const xmlChar *ID, xmlNodePtr *list); 282 283 static int 284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 285 const char *encoding); 286 #ifdef LIBXML_LEGACY_ENABLED 287 static void 288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 289 xmlNodePtr lastNode); 290 #endif /* LIBXML_LEGACY_ENABLED */ 291 292 static xmlParserErrors 293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 294 const xmlChar *string, void *user_data, xmlNodePtr *lst); 295 296 static int 297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 298 299 /************************************************************************ 300 * * 301 * Some factorized error routines * 302 * * 303 ************************************************************************/ 304 305 /** 306 * xmlErrAttributeDup: 307 * @ctxt: an XML parser context 308 * @prefix: the attribute prefix 309 * @localname: the attribute localname 310 * 311 * Handle a redefinition of attribute error 312 */ 313 static void 314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 315 const xmlChar * localname) 316 { 317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 318 (ctxt->instate == XML_PARSER_EOF)) 319 return; 320 if (ctxt != NULL) 321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 322 323 if (prefix == NULL) 324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 326 (const char *) localname, NULL, NULL, 0, 0, 327 "Attribute %s redefined\n", localname); 328 else 329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 331 (const char *) prefix, (const char *) localname, 332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 333 localname); 334 if (ctxt != NULL) { 335 ctxt->wellFormed = 0; 336 if (ctxt->recovery == 0) 337 ctxt->disableSAX = 1; 338 } 339 } 340 341 /** 342 * xmlFatalErr: 343 * @ctxt: an XML parser context 344 * @error: the error number 345 * @extra: extra information string 346 * 347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 348 */ 349 static void 350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 351 { 352 const char *errmsg; 353 354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 355 (ctxt->instate == XML_PARSER_EOF)) 356 return; 357 switch (error) { 358 case XML_ERR_INVALID_HEX_CHARREF: 359 errmsg = "CharRef: invalid hexadecimal value"; 360 break; 361 case XML_ERR_INVALID_DEC_CHARREF: 362 errmsg = "CharRef: invalid decimal value"; 363 break; 364 case XML_ERR_INVALID_CHARREF: 365 errmsg = "CharRef: invalid value"; 366 break; 367 case XML_ERR_INTERNAL_ERROR: 368 errmsg = "internal error"; 369 break; 370 case XML_ERR_PEREF_AT_EOF: 371 errmsg = "PEReference at end of document"; 372 break; 373 case XML_ERR_PEREF_IN_PROLOG: 374 errmsg = "PEReference in prolog"; 375 break; 376 case XML_ERR_PEREF_IN_EPILOG: 377 errmsg = "PEReference in epilog"; 378 break; 379 case XML_ERR_PEREF_NO_NAME: 380 errmsg = "PEReference: no name"; 381 break; 382 case XML_ERR_PEREF_SEMICOL_MISSING: 383 errmsg = "PEReference: expecting ';'"; 384 break; 385 case XML_ERR_ENTITY_LOOP: 386 errmsg = "Detected an entity reference loop"; 387 break; 388 case XML_ERR_ENTITY_NOT_STARTED: 389 errmsg = "EntityValue: \" or ' expected"; 390 break; 391 case XML_ERR_ENTITY_PE_INTERNAL: 392 errmsg = "PEReferences forbidden in internal subset"; 393 break; 394 case XML_ERR_ENTITY_NOT_FINISHED: 395 errmsg = "EntityValue: \" or ' expected"; 396 break; 397 case XML_ERR_ATTRIBUTE_NOT_STARTED: 398 errmsg = "AttValue: \" or ' expected"; 399 break; 400 case XML_ERR_LT_IN_ATTRIBUTE: 401 errmsg = "Unescaped '<' not allowed in attributes values"; 402 break; 403 case XML_ERR_LITERAL_NOT_STARTED: 404 errmsg = "SystemLiteral \" or ' expected"; 405 break; 406 case XML_ERR_LITERAL_NOT_FINISHED: 407 errmsg = "Unfinished System or Public ID \" or ' expected"; 408 break; 409 case XML_ERR_MISPLACED_CDATA_END: 410 errmsg = "Sequence ']]>' not allowed in content"; 411 break; 412 case XML_ERR_URI_REQUIRED: 413 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 414 break; 415 case XML_ERR_PUBID_REQUIRED: 416 errmsg = "PUBLIC, the Public Identifier is missing"; 417 break; 418 case XML_ERR_HYPHEN_IN_COMMENT: 419 errmsg = "Comment must not contain '--' (double-hyphen)"; 420 break; 421 case XML_ERR_PI_NOT_STARTED: 422 errmsg = "xmlParsePI : no target name"; 423 break; 424 case XML_ERR_RESERVED_XML_NAME: 425 errmsg = "Invalid PI name"; 426 break; 427 case XML_ERR_NOTATION_NOT_STARTED: 428 errmsg = "NOTATION: Name expected here"; 429 break; 430 case XML_ERR_NOTATION_NOT_FINISHED: 431 errmsg = "'>' required to close NOTATION declaration"; 432 break; 433 case XML_ERR_VALUE_REQUIRED: 434 errmsg = "Entity value required"; 435 break; 436 case XML_ERR_URI_FRAGMENT: 437 errmsg = "Fragment not allowed"; 438 break; 439 case XML_ERR_ATTLIST_NOT_STARTED: 440 errmsg = "'(' required to start ATTLIST enumeration"; 441 break; 442 case XML_ERR_NMTOKEN_REQUIRED: 443 errmsg = "NmToken expected in ATTLIST enumeration"; 444 break; 445 case XML_ERR_ATTLIST_NOT_FINISHED: 446 errmsg = "')' required to finish ATTLIST enumeration"; 447 break; 448 case XML_ERR_MIXED_NOT_STARTED: 449 errmsg = "MixedContentDecl : '|' or ')*' expected"; 450 break; 451 case XML_ERR_PCDATA_REQUIRED: 452 errmsg = "MixedContentDecl : '#PCDATA' expected"; 453 break; 454 case XML_ERR_ELEMCONTENT_NOT_STARTED: 455 errmsg = "ContentDecl : Name or '(' expected"; 456 break; 457 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 458 errmsg = "ContentDecl : ',' '|' or ')' expected"; 459 break; 460 case XML_ERR_PEREF_IN_INT_SUBSET: 461 errmsg = 462 "PEReference: forbidden within markup decl in internal subset"; 463 break; 464 case XML_ERR_GT_REQUIRED: 465 errmsg = "expected '>'"; 466 break; 467 case XML_ERR_CONDSEC_INVALID: 468 errmsg = "XML conditional section '[' expected"; 469 break; 470 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 471 errmsg = "Content error in the external subset"; 472 break; 473 case XML_ERR_CONDSEC_INVALID_KEYWORD: 474 errmsg = 475 "conditional section INCLUDE or IGNORE keyword expected"; 476 break; 477 case XML_ERR_CONDSEC_NOT_FINISHED: 478 errmsg = "XML conditional section not closed"; 479 break; 480 case XML_ERR_XMLDECL_NOT_STARTED: 481 errmsg = "Text declaration '<?xml' required"; 482 break; 483 case XML_ERR_XMLDECL_NOT_FINISHED: 484 errmsg = "parsing XML declaration: '?>' expected"; 485 break; 486 case XML_ERR_EXT_ENTITY_STANDALONE: 487 errmsg = "external parsed entities cannot be standalone"; 488 break; 489 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 490 errmsg = "EntityRef: expecting ';'"; 491 break; 492 case XML_ERR_DOCTYPE_NOT_FINISHED: 493 errmsg = "DOCTYPE improperly terminated"; 494 break; 495 case XML_ERR_LTSLASH_REQUIRED: 496 errmsg = "EndTag: '</' not found"; 497 break; 498 case XML_ERR_EQUAL_REQUIRED: 499 errmsg = "expected '='"; 500 break; 501 case XML_ERR_STRING_NOT_CLOSED: 502 errmsg = "String not closed expecting \" or '"; 503 break; 504 case XML_ERR_STRING_NOT_STARTED: 505 errmsg = "String not started expecting ' or \""; 506 break; 507 case XML_ERR_ENCODING_NAME: 508 errmsg = "Invalid XML encoding name"; 509 break; 510 case XML_ERR_STANDALONE_VALUE: 511 errmsg = "standalone accepts only 'yes' or 'no'"; 512 break; 513 case XML_ERR_DOCUMENT_EMPTY: 514 errmsg = "Document is empty"; 515 break; 516 case XML_ERR_DOCUMENT_END: 517 errmsg = "Extra content at the end of the document"; 518 break; 519 case XML_ERR_NOT_WELL_BALANCED: 520 errmsg = "chunk is not well balanced"; 521 break; 522 case XML_ERR_EXTRA_CONTENT: 523 errmsg = "extra content at the end of well balanced chunk"; 524 break; 525 case XML_ERR_VERSION_MISSING: 526 errmsg = "Malformed declaration expecting version"; 527 break; 528 case XML_ERR_NAME_TOO_LONG: 529 errmsg = "Name too long use XML_PARSE_HUGE option"; 530 break; 531 #if 0 532 case: 533 errmsg = ""; 534 break; 535 #endif 536 default: 537 errmsg = "Unregistered error message"; 538 } 539 if (ctxt != NULL) 540 ctxt->errNo = error; 541 if (info == NULL) { 542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 544 errmsg); 545 } else { 546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 548 errmsg, info); 549 } 550 if (ctxt != NULL) { 551 ctxt->wellFormed = 0; 552 if (ctxt->recovery == 0) 553 ctxt->disableSAX = 1; 554 } 555 } 556 557 /** 558 * xmlFatalErrMsg: 559 * @ctxt: an XML parser context 560 * @error: the error number 561 * @msg: the error message 562 * 563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 564 */ 565 static void LIBXML_ATTR_FORMAT(3,0) 566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 567 const char *msg) 568 { 569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 570 (ctxt->instate == XML_PARSER_EOF)) 571 return; 572 if (ctxt != NULL) 573 ctxt->errNo = error; 574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 576 if (ctxt != NULL) { 577 ctxt->wellFormed = 0; 578 if (ctxt->recovery == 0) 579 ctxt->disableSAX = 1; 580 } 581 } 582 583 /** 584 * xmlWarningMsg: 585 * @ctxt: an XML parser context 586 * @error: the error number 587 * @msg: the error message 588 * @str1: extra data 589 * @str2: extra data 590 * 591 * Handle a warning. 592 */ 593 static void LIBXML_ATTR_FORMAT(3,0) 594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg, const xmlChar *str1, const xmlChar *str2) 596 { 597 xmlStructuredErrorFunc schannel = NULL; 598 599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 600 (ctxt->instate == XML_PARSER_EOF)) 601 return; 602 if ((ctxt != NULL) && (ctxt->sax != NULL) && 603 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 604 schannel = ctxt->sax->serror; 605 if (ctxt != NULL) { 606 __xmlRaiseError(schannel, 607 (ctxt->sax) ? ctxt->sax->warning : NULL, 608 ctxt->userData, 609 ctxt, NULL, XML_FROM_PARSER, error, 610 XML_ERR_WARNING, NULL, 0, 611 (const char *) str1, (const char *) str2, NULL, 0, 0, 612 msg, (const char *) str1, (const char *) str2); 613 } else { 614 __xmlRaiseError(schannel, NULL, NULL, 615 ctxt, NULL, XML_FROM_PARSER, error, 616 XML_ERR_WARNING, NULL, 0, 617 (const char *) str1, (const char *) str2, NULL, 0, 0, 618 msg, (const char *) str1, (const char *) str2); 619 } 620 } 621 622 /** 623 * xmlValidityError: 624 * @ctxt: an XML parser context 625 * @error: the error number 626 * @msg: the error message 627 * @str1: extra data 628 * 629 * Handle a validity error. 630 */ 631 static void LIBXML_ATTR_FORMAT(3,0) 632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 633 const char *msg, const xmlChar *str1, const xmlChar *str2) 634 { 635 xmlStructuredErrorFunc schannel = NULL; 636 637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 638 (ctxt->instate == XML_PARSER_EOF)) 639 return; 640 if (ctxt != NULL) { 641 ctxt->errNo = error; 642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 643 schannel = ctxt->sax->serror; 644 } 645 if (ctxt != NULL) { 646 __xmlRaiseError(schannel, 647 ctxt->vctxt.error, ctxt->vctxt.userData, 648 ctxt, NULL, XML_FROM_DTD, error, 649 XML_ERR_ERROR, NULL, 0, (const char *) str1, 650 (const char *) str2, NULL, 0, 0, 651 msg, (const char *) str1, (const char *) str2); 652 ctxt->valid = 0; 653 } else { 654 __xmlRaiseError(schannel, NULL, NULL, 655 ctxt, NULL, XML_FROM_DTD, error, 656 XML_ERR_ERROR, NULL, 0, (const char *) str1, 657 (const char *) str2, NULL, 0, 0, 658 msg, (const char *) str1, (const char *) str2); 659 } 660 } 661 662 /** 663 * xmlFatalErrMsgInt: 664 * @ctxt: an XML parser context 665 * @error: the error number 666 * @msg: the error message 667 * @val: an integer value 668 * 669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 670 */ 671 static void LIBXML_ATTR_FORMAT(3,0) 672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 673 const char *msg, int val) 674 { 675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 676 (ctxt->instate == XML_PARSER_EOF)) 677 return; 678 if (ctxt != NULL) 679 ctxt->errNo = error; 680 __xmlRaiseError(NULL, NULL, NULL, 681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 683 if (ctxt != NULL) { 684 ctxt->wellFormed = 0; 685 if (ctxt->recovery == 0) 686 ctxt->disableSAX = 1; 687 } 688 } 689 690 /** 691 * xmlFatalErrMsgStrIntStr: 692 * @ctxt: an XML parser context 693 * @error: the error number 694 * @msg: the error message 695 * @str1: an string info 696 * @val: an integer value 697 * @str2: an string info 698 * 699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 700 */ 701 static void LIBXML_ATTR_FORMAT(3,0) 702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 703 const char *msg, const xmlChar *str1, int val, 704 const xmlChar *str2) 705 { 706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 707 (ctxt->instate == XML_PARSER_EOF)) 708 return; 709 if (ctxt != NULL) 710 ctxt->errNo = error; 711 __xmlRaiseError(NULL, NULL, NULL, 712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 713 NULL, 0, (const char *) str1, (const char *) str2, 714 NULL, val, 0, msg, str1, val, str2); 715 if (ctxt != NULL) { 716 ctxt->wellFormed = 0; 717 if (ctxt->recovery == 0) 718 ctxt->disableSAX = 1; 719 } 720 } 721 722 /** 723 * xmlFatalErrMsgStr: 724 * @ctxt: an XML parser context 725 * @error: the error number 726 * @msg: the error message 727 * @val: a string value 728 * 729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 730 */ 731 static void LIBXML_ATTR_FORMAT(3,0) 732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 733 const char *msg, const xmlChar * val) 734 { 735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 736 (ctxt->instate == XML_PARSER_EOF)) 737 return; 738 if (ctxt != NULL) 739 ctxt->errNo = error; 740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 741 XML_FROM_PARSER, error, XML_ERR_FATAL, 742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 743 val); 744 if (ctxt != NULL) { 745 ctxt->wellFormed = 0; 746 if (ctxt->recovery == 0) 747 ctxt->disableSAX = 1; 748 } 749 } 750 751 /** 752 * xmlErrMsgStr: 753 * @ctxt: an XML parser context 754 * @error: the error number 755 * @msg: the error message 756 * @val: a string value 757 * 758 * Handle a non fatal parser error 759 */ 760 static void LIBXML_ATTR_FORMAT(3,0) 761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 762 const char *msg, const xmlChar * val) 763 { 764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 765 (ctxt->instate == XML_PARSER_EOF)) 766 return; 767 if (ctxt != NULL) 768 ctxt->errNo = error; 769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 770 XML_FROM_PARSER, error, XML_ERR_ERROR, 771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 772 val); 773 } 774 775 /** 776 * xmlNsErr: 777 * @ctxt: an XML parser context 778 * @error: the error number 779 * @msg: the message 780 * @info1: extra information string 781 * @info2: extra information string 782 * 783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 784 */ 785 static void LIBXML_ATTR_FORMAT(3,0) 786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 787 const char *msg, 788 const xmlChar * info1, const xmlChar * info2, 789 const xmlChar * info3) 790 { 791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 792 (ctxt->instate == XML_PARSER_EOF)) 793 return; 794 if (ctxt != NULL) 795 ctxt->errNo = error; 796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 797 XML_ERR_ERROR, NULL, 0, (const char *) info1, 798 (const char *) info2, (const char *) info3, 0, 0, msg, 799 info1, info2, info3); 800 if (ctxt != NULL) 801 ctxt->nsWellFormed = 0; 802 } 803 804 /** 805 * xmlNsWarn 806 * @ctxt: an XML parser context 807 * @error: the error number 808 * @msg: the message 809 * @info1: extra information string 810 * @info2: extra information string 811 * 812 * Handle a namespace warning error 813 */ 814 static void LIBXML_ATTR_FORMAT(3,0) 815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 816 const char *msg, 817 const xmlChar * info1, const xmlChar * info2, 818 const xmlChar * info3) 819 { 820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 821 (ctxt->instate == XML_PARSER_EOF)) 822 return; 823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 824 XML_ERR_WARNING, NULL, 0, (const char *) info1, 825 (const char *) info2, (const char *) info3, 0, 0, msg, 826 info1, info2, info3); 827 } 828 829 /************************************************************************ 830 * * 831 * Library wide options * 832 * * 833 ************************************************************************/ 834 835 /** 836 * xmlHasFeature: 837 * @feature: the feature to be examined 838 * 839 * Examines if the library has been compiled with a given feature. 840 * 841 * Returns a non-zero value if the feature exist, otherwise zero. 842 * Returns zero (0) if the feature does not exist or an unknown 843 * unknown feature is requested, non-zero otherwise. 844 */ 845 int 846 xmlHasFeature(xmlFeature feature) 847 { 848 switch (feature) { 849 case XML_WITH_THREAD: 850 #ifdef LIBXML_THREAD_ENABLED 851 return(1); 852 #else 853 return(0); 854 #endif 855 case XML_WITH_TREE: 856 #ifdef LIBXML_TREE_ENABLED 857 return(1); 858 #else 859 return(0); 860 #endif 861 case XML_WITH_OUTPUT: 862 #ifdef LIBXML_OUTPUT_ENABLED 863 return(1); 864 #else 865 return(0); 866 #endif 867 case XML_WITH_PUSH: 868 #ifdef LIBXML_PUSH_ENABLED 869 return(1); 870 #else 871 return(0); 872 #endif 873 case XML_WITH_READER: 874 #ifdef LIBXML_READER_ENABLED 875 return(1); 876 #else 877 return(0); 878 #endif 879 case XML_WITH_PATTERN: 880 #ifdef LIBXML_PATTERN_ENABLED 881 return(1); 882 #else 883 return(0); 884 #endif 885 case XML_WITH_WRITER: 886 #ifdef LIBXML_WRITER_ENABLED 887 return(1); 888 #else 889 return(0); 890 #endif 891 case XML_WITH_SAX1: 892 #ifdef LIBXML_SAX1_ENABLED 893 return(1); 894 #else 895 return(0); 896 #endif 897 case XML_WITH_FTP: 898 #ifdef LIBXML_FTP_ENABLED 899 return(1); 900 #else 901 return(0); 902 #endif 903 case XML_WITH_HTTP: 904 #ifdef LIBXML_HTTP_ENABLED 905 return(1); 906 #else 907 return(0); 908 #endif 909 case XML_WITH_VALID: 910 #ifdef LIBXML_VALID_ENABLED 911 return(1); 912 #else 913 return(0); 914 #endif 915 case XML_WITH_HTML: 916 #ifdef LIBXML_HTML_ENABLED 917 return(1); 918 #else 919 return(0); 920 #endif 921 case XML_WITH_LEGACY: 922 #ifdef LIBXML_LEGACY_ENABLED 923 return(1); 924 #else 925 return(0); 926 #endif 927 case XML_WITH_C14N: 928 #ifdef LIBXML_C14N_ENABLED 929 return(1); 930 #else 931 return(0); 932 #endif 933 case XML_WITH_CATALOG: 934 #ifdef LIBXML_CATALOG_ENABLED 935 return(1); 936 #else 937 return(0); 938 #endif 939 case XML_WITH_XPATH: 940 #ifdef LIBXML_XPATH_ENABLED 941 return(1); 942 #else 943 return(0); 944 #endif 945 case XML_WITH_XPTR: 946 #ifdef LIBXML_XPTR_ENABLED 947 return(1); 948 #else 949 return(0); 950 #endif 951 case XML_WITH_XINCLUDE: 952 #ifdef LIBXML_XINCLUDE_ENABLED 953 return(1); 954 #else 955 return(0); 956 #endif 957 case XML_WITH_ICONV: 958 #ifdef LIBXML_ICONV_ENABLED 959 return(1); 960 #else 961 return(0); 962 #endif 963 case XML_WITH_ISO8859X: 964 #ifdef LIBXML_ISO8859X_ENABLED 965 return(1); 966 #else 967 return(0); 968 #endif 969 case XML_WITH_UNICODE: 970 #ifdef LIBXML_UNICODE_ENABLED 971 return(1); 972 #else 973 return(0); 974 #endif 975 case XML_WITH_REGEXP: 976 #ifdef LIBXML_REGEXP_ENABLED 977 return(1); 978 #else 979 return(0); 980 #endif 981 case XML_WITH_AUTOMATA: 982 #ifdef LIBXML_AUTOMATA_ENABLED 983 return(1); 984 #else 985 return(0); 986 #endif 987 case XML_WITH_EXPR: 988 #ifdef LIBXML_EXPR_ENABLED 989 return(1); 990 #else 991 return(0); 992 #endif 993 case XML_WITH_SCHEMAS: 994 #ifdef LIBXML_SCHEMAS_ENABLED 995 return(1); 996 #else 997 return(0); 998 #endif 999 case XML_WITH_SCHEMATRON: 1000 #ifdef LIBXML_SCHEMATRON_ENABLED 1001 return(1); 1002 #else 1003 return(0); 1004 #endif 1005 case XML_WITH_MODULES: 1006 #ifdef LIBXML_MODULES_ENABLED 1007 return(1); 1008 #else 1009 return(0); 1010 #endif 1011 case XML_WITH_DEBUG: 1012 #ifdef LIBXML_DEBUG_ENABLED 1013 return(1); 1014 #else 1015 return(0); 1016 #endif 1017 case XML_WITH_DEBUG_MEM: 1018 #ifdef DEBUG_MEMORY_LOCATION 1019 return(1); 1020 #else 1021 return(0); 1022 #endif 1023 case XML_WITH_DEBUG_RUN: 1024 #ifdef LIBXML_DEBUG_RUNTIME 1025 return(1); 1026 #else 1027 return(0); 1028 #endif 1029 case XML_WITH_ZLIB: 1030 #ifdef LIBXML_ZLIB_ENABLED 1031 return(1); 1032 #else 1033 return(0); 1034 #endif 1035 case XML_WITH_LZMA: 1036 #ifdef LIBXML_LZMA_ENABLED 1037 return(1); 1038 #else 1039 return(0); 1040 #endif 1041 case XML_WITH_ICU: 1042 #ifdef LIBXML_ICU_ENABLED 1043 return(1); 1044 #else 1045 return(0); 1046 #endif 1047 default: 1048 break; 1049 } 1050 return(0); 1051 } 1052 1053 /************************************************************************ 1054 * * 1055 * SAX2 defaulted attributes handling * 1056 * * 1057 ************************************************************************/ 1058 1059 /** 1060 * xmlDetectSAX2: 1061 * @ctxt: an XML parser context 1062 * 1063 * Do the SAX2 detection and specific intialization 1064 */ 1065 static void 1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1067 if (ctxt == NULL) return; 1068 #ifdef LIBXML_SAX1_ENABLED 1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1070 ((ctxt->sax->startElementNs != NULL) || 1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1072 #else 1073 ctxt->sax2 = 1; 1074 #endif /* LIBXML_SAX1_ENABLED */ 1075 1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1080 (ctxt->str_xml_ns == NULL)) { 1081 xmlErrMemory(ctxt, NULL); 1082 } 1083 } 1084 1085 typedef struct _xmlDefAttrs xmlDefAttrs; 1086 typedef xmlDefAttrs *xmlDefAttrsPtr; 1087 struct _xmlDefAttrs { 1088 int nbAttrs; /* number of defaulted attributes on that element */ 1089 int maxAttrs; /* the size of the array */ 1090 #if __STDC_VERSION__ >= 199901L 1091 /* Using a C99 flexible array member avoids UBSan errors. */ 1092 const xmlChar *values[]; /* array of localname/prefix/values/external */ 1093 #else 1094 const xmlChar *values[5]; 1095 #endif 1096 }; 1097 1098 /** 1099 * xmlAttrNormalizeSpace: 1100 * @src: the source string 1101 * @dst: the target string 1102 * 1103 * Normalize the space in non CDATA attribute values: 1104 * If the attribute type is not CDATA, then the XML processor MUST further 1105 * process the normalized attribute value by discarding any leading and 1106 * trailing space (#x20) characters, and by replacing sequences of space 1107 * (#x20) characters by a single space (#x20) character. 1108 * Note that the size of dst need to be at least src, and if one doesn't need 1109 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1110 * passing src as dst is just fine. 1111 * 1112 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1113 * is needed. 1114 */ 1115 static xmlChar * 1116 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1117 { 1118 if ((src == NULL) || (dst == NULL)) 1119 return(NULL); 1120 1121 while (*src == 0x20) src++; 1122 while (*src != 0) { 1123 if (*src == 0x20) { 1124 while (*src == 0x20) src++; 1125 if (*src != 0) 1126 *dst++ = 0x20; 1127 } else { 1128 *dst++ = *src++; 1129 } 1130 } 1131 *dst = 0; 1132 if (dst == src) 1133 return(NULL); 1134 return(dst); 1135 } 1136 1137 /** 1138 * xmlAttrNormalizeSpace2: 1139 * @src: the source string 1140 * 1141 * Normalize the space in non CDATA attribute values, a slightly more complex 1142 * front end to avoid allocation problems when running on attribute values 1143 * coming from the input. 1144 * 1145 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1146 * is needed. 1147 */ 1148 static const xmlChar * 1149 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1150 { 1151 int i; 1152 int remove_head = 0; 1153 int need_realloc = 0; 1154 const xmlChar *cur; 1155 1156 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1157 return(NULL); 1158 i = *len; 1159 if (i <= 0) 1160 return(NULL); 1161 1162 cur = src; 1163 while (*cur == 0x20) { 1164 cur++; 1165 remove_head++; 1166 } 1167 while (*cur != 0) { 1168 if (*cur == 0x20) { 1169 cur++; 1170 if ((*cur == 0x20) || (*cur == 0)) { 1171 need_realloc = 1; 1172 break; 1173 } 1174 } else 1175 cur++; 1176 } 1177 if (need_realloc) { 1178 xmlChar *ret; 1179 1180 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1181 if (ret == NULL) { 1182 xmlErrMemory(ctxt, NULL); 1183 return(NULL); 1184 } 1185 xmlAttrNormalizeSpace(ret, ret); 1186 *len = (int) strlen((const char *)ret); 1187 return(ret); 1188 } else if (remove_head) { 1189 *len -= remove_head; 1190 memmove(src, src + remove_head, 1 + *len); 1191 return(src); 1192 } 1193 return(NULL); 1194 } 1195 1196 /** 1197 * xmlAddDefAttrs: 1198 * @ctxt: an XML parser context 1199 * @fullname: the element fullname 1200 * @fullattr: the attribute fullname 1201 * @value: the attribute value 1202 * 1203 * Add a defaulted attribute for an element 1204 */ 1205 static void 1206 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1207 const xmlChar *fullname, 1208 const xmlChar *fullattr, 1209 const xmlChar *value) { 1210 xmlDefAttrsPtr defaults; 1211 int len; 1212 const xmlChar *name; 1213 const xmlChar *prefix; 1214 1215 /* 1216 * Allows to detect attribute redefinitions 1217 */ 1218 if (ctxt->attsSpecial != NULL) { 1219 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1220 return; 1221 } 1222 1223 if (ctxt->attsDefault == NULL) { 1224 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1225 if (ctxt->attsDefault == NULL) 1226 goto mem_error; 1227 } 1228 1229 /* 1230 * split the element name into prefix:localname , the string found 1231 * are within the DTD and then not associated to namespace names. 1232 */ 1233 name = xmlSplitQName3(fullname, &len); 1234 if (name == NULL) { 1235 name = xmlDictLookup(ctxt->dict, fullname, -1); 1236 prefix = NULL; 1237 } else { 1238 name = xmlDictLookup(ctxt->dict, name, -1); 1239 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1240 } 1241 1242 /* 1243 * make sure there is some storage 1244 */ 1245 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1246 if (defaults == NULL) { 1247 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1248 (4 * 5) * sizeof(const xmlChar *)); 1249 if (defaults == NULL) 1250 goto mem_error; 1251 defaults->nbAttrs = 0; 1252 defaults->maxAttrs = 4; 1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1254 defaults, NULL) < 0) { 1255 xmlFree(defaults); 1256 goto mem_error; 1257 } 1258 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1259 xmlDefAttrsPtr temp; 1260 1261 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1262 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1263 if (temp == NULL) 1264 goto mem_error; 1265 defaults = temp; 1266 defaults->maxAttrs *= 2; 1267 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1268 defaults, NULL) < 0) { 1269 xmlFree(defaults); 1270 goto mem_error; 1271 } 1272 } 1273 1274 /* 1275 * Split the element name into prefix:localname , the string found 1276 * are within the DTD and hen not associated to namespace names. 1277 */ 1278 name = xmlSplitQName3(fullattr, &len); 1279 if (name == NULL) { 1280 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1281 prefix = NULL; 1282 } else { 1283 name = xmlDictLookup(ctxt->dict, name, -1); 1284 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1285 } 1286 1287 defaults->values[5 * defaults->nbAttrs] = name; 1288 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1289 /* intern the string and precompute the end */ 1290 len = xmlStrlen(value); 1291 value = xmlDictLookup(ctxt->dict, value, len); 1292 defaults->values[5 * defaults->nbAttrs + 2] = value; 1293 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1294 if (ctxt->external) 1295 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1296 else 1297 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1298 defaults->nbAttrs++; 1299 1300 return; 1301 1302 mem_error: 1303 xmlErrMemory(ctxt, NULL); 1304 return; 1305 } 1306 1307 /** 1308 * xmlAddSpecialAttr: 1309 * @ctxt: an XML parser context 1310 * @fullname: the element fullname 1311 * @fullattr: the attribute fullname 1312 * @type: the attribute type 1313 * 1314 * Register this attribute type 1315 */ 1316 static void 1317 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1318 const xmlChar *fullname, 1319 const xmlChar *fullattr, 1320 int type) 1321 { 1322 if (ctxt->attsSpecial == NULL) { 1323 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1324 if (ctxt->attsSpecial == NULL) 1325 goto mem_error; 1326 } 1327 1328 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1329 return; 1330 1331 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1332 (void *) (ptrdiff_t) type); 1333 return; 1334 1335 mem_error: 1336 xmlErrMemory(ctxt, NULL); 1337 return; 1338 } 1339 1340 /** 1341 * xmlCleanSpecialAttrCallback: 1342 * 1343 * Removes CDATA attributes from the special attribute table 1344 */ 1345 static void 1346 xmlCleanSpecialAttrCallback(void *payload, void *data, 1347 const xmlChar *fullname, const xmlChar *fullattr, 1348 const xmlChar *unused ATTRIBUTE_UNUSED) { 1349 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1350 1351 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1352 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1353 } 1354 } 1355 1356 /** 1357 * xmlCleanSpecialAttr: 1358 * @ctxt: an XML parser context 1359 * 1360 * Trim the list of attributes defined to remove all those of type 1361 * CDATA as they are not special. This call should be done when finishing 1362 * to parse the DTD and before starting to parse the document root. 1363 */ 1364 static void 1365 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1366 { 1367 if (ctxt->attsSpecial == NULL) 1368 return; 1369 1370 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1371 1372 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1373 xmlHashFree(ctxt->attsSpecial, NULL); 1374 ctxt->attsSpecial = NULL; 1375 } 1376 return; 1377 } 1378 1379 /** 1380 * xmlCheckLanguageID: 1381 * @lang: pointer to the string value 1382 * 1383 * Checks that the value conforms to the LanguageID production: 1384 * 1385 * NOTE: this is somewhat deprecated, those productions were removed from 1386 * the XML Second edition. 1387 * 1388 * [33] LanguageID ::= Langcode ('-' Subcode)* 1389 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1390 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1391 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1392 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1393 * [38] Subcode ::= ([a-z] | [A-Z])+ 1394 * 1395 * The current REC reference the sucessors of RFC 1766, currently 5646 1396 * 1397 * http://www.rfc-editor.org/rfc/rfc5646.txt 1398 * langtag = language 1399 * ["-" script] 1400 * ["-" region] 1401 * *("-" variant) 1402 * *("-" extension) 1403 * ["-" privateuse] 1404 * language = 2*3ALPHA ; shortest ISO 639 code 1405 * ["-" extlang] ; sometimes followed by 1406 * ; extended language subtags 1407 * / 4ALPHA ; or reserved for future use 1408 * / 5*8ALPHA ; or registered language subtag 1409 * 1410 * extlang = 3ALPHA ; selected ISO 639 codes 1411 * *2("-" 3ALPHA) ; permanently reserved 1412 * 1413 * script = 4ALPHA ; ISO 15924 code 1414 * 1415 * region = 2ALPHA ; ISO 3166-1 code 1416 * / 3DIGIT ; UN M.49 code 1417 * 1418 * variant = 5*8alphanum ; registered variants 1419 * / (DIGIT 3alphanum) 1420 * 1421 * extension = singleton 1*("-" (2*8alphanum)) 1422 * 1423 * ; Single alphanumerics 1424 * ; "x" reserved for private use 1425 * singleton = DIGIT ; 0 - 9 1426 * / %x41-57 ; A - W 1427 * / %x59-5A ; Y - Z 1428 * / %x61-77 ; a - w 1429 * / %x79-7A ; y - z 1430 * 1431 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1432 * The parser below doesn't try to cope with extension or privateuse 1433 * that could be added but that's not interoperable anyway 1434 * 1435 * Returns 1 if correct 0 otherwise 1436 **/ 1437 int 1438 xmlCheckLanguageID(const xmlChar * lang) 1439 { 1440 const xmlChar *cur = lang, *nxt; 1441 1442 if (cur == NULL) 1443 return (0); 1444 if (((cur[0] == 'i') && (cur[1] == '-')) || 1445 ((cur[0] == 'I') && (cur[1] == '-')) || 1446 ((cur[0] == 'x') && (cur[1] == '-')) || 1447 ((cur[0] == 'X') && (cur[1] == '-'))) { 1448 /* 1449 * Still allow IANA code and user code which were coming 1450 * from the previous version of the XML-1.0 specification 1451 * it's deprecated but we should not fail 1452 */ 1453 cur += 2; 1454 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1455 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1456 cur++; 1457 return(cur[0] == 0); 1458 } 1459 nxt = cur; 1460 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1461 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1462 nxt++; 1463 if (nxt - cur >= 4) { 1464 /* 1465 * Reserved 1466 */ 1467 if ((nxt - cur > 8) || (nxt[0] != 0)) 1468 return(0); 1469 return(1); 1470 } 1471 if (nxt - cur < 2) 1472 return(0); 1473 /* we got an ISO 639 code */ 1474 if (nxt[0] == 0) 1475 return(1); 1476 if (nxt[0] != '-') 1477 return(0); 1478 1479 nxt++; 1480 cur = nxt; 1481 /* now we can have extlang or script or region or variant */ 1482 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1483 goto region_m49; 1484 1485 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1486 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1487 nxt++; 1488 if (nxt - cur == 4) 1489 goto script; 1490 if (nxt - cur == 2) 1491 goto region; 1492 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1493 goto variant; 1494 if (nxt - cur != 3) 1495 return(0); 1496 /* we parsed an extlang */ 1497 if (nxt[0] == 0) 1498 return(1); 1499 if (nxt[0] != '-') 1500 return(0); 1501 1502 nxt++; 1503 cur = nxt; 1504 /* now we can have script or region or variant */ 1505 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1506 goto region_m49; 1507 1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1510 nxt++; 1511 if (nxt - cur == 2) 1512 goto region; 1513 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1514 goto variant; 1515 if (nxt - cur != 4) 1516 return(0); 1517 /* we parsed a script */ 1518 script: 1519 if (nxt[0] == 0) 1520 return(1); 1521 if (nxt[0] != '-') 1522 return(0); 1523 1524 nxt++; 1525 cur = nxt; 1526 /* now we can have region or variant */ 1527 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1528 goto region_m49; 1529 1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1532 nxt++; 1533 1534 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1535 goto variant; 1536 if (nxt - cur != 2) 1537 return(0); 1538 /* we parsed a region */ 1539 region: 1540 if (nxt[0] == 0) 1541 return(1); 1542 if (nxt[0] != '-') 1543 return(0); 1544 1545 nxt++; 1546 cur = nxt; 1547 /* now we can just have a variant */ 1548 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1549 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1550 nxt++; 1551 1552 if ((nxt - cur < 5) || (nxt - cur > 8)) 1553 return(0); 1554 1555 /* we parsed a variant */ 1556 variant: 1557 if (nxt[0] == 0) 1558 return(1); 1559 if (nxt[0] != '-') 1560 return(0); 1561 /* extensions and private use subtags not checked */ 1562 return (1); 1563 1564 region_m49: 1565 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1566 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1567 nxt += 3; 1568 goto region; 1569 } 1570 return(0); 1571 } 1572 1573 /************************************************************************ 1574 * * 1575 * Parser stacks related functions and macros * 1576 * * 1577 ************************************************************************/ 1578 1579 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1580 const xmlChar ** str); 1581 1582 #ifdef SAX2 1583 /** 1584 * nsPush: 1585 * @ctxt: an XML parser context 1586 * @prefix: the namespace prefix or NULL 1587 * @URL: the namespace name 1588 * 1589 * Pushes a new parser namespace on top of the ns stack 1590 * 1591 * Returns -1 in case of error, -2 if the namespace should be discarded 1592 * and the index in the stack otherwise. 1593 */ 1594 static int 1595 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1596 { 1597 if (ctxt->options & XML_PARSE_NSCLEAN) { 1598 int i; 1599 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1600 if (ctxt->nsTab[i] == prefix) { 1601 /* in scope */ 1602 if (ctxt->nsTab[i + 1] == URL) 1603 return(-2); 1604 /* out of scope keep it */ 1605 break; 1606 } 1607 } 1608 } 1609 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1610 ctxt->nsMax = 10; 1611 ctxt->nsNr = 0; 1612 ctxt->nsTab = (const xmlChar **) 1613 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1614 if (ctxt->nsTab == NULL) { 1615 xmlErrMemory(ctxt, NULL); 1616 ctxt->nsMax = 0; 1617 return (-1); 1618 } 1619 } else if (ctxt->nsNr >= ctxt->nsMax) { 1620 const xmlChar ** tmp; 1621 ctxt->nsMax *= 2; 1622 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1623 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1624 if (tmp == NULL) { 1625 xmlErrMemory(ctxt, NULL); 1626 ctxt->nsMax /= 2; 1627 return (-1); 1628 } 1629 ctxt->nsTab = tmp; 1630 } 1631 ctxt->nsTab[ctxt->nsNr++] = prefix; 1632 ctxt->nsTab[ctxt->nsNr++] = URL; 1633 return (ctxt->nsNr); 1634 } 1635 /** 1636 * nsPop: 1637 * @ctxt: an XML parser context 1638 * @nr: the number to pop 1639 * 1640 * Pops the top @nr parser prefix/namespace from the ns stack 1641 * 1642 * Returns the number of namespaces removed 1643 */ 1644 static int 1645 nsPop(xmlParserCtxtPtr ctxt, int nr) 1646 { 1647 int i; 1648 1649 if (ctxt->nsTab == NULL) return(0); 1650 if (ctxt->nsNr < nr) { 1651 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1652 nr = ctxt->nsNr; 1653 } 1654 if (ctxt->nsNr <= 0) 1655 return (0); 1656 1657 for (i = 0;i < nr;i++) { 1658 ctxt->nsNr--; 1659 ctxt->nsTab[ctxt->nsNr] = NULL; 1660 } 1661 return(nr); 1662 } 1663 #endif 1664 1665 static int 1666 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1667 const xmlChar **atts; 1668 int *attallocs; 1669 int maxatts; 1670 1671 if (ctxt->atts == NULL) { 1672 maxatts = 55; /* allow for 10 attrs by default */ 1673 atts = (const xmlChar **) 1674 xmlMalloc(maxatts * sizeof(xmlChar *)); 1675 if (atts == NULL) goto mem_error; 1676 ctxt->atts = atts; 1677 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1678 if (attallocs == NULL) goto mem_error; 1679 ctxt->attallocs = attallocs; 1680 ctxt->maxatts = maxatts; 1681 } else if (nr + 5 > ctxt->maxatts) { 1682 maxatts = (nr + 5) * 2; 1683 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1684 maxatts * sizeof(const xmlChar *)); 1685 if (atts == NULL) goto mem_error; 1686 ctxt->atts = atts; 1687 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1688 (maxatts / 5) * sizeof(int)); 1689 if (attallocs == NULL) goto mem_error; 1690 ctxt->attallocs = attallocs; 1691 ctxt->maxatts = maxatts; 1692 } 1693 return(ctxt->maxatts); 1694 mem_error: 1695 xmlErrMemory(ctxt, NULL); 1696 return(-1); 1697 } 1698 1699 /** 1700 * inputPush: 1701 * @ctxt: an XML parser context 1702 * @value: the parser input 1703 * 1704 * Pushes a new parser input on top of the input stack 1705 * 1706 * Returns -1 in case of error, the index in the stack otherwise 1707 */ 1708 int 1709 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1710 { 1711 if ((ctxt == NULL) || (value == NULL)) 1712 return(-1); 1713 if (ctxt->inputNr >= ctxt->inputMax) { 1714 ctxt->inputMax *= 2; 1715 ctxt->inputTab = 1716 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1717 ctxt->inputMax * 1718 sizeof(ctxt->inputTab[0])); 1719 if (ctxt->inputTab == NULL) { 1720 xmlErrMemory(ctxt, NULL); 1721 xmlFreeInputStream(value); 1722 ctxt->inputMax /= 2; 1723 value = NULL; 1724 return (-1); 1725 } 1726 } 1727 ctxt->inputTab[ctxt->inputNr] = value; 1728 ctxt->input = value; 1729 return (ctxt->inputNr++); 1730 } 1731 /** 1732 * inputPop: 1733 * @ctxt: an XML parser context 1734 * 1735 * Pops the top parser input from the input stack 1736 * 1737 * Returns the input just removed 1738 */ 1739 xmlParserInputPtr 1740 inputPop(xmlParserCtxtPtr ctxt) 1741 { 1742 xmlParserInputPtr ret; 1743 1744 if (ctxt == NULL) 1745 return(NULL); 1746 if (ctxt->inputNr <= 0) 1747 return (NULL); 1748 ctxt->inputNr--; 1749 if (ctxt->inputNr > 0) 1750 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1751 else 1752 ctxt->input = NULL; 1753 ret = ctxt->inputTab[ctxt->inputNr]; 1754 ctxt->inputTab[ctxt->inputNr] = NULL; 1755 return (ret); 1756 } 1757 /** 1758 * nodePush: 1759 * @ctxt: an XML parser context 1760 * @value: the element node 1761 * 1762 * Pushes a new element node on top of the node stack 1763 * 1764 * Returns -1 in case of error, the index in the stack otherwise 1765 */ 1766 int 1767 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1768 { 1769 if (ctxt == NULL) return(0); 1770 if (ctxt->nodeNr >= ctxt->nodeMax) { 1771 xmlNodePtr *tmp; 1772 1773 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1774 ctxt->nodeMax * 2 * 1775 sizeof(ctxt->nodeTab[0])); 1776 if (tmp == NULL) { 1777 xmlErrMemory(ctxt, NULL); 1778 return (-1); 1779 } 1780 ctxt->nodeTab = tmp; 1781 ctxt->nodeMax *= 2; 1782 } 1783 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1784 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1785 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1786 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1787 xmlParserMaxDepth); 1788 xmlHaltParser(ctxt); 1789 return(-1); 1790 } 1791 ctxt->nodeTab[ctxt->nodeNr] = value; 1792 ctxt->node = value; 1793 return (ctxt->nodeNr++); 1794 } 1795 1796 /** 1797 * nodePop: 1798 * @ctxt: an XML parser context 1799 * 1800 * Pops the top element node from the node stack 1801 * 1802 * Returns the node just removed 1803 */ 1804 xmlNodePtr 1805 nodePop(xmlParserCtxtPtr ctxt) 1806 { 1807 xmlNodePtr ret; 1808 1809 if (ctxt == NULL) return(NULL); 1810 if (ctxt->nodeNr <= 0) 1811 return (NULL); 1812 ctxt->nodeNr--; 1813 if (ctxt->nodeNr > 0) 1814 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1815 else 1816 ctxt->node = NULL; 1817 ret = ctxt->nodeTab[ctxt->nodeNr]; 1818 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1819 return (ret); 1820 } 1821 1822 #ifdef LIBXML_PUSH_ENABLED 1823 /** 1824 * nameNsPush: 1825 * @ctxt: an XML parser context 1826 * @value: the element name 1827 * @prefix: the element prefix 1828 * @URI: the element namespace name 1829 * 1830 * Pushes a new element name/prefix/URL on top of the name stack 1831 * 1832 * Returns -1 in case of error, the index in the stack otherwise 1833 */ 1834 static int 1835 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1836 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1837 { 1838 if (ctxt->nameNr >= ctxt->nameMax) { 1839 const xmlChar * *tmp; 1840 void **tmp2; 1841 ctxt->nameMax *= 2; 1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1843 ctxt->nameMax * 1844 sizeof(ctxt->nameTab[0])); 1845 if (tmp == NULL) { 1846 ctxt->nameMax /= 2; 1847 goto mem_error; 1848 } 1849 ctxt->nameTab = tmp; 1850 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1851 ctxt->nameMax * 3 * 1852 sizeof(ctxt->pushTab[0])); 1853 if (tmp2 == NULL) { 1854 ctxt->nameMax /= 2; 1855 goto mem_error; 1856 } 1857 ctxt->pushTab = tmp2; 1858 } 1859 ctxt->nameTab[ctxt->nameNr] = value; 1860 ctxt->name = value; 1861 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1862 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1863 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr; 1864 return (ctxt->nameNr++); 1865 mem_error: 1866 xmlErrMemory(ctxt, NULL); 1867 return (-1); 1868 } 1869 /** 1870 * nameNsPop: 1871 * @ctxt: an XML parser context 1872 * 1873 * Pops the top element/prefix/URI name from the name stack 1874 * 1875 * Returns the name just removed 1876 */ 1877 static const xmlChar * 1878 nameNsPop(xmlParserCtxtPtr ctxt) 1879 { 1880 const xmlChar *ret; 1881 1882 if (ctxt->nameNr <= 0) 1883 return (NULL); 1884 ctxt->nameNr--; 1885 if (ctxt->nameNr > 0) 1886 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1887 else 1888 ctxt->name = NULL; 1889 ret = ctxt->nameTab[ctxt->nameNr]; 1890 ctxt->nameTab[ctxt->nameNr] = NULL; 1891 return (ret); 1892 } 1893 #endif /* LIBXML_PUSH_ENABLED */ 1894 1895 /** 1896 * namePush: 1897 * @ctxt: an XML parser context 1898 * @value: the element name 1899 * 1900 * Pushes a new element name on top of the name stack 1901 * 1902 * Returns -1 in case of error, the index in the stack otherwise 1903 */ 1904 int 1905 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1906 { 1907 if (ctxt == NULL) return (-1); 1908 1909 if (ctxt->nameNr >= ctxt->nameMax) { 1910 const xmlChar * *tmp; 1911 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1912 ctxt->nameMax * 2 * 1913 sizeof(ctxt->nameTab[0])); 1914 if (tmp == NULL) { 1915 goto mem_error; 1916 } 1917 ctxt->nameTab = tmp; 1918 ctxt->nameMax *= 2; 1919 } 1920 ctxt->nameTab[ctxt->nameNr] = value; 1921 ctxt->name = value; 1922 return (ctxt->nameNr++); 1923 mem_error: 1924 xmlErrMemory(ctxt, NULL); 1925 return (-1); 1926 } 1927 /** 1928 * namePop: 1929 * @ctxt: an XML parser context 1930 * 1931 * Pops the top element name from the name stack 1932 * 1933 * Returns the name just removed 1934 */ 1935 const xmlChar * 1936 namePop(xmlParserCtxtPtr ctxt) 1937 { 1938 const xmlChar *ret; 1939 1940 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1941 return (NULL); 1942 ctxt->nameNr--; 1943 if (ctxt->nameNr > 0) 1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1945 else 1946 ctxt->name = NULL; 1947 ret = ctxt->nameTab[ctxt->nameNr]; 1948 ctxt->nameTab[ctxt->nameNr] = NULL; 1949 return (ret); 1950 } 1951 1952 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1953 if (ctxt->spaceNr >= ctxt->spaceMax) { 1954 int *tmp; 1955 1956 ctxt->spaceMax *= 2; 1957 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1958 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1959 if (tmp == NULL) { 1960 xmlErrMemory(ctxt, NULL); 1961 ctxt->spaceMax /=2; 1962 return(-1); 1963 } 1964 ctxt->spaceTab = tmp; 1965 } 1966 ctxt->spaceTab[ctxt->spaceNr] = val; 1967 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1968 return(ctxt->spaceNr++); 1969 } 1970 1971 static int spacePop(xmlParserCtxtPtr ctxt) { 1972 int ret; 1973 if (ctxt->spaceNr <= 0) return(0); 1974 ctxt->spaceNr--; 1975 if (ctxt->spaceNr > 0) 1976 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1977 else 1978 ctxt->space = &ctxt->spaceTab[0]; 1979 ret = ctxt->spaceTab[ctxt->spaceNr]; 1980 ctxt->spaceTab[ctxt->spaceNr] = -1; 1981 return(ret); 1982 } 1983 1984 /* 1985 * Macros for accessing the content. Those should be used only by the parser, 1986 * and not exported. 1987 * 1988 * Dirty macros, i.e. one often need to make assumption on the context to 1989 * use them 1990 * 1991 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1992 * To be used with extreme caution since operations consuming 1993 * characters may move the input buffer to a different location ! 1994 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1995 * This should be used internally by the parser 1996 * only to compare to ASCII values otherwise it would break when 1997 * running with UTF-8 encoding. 1998 * RAW same as CUR but in the input buffer, bypass any token 1999 * extraction that may have been done 2000 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2001 * to compare on ASCII based substring. 2002 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2003 * strings without newlines within the parser. 2004 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2005 * defined char within the parser. 2006 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2007 * 2008 * NEXT Skip to the next character, this does the proper decoding 2009 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2010 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2011 * CUR_CHAR(l) returns the current unicode character (int), set l 2012 * to the number of xmlChars used for the encoding [0-5]. 2013 * CUR_SCHAR same but operate on a string instead of the context 2014 * COPY_BUF copy the current unicode char to the target buffer, increment 2015 * the index 2016 * GROW, SHRINK handling of input buffers 2017 */ 2018 2019 #define RAW (*ctxt->input->cur) 2020 #define CUR (*ctxt->input->cur) 2021 #define NXT(val) ctxt->input->cur[(val)] 2022 #define CUR_PTR ctxt->input->cur 2023 #define BASE_PTR ctxt->input->base 2024 2025 #define CMP4( s, c1, c2, c3, c4 ) \ 2026 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2027 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2028 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2029 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2030 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2031 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2032 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2033 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2034 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2035 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2036 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2037 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2038 ((unsigned char *) s)[ 8 ] == c9 ) 2039 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2040 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2041 ((unsigned char *) s)[ 9 ] == c10 ) 2042 2043 #define SKIP(val) do { \ 2044 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2045 if (*ctxt->input->cur == 0) \ 2046 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2047 } while (0) 2048 2049 #define SKIPL(val) do { \ 2050 int skipl; \ 2051 for(skipl=0; skipl<val; skipl++) { \ 2052 if (*(ctxt->input->cur) == '\n') { \ 2053 ctxt->input->line++; ctxt->input->col = 1; \ 2054 } else ctxt->input->col++; \ 2055 ctxt->nbChars++; \ 2056 ctxt->input->cur++; \ 2057 } \ 2058 if (*ctxt->input->cur == 0) \ 2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2060 } while (0) 2061 2062 #define SHRINK if ((ctxt->progressive == 0) && \ 2063 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2064 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2065 xmlSHRINK (ctxt); 2066 2067 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2068 xmlParserInputShrink(ctxt->input); 2069 if (*ctxt->input->cur == 0) 2070 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2071 } 2072 2073 #define GROW if ((ctxt->progressive == 0) && \ 2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2075 xmlGROW (ctxt); 2076 2077 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2080 2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2083 ((ctxt->input->buf) && 2084 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) && 2085 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2087 xmlHaltParser(ctxt); 2088 return; 2089 } 2090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2091 if ((ctxt->input->cur > ctxt->input->end) || 2092 (ctxt->input->cur < ctxt->input->base)) { 2093 xmlHaltParser(ctxt); 2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2095 return; 2096 } 2097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0)) 2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2099 } 2100 2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2102 2103 #define NEXT xmlNextChar(ctxt) 2104 2105 #define NEXT1 { \ 2106 ctxt->input->col++; \ 2107 ctxt->input->cur++; \ 2108 ctxt->nbChars++; \ 2109 if (*ctxt->input->cur == 0) \ 2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2111 } 2112 2113 #define NEXTL(l) do { \ 2114 if (*(ctxt->input->cur) == '\n') { \ 2115 ctxt->input->line++; ctxt->input->col = 1; \ 2116 } else ctxt->input->col++; \ 2117 ctxt->input->cur += l; \ 2118 } while (0) 2119 2120 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2121 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2122 2123 #define COPY_BUF(l,b,i,v) \ 2124 if (l == 1) b[i++] = (xmlChar) v; \ 2125 else i += xmlCopyCharMultiByte(&b[i],v) 2126 2127 /** 2128 * xmlSkipBlankChars: 2129 * @ctxt: the XML parser context 2130 * 2131 * skip all blanks character found at that point in the input streams. 2132 * It pops up finished entities in the process if allowable at that point. 2133 * 2134 * Returns the number of space chars skipped 2135 */ 2136 2137 int 2138 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2139 int res = 0; 2140 2141 /* 2142 * It's Okay to use CUR/NEXT here since all the blanks are on 2143 * the ASCII range. 2144 */ 2145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2146 const xmlChar *cur; 2147 /* 2148 * if we are in the document content, go really fast 2149 */ 2150 cur = ctxt->input->cur; 2151 while (IS_BLANK_CH(*cur)) { 2152 if (*cur == '\n') { 2153 ctxt->input->line++; ctxt->input->col = 1; 2154 } else { 2155 ctxt->input->col++; 2156 } 2157 cur++; 2158 res++; 2159 if (*cur == 0) { 2160 ctxt->input->cur = cur; 2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2162 cur = ctxt->input->cur; 2163 } 2164 } 2165 ctxt->input->cur = cur; 2166 } else { 2167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2168 2169 while (1) { 2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2171 NEXT; 2172 } else if (CUR == '%') { 2173 /* 2174 * Need to handle support of entities branching here 2175 */ 2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2177 break; 2178 xmlParsePEReference(ctxt); 2179 } else if (CUR == 0) { 2180 if (ctxt->inputNr <= 1) 2181 break; 2182 xmlPopInput(ctxt); 2183 } else { 2184 break; 2185 } 2186 2187 /* 2188 * Also increase the counter when entering or exiting a PERef. 2189 * The spec says: "When a parameter-entity reference is recognized 2190 * in the DTD and included, its replacement text MUST be enlarged 2191 * by the attachment of one leading and one following space (#x20) 2192 * character." 2193 */ 2194 res++; 2195 } 2196 } 2197 return(res); 2198 } 2199 2200 /************************************************************************ 2201 * * 2202 * Commodity functions to handle entities * 2203 * * 2204 ************************************************************************/ 2205 2206 /** 2207 * xmlPopInput: 2208 * @ctxt: an XML parser context 2209 * 2210 * xmlPopInput: the current input pointed by ctxt->input came to an end 2211 * pop it and return the next char. 2212 * 2213 * Returns the current xmlChar in the parser context 2214 */ 2215 xmlChar 2216 xmlPopInput(xmlParserCtxtPtr ctxt) { 2217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2218 if (xmlParserDebugEntities) 2219 xmlGenericError(xmlGenericErrorContext, 2220 "Popping input %d\n", ctxt->inputNr); 2221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2222 (ctxt->instate != XML_PARSER_EOF)) 2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2224 "Unfinished entity outside the DTD"); 2225 xmlFreeInputStream(inputPop(ctxt)); 2226 if (*ctxt->input->cur == 0) 2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2228 return(CUR); 2229 } 2230 2231 /** 2232 * xmlPushInput: 2233 * @ctxt: an XML parser context 2234 * @input: an XML parser input fragment (entity, XML fragment ...). 2235 * 2236 * xmlPushInput: switch to a new input stream which is stacked on top 2237 * of the previous one(s). 2238 * Returns -1 in case of error or the index in the input stack 2239 */ 2240 int 2241 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2242 int ret; 2243 if (input == NULL) return(-1); 2244 2245 if (xmlParserDebugEntities) { 2246 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2247 xmlGenericError(xmlGenericErrorContext, 2248 "%s(%d): ", ctxt->input->filename, 2249 ctxt->input->line); 2250 xmlGenericError(xmlGenericErrorContext, 2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2252 } 2253 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2254 (ctxt->inputNr > 1024)) { 2255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2256 while (ctxt->inputNr > 1) 2257 xmlFreeInputStream(inputPop(ctxt)); 2258 return(-1); 2259 } 2260 ret = inputPush(ctxt, input); 2261 if (ctxt->instate == XML_PARSER_EOF) 2262 return(-1); 2263 GROW; 2264 return(ret); 2265 } 2266 2267 /** 2268 * xmlParseCharRef: 2269 * @ctxt: an XML parser context 2270 * 2271 * parse Reference declarations 2272 * 2273 * [66] CharRef ::= '&#' [0-9]+ ';' | 2274 * '&#x' [0-9a-fA-F]+ ';' 2275 * 2276 * [ WFC: Legal Character ] 2277 * Characters referred to using character references must match the 2278 * production for Char. 2279 * 2280 * Returns the value parsed (as an int), 0 in case of error 2281 */ 2282 int 2283 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2284 unsigned int val = 0; 2285 int count = 0; 2286 unsigned int outofrange = 0; 2287 2288 /* 2289 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2290 */ 2291 if ((RAW == '&') && (NXT(1) == '#') && 2292 (NXT(2) == 'x')) { 2293 SKIP(3); 2294 GROW; 2295 while (RAW != ';') { /* loop blocked by count */ 2296 if (count++ > 20) { 2297 count = 0; 2298 GROW; 2299 if (ctxt->instate == XML_PARSER_EOF) 2300 return(0); 2301 } 2302 if ((RAW >= '0') && (RAW <= '9')) 2303 val = val * 16 + (CUR - '0'); 2304 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2305 val = val * 16 + (CUR - 'a') + 10; 2306 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2307 val = val * 16 + (CUR - 'A') + 10; 2308 else { 2309 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2310 val = 0; 2311 break; 2312 } 2313 if (val > 0x10FFFF) 2314 outofrange = val; 2315 2316 NEXT; 2317 count++; 2318 } 2319 if (RAW == ';') { 2320 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2321 ctxt->input->col++; 2322 ctxt->nbChars ++; 2323 ctxt->input->cur++; 2324 } 2325 } else if ((RAW == '&') && (NXT(1) == '#')) { 2326 SKIP(2); 2327 GROW; 2328 while (RAW != ';') { /* loop blocked by count */ 2329 if (count++ > 20) { 2330 count = 0; 2331 GROW; 2332 if (ctxt->instate == XML_PARSER_EOF) 2333 return(0); 2334 } 2335 if ((RAW >= '0') && (RAW <= '9')) 2336 val = val * 10 + (CUR - '0'); 2337 else { 2338 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2339 val = 0; 2340 break; 2341 } 2342 if (val > 0x10FFFF) 2343 outofrange = val; 2344 2345 NEXT; 2346 count++; 2347 } 2348 if (RAW == ';') { 2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2350 ctxt->input->col++; 2351 ctxt->nbChars ++; 2352 ctxt->input->cur++; 2353 } 2354 } else { 2355 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2356 } 2357 2358 /* 2359 * [ WFC: Legal Character ] 2360 * Characters referred to using character references must match the 2361 * production for Char. 2362 */ 2363 if ((IS_CHAR(val) && (outofrange == 0))) { 2364 return(val); 2365 } else { 2366 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2367 "xmlParseCharRef: invalid xmlChar value %d\n", 2368 val); 2369 } 2370 return(0); 2371 } 2372 2373 /** 2374 * xmlParseStringCharRef: 2375 * @ctxt: an XML parser context 2376 * @str: a pointer to an index in the string 2377 * 2378 * parse Reference declarations, variant parsing from a string rather 2379 * than an an input flow. 2380 * 2381 * [66] CharRef ::= '&#' [0-9]+ ';' | 2382 * '&#x' [0-9a-fA-F]+ ';' 2383 * 2384 * [ WFC: Legal Character ] 2385 * Characters referred to using character references must match the 2386 * production for Char. 2387 * 2388 * Returns the value parsed (as an int), 0 in case of error, str will be 2389 * updated to the current value of the index 2390 */ 2391 static int 2392 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2393 const xmlChar *ptr; 2394 xmlChar cur; 2395 unsigned int val = 0; 2396 unsigned int outofrange = 0; 2397 2398 if ((str == NULL) || (*str == NULL)) return(0); 2399 ptr = *str; 2400 cur = *ptr; 2401 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2402 ptr += 3; 2403 cur = *ptr; 2404 while (cur != ';') { /* Non input consuming loop */ 2405 if ((cur >= '0') && (cur <= '9')) 2406 val = val * 16 + (cur - '0'); 2407 else if ((cur >= 'a') && (cur <= 'f')) 2408 val = val * 16 + (cur - 'a') + 10; 2409 else if ((cur >= 'A') && (cur <= 'F')) 2410 val = val * 16 + (cur - 'A') + 10; 2411 else { 2412 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2413 val = 0; 2414 break; 2415 } 2416 if (val > 0x10FFFF) 2417 outofrange = val; 2418 2419 ptr++; 2420 cur = *ptr; 2421 } 2422 if (cur == ';') 2423 ptr++; 2424 } else if ((cur == '&') && (ptr[1] == '#')){ 2425 ptr += 2; 2426 cur = *ptr; 2427 while (cur != ';') { /* Non input consuming loops */ 2428 if ((cur >= '0') && (cur <= '9')) 2429 val = val * 10 + (cur - '0'); 2430 else { 2431 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2432 val = 0; 2433 break; 2434 } 2435 if (val > 0x10FFFF) 2436 outofrange = val; 2437 2438 ptr++; 2439 cur = *ptr; 2440 } 2441 if (cur == ';') 2442 ptr++; 2443 } else { 2444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2445 return(0); 2446 } 2447 *str = ptr; 2448 2449 /* 2450 * [ WFC: Legal Character ] 2451 * Characters referred to using character references must match the 2452 * production for Char. 2453 */ 2454 if ((IS_CHAR(val) && (outofrange == 0))) { 2455 return(val); 2456 } else { 2457 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2458 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2459 val); 2460 } 2461 return(0); 2462 } 2463 2464 /** 2465 * xmlParserHandlePEReference: 2466 * @ctxt: the parser context 2467 * 2468 * [69] PEReference ::= '%' Name ';' 2469 * 2470 * [ WFC: No Recursion ] 2471 * A parsed entity must not contain a recursive 2472 * reference to itself, either directly or indirectly. 2473 * 2474 * [ WFC: Entity Declared ] 2475 * In a document without any DTD, a document with only an internal DTD 2476 * subset which contains no parameter entity references, or a document 2477 * with "standalone='yes'", ... ... The declaration of a parameter 2478 * entity must precede any reference to it... 2479 * 2480 * [ VC: Entity Declared ] 2481 * In a document with an external subset or external parameter entities 2482 * with "standalone='no'", ... ... The declaration of a parameter entity 2483 * must precede any reference to it... 2484 * 2485 * [ WFC: In DTD ] 2486 * Parameter-entity references may only appear in the DTD. 2487 * NOTE: misleading but this is handled. 2488 * 2489 * A PEReference may have been detected in the current input stream 2490 * the handling is done accordingly to 2491 * http://www.w3.org/TR/REC-xml#entproc 2492 * i.e. 2493 * - Included in literal in entity values 2494 * - Included as Parameter Entity reference within DTDs 2495 */ 2496 void 2497 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2498 switch(ctxt->instate) { 2499 case XML_PARSER_CDATA_SECTION: 2500 return; 2501 case XML_PARSER_COMMENT: 2502 return; 2503 case XML_PARSER_START_TAG: 2504 return; 2505 case XML_PARSER_END_TAG: 2506 return; 2507 case XML_PARSER_EOF: 2508 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2509 return; 2510 case XML_PARSER_PROLOG: 2511 case XML_PARSER_START: 2512 case XML_PARSER_MISC: 2513 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2514 return; 2515 case XML_PARSER_ENTITY_DECL: 2516 case XML_PARSER_CONTENT: 2517 case XML_PARSER_ATTRIBUTE_VALUE: 2518 case XML_PARSER_PI: 2519 case XML_PARSER_SYSTEM_LITERAL: 2520 case XML_PARSER_PUBLIC_LITERAL: 2521 /* we just ignore it there */ 2522 return; 2523 case XML_PARSER_EPILOG: 2524 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2525 return; 2526 case XML_PARSER_ENTITY_VALUE: 2527 /* 2528 * NOTE: in the case of entity values, we don't do the 2529 * substitution here since we need the literal 2530 * entity value to be able to save the internal 2531 * subset of the document. 2532 * This will be handled by xmlStringDecodeEntities 2533 */ 2534 return; 2535 case XML_PARSER_DTD: 2536 /* 2537 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2538 * In the internal DTD subset, parameter-entity references 2539 * can occur only where markup declarations can occur, not 2540 * within markup declarations. 2541 * In that case this is handled in xmlParseMarkupDecl 2542 */ 2543 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2544 return; 2545 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2546 return; 2547 break; 2548 case XML_PARSER_IGNORE: 2549 return; 2550 } 2551 2552 xmlParsePEReference(ctxt); 2553 } 2554 2555 /* 2556 * Macro used to grow the current buffer. 2557 * buffer##_size is expected to be a size_t 2558 * mem_error: is expected to handle memory allocation failures 2559 */ 2560 #define growBuffer(buffer, n) { \ 2561 xmlChar *tmp; \ 2562 size_t new_size = buffer##_size * 2 + n; \ 2563 if (new_size < buffer##_size) goto mem_error; \ 2564 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2565 if (tmp == NULL) goto mem_error; \ 2566 buffer = tmp; \ 2567 buffer##_size = new_size; \ 2568 } 2569 2570 /** 2571 * xmlStringLenDecodeEntities: 2572 * @ctxt: the parser context 2573 * @str: the input string 2574 * @len: the string length 2575 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2576 * @end: an end marker xmlChar, 0 if none 2577 * @end2: an end marker xmlChar, 0 if none 2578 * @end3: an end marker xmlChar, 0 if none 2579 * 2580 * Takes a entity string content and process to do the adequate substitutions. 2581 * 2582 * [67] Reference ::= EntityRef | CharRef 2583 * 2584 * [69] PEReference ::= '%' Name ';' 2585 * 2586 * Returns A newly allocated string with the substitution done. The caller 2587 * must deallocate it ! 2588 */ 2589 xmlChar * 2590 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2591 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2592 xmlChar *buffer = NULL; 2593 size_t buffer_size = 0; 2594 size_t nbchars = 0; 2595 2596 xmlChar *current = NULL; 2597 xmlChar *rep = NULL; 2598 const xmlChar *last; 2599 xmlEntityPtr ent; 2600 int c,l; 2601 2602 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2603 return(NULL); 2604 last = str + len; 2605 2606 if (((ctxt->depth > 40) && 2607 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2608 (ctxt->depth > 1024)) { 2609 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2610 return(NULL); 2611 } 2612 2613 /* 2614 * allocate a translation buffer. 2615 */ 2616 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2617 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2618 if (buffer == NULL) goto mem_error; 2619 2620 /* 2621 * OK loop until we reach one of the ending char or a size limit. 2622 * we are operating on already parsed values. 2623 */ 2624 if (str < last) 2625 c = CUR_SCHAR(str, l); 2626 else 2627 c = 0; 2628 while ((c != 0) && (c != end) && /* non input consuming loop */ 2629 (c != end2) && (c != end3)) { 2630 2631 if (c == 0) break; 2632 if ((c == '&') && (str[1] == '#')) { 2633 int val = xmlParseStringCharRef(ctxt, &str); 2634 if (val == 0) 2635 goto int_error; 2636 COPY_BUF(0,buffer,nbchars,val); 2637 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2638 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2639 } 2640 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2641 if (xmlParserDebugEntities) 2642 xmlGenericError(xmlGenericErrorContext, 2643 "String decoding Entity Reference: %.30s\n", 2644 str); 2645 ent = xmlParseStringEntityRef(ctxt, &str); 2646 xmlParserEntityCheck(ctxt, 0, ent, 0); 2647 if (ent != NULL) 2648 ctxt->nbentities += ent->checked / 2; 2649 if ((ent != NULL) && 2650 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2651 if (ent->content != NULL) { 2652 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2653 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2654 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2655 } 2656 } else { 2657 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2658 "predefined entity has no content\n"); 2659 goto int_error; 2660 } 2661 } else if ((ent != NULL) && (ent->content != NULL)) { 2662 ctxt->depth++; 2663 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2664 0, 0, 0); 2665 ctxt->depth--; 2666 if (rep == NULL) 2667 goto int_error; 2668 2669 current = rep; 2670 while (*current != 0) { /* non input consuming loop */ 2671 buffer[nbchars++] = *current++; 2672 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2673 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2674 goto int_error; 2675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2676 } 2677 } 2678 xmlFree(rep); 2679 rep = NULL; 2680 } else if (ent != NULL) { 2681 int i = xmlStrlen(ent->name); 2682 const xmlChar *cur = ent->name; 2683 2684 buffer[nbchars++] = '&'; 2685 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2686 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2687 } 2688 for (;i > 0;i--) 2689 buffer[nbchars++] = *cur++; 2690 buffer[nbchars++] = ';'; 2691 } 2692 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2693 if (xmlParserDebugEntities) 2694 xmlGenericError(xmlGenericErrorContext, 2695 "String decoding PE Reference: %.30s\n", str); 2696 ent = xmlParseStringPEReference(ctxt, &str); 2697 xmlParserEntityCheck(ctxt, 0, ent, 0); 2698 if (ent != NULL) 2699 ctxt->nbentities += ent->checked / 2; 2700 if (ent != NULL) { 2701 if (ent->content == NULL) { 2702 /* 2703 * Note: external parsed entities will not be loaded, 2704 * it is not required for a non-validating parser to 2705 * complete external PEreferences coming from the 2706 * internal subset 2707 */ 2708 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2709 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2710 (ctxt->validate != 0)) { 2711 xmlLoadEntityContent(ctxt, ent); 2712 } else { 2713 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2714 "not validating will not read content for PE entity %s\n", 2715 ent->name, NULL); 2716 } 2717 } 2718 ctxt->depth++; 2719 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2720 0, 0, 0); 2721 ctxt->depth--; 2722 if (rep == NULL) 2723 goto int_error; 2724 current = rep; 2725 while (*current != 0) { /* non input consuming loop */ 2726 buffer[nbchars++] = *current++; 2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2729 goto int_error; 2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2731 } 2732 } 2733 xmlFree(rep); 2734 rep = NULL; 2735 } 2736 } else { 2737 COPY_BUF(l,buffer,nbchars,c); 2738 str += l; 2739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2740 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2741 } 2742 } 2743 if (str < last) 2744 c = CUR_SCHAR(str, l); 2745 else 2746 c = 0; 2747 } 2748 buffer[nbchars] = 0; 2749 return(buffer); 2750 2751 mem_error: 2752 xmlErrMemory(ctxt, NULL); 2753 int_error: 2754 if (rep != NULL) 2755 xmlFree(rep); 2756 if (buffer != NULL) 2757 xmlFree(buffer); 2758 return(NULL); 2759 } 2760 2761 /** 2762 * xmlStringDecodeEntities: 2763 * @ctxt: the parser context 2764 * @str: the input string 2765 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2766 * @end: an end marker xmlChar, 0 if none 2767 * @end2: an end marker xmlChar, 0 if none 2768 * @end3: an end marker xmlChar, 0 if none 2769 * 2770 * Takes a entity string content and process to do the adequate substitutions. 2771 * 2772 * [67] Reference ::= EntityRef | CharRef 2773 * 2774 * [69] PEReference ::= '%' Name ';' 2775 * 2776 * Returns A newly allocated string with the substitution done. The caller 2777 * must deallocate it ! 2778 */ 2779 xmlChar * 2780 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2781 xmlChar end, xmlChar end2, xmlChar end3) { 2782 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2783 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2784 end, end2, end3)); 2785 } 2786 2787 /************************************************************************ 2788 * * 2789 * Commodity functions, cleanup needed ? * 2790 * * 2791 ************************************************************************/ 2792 2793 /** 2794 * areBlanks: 2795 * @ctxt: an XML parser context 2796 * @str: a xmlChar * 2797 * @len: the size of @str 2798 * @blank_chars: we know the chars are blanks 2799 * 2800 * Is this a sequence of blank chars that one can ignore ? 2801 * 2802 * Returns 1 if ignorable 0 otherwise. 2803 */ 2804 2805 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2806 int blank_chars) { 2807 int i, ret; 2808 xmlNodePtr lastChild; 2809 2810 /* 2811 * Don't spend time trying to differentiate them, the same callback is 2812 * used ! 2813 */ 2814 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2815 return(0); 2816 2817 /* 2818 * Check for xml:space value. 2819 */ 2820 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2821 (*(ctxt->space) == -2)) 2822 return(0); 2823 2824 /* 2825 * Check that the string is made of blanks 2826 */ 2827 if (blank_chars == 0) { 2828 for (i = 0;i < len;i++) 2829 if (!(IS_BLANK_CH(str[i]))) return(0); 2830 } 2831 2832 /* 2833 * Look if the element is mixed content in the DTD if available 2834 */ 2835 if (ctxt->node == NULL) return(0); 2836 if (ctxt->myDoc != NULL) { 2837 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2838 if (ret == 0) return(1); 2839 if (ret == 1) return(0); 2840 } 2841 2842 /* 2843 * Otherwise, heuristic :-\ 2844 */ 2845 if ((RAW != '<') && (RAW != 0xD)) return(0); 2846 if ((ctxt->node->children == NULL) && 2847 (RAW == '<') && (NXT(1) == '/')) return(0); 2848 2849 lastChild = xmlGetLastChild(ctxt->node); 2850 if (lastChild == NULL) { 2851 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2852 (ctxt->node->content != NULL)) return(0); 2853 } else if (xmlNodeIsText(lastChild)) 2854 return(0); 2855 else if ((ctxt->node->children != NULL) && 2856 (xmlNodeIsText(ctxt->node->children))) 2857 return(0); 2858 return(1); 2859 } 2860 2861 /************************************************************************ 2862 * * 2863 * Extra stuff for namespace support * 2864 * Relates to http://www.w3.org/TR/WD-xml-names * 2865 * * 2866 ************************************************************************/ 2867 2868 /** 2869 * xmlSplitQName: 2870 * @ctxt: an XML parser context 2871 * @name: an XML parser context 2872 * @prefix: a xmlChar ** 2873 * 2874 * parse an UTF8 encoded XML qualified name string 2875 * 2876 * [NS 5] QName ::= (Prefix ':')? LocalPart 2877 * 2878 * [NS 6] Prefix ::= NCName 2879 * 2880 * [NS 7] LocalPart ::= NCName 2881 * 2882 * Returns the local part, and prefix is updated 2883 * to get the Prefix if any. 2884 */ 2885 2886 xmlChar * 2887 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2888 xmlChar buf[XML_MAX_NAMELEN + 5]; 2889 xmlChar *buffer = NULL; 2890 int len = 0; 2891 int max = XML_MAX_NAMELEN; 2892 xmlChar *ret = NULL; 2893 const xmlChar *cur = name; 2894 int c; 2895 2896 if (prefix == NULL) return(NULL); 2897 *prefix = NULL; 2898 2899 if (cur == NULL) return(NULL); 2900 2901 #ifndef XML_XML_NAMESPACE 2902 /* xml: prefix is not really a namespace */ 2903 if ((cur[0] == 'x') && (cur[1] == 'm') && 2904 (cur[2] == 'l') && (cur[3] == ':')) 2905 return(xmlStrdup(name)); 2906 #endif 2907 2908 /* nasty but well=formed */ 2909 if (cur[0] == ':') 2910 return(xmlStrdup(name)); 2911 2912 c = *cur++; 2913 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2914 buf[len++] = c; 2915 c = *cur++; 2916 } 2917 if (len >= max) { 2918 /* 2919 * Okay someone managed to make a huge name, so he's ready to pay 2920 * for the processing speed. 2921 */ 2922 max = len * 2; 2923 2924 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2925 if (buffer == NULL) { 2926 xmlErrMemory(ctxt, NULL); 2927 return(NULL); 2928 } 2929 memcpy(buffer, buf, len); 2930 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2931 if (len + 10 > max) { 2932 xmlChar *tmp; 2933 2934 max *= 2; 2935 tmp = (xmlChar *) xmlRealloc(buffer, 2936 max * sizeof(xmlChar)); 2937 if (tmp == NULL) { 2938 xmlFree(buffer); 2939 xmlErrMemory(ctxt, NULL); 2940 return(NULL); 2941 } 2942 buffer = tmp; 2943 } 2944 buffer[len++] = c; 2945 c = *cur++; 2946 } 2947 buffer[len] = 0; 2948 } 2949 2950 if ((c == ':') && (*cur == 0)) { 2951 if (buffer != NULL) 2952 xmlFree(buffer); 2953 *prefix = NULL; 2954 return(xmlStrdup(name)); 2955 } 2956 2957 if (buffer == NULL) 2958 ret = xmlStrndup(buf, len); 2959 else { 2960 ret = buffer; 2961 buffer = NULL; 2962 max = XML_MAX_NAMELEN; 2963 } 2964 2965 2966 if (c == ':') { 2967 c = *cur; 2968 *prefix = ret; 2969 if (c == 0) { 2970 return(xmlStrndup(BAD_CAST "", 0)); 2971 } 2972 len = 0; 2973 2974 /* 2975 * Check that the first character is proper to start 2976 * a new name 2977 */ 2978 if (!(((c >= 0x61) && (c <= 0x7A)) || 2979 ((c >= 0x41) && (c <= 0x5A)) || 2980 (c == '_') || (c == ':'))) { 2981 int l; 2982 int first = CUR_SCHAR(cur, l); 2983 2984 if (!IS_LETTER(first) && (first != '_')) { 2985 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2986 "Name %s is not XML Namespace compliant\n", 2987 name); 2988 } 2989 } 2990 cur++; 2991 2992 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2993 buf[len++] = c; 2994 c = *cur++; 2995 } 2996 if (len >= max) { 2997 /* 2998 * Okay someone managed to make a huge name, so he's ready to pay 2999 * for the processing speed. 3000 */ 3001 max = len * 2; 3002 3003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3004 if (buffer == NULL) { 3005 xmlErrMemory(ctxt, NULL); 3006 return(NULL); 3007 } 3008 memcpy(buffer, buf, len); 3009 while (c != 0) { /* tested bigname2.xml */ 3010 if (len + 10 > max) { 3011 xmlChar *tmp; 3012 3013 max *= 2; 3014 tmp = (xmlChar *) xmlRealloc(buffer, 3015 max * sizeof(xmlChar)); 3016 if (tmp == NULL) { 3017 xmlErrMemory(ctxt, NULL); 3018 xmlFree(buffer); 3019 return(NULL); 3020 } 3021 buffer = tmp; 3022 } 3023 buffer[len++] = c; 3024 c = *cur++; 3025 } 3026 buffer[len] = 0; 3027 } 3028 3029 if (buffer == NULL) 3030 ret = xmlStrndup(buf, len); 3031 else { 3032 ret = buffer; 3033 } 3034 } 3035 3036 return(ret); 3037 } 3038 3039 /************************************************************************ 3040 * * 3041 * The parser itself * 3042 * Relates to http://www.w3.org/TR/REC-xml * 3043 * * 3044 ************************************************************************/ 3045 3046 /************************************************************************ 3047 * * 3048 * Routines to parse Name, NCName and NmToken * 3049 * * 3050 ************************************************************************/ 3051 #ifdef DEBUG 3052 static unsigned long nbParseName = 0; 3053 static unsigned long nbParseNmToken = 0; 3054 static unsigned long nbParseNCName = 0; 3055 static unsigned long nbParseNCNameComplex = 0; 3056 static unsigned long nbParseNameComplex = 0; 3057 static unsigned long nbParseStringName = 0; 3058 #endif 3059 3060 /* 3061 * The two following functions are related to the change of accepted 3062 * characters for Name and NmToken in the Revision 5 of XML-1.0 3063 * They correspond to the modified production [4] and the new production [4a] 3064 * changes in that revision. Also note that the macros used for the 3065 * productions Letter, Digit, CombiningChar and Extender are not needed 3066 * anymore. 3067 * We still keep compatibility to pre-revision5 parsing semantic if the 3068 * new XML_PARSE_OLD10 option is given to the parser. 3069 */ 3070 static int 3071 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3072 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3073 /* 3074 * Use the new checks of production [4] [4a] amd [5] of the 3075 * Update 5 of XML-1.0 3076 */ 3077 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3078 (((c >= 'a') && (c <= 'z')) || 3079 ((c >= 'A') && (c <= 'Z')) || 3080 (c == '_') || (c == ':') || 3081 ((c >= 0xC0) && (c <= 0xD6)) || 3082 ((c >= 0xD8) && (c <= 0xF6)) || 3083 ((c >= 0xF8) && (c <= 0x2FF)) || 3084 ((c >= 0x370) && (c <= 0x37D)) || 3085 ((c >= 0x37F) && (c <= 0x1FFF)) || 3086 ((c >= 0x200C) && (c <= 0x200D)) || 3087 ((c >= 0x2070) && (c <= 0x218F)) || 3088 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3089 ((c >= 0x3001) && (c <= 0xD7FF)) || 3090 ((c >= 0xF900) && (c <= 0xFDCF)) || 3091 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3092 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3093 return(1); 3094 } else { 3095 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3096 return(1); 3097 } 3098 return(0); 3099 } 3100 3101 static int 3102 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3103 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3104 /* 3105 * Use the new checks of production [4] [4a] amd [5] of the 3106 * Update 5 of XML-1.0 3107 */ 3108 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3109 (((c >= 'a') && (c <= 'z')) || 3110 ((c >= 'A') && (c <= 'Z')) || 3111 ((c >= '0') && (c <= '9')) || /* !start */ 3112 (c == '_') || (c == ':') || 3113 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3114 ((c >= 0xC0) && (c <= 0xD6)) || 3115 ((c >= 0xD8) && (c <= 0xF6)) || 3116 ((c >= 0xF8) && (c <= 0x2FF)) || 3117 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3118 ((c >= 0x370) && (c <= 0x37D)) || 3119 ((c >= 0x37F) && (c <= 0x1FFF)) || 3120 ((c >= 0x200C) && (c <= 0x200D)) || 3121 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3122 ((c >= 0x2070) && (c <= 0x218F)) || 3123 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3124 ((c >= 0x3001) && (c <= 0xD7FF)) || 3125 ((c >= 0xF900) && (c <= 0xFDCF)) || 3126 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3127 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3128 return(1); 3129 } else { 3130 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3131 (c == '.') || (c == '-') || 3132 (c == '_') || (c == ':') || 3133 (IS_COMBINING(c)) || 3134 (IS_EXTENDER(c))) 3135 return(1); 3136 } 3137 return(0); 3138 } 3139 3140 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3141 int *len, int *alloc, int normalize); 3142 3143 static const xmlChar * 3144 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3145 int len = 0, l; 3146 int c; 3147 int count = 0; 3148 3149 #ifdef DEBUG 3150 nbParseNameComplex++; 3151 #endif 3152 3153 /* 3154 * Handler for more complex cases 3155 */ 3156 GROW; 3157 if (ctxt->instate == XML_PARSER_EOF) 3158 return(NULL); 3159 c = CUR_CHAR(l); 3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3161 /* 3162 * Use the new checks of production [4] [4a] amd [5] of the 3163 * Update 5 of XML-1.0 3164 */ 3165 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3166 (!(((c >= 'a') && (c <= 'z')) || 3167 ((c >= 'A') && (c <= 'Z')) || 3168 (c == '_') || (c == ':') || 3169 ((c >= 0xC0) && (c <= 0xD6)) || 3170 ((c >= 0xD8) && (c <= 0xF6)) || 3171 ((c >= 0xF8) && (c <= 0x2FF)) || 3172 ((c >= 0x370) && (c <= 0x37D)) || 3173 ((c >= 0x37F) && (c <= 0x1FFF)) || 3174 ((c >= 0x200C) && (c <= 0x200D)) || 3175 ((c >= 0x2070) && (c <= 0x218F)) || 3176 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3177 ((c >= 0x3001) && (c <= 0xD7FF)) || 3178 ((c >= 0xF900) && (c <= 0xFDCF)) || 3179 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3180 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3181 return(NULL); 3182 } 3183 len += l; 3184 NEXTL(l); 3185 c = CUR_CHAR(l); 3186 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3187 (((c >= 'a') && (c <= 'z')) || 3188 ((c >= 'A') && (c <= 'Z')) || 3189 ((c >= '0') && (c <= '9')) || /* !start */ 3190 (c == '_') || (c == ':') || 3191 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3192 ((c >= 0xC0) && (c <= 0xD6)) || 3193 ((c >= 0xD8) && (c <= 0xF6)) || 3194 ((c >= 0xF8) && (c <= 0x2FF)) || 3195 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3196 ((c >= 0x370) && (c <= 0x37D)) || 3197 ((c >= 0x37F) && (c <= 0x1FFF)) || 3198 ((c >= 0x200C) && (c <= 0x200D)) || 3199 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3200 ((c >= 0x2070) && (c <= 0x218F)) || 3201 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3202 ((c >= 0x3001) && (c <= 0xD7FF)) || 3203 ((c >= 0xF900) && (c <= 0xFDCF)) || 3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3205 ((c >= 0x10000) && (c <= 0xEFFFF)) 3206 )) { 3207 if (count++ > XML_PARSER_CHUNK_SIZE) { 3208 count = 0; 3209 GROW; 3210 if (ctxt->instate == XML_PARSER_EOF) 3211 return(NULL); 3212 } 3213 len += l; 3214 NEXTL(l); 3215 c = CUR_CHAR(l); 3216 } 3217 } else { 3218 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3219 (!IS_LETTER(c) && (c != '_') && 3220 (c != ':'))) { 3221 return(NULL); 3222 } 3223 len += l; 3224 NEXTL(l); 3225 c = CUR_CHAR(l); 3226 3227 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3228 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3229 (c == '.') || (c == '-') || 3230 (c == '_') || (c == ':') || 3231 (IS_COMBINING(c)) || 3232 (IS_EXTENDER(c)))) { 3233 if (count++ > XML_PARSER_CHUNK_SIZE) { 3234 count = 0; 3235 GROW; 3236 if (ctxt->instate == XML_PARSER_EOF) 3237 return(NULL); 3238 } 3239 len += l; 3240 NEXTL(l); 3241 c = CUR_CHAR(l); 3242 } 3243 } 3244 if ((len > XML_MAX_NAME_LENGTH) && 3245 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3246 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3247 return(NULL); 3248 } 3249 if (ctxt->input->cur - ctxt->input->base < len) { 3250 /* 3251 * There were a couple of bugs where PERefs lead to to a change 3252 * of the buffer. Check the buffer size to avoid passing an invalid 3253 * pointer to xmlDictLookup. 3254 */ 3255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3256 "unexpected change of input buffer"); 3257 return (NULL); 3258 } 3259 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3260 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3261 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3262 } 3263 3264 /** 3265 * xmlParseName: 3266 * @ctxt: an XML parser context 3267 * 3268 * parse an XML name. 3269 * 3270 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3271 * CombiningChar | Extender 3272 * 3273 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3274 * 3275 * [6] Names ::= Name (#x20 Name)* 3276 * 3277 * Returns the Name parsed or NULL 3278 */ 3279 3280 const xmlChar * 3281 xmlParseName(xmlParserCtxtPtr ctxt) { 3282 const xmlChar *in; 3283 const xmlChar *ret; 3284 int count = 0; 3285 3286 GROW; 3287 3288 #ifdef DEBUG 3289 nbParseName++; 3290 #endif 3291 3292 /* 3293 * Accelerator for simple ASCII names 3294 */ 3295 in = ctxt->input->cur; 3296 if (((*in >= 0x61) && (*in <= 0x7A)) || 3297 ((*in >= 0x41) && (*in <= 0x5A)) || 3298 (*in == '_') || (*in == ':')) { 3299 in++; 3300 while (((*in >= 0x61) && (*in <= 0x7A)) || 3301 ((*in >= 0x41) && (*in <= 0x5A)) || 3302 ((*in >= 0x30) && (*in <= 0x39)) || 3303 (*in == '_') || (*in == '-') || 3304 (*in == ':') || (*in == '.')) 3305 in++; 3306 if ((*in > 0) && (*in < 0x80)) { 3307 count = in - ctxt->input->cur; 3308 if ((count > XML_MAX_NAME_LENGTH) && 3309 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3310 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3311 return(NULL); 3312 } 3313 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3314 ctxt->input->cur = in; 3315 ctxt->nbChars += count; 3316 ctxt->input->col += count; 3317 if (ret == NULL) 3318 xmlErrMemory(ctxt, NULL); 3319 return(ret); 3320 } 3321 } 3322 /* accelerator for special cases */ 3323 return(xmlParseNameComplex(ctxt)); 3324 } 3325 3326 static const xmlChar * 3327 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3328 int len = 0, l; 3329 int c; 3330 int count = 0; 3331 size_t startPosition = 0; 3332 3333 #ifdef DEBUG 3334 nbParseNCNameComplex++; 3335 #endif 3336 3337 /* 3338 * Handler for more complex cases 3339 */ 3340 GROW; 3341 startPosition = CUR_PTR - BASE_PTR; 3342 c = CUR_CHAR(l); 3343 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3344 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3345 return(NULL); 3346 } 3347 3348 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3349 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3350 if (count++ > XML_PARSER_CHUNK_SIZE) { 3351 if ((len > XML_MAX_NAME_LENGTH) && 3352 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3354 return(NULL); 3355 } 3356 count = 0; 3357 GROW; 3358 if (ctxt->instate == XML_PARSER_EOF) 3359 return(NULL); 3360 } 3361 len += l; 3362 NEXTL(l); 3363 c = CUR_CHAR(l); 3364 if (c == 0) { 3365 count = 0; 3366 /* 3367 * when shrinking to extend the buffer we really need to preserve 3368 * the part of the name we already parsed. Hence rolling back 3369 * by current lenght. 3370 */ 3371 ctxt->input->cur -= l; 3372 GROW; 3373 if (ctxt->instate == XML_PARSER_EOF) 3374 return(NULL); 3375 ctxt->input->cur += l; 3376 c = CUR_CHAR(l); 3377 } 3378 } 3379 if ((len > XML_MAX_NAME_LENGTH) && 3380 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3381 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3382 return(NULL); 3383 } 3384 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3385 } 3386 3387 /** 3388 * xmlParseNCName: 3389 * @ctxt: an XML parser context 3390 * @len: length of the string parsed 3391 * 3392 * parse an XML name. 3393 * 3394 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3395 * CombiningChar | Extender 3396 * 3397 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3398 * 3399 * Returns the Name parsed or NULL 3400 */ 3401 3402 static const xmlChar * 3403 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3404 const xmlChar *in, *e; 3405 const xmlChar *ret; 3406 int count = 0; 3407 3408 #ifdef DEBUG 3409 nbParseNCName++; 3410 #endif 3411 3412 /* 3413 * Accelerator for simple ASCII names 3414 */ 3415 in = ctxt->input->cur; 3416 e = ctxt->input->end; 3417 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3418 ((*in >= 0x41) && (*in <= 0x5A)) || 3419 (*in == '_')) && (in < e)) { 3420 in++; 3421 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3422 ((*in >= 0x41) && (*in <= 0x5A)) || 3423 ((*in >= 0x30) && (*in <= 0x39)) || 3424 (*in == '_') || (*in == '-') || 3425 (*in == '.')) && (in < e)) 3426 in++; 3427 if (in >= e) 3428 goto complex; 3429 if ((*in > 0) && (*in < 0x80)) { 3430 count = in - ctxt->input->cur; 3431 if ((count > XML_MAX_NAME_LENGTH) && 3432 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3433 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3434 return(NULL); 3435 } 3436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3437 ctxt->input->cur = in; 3438 ctxt->nbChars += count; 3439 ctxt->input->col += count; 3440 if (ret == NULL) { 3441 xmlErrMemory(ctxt, NULL); 3442 } 3443 return(ret); 3444 } 3445 } 3446 complex: 3447 return(xmlParseNCNameComplex(ctxt)); 3448 } 3449 3450 /** 3451 * xmlParseNameAndCompare: 3452 * @ctxt: an XML parser context 3453 * 3454 * parse an XML name and compares for match 3455 * (specialized for endtag parsing) 3456 * 3457 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3458 * and the name for mismatch 3459 */ 3460 3461 static const xmlChar * 3462 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3463 register const xmlChar *cmp = other; 3464 register const xmlChar *in; 3465 const xmlChar *ret; 3466 3467 GROW; 3468 if (ctxt->instate == XML_PARSER_EOF) 3469 return(NULL); 3470 3471 in = ctxt->input->cur; 3472 while (*in != 0 && *in == *cmp) { 3473 ++in; 3474 ++cmp; 3475 ctxt->input->col++; 3476 } 3477 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3478 /* success */ 3479 ctxt->input->cur = in; 3480 return (const xmlChar*) 1; 3481 } 3482 /* failure (or end of input buffer), check with full function */ 3483 ret = xmlParseName (ctxt); 3484 /* strings coming from the dictionary direct compare possible */ 3485 if (ret == other) { 3486 return (const xmlChar*) 1; 3487 } 3488 return ret; 3489 } 3490 3491 /** 3492 * xmlParseStringName: 3493 * @ctxt: an XML parser context 3494 * @str: a pointer to the string pointer (IN/OUT) 3495 * 3496 * parse an XML name. 3497 * 3498 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3499 * CombiningChar | Extender 3500 * 3501 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3502 * 3503 * [6] Names ::= Name (#x20 Name)* 3504 * 3505 * Returns the Name parsed or NULL. The @str pointer 3506 * is updated to the current location in the string. 3507 */ 3508 3509 static xmlChar * 3510 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3511 xmlChar buf[XML_MAX_NAMELEN + 5]; 3512 const xmlChar *cur = *str; 3513 int len = 0, l; 3514 int c; 3515 3516 #ifdef DEBUG 3517 nbParseStringName++; 3518 #endif 3519 3520 c = CUR_SCHAR(cur, l); 3521 if (!xmlIsNameStartChar(ctxt, c)) { 3522 return(NULL); 3523 } 3524 3525 COPY_BUF(l,buf,len,c); 3526 cur += l; 3527 c = CUR_SCHAR(cur, l); 3528 while (xmlIsNameChar(ctxt, c)) { 3529 COPY_BUF(l,buf,len,c); 3530 cur += l; 3531 c = CUR_SCHAR(cur, l); 3532 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3533 /* 3534 * Okay someone managed to make a huge name, so he's ready to pay 3535 * for the processing speed. 3536 */ 3537 xmlChar *buffer; 3538 int max = len * 2; 3539 3540 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3541 if (buffer == NULL) { 3542 xmlErrMemory(ctxt, NULL); 3543 return(NULL); 3544 } 3545 memcpy(buffer, buf, len); 3546 while (xmlIsNameChar(ctxt, c)) { 3547 if (len + 10 > max) { 3548 xmlChar *tmp; 3549 3550 if ((len > XML_MAX_NAME_LENGTH) && 3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3553 xmlFree(buffer); 3554 return(NULL); 3555 } 3556 max *= 2; 3557 tmp = (xmlChar *) xmlRealloc(buffer, 3558 max * sizeof(xmlChar)); 3559 if (tmp == NULL) { 3560 xmlErrMemory(ctxt, NULL); 3561 xmlFree(buffer); 3562 return(NULL); 3563 } 3564 buffer = tmp; 3565 } 3566 COPY_BUF(l,buffer,len,c); 3567 cur += l; 3568 c = CUR_SCHAR(cur, l); 3569 } 3570 buffer[len] = 0; 3571 *str = cur; 3572 return(buffer); 3573 } 3574 } 3575 if ((len > XML_MAX_NAME_LENGTH) && 3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3578 return(NULL); 3579 } 3580 *str = cur; 3581 return(xmlStrndup(buf, len)); 3582 } 3583 3584 /** 3585 * xmlParseNmtoken: 3586 * @ctxt: an XML parser context 3587 * 3588 * parse an XML Nmtoken. 3589 * 3590 * [7] Nmtoken ::= (NameChar)+ 3591 * 3592 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3593 * 3594 * Returns the Nmtoken parsed or NULL 3595 */ 3596 3597 xmlChar * 3598 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3599 xmlChar buf[XML_MAX_NAMELEN + 5]; 3600 int len = 0, l; 3601 int c; 3602 int count = 0; 3603 3604 #ifdef DEBUG 3605 nbParseNmToken++; 3606 #endif 3607 3608 GROW; 3609 if (ctxt->instate == XML_PARSER_EOF) 3610 return(NULL); 3611 c = CUR_CHAR(l); 3612 3613 while (xmlIsNameChar(ctxt, c)) { 3614 if (count++ > XML_PARSER_CHUNK_SIZE) { 3615 count = 0; 3616 GROW; 3617 } 3618 COPY_BUF(l,buf,len,c); 3619 NEXTL(l); 3620 c = CUR_CHAR(l); 3621 if (c == 0) { 3622 count = 0; 3623 GROW; 3624 if (ctxt->instate == XML_PARSER_EOF) 3625 return(NULL); 3626 c = CUR_CHAR(l); 3627 } 3628 if (len >= XML_MAX_NAMELEN) { 3629 /* 3630 * Okay someone managed to make a huge token, so he's ready to pay 3631 * for the processing speed. 3632 */ 3633 xmlChar *buffer; 3634 int max = len * 2; 3635 3636 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3637 if (buffer == NULL) { 3638 xmlErrMemory(ctxt, NULL); 3639 return(NULL); 3640 } 3641 memcpy(buffer, buf, len); 3642 while (xmlIsNameChar(ctxt, c)) { 3643 if (count++ > XML_PARSER_CHUNK_SIZE) { 3644 count = 0; 3645 GROW; 3646 if (ctxt->instate == XML_PARSER_EOF) { 3647 xmlFree(buffer); 3648 return(NULL); 3649 } 3650 } 3651 if (len + 10 > max) { 3652 xmlChar *tmp; 3653 3654 if ((max > XML_MAX_NAME_LENGTH) && 3655 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3656 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3657 xmlFree(buffer); 3658 return(NULL); 3659 } 3660 max *= 2; 3661 tmp = (xmlChar *) xmlRealloc(buffer, 3662 max * sizeof(xmlChar)); 3663 if (tmp == NULL) { 3664 xmlErrMemory(ctxt, NULL); 3665 xmlFree(buffer); 3666 return(NULL); 3667 } 3668 buffer = tmp; 3669 } 3670 COPY_BUF(l,buffer,len,c); 3671 NEXTL(l); 3672 c = CUR_CHAR(l); 3673 } 3674 buffer[len] = 0; 3675 return(buffer); 3676 } 3677 } 3678 if (len == 0) 3679 return(NULL); 3680 if ((len > XML_MAX_NAME_LENGTH) && 3681 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3682 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3683 return(NULL); 3684 } 3685 return(xmlStrndup(buf, len)); 3686 } 3687 3688 /** 3689 * xmlParseEntityValue: 3690 * @ctxt: an XML parser context 3691 * @orig: if non-NULL store a copy of the original entity value 3692 * 3693 * parse a value for ENTITY declarations 3694 * 3695 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3696 * "'" ([^%&'] | PEReference | Reference)* "'" 3697 * 3698 * Returns the EntityValue parsed with reference substituted or NULL 3699 */ 3700 3701 xmlChar * 3702 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3703 xmlChar *buf = NULL; 3704 int len = 0; 3705 int size = XML_PARSER_BUFFER_SIZE; 3706 int c, l; 3707 xmlChar stop; 3708 xmlChar *ret = NULL; 3709 const xmlChar *cur = NULL; 3710 xmlParserInputPtr input; 3711 3712 if (RAW == '"') stop = '"'; 3713 else if (RAW == '\'') stop = '\''; 3714 else { 3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3716 return(NULL); 3717 } 3718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3719 if (buf == NULL) { 3720 xmlErrMemory(ctxt, NULL); 3721 return(NULL); 3722 } 3723 3724 /* 3725 * The content of the entity definition is copied in a buffer. 3726 */ 3727 3728 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3729 input = ctxt->input; 3730 GROW; 3731 if (ctxt->instate == XML_PARSER_EOF) 3732 goto error; 3733 NEXT; 3734 c = CUR_CHAR(l); 3735 /* 3736 * NOTE: 4.4.5 Included in Literal 3737 * When a parameter entity reference appears in a literal entity 3738 * value, ... a single or double quote character in the replacement 3739 * text is always treated as a normal data character and will not 3740 * terminate the literal. 3741 * In practice it means we stop the loop only when back at parsing 3742 * the initial entity and the quote is found 3743 */ 3744 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3745 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3746 if (len + 5 >= size) { 3747 xmlChar *tmp; 3748 3749 size *= 2; 3750 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3751 if (tmp == NULL) { 3752 xmlErrMemory(ctxt, NULL); 3753 goto error; 3754 } 3755 buf = tmp; 3756 } 3757 COPY_BUF(l,buf,len,c); 3758 NEXTL(l); 3759 3760 GROW; 3761 c = CUR_CHAR(l); 3762 if (c == 0) { 3763 GROW; 3764 c = CUR_CHAR(l); 3765 } 3766 } 3767 buf[len] = 0; 3768 if (ctxt->instate == XML_PARSER_EOF) 3769 goto error; 3770 if (c != stop) { 3771 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3772 goto error; 3773 } 3774 NEXT; 3775 3776 /* 3777 * Raise problem w.r.t. '&' and '%' being used in non-entities 3778 * reference constructs. Note Charref will be handled in 3779 * xmlStringDecodeEntities() 3780 */ 3781 cur = buf; 3782 while (*cur != 0) { /* non input consuming */ 3783 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3784 xmlChar *name; 3785 xmlChar tmp = *cur; 3786 int nameOk = 0; 3787 3788 cur++; 3789 name = xmlParseStringName(ctxt, &cur); 3790 if (name != NULL) { 3791 nameOk = 1; 3792 xmlFree(name); 3793 } 3794 if ((nameOk == 0) || (*cur != ';')) { 3795 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3796 "EntityValue: '%c' forbidden except for entities references\n", 3797 tmp); 3798 goto error; 3799 } 3800 if ((tmp == '%') && (ctxt->inSubset == 1) && 3801 (ctxt->inputNr == 1)) { 3802 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3803 goto error; 3804 } 3805 if (*cur == 0) 3806 break; 3807 } 3808 cur++; 3809 } 3810 3811 /* 3812 * Then PEReference entities are substituted. 3813 * 3814 * NOTE: 4.4.7 Bypassed 3815 * When a general entity reference appears in the EntityValue in 3816 * an entity declaration, it is bypassed and left as is. 3817 * so XML_SUBSTITUTE_REF is not set here. 3818 */ 3819 ++ctxt->depth; 3820 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3821 0, 0, 0); 3822 --ctxt->depth; 3823 if (orig != NULL) { 3824 *orig = buf; 3825 buf = NULL; 3826 } 3827 3828 error: 3829 if (buf != NULL) 3830 xmlFree(buf); 3831 return(ret); 3832 } 3833 3834 /** 3835 * xmlParseAttValueComplex: 3836 * @ctxt: an XML parser context 3837 * @len: the resulting attribute len 3838 * @normalize: wether to apply the inner normalization 3839 * 3840 * parse a value for an attribute, this is the fallback function 3841 * of xmlParseAttValue() when the attribute parsing requires handling 3842 * of non-ASCII characters, or normalization compaction. 3843 * 3844 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3845 */ 3846 static xmlChar * 3847 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3848 xmlChar limit = 0; 3849 xmlChar *buf = NULL; 3850 xmlChar *rep = NULL; 3851 size_t len = 0; 3852 size_t buf_size = 0; 3853 int c, l, in_space = 0; 3854 xmlChar *current = NULL; 3855 xmlEntityPtr ent; 3856 3857 if (NXT(0) == '"') { 3858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3859 limit = '"'; 3860 NEXT; 3861 } else if (NXT(0) == '\'') { 3862 limit = '\''; 3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3864 NEXT; 3865 } else { 3866 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3867 return(NULL); 3868 } 3869 3870 /* 3871 * allocate a translation buffer. 3872 */ 3873 buf_size = XML_PARSER_BUFFER_SIZE; 3874 buf = (xmlChar *) xmlMallocAtomic(buf_size); 3875 if (buf == NULL) goto mem_error; 3876 3877 /* 3878 * OK loop until we reach one of the ending char or a size limit. 3879 */ 3880 c = CUR_CHAR(l); 3881 while (((NXT(0) != limit) && /* checked */ 3882 (IS_CHAR(c)) && (c != '<')) && 3883 (ctxt->instate != XML_PARSER_EOF)) { 3884 /* 3885 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3886 * special option is given 3887 */ 3888 if ((len > XML_MAX_TEXT_LENGTH) && 3889 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3890 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3891 "AttValue length too long\n"); 3892 goto mem_error; 3893 } 3894 if (c == 0) break; 3895 if (c == '&') { 3896 in_space = 0; 3897 if (NXT(1) == '#') { 3898 int val = xmlParseCharRef(ctxt); 3899 3900 if (val == '&') { 3901 if (ctxt->replaceEntities) { 3902 if (len + 10 > buf_size) { 3903 growBuffer(buf, 10); 3904 } 3905 buf[len++] = '&'; 3906 } else { 3907 /* 3908 * The reparsing will be done in xmlStringGetNodeList() 3909 * called by the attribute() function in SAX.c 3910 */ 3911 if (len + 10 > buf_size) { 3912 growBuffer(buf, 10); 3913 } 3914 buf[len++] = '&'; 3915 buf[len++] = '#'; 3916 buf[len++] = '3'; 3917 buf[len++] = '8'; 3918 buf[len++] = ';'; 3919 } 3920 } else if (val != 0) { 3921 if (len + 10 > buf_size) { 3922 growBuffer(buf, 10); 3923 } 3924 len += xmlCopyChar(0, &buf[len], val); 3925 } 3926 } else { 3927 ent = xmlParseEntityRef(ctxt); 3928 ctxt->nbentities++; 3929 if (ent != NULL) 3930 ctxt->nbentities += ent->owner; 3931 if ((ent != NULL) && 3932 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3933 if (len + 10 > buf_size) { 3934 growBuffer(buf, 10); 3935 } 3936 if ((ctxt->replaceEntities == 0) && 3937 (ent->content[0] == '&')) { 3938 buf[len++] = '&'; 3939 buf[len++] = '#'; 3940 buf[len++] = '3'; 3941 buf[len++] = '8'; 3942 buf[len++] = ';'; 3943 } else { 3944 buf[len++] = ent->content[0]; 3945 } 3946 } else if ((ent != NULL) && 3947 (ctxt->replaceEntities != 0)) { 3948 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3949 ++ctxt->depth; 3950 rep = xmlStringDecodeEntities(ctxt, ent->content, 3951 XML_SUBSTITUTE_REF, 3952 0, 0, 0); 3953 --ctxt->depth; 3954 if (rep != NULL) { 3955 current = rep; 3956 while (*current != 0) { /* non input consuming */ 3957 if ((*current == 0xD) || (*current == 0xA) || 3958 (*current == 0x9)) { 3959 buf[len++] = 0x20; 3960 current++; 3961 } else 3962 buf[len++] = *current++; 3963 if (len + 10 > buf_size) { 3964 growBuffer(buf, 10); 3965 } 3966 } 3967 xmlFree(rep); 3968 rep = NULL; 3969 } 3970 } else { 3971 if (len + 10 > buf_size) { 3972 growBuffer(buf, 10); 3973 } 3974 if (ent->content != NULL) 3975 buf[len++] = ent->content[0]; 3976 } 3977 } else if (ent != NULL) { 3978 int i = xmlStrlen(ent->name); 3979 const xmlChar *cur = ent->name; 3980 3981 /* 3982 * This may look absurd but is needed to detect 3983 * entities problems 3984 */ 3985 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3986 (ent->content != NULL) && (ent->checked == 0)) { 3987 unsigned long oldnbent = ctxt->nbentities; 3988 3989 ++ctxt->depth; 3990 rep = xmlStringDecodeEntities(ctxt, ent->content, 3991 XML_SUBSTITUTE_REF, 0, 0, 0); 3992 --ctxt->depth; 3993 3994 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 3995 if (rep != NULL) { 3996 if (xmlStrchr(rep, '<')) 3997 ent->checked |= 1; 3998 xmlFree(rep); 3999 rep = NULL; 4000 } else { 4001 ent->content[0] = 0; 4002 } 4003 } 4004 4005 /* 4006 * Just output the reference 4007 */ 4008 buf[len++] = '&'; 4009 while (len + i + 10 > buf_size) { 4010 growBuffer(buf, i + 10); 4011 } 4012 for (;i > 0;i--) 4013 buf[len++] = *cur++; 4014 buf[len++] = ';'; 4015 } 4016 } 4017 } else { 4018 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4019 if ((len != 0) || (!normalize)) { 4020 if ((!normalize) || (!in_space)) { 4021 COPY_BUF(l,buf,len,0x20); 4022 while (len + 10 > buf_size) { 4023 growBuffer(buf, 10); 4024 } 4025 } 4026 in_space = 1; 4027 } 4028 } else { 4029 in_space = 0; 4030 COPY_BUF(l,buf,len,c); 4031 if (len + 10 > buf_size) { 4032 growBuffer(buf, 10); 4033 } 4034 } 4035 NEXTL(l); 4036 } 4037 GROW; 4038 c = CUR_CHAR(l); 4039 } 4040 if (ctxt->instate == XML_PARSER_EOF) 4041 goto error; 4042 4043 if ((in_space) && (normalize)) { 4044 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4045 } 4046 buf[len] = 0; 4047 if (RAW == '<') { 4048 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4049 } else if (RAW != limit) { 4050 if ((c != 0) && (!IS_CHAR(c))) { 4051 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4052 "invalid character in attribute value\n"); 4053 } else { 4054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4055 "AttValue: ' expected\n"); 4056 } 4057 } else 4058 NEXT; 4059 4060 /* 4061 * There we potentially risk an overflow, don't allow attribute value of 4062 * length more than INT_MAX it is a very reasonnable assumption ! 4063 */ 4064 if (len >= INT_MAX) { 4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4066 "AttValue length too long\n"); 4067 goto mem_error; 4068 } 4069 4070 if (attlen != NULL) *attlen = (int) len; 4071 return(buf); 4072 4073 mem_error: 4074 xmlErrMemory(ctxt, NULL); 4075 error: 4076 if (buf != NULL) 4077 xmlFree(buf); 4078 if (rep != NULL) 4079 xmlFree(rep); 4080 return(NULL); 4081 } 4082 4083 /** 4084 * xmlParseAttValue: 4085 * @ctxt: an XML parser context 4086 * 4087 * parse a value for an attribute 4088 * Note: the parser won't do substitution of entities here, this 4089 * will be handled later in xmlStringGetNodeList 4090 * 4091 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4092 * "'" ([^<&'] | Reference)* "'" 4093 * 4094 * 3.3.3 Attribute-Value Normalization: 4095 * Before the value of an attribute is passed to the application or 4096 * checked for validity, the XML processor must normalize it as follows: 4097 * - a character reference is processed by appending the referenced 4098 * character to the attribute value 4099 * - an entity reference is processed by recursively processing the 4100 * replacement text of the entity 4101 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4102 * appending #x20 to the normalized value, except that only a single 4103 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4104 * parsed entity or the literal entity value of an internal parsed entity 4105 * - other characters are processed by appending them to the normalized value 4106 * If the declared value is not CDATA, then the XML processor must further 4107 * process the normalized attribute value by discarding any leading and 4108 * trailing space (#x20) characters, and by replacing sequences of space 4109 * (#x20) characters by a single space (#x20) character. 4110 * All attributes for which no declaration has been read should be treated 4111 * by a non-validating parser as if declared CDATA. 4112 * 4113 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4114 */ 4115 4116 4117 xmlChar * 4118 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4119 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4120 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4121 } 4122 4123 /** 4124 * xmlParseSystemLiteral: 4125 * @ctxt: an XML parser context 4126 * 4127 * parse an XML Literal 4128 * 4129 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4130 * 4131 * Returns the SystemLiteral parsed or NULL 4132 */ 4133 4134 xmlChar * 4135 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4136 xmlChar *buf = NULL; 4137 int len = 0; 4138 int size = XML_PARSER_BUFFER_SIZE; 4139 int cur, l; 4140 xmlChar stop; 4141 int state = ctxt->instate; 4142 int count = 0; 4143 4144 SHRINK; 4145 if (RAW == '"') { 4146 NEXT; 4147 stop = '"'; 4148 } else if (RAW == '\'') { 4149 NEXT; 4150 stop = '\''; 4151 } else { 4152 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4153 return(NULL); 4154 } 4155 4156 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4157 if (buf == NULL) { 4158 xmlErrMemory(ctxt, NULL); 4159 return(NULL); 4160 } 4161 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4162 cur = CUR_CHAR(l); 4163 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4164 if (len + 5 >= size) { 4165 xmlChar *tmp; 4166 4167 if ((size > XML_MAX_NAME_LENGTH) && 4168 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4169 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4170 xmlFree(buf); 4171 ctxt->instate = (xmlParserInputState) state; 4172 return(NULL); 4173 } 4174 size *= 2; 4175 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4176 if (tmp == NULL) { 4177 xmlFree(buf); 4178 xmlErrMemory(ctxt, NULL); 4179 ctxt->instate = (xmlParserInputState) state; 4180 return(NULL); 4181 } 4182 buf = tmp; 4183 } 4184 count++; 4185 if (count > 50) { 4186 GROW; 4187 count = 0; 4188 if (ctxt->instate == XML_PARSER_EOF) { 4189 xmlFree(buf); 4190 return(NULL); 4191 } 4192 } 4193 COPY_BUF(l,buf,len,cur); 4194 NEXTL(l); 4195 cur = CUR_CHAR(l); 4196 if (cur == 0) { 4197 GROW; 4198 SHRINK; 4199 cur = CUR_CHAR(l); 4200 } 4201 } 4202 buf[len] = 0; 4203 ctxt->instate = (xmlParserInputState) state; 4204 if (!IS_CHAR(cur)) { 4205 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4206 } else { 4207 NEXT; 4208 } 4209 return(buf); 4210 } 4211 4212 /** 4213 * xmlParsePubidLiteral: 4214 * @ctxt: an XML parser context 4215 * 4216 * parse an XML public literal 4217 * 4218 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4219 * 4220 * Returns the PubidLiteral parsed or NULL. 4221 */ 4222 4223 xmlChar * 4224 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4225 xmlChar *buf = NULL; 4226 int len = 0; 4227 int size = XML_PARSER_BUFFER_SIZE; 4228 xmlChar cur; 4229 xmlChar stop; 4230 int count = 0; 4231 xmlParserInputState oldstate = ctxt->instate; 4232 4233 SHRINK; 4234 if (RAW == '"') { 4235 NEXT; 4236 stop = '"'; 4237 } else if (RAW == '\'') { 4238 NEXT; 4239 stop = '\''; 4240 } else { 4241 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4242 return(NULL); 4243 } 4244 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4245 if (buf == NULL) { 4246 xmlErrMemory(ctxt, NULL); 4247 return(NULL); 4248 } 4249 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4250 cur = CUR; 4251 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4252 if (len + 1 >= size) { 4253 xmlChar *tmp; 4254 4255 if ((size > XML_MAX_NAME_LENGTH) && 4256 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4257 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4258 xmlFree(buf); 4259 return(NULL); 4260 } 4261 size *= 2; 4262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4263 if (tmp == NULL) { 4264 xmlErrMemory(ctxt, NULL); 4265 xmlFree(buf); 4266 return(NULL); 4267 } 4268 buf = tmp; 4269 } 4270 buf[len++] = cur; 4271 count++; 4272 if (count > 50) { 4273 GROW; 4274 count = 0; 4275 if (ctxt->instate == XML_PARSER_EOF) { 4276 xmlFree(buf); 4277 return(NULL); 4278 } 4279 } 4280 NEXT; 4281 cur = CUR; 4282 if (cur == 0) { 4283 GROW; 4284 SHRINK; 4285 cur = CUR; 4286 } 4287 } 4288 buf[len] = 0; 4289 if (cur != stop) { 4290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4291 } else { 4292 NEXT; 4293 } 4294 ctxt->instate = oldstate; 4295 return(buf); 4296 } 4297 4298 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4299 4300 /* 4301 * used for the test in the inner loop of the char data testing 4302 */ 4303 static const unsigned char test_char_data[256] = { 4304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4305 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4306 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4307 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4308 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4309 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4310 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4311 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4312 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4313 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4314 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4315 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4316 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4317 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4318 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4319 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4336 }; 4337 4338 /** 4339 * xmlParseCharData: 4340 * @ctxt: an XML parser context 4341 * @cdata: int indicating whether we are within a CDATA section 4342 * 4343 * parse a CharData section. 4344 * if we are within a CDATA section ']]>' marks an end of section. 4345 * 4346 * The right angle bracket (>) may be represented using the string ">", 4347 * and must, for compatibility, be escaped using ">" or a character 4348 * reference when it appears in the string "]]>" in content, when that 4349 * string is not marking the end of a CDATA section. 4350 * 4351 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4352 */ 4353 4354 void 4355 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4356 const xmlChar *in; 4357 int nbchar = 0; 4358 int line = ctxt->input->line; 4359 int col = ctxt->input->col; 4360 int ccol; 4361 4362 SHRINK; 4363 GROW; 4364 /* 4365 * Accelerated common case where input don't need to be 4366 * modified before passing it to the handler. 4367 */ 4368 if (!cdata) { 4369 in = ctxt->input->cur; 4370 do { 4371 get_more_space: 4372 while (*in == 0x20) { in++; ctxt->input->col++; } 4373 if (*in == 0xA) { 4374 do { 4375 ctxt->input->line++; ctxt->input->col = 1; 4376 in++; 4377 } while (*in == 0xA); 4378 goto get_more_space; 4379 } 4380 if (*in == '<') { 4381 nbchar = in - ctxt->input->cur; 4382 if (nbchar > 0) { 4383 const xmlChar *tmp = ctxt->input->cur; 4384 ctxt->input->cur = in; 4385 4386 if ((ctxt->sax != NULL) && 4387 (ctxt->sax->ignorableWhitespace != 4388 ctxt->sax->characters)) { 4389 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4390 if (ctxt->sax->ignorableWhitespace != NULL) 4391 ctxt->sax->ignorableWhitespace(ctxt->userData, 4392 tmp, nbchar); 4393 } else { 4394 if (ctxt->sax->characters != NULL) 4395 ctxt->sax->characters(ctxt->userData, 4396 tmp, nbchar); 4397 if (*ctxt->space == -1) 4398 *ctxt->space = -2; 4399 } 4400 } else if ((ctxt->sax != NULL) && 4401 (ctxt->sax->characters != NULL)) { 4402 ctxt->sax->characters(ctxt->userData, 4403 tmp, nbchar); 4404 } 4405 } 4406 return; 4407 } 4408 4409 get_more: 4410 ccol = ctxt->input->col; 4411 while (test_char_data[*in]) { 4412 in++; 4413 ccol++; 4414 } 4415 ctxt->input->col = ccol; 4416 if (*in == 0xA) { 4417 do { 4418 ctxt->input->line++; ctxt->input->col = 1; 4419 in++; 4420 } while (*in == 0xA); 4421 goto get_more; 4422 } 4423 if (*in == ']') { 4424 if ((in[1] == ']') && (in[2] == '>')) { 4425 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4426 ctxt->input->cur = in + 1; 4427 return; 4428 } 4429 in++; 4430 ctxt->input->col++; 4431 goto get_more; 4432 } 4433 nbchar = in - ctxt->input->cur; 4434 if (nbchar > 0) { 4435 if ((ctxt->sax != NULL) && 4436 (ctxt->sax->ignorableWhitespace != 4437 ctxt->sax->characters) && 4438 (IS_BLANK_CH(*ctxt->input->cur))) { 4439 const xmlChar *tmp = ctxt->input->cur; 4440 ctxt->input->cur = in; 4441 4442 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4443 if (ctxt->sax->ignorableWhitespace != NULL) 4444 ctxt->sax->ignorableWhitespace(ctxt->userData, 4445 tmp, nbchar); 4446 } else { 4447 if (ctxt->sax->characters != NULL) 4448 ctxt->sax->characters(ctxt->userData, 4449 tmp, nbchar); 4450 if (*ctxt->space == -1) 4451 *ctxt->space = -2; 4452 } 4453 line = ctxt->input->line; 4454 col = ctxt->input->col; 4455 } else if (ctxt->sax != NULL) { 4456 if (ctxt->sax->characters != NULL) 4457 ctxt->sax->characters(ctxt->userData, 4458 ctxt->input->cur, nbchar); 4459 line = ctxt->input->line; 4460 col = ctxt->input->col; 4461 } 4462 /* something really bad happened in the SAX callback */ 4463 if (ctxt->instate != XML_PARSER_CONTENT) 4464 return; 4465 } 4466 ctxt->input->cur = in; 4467 if (*in == 0xD) { 4468 in++; 4469 if (*in == 0xA) { 4470 ctxt->input->cur = in; 4471 in++; 4472 ctxt->input->line++; ctxt->input->col = 1; 4473 continue; /* while */ 4474 } 4475 in--; 4476 } 4477 if (*in == '<') { 4478 return; 4479 } 4480 if (*in == '&') { 4481 return; 4482 } 4483 SHRINK; 4484 GROW; 4485 if (ctxt->instate == XML_PARSER_EOF) 4486 return; 4487 in = ctxt->input->cur; 4488 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4489 nbchar = 0; 4490 } 4491 ctxt->input->line = line; 4492 ctxt->input->col = col; 4493 xmlParseCharDataComplex(ctxt, cdata); 4494 } 4495 4496 /** 4497 * xmlParseCharDataComplex: 4498 * @ctxt: an XML parser context 4499 * @cdata: int indicating whether we are within a CDATA section 4500 * 4501 * parse a CharData section.this is the fallback function 4502 * of xmlParseCharData() when the parsing requires handling 4503 * of non-ASCII characters. 4504 */ 4505 static void 4506 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4507 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4508 int nbchar = 0; 4509 int cur, l; 4510 int count = 0; 4511 4512 SHRINK; 4513 GROW; 4514 cur = CUR_CHAR(l); 4515 while ((cur != '<') && /* checked */ 4516 (cur != '&') && 4517 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4518 if ((cur == ']') && (NXT(1) == ']') && 4519 (NXT(2) == '>')) { 4520 if (cdata) break; 4521 else { 4522 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4523 } 4524 } 4525 COPY_BUF(l,buf,nbchar,cur); 4526 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4527 buf[nbchar] = 0; 4528 4529 /* 4530 * OK the segment is to be consumed as chars. 4531 */ 4532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4533 if (areBlanks(ctxt, buf, nbchar, 0)) { 4534 if (ctxt->sax->ignorableWhitespace != NULL) 4535 ctxt->sax->ignorableWhitespace(ctxt->userData, 4536 buf, nbchar); 4537 } else { 4538 if (ctxt->sax->characters != NULL) 4539 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4540 if ((ctxt->sax->characters != 4541 ctxt->sax->ignorableWhitespace) && 4542 (*ctxt->space == -1)) 4543 *ctxt->space = -2; 4544 } 4545 } 4546 nbchar = 0; 4547 /* something really bad happened in the SAX callback */ 4548 if (ctxt->instate != XML_PARSER_CONTENT) 4549 return; 4550 } 4551 count++; 4552 if (count > 50) { 4553 GROW; 4554 count = 0; 4555 if (ctxt->instate == XML_PARSER_EOF) 4556 return; 4557 } 4558 NEXTL(l); 4559 cur = CUR_CHAR(l); 4560 } 4561 if (nbchar != 0) { 4562 buf[nbchar] = 0; 4563 /* 4564 * OK the segment is to be consumed as chars. 4565 */ 4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4567 if (areBlanks(ctxt, buf, nbchar, 0)) { 4568 if (ctxt->sax->ignorableWhitespace != NULL) 4569 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4570 } else { 4571 if (ctxt->sax->characters != NULL) 4572 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4573 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4574 (*ctxt->space == -1)) 4575 *ctxt->space = -2; 4576 } 4577 } 4578 } 4579 if ((cur != 0) && (!IS_CHAR(cur))) { 4580 /* Generate the error and skip the offending character */ 4581 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4582 "PCDATA invalid Char value %d\n", 4583 cur); 4584 NEXTL(l); 4585 } 4586 } 4587 4588 /** 4589 * xmlParseExternalID: 4590 * @ctxt: an XML parser context 4591 * @publicID: a xmlChar** receiving PubidLiteral 4592 * @strict: indicate whether we should restrict parsing to only 4593 * production [75], see NOTE below 4594 * 4595 * Parse an External ID or a Public ID 4596 * 4597 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4598 * 'PUBLIC' S PubidLiteral S SystemLiteral 4599 * 4600 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4601 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4602 * 4603 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4604 * 4605 * Returns the function returns SystemLiteral and in the second 4606 * case publicID receives PubidLiteral, is strict is off 4607 * it is possible to return NULL and have publicID set. 4608 */ 4609 4610 xmlChar * 4611 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4612 xmlChar *URI = NULL; 4613 4614 SHRINK; 4615 4616 *publicID = NULL; 4617 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4618 SKIP(6); 4619 if (SKIP_BLANKS == 0) { 4620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4621 "Space required after 'SYSTEM'\n"); 4622 } 4623 URI = xmlParseSystemLiteral(ctxt); 4624 if (URI == NULL) { 4625 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4626 } 4627 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4628 SKIP(6); 4629 if (SKIP_BLANKS == 0) { 4630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4631 "Space required after 'PUBLIC'\n"); 4632 } 4633 *publicID = xmlParsePubidLiteral(ctxt); 4634 if (*publicID == NULL) { 4635 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4636 } 4637 if (strict) { 4638 /* 4639 * We don't handle [83] so "S SystemLiteral" is required. 4640 */ 4641 if (SKIP_BLANKS == 0) { 4642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4643 "Space required after the Public Identifier\n"); 4644 } 4645 } else { 4646 /* 4647 * We handle [83] so we return immediately, if 4648 * "S SystemLiteral" is not detected. We skip blanks if no 4649 * system literal was found, but this is harmless since we must 4650 * be at the end of a NotationDecl. 4651 */ 4652 if (SKIP_BLANKS == 0) return(NULL); 4653 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4654 } 4655 URI = xmlParseSystemLiteral(ctxt); 4656 if (URI == NULL) { 4657 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4658 } 4659 } 4660 return(URI); 4661 } 4662 4663 /** 4664 * xmlParseCommentComplex: 4665 * @ctxt: an XML parser context 4666 * @buf: the already parsed part of the buffer 4667 * @len: number of bytes filles in the buffer 4668 * @size: allocated size of the buffer 4669 * 4670 * Skip an XML (SGML) comment <!-- .... --> 4671 * The spec says that "For compatibility, the string "--" (double-hyphen) 4672 * must not occur within comments. " 4673 * This is the slow routine in case the accelerator for ascii didn't work 4674 * 4675 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4676 */ 4677 static void 4678 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4679 size_t len, size_t size) { 4680 int q, ql; 4681 int r, rl; 4682 int cur, l; 4683 size_t count = 0; 4684 int inputid; 4685 4686 inputid = ctxt->input->id; 4687 4688 if (buf == NULL) { 4689 len = 0; 4690 size = XML_PARSER_BUFFER_SIZE; 4691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4692 if (buf == NULL) { 4693 xmlErrMemory(ctxt, NULL); 4694 return; 4695 } 4696 } 4697 GROW; /* Assure there's enough input data */ 4698 q = CUR_CHAR(ql); 4699 if (q == 0) 4700 goto not_terminated; 4701 if (!IS_CHAR(q)) { 4702 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4703 "xmlParseComment: invalid xmlChar value %d\n", 4704 q); 4705 xmlFree (buf); 4706 return; 4707 } 4708 NEXTL(ql); 4709 r = CUR_CHAR(rl); 4710 if (r == 0) 4711 goto not_terminated; 4712 if (!IS_CHAR(r)) { 4713 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4714 "xmlParseComment: invalid xmlChar value %d\n", 4715 q); 4716 xmlFree (buf); 4717 return; 4718 } 4719 NEXTL(rl); 4720 cur = CUR_CHAR(l); 4721 if (cur == 0) 4722 goto not_terminated; 4723 while (IS_CHAR(cur) && /* checked */ 4724 ((cur != '>') || 4725 (r != '-') || (q != '-'))) { 4726 if ((r == '-') && (q == '-')) { 4727 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4728 } 4729 if ((len > XML_MAX_TEXT_LENGTH) && 4730 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4731 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4732 "Comment too big found", NULL); 4733 xmlFree (buf); 4734 return; 4735 } 4736 if (len + 5 >= size) { 4737 xmlChar *new_buf; 4738 size_t new_size; 4739 4740 new_size = size * 2; 4741 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4742 if (new_buf == NULL) { 4743 xmlFree (buf); 4744 xmlErrMemory(ctxt, NULL); 4745 return; 4746 } 4747 buf = new_buf; 4748 size = new_size; 4749 } 4750 COPY_BUF(ql,buf,len,q); 4751 q = r; 4752 ql = rl; 4753 r = cur; 4754 rl = l; 4755 4756 count++; 4757 if (count > 50) { 4758 GROW; 4759 count = 0; 4760 if (ctxt->instate == XML_PARSER_EOF) { 4761 xmlFree(buf); 4762 return; 4763 } 4764 } 4765 NEXTL(l); 4766 cur = CUR_CHAR(l); 4767 if (cur == 0) { 4768 SHRINK; 4769 GROW; 4770 cur = CUR_CHAR(l); 4771 } 4772 } 4773 buf[len] = 0; 4774 if (cur == 0) { 4775 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4776 "Comment not terminated \n<!--%.50s\n", buf); 4777 } else if (!IS_CHAR(cur)) { 4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4779 "xmlParseComment: invalid xmlChar value %d\n", 4780 cur); 4781 } else { 4782 if (inputid != ctxt->input->id) { 4783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4784 "Comment doesn't start and stop in the same" 4785 " entity\n"); 4786 } 4787 NEXT; 4788 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4789 (!ctxt->disableSAX)) 4790 ctxt->sax->comment(ctxt->userData, buf); 4791 } 4792 xmlFree(buf); 4793 return; 4794 not_terminated: 4795 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4796 "Comment not terminated\n", NULL); 4797 xmlFree(buf); 4798 return; 4799 } 4800 4801 /** 4802 * xmlParseComment: 4803 * @ctxt: an XML parser context 4804 * 4805 * Skip an XML (SGML) comment <!-- .... --> 4806 * The spec says that "For compatibility, the string "--" (double-hyphen) 4807 * must not occur within comments. " 4808 * 4809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4810 */ 4811 void 4812 xmlParseComment(xmlParserCtxtPtr ctxt) { 4813 xmlChar *buf = NULL; 4814 size_t size = XML_PARSER_BUFFER_SIZE; 4815 size_t len = 0; 4816 xmlParserInputState state; 4817 const xmlChar *in; 4818 size_t nbchar = 0; 4819 int ccol; 4820 int inputid; 4821 4822 /* 4823 * Check that there is a comment right here. 4824 */ 4825 if ((RAW != '<') || (NXT(1) != '!') || 4826 (NXT(2) != '-') || (NXT(3) != '-')) return; 4827 state = ctxt->instate; 4828 ctxt->instate = XML_PARSER_COMMENT; 4829 inputid = ctxt->input->id; 4830 SKIP(4); 4831 SHRINK; 4832 GROW; 4833 4834 /* 4835 * Accelerated common case where input don't need to be 4836 * modified before passing it to the handler. 4837 */ 4838 in = ctxt->input->cur; 4839 do { 4840 if (*in == 0xA) { 4841 do { 4842 ctxt->input->line++; ctxt->input->col = 1; 4843 in++; 4844 } while (*in == 0xA); 4845 } 4846 get_more: 4847 ccol = ctxt->input->col; 4848 while (((*in > '-') && (*in <= 0x7F)) || 4849 ((*in >= 0x20) && (*in < '-')) || 4850 (*in == 0x09)) { 4851 in++; 4852 ccol++; 4853 } 4854 ctxt->input->col = ccol; 4855 if (*in == 0xA) { 4856 do { 4857 ctxt->input->line++; ctxt->input->col = 1; 4858 in++; 4859 } while (*in == 0xA); 4860 goto get_more; 4861 } 4862 nbchar = in - ctxt->input->cur; 4863 /* 4864 * save current set of data 4865 */ 4866 if (nbchar > 0) { 4867 if ((ctxt->sax != NULL) && 4868 (ctxt->sax->comment != NULL)) { 4869 if (buf == NULL) { 4870 if ((*in == '-') && (in[1] == '-')) 4871 size = nbchar + 1; 4872 else 4873 size = XML_PARSER_BUFFER_SIZE + nbchar; 4874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4875 if (buf == NULL) { 4876 xmlErrMemory(ctxt, NULL); 4877 ctxt->instate = state; 4878 return; 4879 } 4880 len = 0; 4881 } else if (len + nbchar + 1 >= size) { 4882 xmlChar *new_buf; 4883 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4884 new_buf = (xmlChar *) xmlRealloc(buf, 4885 size * sizeof(xmlChar)); 4886 if (new_buf == NULL) { 4887 xmlFree (buf); 4888 xmlErrMemory(ctxt, NULL); 4889 ctxt->instate = state; 4890 return; 4891 } 4892 buf = new_buf; 4893 } 4894 memcpy(&buf[len], ctxt->input->cur, nbchar); 4895 len += nbchar; 4896 buf[len] = 0; 4897 } 4898 } 4899 if ((len > XML_MAX_TEXT_LENGTH) && 4900 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4901 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4902 "Comment too big found", NULL); 4903 xmlFree (buf); 4904 return; 4905 } 4906 ctxt->input->cur = in; 4907 if (*in == 0xA) { 4908 in++; 4909 ctxt->input->line++; ctxt->input->col = 1; 4910 } 4911 if (*in == 0xD) { 4912 in++; 4913 if (*in == 0xA) { 4914 ctxt->input->cur = in; 4915 in++; 4916 ctxt->input->line++; ctxt->input->col = 1; 4917 continue; /* while */ 4918 } 4919 in--; 4920 } 4921 SHRINK; 4922 GROW; 4923 if (ctxt->instate == XML_PARSER_EOF) { 4924 xmlFree(buf); 4925 return; 4926 } 4927 in = ctxt->input->cur; 4928 if (*in == '-') { 4929 if (in[1] == '-') { 4930 if (in[2] == '>') { 4931 if (ctxt->input->id != inputid) { 4932 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4933 "comment doesn't start and stop in the" 4934 " same entity\n"); 4935 } 4936 SKIP(3); 4937 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4938 (!ctxt->disableSAX)) { 4939 if (buf != NULL) 4940 ctxt->sax->comment(ctxt->userData, buf); 4941 else 4942 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4943 } 4944 if (buf != NULL) 4945 xmlFree(buf); 4946 if (ctxt->instate != XML_PARSER_EOF) 4947 ctxt->instate = state; 4948 return; 4949 } 4950 if (buf != NULL) { 4951 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4952 "Double hyphen within comment: " 4953 "<!--%.50s\n", 4954 buf); 4955 } else 4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4957 "Double hyphen within comment\n", NULL); 4958 in++; 4959 ctxt->input->col++; 4960 } 4961 in++; 4962 ctxt->input->col++; 4963 goto get_more; 4964 } 4965 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4966 xmlParseCommentComplex(ctxt, buf, len, size); 4967 ctxt->instate = state; 4968 return; 4969 } 4970 4971 4972 /** 4973 * xmlParsePITarget: 4974 * @ctxt: an XML parser context 4975 * 4976 * parse the name of a PI 4977 * 4978 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4979 * 4980 * Returns the PITarget name or NULL 4981 */ 4982 4983 const xmlChar * 4984 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4985 const xmlChar *name; 4986 4987 name = xmlParseName(ctxt); 4988 if ((name != NULL) && 4989 ((name[0] == 'x') || (name[0] == 'X')) && 4990 ((name[1] == 'm') || (name[1] == 'M')) && 4991 ((name[2] == 'l') || (name[2] == 'L'))) { 4992 int i; 4993 if ((name[0] == 'x') && (name[1] == 'm') && 4994 (name[2] == 'l') && (name[3] == 0)) { 4995 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4996 "XML declaration allowed only at the start of the document\n"); 4997 return(name); 4998 } else if (name[3] == 0) { 4999 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5000 return(name); 5001 } 5002 for (i = 0;;i++) { 5003 if (xmlW3CPIs[i] == NULL) break; 5004 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5005 return(name); 5006 } 5007 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5008 "xmlParsePITarget: invalid name prefix 'xml'\n", 5009 NULL, NULL); 5010 } 5011 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5012 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5013 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5014 } 5015 return(name); 5016 } 5017 5018 #ifdef LIBXML_CATALOG_ENABLED 5019 /** 5020 * xmlParseCatalogPI: 5021 * @ctxt: an XML parser context 5022 * @catalog: the PI value string 5023 * 5024 * parse an XML Catalog Processing Instruction. 5025 * 5026 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5027 * 5028 * Occurs only if allowed by the user and if happening in the Misc 5029 * part of the document before any doctype informations 5030 * This will add the given catalog to the parsing context in order 5031 * to be used if there is a resolution need further down in the document 5032 */ 5033 5034 static void 5035 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5036 xmlChar *URL = NULL; 5037 const xmlChar *tmp, *base; 5038 xmlChar marker; 5039 5040 tmp = catalog; 5041 while (IS_BLANK_CH(*tmp)) tmp++; 5042 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5043 goto error; 5044 tmp += 7; 5045 while (IS_BLANK_CH(*tmp)) tmp++; 5046 if (*tmp != '=') { 5047 return; 5048 } 5049 tmp++; 5050 while (IS_BLANK_CH(*tmp)) tmp++; 5051 marker = *tmp; 5052 if ((marker != '\'') && (marker != '"')) 5053 goto error; 5054 tmp++; 5055 base = tmp; 5056 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5057 if (*tmp == 0) 5058 goto error; 5059 URL = xmlStrndup(base, tmp - base); 5060 tmp++; 5061 while (IS_BLANK_CH(*tmp)) tmp++; 5062 if (*tmp != 0) 5063 goto error; 5064 5065 if (URL != NULL) { 5066 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5067 xmlFree(URL); 5068 } 5069 return; 5070 5071 error: 5072 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5073 "Catalog PI syntax error: %s\n", 5074 catalog, NULL); 5075 if (URL != NULL) 5076 xmlFree(URL); 5077 } 5078 #endif 5079 5080 /** 5081 * xmlParsePI: 5082 * @ctxt: an XML parser context 5083 * 5084 * parse an XML Processing Instruction. 5085 * 5086 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5087 * 5088 * The processing is transfered to SAX once parsed. 5089 */ 5090 5091 void 5092 xmlParsePI(xmlParserCtxtPtr ctxt) { 5093 xmlChar *buf = NULL; 5094 size_t len = 0; 5095 size_t size = XML_PARSER_BUFFER_SIZE; 5096 int cur, l; 5097 const xmlChar *target; 5098 xmlParserInputState state; 5099 int count = 0; 5100 5101 if ((RAW == '<') && (NXT(1) == '?')) { 5102 int inputid = ctxt->input->id; 5103 state = ctxt->instate; 5104 ctxt->instate = XML_PARSER_PI; 5105 /* 5106 * this is a Processing Instruction. 5107 */ 5108 SKIP(2); 5109 SHRINK; 5110 5111 /* 5112 * Parse the target name and check for special support like 5113 * namespace. 5114 */ 5115 target = xmlParsePITarget(ctxt); 5116 if (target != NULL) { 5117 if ((RAW == '?') && (NXT(1) == '>')) { 5118 if (inputid != ctxt->input->id) { 5119 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5120 "PI declaration doesn't start and stop in" 5121 " the same entity\n"); 5122 } 5123 SKIP(2); 5124 5125 /* 5126 * SAX: PI detected. 5127 */ 5128 if ((ctxt->sax) && (!ctxt->disableSAX) && 5129 (ctxt->sax->processingInstruction != NULL)) 5130 ctxt->sax->processingInstruction(ctxt->userData, 5131 target, NULL); 5132 if (ctxt->instate != XML_PARSER_EOF) 5133 ctxt->instate = state; 5134 return; 5135 } 5136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5137 if (buf == NULL) { 5138 xmlErrMemory(ctxt, NULL); 5139 ctxt->instate = state; 5140 return; 5141 } 5142 if (SKIP_BLANKS == 0) { 5143 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5144 "ParsePI: PI %s space expected\n", target); 5145 } 5146 cur = CUR_CHAR(l); 5147 while (IS_CHAR(cur) && /* checked */ 5148 ((cur != '?') || (NXT(1) != '>'))) { 5149 if (len + 5 >= size) { 5150 xmlChar *tmp; 5151 size_t new_size = size * 2; 5152 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5153 if (tmp == NULL) { 5154 xmlErrMemory(ctxt, NULL); 5155 xmlFree(buf); 5156 ctxt->instate = state; 5157 return; 5158 } 5159 buf = tmp; 5160 size = new_size; 5161 } 5162 count++; 5163 if (count > 50) { 5164 GROW; 5165 if (ctxt->instate == XML_PARSER_EOF) { 5166 xmlFree(buf); 5167 return; 5168 } 5169 count = 0; 5170 if ((len > XML_MAX_TEXT_LENGTH) && 5171 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5172 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5173 "PI %s too big found", target); 5174 xmlFree(buf); 5175 ctxt->instate = state; 5176 return; 5177 } 5178 } 5179 COPY_BUF(l,buf,len,cur); 5180 NEXTL(l); 5181 cur = CUR_CHAR(l); 5182 if (cur == 0) { 5183 SHRINK; 5184 GROW; 5185 cur = CUR_CHAR(l); 5186 } 5187 } 5188 if ((len > XML_MAX_TEXT_LENGTH) && 5189 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5190 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5191 "PI %s too big found", target); 5192 xmlFree(buf); 5193 ctxt->instate = state; 5194 return; 5195 } 5196 buf[len] = 0; 5197 if (cur != '?') { 5198 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5199 "ParsePI: PI %s never end ...\n", target); 5200 } else { 5201 if (inputid != ctxt->input->id) { 5202 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5203 "PI declaration doesn't start and stop in" 5204 " the same entity\n"); 5205 } 5206 SKIP(2); 5207 5208 #ifdef LIBXML_CATALOG_ENABLED 5209 if (((state == XML_PARSER_MISC) || 5210 (state == XML_PARSER_START)) && 5211 (xmlStrEqual(target, XML_CATALOG_PI))) { 5212 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5213 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5214 (allow == XML_CATA_ALLOW_ALL)) 5215 xmlParseCatalogPI(ctxt, buf); 5216 } 5217 #endif 5218 5219 5220 /* 5221 * SAX: PI detected. 5222 */ 5223 if ((ctxt->sax) && (!ctxt->disableSAX) && 5224 (ctxt->sax->processingInstruction != NULL)) 5225 ctxt->sax->processingInstruction(ctxt->userData, 5226 target, buf); 5227 } 5228 xmlFree(buf); 5229 } else { 5230 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5231 } 5232 if (ctxt->instate != XML_PARSER_EOF) 5233 ctxt->instate = state; 5234 } 5235 } 5236 5237 /** 5238 * xmlParseNotationDecl: 5239 * @ctxt: an XML parser context 5240 * 5241 * parse a notation declaration 5242 * 5243 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5244 * 5245 * Hence there is actually 3 choices: 5246 * 'PUBLIC' S PubidLiteral 5247 * 'PUBLIC' S PubidLiteral S SystemLiteral 5248 * and 'SYSTEM' S SystemLiteral 5249 * 5250 * See the NOTE on xmlParseExternalID(). 5251 */ 5252 5253 void 5254 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5255 const xmlChar *name; 5256 xmlChar *Pubid; 5257 xmlChar *Systemid; 5258 5259 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5260 int inputid = ctxt->input->id; 5261 SHRINK; 5262 SKIP(10); 5263 if (SKIP_BLANKS == 0) { 5264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5265 "Space required after '<!NOTATION'\n"); 5266 return; 5267 } 5268 5269 name = xmlParseName(ctxt); 5270 if (name == NULL) { 5271 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5272 return; 5273 } 5274 if (xmlStrchr(name, ':') != NULL) { 5275 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5276 "colons are forbidden from notation names '%s'\n", 5277 name, NULL, NULL); 5278 } 5279 if (SKIP_BLANKS == 0) { 5280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5281 "Space required after the NOTATION name'\n"); 5282 return; 5283 } 5284 5285 /* 5286 * Parse the IDs. 5287 */ 5288 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5289 SKIP_BLANKS; 5290 5291 if (RAW == '>') { 5292 if (inputid != ctxt->input->id) { 5293 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5294 "Notation declaration doesn't start and stop" 5295 " in the same entity\n"); 5296 } 5297 NEXT; 5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5299 (ctxt->sax->notationDecl != NULL)) 5300 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5301 } else { 5302 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5303 } 5304 if (Systemid != NULL) xmlFree(Systemid); 5305 if (Pubid != NULL) xmlFree(Pubid); 5306 } 5307 } 5308 5309 /** 5310 * xmlParseEntityDecl: 5311 * @ctxt: an XML parser context 5312 * 5313 * parse <!ENTITY declarations 5314 * 5315 * [70] EntityDecl ::= GEDecl | PEDecl 5316 * 5317 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5318 * 5319 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5320 * 5321 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5322 * 5323 * [74] PEDef ::= EntityValue | ExternalID 5324 * 5325 * [76] NDataDecl ::= S 'NDATA' S Name 5326 * 5327 * [ VC: Notation Declared ] 5328 * The Name must match the declared name of a notation. 5329 */ 5330 5331 void 5332 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5333 const xmlChar *name = NULL; 5334 xmlChar *value = NULL; 5335 xmlChar *URI = NULL, *literal = NULL; 5336 const xmlChar *ndata = NULL; 5337 int isParameter = 0; 5338 xmlChar *orig = NULL; 5339 5340 /* GROW; done in the caller */ 5341 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5342 int inputid = ctxt->input->id; 5343 SHRINK; 5344 SKIP(8); 5345 if (SKIP_BLANKS == 0) { 5346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5347 "Space required after '<!ENTITY'\n"); 5348 } 5349 5350 if (RAW == '%') { 5351 NEXT; 5352 if (SKIP_BLANKS == 0) { 5353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5354 "Space required after '%%'\n"); 5355 } 5356 isParameter = 1; 5357 } 5358 5359 name = xmlParseName(ctxt); 5360 if (name == NULL) { 5361 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5362 "xmlParseEntityDecl: no name\n"); 5363 return; 5364 } 5365 if (xmlStrchr(name, ':') != NULL) { 5366 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5367 "colons are forbidden from entities names '%s'\n", 5368 name, NULL, NULL); 5369 } 5370 if (SKIP_BLANKS == 0) { 5371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5372 "Space required after the entity name\n"); 5373 } 5374 5375 ctxt->instate = XML_PARSER_ENTITY_DECL; 5376 /* 5377 * handle the various case of definitions... 5378 */ 5379 if (isParameter) { 5380 if ((RAW == '"') || (RAW == '\'')) { 5381 value = xmlParseEntityValue(ctxt, &orig); 5382 if (value) { 5383 if ((ctxt->sax != NULL) && 5384 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5385 ctxt->sax->entityDecl(ctxt->userData, name, 5386 XML_INTERNAL_PARAMETER_ENTITY, 5387 NULL, NULL, value); 5388 } 5389 } else { 5390 URI = xmlParseExternalID(ctxt, &literal, 1); 5391 if ((URI == NULL) && (literal == NULL)) { 5392 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5393 } 5394 if (URI) { 5395 xmlURIPtr uri; 5396 5397 uri = xmlParseURI((const char *) URI); 5398 if (uri == NULL) { 5399 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5400 "Invalid URI: %s\n", URI); 5401 /* 5402 * This really ought to be a well formedness error 5403 * but the XML Core WG decided otherwise c.f. issue 5404 * E26 of the XML erratas. 5405 */ 5406 } else { 5407 if (uri->fragment != NULL) { 5408 /* 5409 * Okay this is foolish to block those but not 5410 * invalid URIs. 5411 */ 5412 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5413 } else { 5414 if ((ctxt->sax != NULL) && 5415 (!ctxt->disableSAX) && 5416 (ctxt->sax->entityDecl != NULL)) 5417 ctxt->sax->entityDecl(ctxt->userData, name, 5418 XML_EXTERNAL_PARAMETER_ENTITY, 5419 literal, URI, NULL); 5420 } 5421 xmlFreeURI(uri); 5422 } 5423 } 5424 } 5425 } else { 5426 if ((RAW == '"') || (RAW == '\'')) { 5427 value = xmlParseEntityValue(ctxt, &orig); 5428 if ((ctxt->sax != NULL) && 5429 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5430 ctxt->sax->entityDecl(ctxt->userData, name, 5431 XML_INTERNAL_GENERAL_ENTITY, 5432 NULL, NULL, value); 5433 /* 5434 * For expat compatibility in SAX mode. 5435 */ 5436 if ((ctxt->myDoc == NULL) || 5437 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5438 if (ctxt->myDoc == NULL) { 5439 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5440 if (ctxt->myDoc == NULL) { 5441 xmlErrMemory(ctxt, "New Doc failed"); 5442 return; 5443 } 5444 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5445 } 5446 if (ctxt->myDoc->intSubset == NULL) 5447 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5448 BAD_CAST "fake", NULL, NULL); 5449 5450 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5451 NULL, NULL, value); 5452 } 5453 } else { 5454 URI = xmlParseExternalID(ctxt, &literal, 1); 5455 if ((URI == NULL) && (literal == NULL)) { 5456 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5457 } 5458 if (URI) { 5459 xmlURIPtr uri; 5460 5461 uri = xmlParseURI((const char *)URI); 5462 if (uri == NULL) { 5463 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5464 "Invalid URI: %s\n", URI); 5465 /* 5466 * This really ought to be a well formedness error 5467 * but the XML Core WG decided otherwise c.f. issue 5468 * E26 of the XML erratas. 5469 */ 5470 } else { 5471 if (uri->fragment != NULL) { 5472 /* 5473 * Okay this is foolish to block those but not 5474 * invalid URIs. 5475 */ 5476 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5477 } 5478 xmlFreeURI(uri); 5479 } 5480 } 5481 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5483 "Space required before 'NDATA'\n"); 5484 } 5485 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5486 SKIP(5); 5487 if (SKIP_BLANKS == 0) { 5488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5489 "Space required after 'NDATA'\n"); 5490 } 5491 ndata = xmlParseName(ctxt); 5492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5493 (ctxt->sax->unparsedEntityDecl != NULL)) 5494 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5495 literal, URI, ndata); 5496 } else { 5497 if ((ctxt->sax != NULL) && 5498 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5499 ctxt->sax->entityDecl(ctxt->userData, name, 5500 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5501 literal, URI, NULL); 5502 /* 5503 * For expat compatibility in SAX mode. 5504 * assuming the entity repalcement was asked for 5505 */ 5506 if ((ctxt->replaceEntities != 0) && 5507 ((ctxt->myDoc == NULL) || 5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5509 if (ctxt->myDoc == NULL) { 5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5511 if (ctxt->myDoc == NULL) { 5512 xmlErrMemory(ctxt, "New Doc failed"); 5513 return; 5514 } 5515 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5516 } 5517 5518 if (ctxt->myDoc->intSubset == NULL) 5519 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5520 BAD_CAST "fake", NULL, NULL); 5521 xmlSAX2EntityDecl(ctxt, name, 5522 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5523 literal, URI, NULL); 5524 } 5525 } 5526 } 5527 } 5528 if (ctxt->instate == XML_PARSER_EOF) 5529 goto done; 5530 SKIP_BLANKS; 5531 if (RAW != '>') { 5532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5533 "xmlParseEntityDecl: entity %s not terminated\n", name); 5534 xmlHaltParser(ctxt); 5535 } else { 5536 if (inputid != ctxt->input->id) { 5537 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5538 "Entity declaration doesn't start and stop in" 5539 " the same entity\n"); 5540 } 5541 NEXT; 5542 } 5543 if (orig != NULL) { 5544 /* 5545 * Ugly mechanism to save the raw entity value. 5546 */ 5547 xmlEntityPtr cur = NULL; 5548 5549 if (isParameter) { 5550 if ((ctxt->sax != NULL) && 5551 (ctxt->sax->getParameterEntity != NULL)) 5552 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5553 } else { 5554 if ((ctxt->sax != NULL) && 5555 (ctxt->sax->getEntity != NULL)) 5556 cur = ctxt->sax->getEntity(ctxt->userData, name); 5557 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5558 cur = xmlSAX2GetEntity(ctxt, name); 5559 } 5560 } 5561 if ((cur != NULL) && (cur->orig == NULL)) { 5562 cur->orig = orig; 5563 orig = NULL; 5564 } 5565 } 5566 5567 done: 5568 if (value != NULL) xmlFree(value); 5569 if (URI != NULL) xmlFree(URI); 5570 if (literal != NULL) xmlFree(literal); 5571 if (orig != NULL) xmlFree(orig); 5572 } 5573 } 5574 5575 /** 5576 * xmlParseDefaultDecl: 5577 * @ctxt: an XML parser context 5578 * @value: Receive a possible fixed default value for the attribute 5579 * 5580 * Parse an attribute default declaration 5581 * 5582 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5583 * 5584 * [ VC: Required Attribute ] 5585 * if the default declaration is the keyword #REQUIRED, then the 5586 * attribute must be specified for all elements of the type in the 5587 * attribute-list declaration. 5588 * 5589 * [ VC: Attribute Default Legal ] 5590 * The declared default value must meet the lexical constraints of 5591 * the declared attribute type c.f. xmlValidateAttributeDecl() 5592 * 5593 * [ VC: Fixed Attribute Default ] 5594 * if an attribute has a default value declared with the #FIXED 5595 * keyword, instances of that attribute must match the default value. 5596 * 5597 * [ WFC: No < in Attribute Values ] 5598 * handled in xmlParseAttValue() 5599 * 5600 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5601 * or XML_ATTRIBUTE_FIXED. 5602 */ 5603 5604 int 5605 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5606 int val; 5607 xmlChar *ret; 5608 5609 *value = NULL; 5610 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5611 SKIP(9); 5612 return(XML_ATTRIBUTE_REQUIRED); 5613 } 5614 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5615 SKIP(8); 5616 return(XML_ATTRIBUTE_IMPLIED); 5617 } 5618 val = XML_ATTRIBUTE_NONE; 5619 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5620 SKIP(6); 5621 val = XML_ATTRIBUTE_FIXED; 5622 if (SKIP_BLANKS == 0) { 5623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5624 "Space required after '#FIXED'\n"); 5625 } 5626 } 5627 ret = xmlParseAttValue(ctxt); 5628 ctxt->instate = XML_PARSER_DTD; 5629 if (ret == NULL) { 5630 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5631 "Attribute default value declaration error\n"); 5632 } else 5633 *value = ret; 5634 return(val); 5635 } 5636 5637 /** 5638 * xmlParseNotationType: 5639 * @ctxt: an XML parser context 5640 * 5641 * parse an Notation attribute type. 5642 * 5643 * Note: the leading 'NOTATION' S part has already being parsed... 5644 * 5645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5646 * 5647 * [ VC: Notation Attributes ] 5648 * Values of this type must match one of the notation names included 5649 * in the declaration; all notation names in the declaration must be declared. 5650 * 5651 * Returns: the notation attribute tree built while parsing 5652 */ 5653 5654 xmlEnumerationPtr 5655 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5656 const xmlChar *name; 5657 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5658 5659 if (RAW != '(') { 5660 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5661 return(NULL); 5662 } 5663 SHRINK; 5664 do { 5665 NEXT; 5666 SKIP_BLANKS; 5667 name = xmlParseName(ctxt); 5668 if (name == NULL) { 5669 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5670 "Name expected in NOTATION declaration\n"); 5671 xmlFreeEnumeration(ret); 5672 return(NULL); 5673 } 5674 tmp = ret; 5675 while (tmp != NULL) { 5676 if (xmlStrEqual(name, tmp->name)) { 5677 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5678 "standalone: attribute notation value token %s duplicated\n", 5679 name, NULL); 5680 if (!xmlDictOwns(ctxt->dict, name)) 5681 xmlFree((xmlChar *) name); 5682 break; 5683 } 5684 tmp = tmp->next; 5685 } 5686 if (tmp == NULL) { 5687 cur = xmlCreateEnumeration(name); 5688 if (cur == NULL) { 5689 xmlFreeEnumeration(ret); 5690 return(NULL); 5691 } 5692 if (last == NULL) ret = last = cur; 5693 else { 5694 last->next = cur; 5695 last = cur; 5696 } 5697 } 5698 SKIP_BLANKS; 5699 } while (RAW == '|'); 5700 if (RAW != ')') { 5701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5702 xmlFreeEnumeration(ret); 5703 return(NULL); 5704 } 5705 NEXT; 5706 return(ret); 5707 } 5708 5709 /** 5710 * xmlParseEnumerationType: 5711 * @ctxt: an XML parser context 5712 * 5713 * parse an Enumeration attribute type. 5714 * 5715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5716 * 5717 * [ VC: Enumeration ] 5718 * Values of this type must match one of the Nmtoken tokens in 5719 * the declaration 5720 * 5721 * Returns: the enumeration attribute tree built while parsing 5722 */ 5723 5724 xmlEnumerationPtr 5725 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5726 xmlChar *name; 5727 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5728 5729 if (RAW != '(') { 5730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5731 return(NULL); 5732 } 5733 SHRINK; 5734 do { 5735 NEXT; 5736 SKIP_BLANKS; 5737 name = xmlParseNmtoken(ctxt); 5738 if (name == NULL) { 5739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5740 return(ret); 5741 } 5742 tmp = ret; 5743 while (tmp != NULL) { 5744 if (xmlStrEqual(name, tmp->name)) { 5745 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5746 "standalone: attribute enumeration value token %s duplicated\n", 5747 name, NULL); 5748 if (!xmlDictOwns(ctxt->dict, name)) 5749 xmlFree(name); 5750 break; 5751 } 5752 tmp = tmp->next; 5753 } 5754 if (tmp == NULL) { 5755 cur = xmlCreateEnumeration(name); 5756 if (!xmlDictOwns(ctxt->dict, name)) 5757 xmlFree(name); 5758 if (cur == NULL) { 5759 xmlFreeEnumeration(ret); 5760 return(NULL); 5761 } 5762 if (last == NULL) ret = last = cur; 5763 else { 5764 last->next = cur; 5765 last = cur; 5766 } 5767 } 5768 SKIP_BLANKS; 5769 } while (RAW == '|'); 5770 if (RAW != ')') { 5771 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5772 return(ret); 5773 } 5774 NEXT; 5775 return(ret); 5776 } 5777 5778 /** 5779 * xmlParseEnumeratedType: 5780 * @ctxt: an XML parser context 5781 * @tree: the enumeration tree built while parsing 5782 * 5783 * parse an Enumerated attribute type. 5784 * 5785 * [57] EnumeratedType ::= NotationType | Enumeration 5786 * 5787 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5788 * 5789 * 5790 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5791 */ 5792 5793 int 5794 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5795 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5796 SKIP(8); 5797 if (SKIP_BLANKS == 0) { 5798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5799 "Space required after 'NOTATION'\n"); 5800 return(0); 5801 } 5802 *tree = xmlParseNotationType(ctxt); 5803 if (*tree == NULL) return(0); 5804 return(XML_ATTRIBUTE_NOTATION); 5805 } 5806 *tree = xmlParseEnumerationType(ctxt); 5807 if (*tree == NULL) return(0); 5808 return(XML_ATTRIBUTE_ENUMERATION); 5809 } 5810 5811 /** 5812 * xmlParseAttributeType: 5813 * @ctxt: an XML parser context 5814 * @tree: the enumeration tree built while parsing 5815 * 5816 * parse the Attribute list def for an element 5817 * 5818 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5819 * 5820 * [55] StringType ::= 'CDATA' 5821 * 5822 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5823 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5824 * 5825 * Validity constraints for attribute values syntax are checked in 5826 * xmlValidateAttributeValue() 5827 * 5828 * [ VC: ID ] 5829 * Values of type ID must match the Name production. A name must not 5830 * appear more than once in an XML document as a value of this type; 5831 * i.e., ID values must uniquely identify the elements which bear them. 5832 * 5833 * [ VC: One ID per Element Type ] 5834 * No element type may have more than one ID attribute specified. 5835 * 5836 * [ VC: ID Attribute Default ] 5837 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5838 * 5839 * [ VC: IDREF ] 5840 * Values of type IDREF must match the Name production, and values 5841 * of type IDREFS must match Names; each IDREF Name must match the value 5842 * of an ID attribute on some element in the XML document; i.e. IDREF 5843 * values must match the value of some ID attribute. 5844 * 5845 * [ VC: Entity Name ] 5846 * Values of type ENTITY must match the Name production, values 5847 * of type ENTITIES must match Names; each Entity Name must match the 5848 * name of an unparsed entity declared in the DTD. 5849 * 5850 * [ VC: Name Token ] 5851 * Values of type NMTOKEN must match the Nmtoken production; values 5852 * of type NMTOKENS must match Nmtokens. 5853 * 5854 * Returns the attribute type 5855 */ 5856 int 5857 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5858 SHRINK; 5859 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5860 SKIP(5); 5861 return(XML_ATTRIBUTE_CDATA); 5862 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5863 SKIP(6); 5864 return(XML_ATTRIBUTE_IDREFS); 5865 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5866 SKIP(5); 5867 return(XML_ATTRIBUTE_IDREF); 5868 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5869 SKIP(2); 5870 return(XML_ATTRIBUTE_ID); 5871 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5872 SKIP(6); 5873 return(XML_ATTRIBUTE_ENTITY); 5874 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5875 SKIP(8); 5876 return(XML_ATTRIBUTE_ENTITIES); 5877 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5878 SKIP(8); 5879 return(XML_ATTRIBUTE_NMTOKENS); 5880 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5881 SKIP(7); 5882 return(XML_ATTRIBUTE_NMTOKEN); 5883 } 5884 return(xmlParseEnumeratedType(ctxt, tree)); 5885 } 5886 5887 /** 5888 * xmlParseAttributeListDecl: 5889 * @ctxt: an XML parser context 5890 * 5891 * : parse the Attribute list def for an element 5892 * 5893 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5894 * 5895 * [53] AttDef ::= S Name S AttType S DefaultDecl 5896 * 5897 */ 5898 void 5899 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5900 const xmlChar *elemName; 5901 const xmlChar *attrName; 5902 xmlEnumerationPtr tree; 5903 5904 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5905 int inputid = ctxt->input->id; 5906 5907 SKIP(9); 5908 if (SKIP_BLANKS == 0) { 5909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5910 "Space required after '<!ATTLIST'\n"); 5911 } 5912 elemName = xmlParseName(ctxt); 5913 if (elemName == NULL) { 5914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5915 "ATTLIST: no name for Element\n"); 5916 return; 5917 } 5918 SKIP_BLANKS; 5919 GROW; 5920 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5921 int type; 5922 int def; 5923 xmlChar *defaultValue = NULL; 5924 5925 GROW; 5926 tree = NULL; 5927 attrName = xmlParseName(ctxt); 5928 if (attrName == NULL) { 5929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5930 "ATTLIST: no name for Attribute\n"); 5931 break; 5932 } 5933 GROW; 5934 if (SKIP_BLANKS == 0) { 5935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5936 "Space required after the attribute name\n"); 5937 break; 5938 } 5939 5940 type = xmlParseAttributeType(ctxt, &tree); 5941 if (type <= 0) { 5942 break; 5943 } 5944 5945 GROW; 5946 if (SKIP_BLANKS == 0) { 5947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5948 "Space required after the attribute type\n"); 5949 if (tree != NULL) 5950 xmlFreeEnumeration(tree); 5951 break; 5952 } 5953 5954 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5955 if (def <= 0) { 5956 if (defaultValue != NULL) 5957 xmlFree(defaultValue); 5958 if (tree != NULL) 5959 xmlFreeEnumeration(tree); 5960 break; 5961 } 5962 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5963 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5964 5965 GROW; 5966 if (RAW != '>') { 5967 if (SKIP_BLANKS == 0) { 5968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5969 "Space required after the attribute default value\n"); 5970 if (defaultValue != NULL) 5971 xmlFree(defaultValue); 5972 if (tree != NULL) 5973 xmlFreeEnumeration(tree); 5974 break; 5975 } 5976 } 5977 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5978 (ctxt->sax->attributeDecl != NULL)) 5979 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5980 type, def, defaultValue, tree); 5981 else if (tree != NULL) 5982 xmlFreeEnumeration(tree); 5983 5984 if ((ctxt->sax2) && (defaultValue != NULL) && 5985 (def != XML_ATTRIBUTE_IMPLIED) && 5986 (def != XML_ATTRIBUTE_REQUIRED)) { 5987 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5988 } 5989 if (ctxt->sax2) { 5990 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5991 } 5992 if (defaultValue != NULL) 5993 xmlFree(defaultValue); 5994 GROW; 5995 } 5996 if (RAW == '>') { 5997 if (inputid != ctxt->input->id) { 5998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5999 "Attribute list declaration doesn't start and" 6000 " stop in the same entity\n"); 6001 } 6002 NEXT; 6003 } 6004 } 6005 } 6006 6007 /** 6008 * xmlParseElementMixedContentDecl: 6009 * @ctxt: an XML parser context 6010 * @inputchk: the input used for the current entity, needed for boundary checks 6011 * 6012 * parse the declaration for a Mixed Element content 6013 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6014 * 6015 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6016 * '(' S? '#PCDATA' S? ')' 6017 * 6018 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6019 * 6020 * [ VC: No Duplicate Types ] 6021 * The same name must not appear more than once in a single 6022 * mixed-content declaration. 6023 * 6024 * returns: the list of the xmlElementContentPtr describing the element choices 6025 */ 6026 xmlElementContentPtr 6027 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6028 xmlElementContentPtr ret = NULL, cur = NULL, n; 6029 const xmlChar *elem = NULL; 6030 6031 GROW; 6032 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6033 SKIP(7); 6034 SKIP_BLANKS; 6035 SHRINK; 6036 if (RAW == ')') { 6037 if (ctxt->input->id != inputchk) { 6038 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6039 "Element content declaration doesn't start and" 6040 " stop in the same entity\n"); 6041 } 6042 NEXT; 6043 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6044 if (ret == NULL) 6045 return(NULL); 6046 if (RAW == '*') { 6047 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6048 NEXT; 6049 } 6050 return(ret); 6051 } 6052 if ((RAW == '(') || (RAW == '|')) { 6053 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6054 if (ret == NULL) return(NULL); 6055 } 6056 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6057 NEXT; 6058 if (elem == NULL) { 6059 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6060 if (ret == NULL) return(NULL); 6061 ret->c1 = cur; 6062 if (cur != NULL) 6063 cur->parent = ret; 6064 cur = ret; 6065 } else { 6066 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6067 if (n == NULL) return(NULL); 6068 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6069 if (n->c1 != NULL) 6070 n->c1->parent = n; 6071 cur->c2 = n; 6072 if (n != NULL) 6073 n->parent = cur; 6074 cur = n; 6075 } 6076 SKIP_BLANKS; 6077 elem = xmlParseName(ctxt); 6078 if (elem == NULL) { 6079 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6080 "xmlParseElementMixedContentDecl : Name expected\n"); 6081 xmlFreeDocElementContent(ctxt->myDoc, ret); 6082 return(NULL); 6083 } 6084 SKIP_BLANKS; 6085 GROW; 6086 } 6087 if ((RAW == ')') && (NXT(1) == '*')) { 6088 if (elem != NULL) { 6089 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6090 XML_ELEMENT_CONTENT_ELEMENT); 6091 if (cur->c2 != NULL) 6092 cur->c2->parent = cur; 6093 } 6094 if (ret != NULL) 6095 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6096 if (ctxt->input->id != inputchk) { 6097 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6098 "Element content declaration doesn't start and" 6099 " stop in the same entity\n"); 6100 } 6101 SKIP(2); 6102 } else { 6103 xmlFreeDocElementContent(ctxt->myDoc, ret); 6104 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6105 return(NULL); 6106 } 6107 6108 } else { 6109 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6110 } 6111 return(ret); 6112 } 6113 6114 /** 6115 * xmlParseElementChildrenContentDeclPriv: 6116 * @ctxt: an XML parser context 6117 * @inputchk: the input used for the current entity, needed for boundary checks 6118 * @depth: the level of recursion 6119 * 6120 * parse the declaration for a Mixed Element content 6121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6122 * 6123 * 6124 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6125 * 6126 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6127 * 6128 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6129 * 6130 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6131 * 6132 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6133 * TODO Parameter-entity replacement text must be properly nested 6134 * with parenthesized groups. That is to say, if either of the 6135 * opening or closing parentheses in a choice, seq, or Mixed 6136 * construct is contained in the replacement text for a parameter 6137 * entity, both must be contained in the same replacement text. For 6138 * interoperability, if a parameter-entity reference appears in a 6139 * choice, seq, or Mixed construct, its replacement text should not 6140 * be empty, and neither the first nor last non-blank character of 6141 * the replacement text should be a connector (| or ,). 6142 * 6143 * Returns the tree of xmlElementContentPtr describing the element 6144 * hierarchy. 6145 */ 6146 static xmlElementContentPtr 6147 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6148 int depth) { 6149 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6150 const xmlChar *elem; 6151 xmlChar type = 0; 6152 6153 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6154 (depth > 2048)) { 6155 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6156 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6157 depth); 6158 return(NULL); 6159 } 6160 SKIP_BLANKS; 6161 GROW; 6162 if (RAW == '(') { 6163 int inputid = ctxt->input->id; 6164 6165 /* Recurse on first child */ 6166 NEXT; 6167 SKIP_BLANKS; 6168 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6169 depth + 1); 6170 SKIP_BLANKS; 6171 GROW; 6172 } else { 6173 elem = xmlParseName(ctxt); 6174 if (elem == NULL) { 6175 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6176 return(NULL); 6177 } 6178 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6179 if (cur == NULL) { 6180 xmlErrMemory(ctxt, NULL); 6181 return(NULL); 6182 } 6183 GROW; 6184 if (RAW == '?') { 6185 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6186 NEXT; 6187 } else if (RAW == '*') { 6188 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6189 NEXT; 6190 } else if (RAW == '+') { 6191 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6192 NEXT; 6193 } else { 6194 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6195 } 6196 GROW; 6197 } 6198 SKIP_BLANKS; 6199 SHRINK; 6200 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6201 /* 6202 * Each loop we parse one separator and one element. 6203 */ 6204 if (RAW == ',') { 6205 if (type == 0) type = CUR; 6206 6207 /* 6208 * Detect "Name | Name , Name" error 6209 */ 6210 else if (type != CUR) { 6211 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6212 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6213 type); 6214 if ((last != NULL) && (last != ret)) 6215 xmlFreeDocElementContent(ctxt->myDoc, last); 6216 if (ret != NULL) 6217 xmlFreeDocElementContent(ctxt->myDoc, ret); 6218 return(NULL); 6219 } 6220 NEXT; 6221 6222 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6223 if (op == NULL) { 6224 if ((last != NULL) && (last != ret)) 6225 xmlFreeDocElementContent(ctxt->myDoc, last); 6226 xmlFreeDocElementContent(ctxt->myDoc, ret); 6227 return(NULL); 6228 } 6229 if (last == NULL) { 6230 op->c1 = ret; 6231 if (ret != NULL) 6232 ret->parent = op; 6233 ret = cur = op; 6234 } else { 6235 cur->c2 = op; 6236 if (op != NULL) 6237 op->parent = cur; 6238 op->c1 = last; 6239 if (last != NULL) 6240 last->parent = op; 6241 cur =op; 6242 last = NULL; 6243 } 6244 } else if (RAW == '|') { 6245 if (type == 0) type = CUR; 6246 6247 /* 6248 * Detect "Name , Name | Name" error 6249 */ 6250 else if (type != CUR) { 6251 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6252 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6253 type); 6254 if ((last != NULL) && (last != ret)) 6255 xmlFreeDocElementContent(ctxt->myDoc, last); 6256 if (ret != NULL) 6257 xmlFreeDocElementContent(ctxt->myDoc, ret); 6258 return(NULL); 6259 } 6260 NEXT; 6261 6262 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6263 if (op == NULL) { 6264 if ((last != NULL) && (last != ret)) 6265 xmlFreeDocElementContent(ctxt->myDoc, last); 6266 if (ret != NULL) 6267 xmlFreeDocElementContent(ctxt->myDoc, ret); 6268 return(NULL); 6269 } 6270 if (last == NULL) { 6271 op->c1 = ret; 6272 if (ret != NULL) 6273 ret->parent = op; 6274 ret = cur = op; 6275 } else { 6276 cur->c2 = op; 6277 if (op != NULL) 6278 op->parent = cur; 6279 op->c1 = last; 6280 if (last != NULL) 6281 last->parent = op; 6282 cur =op; 6283 last = NULL; 6284 } 6285 } else { 6286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6287 if ((last != NULL) && (last != ret)) 6288 xmlFreeDocElementContent(ctxt->myDoc, last); 6289 if (ret != NULL) 6290 xmlFreeDocElementContent(ctxt->myDoc, ret); 6291 return(NULL); 6292 } 6293 GROW; 6294 SKIP_BLANKS; 6295 GROW; 6296 if (RAW == '(') { 6297 int inputid = ctxt->input->id; 6298 /* Recurse on second child */ 6299 NEXT; 6300 SKIP_BLANKS; 6301 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6302 depth + 1); 6303 SKIP_BLANKS; 6304 } else { 6305 elem = xmlParseName(ctxt); 6306 if (elem == NULL) { 6307 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6308 if (ret != NULL) 6309 xmlFreeDocElementContent(ctxt->myDoc, ret); 6310 return(NULL); 6311 } 6312 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6313 if (last == NULL) { 6314 if (ret != NULL) 6315 xmlFreeDocElementContent(ctxt->myDoc, ret); 6316 return(NULL); 6317 } 6318 if (RAW == '?') { 6319 last->ocur = XML_ELEMENT_CONTENT_OPT; 6320 NEXT; 6321 } else if (RAW == '*') { 6322 last->ocur = XML_ELEMENT_CONTENT_MULT; 6323 NEXT; 6324 } else if (RAW == '+') { 6325 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6326 NEXT; 6327 } else { 6328 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6329 } 6330 } 6331 SKIP_BLANKS; 6332 GROW; 6333 } 6334 if ((cur != NULL) && (last != NULL)) { 6335 cur->c2 = last; 6336 if (last != NULL) 6337 last->parent = cur; 6338 } 6339 if (ctxt->input->id != inputchk) { 6340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6341 "Element content declaration doesn't start and stop in" 6342 " the same entity\n"); 6343 } 6344 NEXT; 6345 if (RAW == '?') { 6346 if (ret != NULL) { 6347 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6348 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6349 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6350 else 6351 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6352 } 6353 NEXT; 6354 } else if (RAW == '*') { 6355 if (ret != NULL) { 6356 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6357 cur = ret; 6358 /* 6359 * Some normalization: 6360 * (a | b* | c?)* == (a | b | c)* 6361 */ 6362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6363 if ((cur->c1 != NULL) && 6364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6367 if ((cur->c2 != NULL) && 6368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6371 cur = cur->c2; 6372 } 6373 } 6374 NEXT; 6375 } else if (RAW == '+') { 6376 if (ret != NULL) { 6377 int found = 0; 6378 6379 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6380 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6381 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6382 else 6383 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6384 /* 6385 * Some normalization: 6386 * (a | b*)+ == (a | b)* 6387 * (a | b?)+ == (a | b)* 6388 */ 6389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6390 if ((cur->c1 != NULL) && 6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6394 found = 1; 6395 } 6396 if ((cur->c2 != NULL) && 6397 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6398 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6399 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6400 found = 1; 6401 } 6402 cur = cur->c2; 6403 } 6404 if (found) 6405 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6406 } 6407 NEXT; 6408 } 6409 return(ret); 6410 } 6411 6412 /** 6413 * xmlParseElementChildrenContentDecl: 6414 * @ctxt: an XML parser context 6415 * @inputchk: the input used for the current entity, needed for boundary checks 6416 * 6417 * parse the declaration for a Mixed Element content 6418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6419 * 6420 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6421 * 6422 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6423 * 6424 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6425 * 6426 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6427 * 6428 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6429 * TODO Parameter-entity replacement text must be properly nested 6430 * with parenthesized groups. That is to say, if either of the 6431 * opening or closing parentheses in a choice, seq, or Mixed 6432 * construct is contained in the replacement text for a parameter 6433 * entity, both must be contained in the same replacement text. For 6434 * interoperability, if a parameter-entity reference appears in a 6435 * choice, seq, or Mixed construct, its replacement text should not 6436 * be empty, and neither the first nor last non-blank character of 6437 * the replacement text should be a connector (| or ,). 6438 * 6439 * Returns the tree of xmlElementContentPtr describing the element 6440 * hierarchy. 6441 */ 6442 xmlElementContentPtr 6443 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6444 /* stub left for API/ABI compat */ 6445 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6446 } 6447 6448 /** 6449 * xmlParseElementContentDecl: 6450 * @ctxt: an XML parser context 6451 * @name: the name of the element being defined. 6452 * @result: the Element Content pointer will be stored here if any 6453 * 6454 * parse the declaration for an Element content either Mixed or Children, 6455 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6456 * 6457 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6458 * 6459 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6460 */ 6461 6462 int 6463 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6464 xmlElementContentPtr *result) { 6465 6466 xmlElementContentPtr tree = NULL; 6467 int inputid = ctxt->input->id; 6468 int res; 6469 6470 *result = NULL; 6471 6472 if (RAW != '(') { 6473 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6474 "xmlParseElementContentDecl : %s '(' expected\n", name); 6475 return(-1); 6476 } 6477 NEXT; 6478 GROW; 6479 if (ctxt->instate == XML_PARSER_EOF) 6480 return(-1); 6481 SKIP_BLANKS; 6482 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6483 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6484 res = XML_ELEMENT_TYPE_MIXED; 6485 } else { 6486 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6487 res = XML_ELEMENT_TYPE_ELEMENT; 6488 } 6489 SKIP_BLANKS; 6490 *result = tree; 6491 return(res); 6492 } 6493 6494 /** 6495 * xmlParseElementDecl: 6496 * @ctxt: an XML parser context 6497 * 6498 * parse an Element declaration. 6499 * 6500 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6501 * 6502 * [ VC: Unique Element Type Declaration ] 6503 * No element type may be declared more than once 6504 * 6505 * Returns the type of the element, or -1 in case of error 6506 */ 6507 int 6508 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6509 const xmlChar *name; 6510 int ret = -1; 6511 xmlElementContentPtr content = NULL; 6512 6513 /* GROW; done in the caller */ 6514 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6515 int inputid = ctxt->input->id; 6516 6517 SKIP(9); 6518 if (SKIP_BLANKS == 0) { 6519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6520 "Space required after 'ELEMENT'\n"); 6521 return(-1); 6522 } 6523 name = xmlParseName(ctxt); 6524 if (name == NULL) { 6525 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6526 "xmlParseElementDecl: no name for Element\n"); 6527 return(-1); 6528 } 6529 if (SKIP_BLANKS == 0) { 6530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6531 "Space required after the element name\n"); 6532 } 6533 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6534 SKIP(5); 6535 /* 6536 * Element must always be empty. 6537 */ 6538 ret = XML_ELEMENT_TYPE_EMPTY; 6539 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6540 (NXT(2) == 'Y')) { 6541 SKIP(3); 6542 /* 6543 * Element is a generic container. 6544 */ 6545 ret = XML_ELEMENT_TYPE_ANY; 6546 } else if (RAW == '(') { 6547 ret = xmlParseElementContentDecl(ctxt, name, &content); 6548 } else { 6549 /* 6550 * [ WFC: PEs in Internal Subset ] error handling. 6551 */ 6552 if ((RAW == '%') && (ctxt->external == 0) && 6553 (ctxt->inputNr == 1)) { 6554 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6555 "PEReference: forbidden within markup decl in internal subset\n"); 6556 } else { 6557 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6558 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6559 } 6560 return(-1); 6561 } 6562 6563 SKIP_BLANKS; 6564 6565 if (RAW != '>') { 6566 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6567 if (content != NULL) { 6568 xmlFreeDocElementContent(ctxt->myDoc, content); 6569 } 6570 } else { 6571 if (inputid != ctxt->input->id) { 6572 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6573 "Element declaration doesn't start and stop in" 6574 " the same entity\n"); 6575 } 6576 6577 NEXT; 6578 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6579 (ctxt->sax->elementDecl != NULL)) { 6580 if (content != NULL) 6581 content->parent = NULL; 6582 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6583 content); 6584 if ((content != NULL) && (content->parent == NULL)) { 6585 /* 6586 * this is a trick: if xmlAddElementDecl is called, 6587 * instead of copying the full tree it is plugged directly 6588 * if called from the parser. Avoid duplicating the 6589 * interfaces or change the API/ABI 6590 */ 6591 xmlFreeDocElementContent(ctxt->myDoc, content); 6592 } 6593 } else if (content != NULL) { 6594 xmlFreeDocElementContent(ctxt->myDoc, content); 6595 } 6596 } 6597 } 6598 return(ret); 6599 } 6600 6601 /** 6602 * xmlParseConditionalSections 6603 * @ctxt: an XML parser context 6604 * 6605 * [61] conditionalSect ::= includeSect | ignoreSect 6606 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6607 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6608 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6609 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6610 */ 6611 6612 static void 6613 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6614 int id = ctxt->input->id; 6615 6616 SKIP(3); 6617 SKIP_BLANKS; 6618 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6619 SKIP(7); 6620 SKIP_BLANKS; 6621 if (RAW != '[') { 6622 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6623 xmlHaltParser(ctxt); 6624 return; 6625 } else { 6626 if (ctxt->input->id != id) { 6627 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6628 "All markup of the conditional section is not" 6629 " in the same entity\n"); 6630 } 6631 NEXT; 6632 } 6633 if (xmlParserDebugEntities) { 6634 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6635 xmlGenericError(xmlGenericErrorContext, 6636 "%s(%d): ", ctxt->input->filename, 6637 ctxt->input->line); 6638 xmlGenericError(xmlGenericErrorContext, 6639 "Entering INCLUDE Conditional Section\n"); 6640 } 6641 6642 SKIP_BLANKS; 6643 GROW; 6644 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6645 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6646 const xmlChar *check = CUR_PTR; 6647 unsigned int cons = ctxt->input->consumed; 6648 6649 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6650 xmlParseConditionalSections(ctxt); 6651 } else 6652 xmlParseMarkupDecl(ctxt); 6653 6654 SKIP_BLANKS; 6655 GROW; 6656 6657 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6658 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6659 xmlHaltParser(ctxt); 6660 break; 6661 } 6662 } 6663 if (xmlParserDebugEntities) { 6664 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6665 xmlGenericError(xmlGenericErrorContext, 6666 "%s(%d): ", ctxt->input->filename, 6667 ctxt->input->line); 6668 xmlGenericError(xmlGenericErrorContext, 6669 "Leaving INCLUDE Conditional Section\n"); 6670 } 6671 6672 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6673 int state; 6674 xmlParserInputState instate; 6675 int depth = 0; 6676 6677 SKIP(6); 6678 SKIP_BLANKS; 6679 if (RAW != '[') { 6680 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6681 xmlHaltParser(ctxt); 6682 return; 6683 } else { 6684 if (ctxt->input->id != id) { 6685 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6686 "All markup of the conditional section is not" 6687 " in the same entity\n"); 6688 } 6689 NEXT; 6690 } 6691 if (xmlParserDebugEntities) { 6692 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6693 xmlGenericError(xmlGenericErrorContext, 6694 "%s(%d): ", ctxt->input->filename, 6695 ctxt->input->line); 6696 xmlGenericError(xmlGenericErrorContext, 6697 "Entering IGNORE Conditional Section\n"); 6698 } 6699 6700 /* 6701 * Parse up to the end of the conditional section 6702 * But disable SAX event generating DTD building in the meantime 6703 */ 6704 state = ctxt->disableSAX; 6705 instate = ctxt->instate; 6706 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6707 ctxt->instate = XML_PARSER_IGNORE; 6708 6709 while (((depth >= 0) && (RAW != 0)) && 6710 (ctxt->instate != XML_PARSER_EOF)) { 6711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6712 depth++; 6713 SKIP(3); 6714 continue; 6715 } 6716 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6717 if (--depth >= 0) SKIP(3); 6718 continue; 6719 } 6720 NEXT; 6721 continue; 6722 } 6723 6724 ctxt->disableSAX = state; 6725 ctxt->instate = instate; 6726 6727 if (xmlParserDebugEntities) { 6728 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6729 xmlGenericError(xmlGenericErrorContext, 6730 "%s(%d): ", ctxt->input->filename, 6731 ctxt->input->line); 6732 xmlGenericError(xmlGenericErrorContext, 6733 "Leaving IGNORE Conditional Section\n"); 6734 } 6735 6736 } else { 6737 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6738 xmlHaltParser(ctxt); 6739 return; 6740 } 6741 6742 if (RAW == 0) 6743 SHRINK; 6744 6745 if (RAW == 0) { 6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6747 } else { 6748 if (ctxt->input->id != id) { 6749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6750 "All markup of the conditional section is not in" 6751 " the same entity\n"); 6752 } 6753 if ((ctxt-> instate != XML_PARSER_EOF) && 6754 ((ctxt->input->cur + 3) <= ctxt->input->end)) 6755 SKIP(3); 6756 } 6757 } 6758 6759 /** 6760 * xmlParseMarkupDecl: 6761 * @ctxt: an XML parser context 6762 * 6763 * parse Markup declarations 6764 * 6765 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6766 * NotationDecl | PI | Comment 6767 * 6768 * [ VC: Proper Declaration/PE Nesting ] 6769 * Parameter-entity replacement text must be properly nested with 6770 * markup declarations. That is to say, if either the first character 6771 * or the last character of a markup declaration (markupdecl above) is 6772 * contained in the replacement text for a parameter-entity reference, 6773 * both must be contained in the same replacement text. 6774 * 6775 * [ WFC: PEs in Internal Subset ] 6776 * In the internal DTD subset, parameter-entity references can occur 6777 * only where markup declarations can occur, not within markup declarations. 6778 * (This does not apply to references that occur in external parameter 6779 * entities or to the external subset.) 6780 */ 6781 void 6782 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6783 GROW; 6784 if (CUR == '<') { 6785 if (NXT(1) == '!') { 6786 switch (NXT(2)) { 6787 case 'E': 6788 if (NXT(3) == 'L') 6789 xmlParseElementDecl(ctxt); 6790 else if (NXT(3) == 'N') 6791 xmlParseEntityDecl(ctxt); 6792 break; 6793 case 'A': 6794 xmlParseAttributeListDecl(ctxt); 6795 break; 6796 case 'N': 6797 xmlParseNotationDecl(ctxt); 6798 break; 6799 case '-': 6800 xmlParseComment(ctxt); 6801 break; 6802 default: 6803 /* there is an error but it will be detected later */ 6804 break; 6805 } 6806 } else if (NXT(1) == '?') { 6807 xmlParsePI(ctxt); 6808 } 6809 } 6810 6811 /* 6812 * detect requirement to exit there and act accordingly 6813 * and avoid having instate overriden later on 6814 */ 6815 if (ctxt->instate == XML_PARSER_EOF) 6816 return; 6817 6818 /* 6819 * Conditional sections are allowed from entities included 6820 * by PE References in the internal subset. 6821 */ 6822 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6824 xmlParseConditionalSections(ctxt); 6825 } 6826 } 6827 6828 ctxt->instate = XML_PARSER_DTD; 6829 } 6830 6831 /** 6832 * xmlParseTextDecl: 6833 * @ctxt: an XML parser context 6834 * 6835 * parse an XML declaration header for external entities 6836 * 6837 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6838 */ 6839 6840 void 6841 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6842 xmlChar *version; 6843 const xmlChar *encoding; 6844 6845 /* 6846 * We know that '<?xml' is here. 6847 */ 6848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6849 SKIP(5); 6850 } else { 6851 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6852 return; 6853 } 6854 6855 if (SKIP_BLANKS == 0) { 6856 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6857 "Space needed after '<?xml'\n"); 6858 } 6859 6860 /* 6861 * We may have the VersionInfo here. 6862 */ 6863 version = xmlParseVersionInfo(ctxt); 6864 if (version == NULL) 6865 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6866 else { 6867 if (SKIP_BLANKS == 0) { 6868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6869 "Space needed here\n"); 6870 } 6871 } 6872 ctxt->input->version = version; 6873 6874 /* 6875 * We must have the encoding declaration 6876 */ 6877 encoding = xmlParseEncodingDecl(ctxt); 6878 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6879 /* 6880 * The XML REC instructs us to stop parsing right here 6881 */ 6882 return; 6883 } 6884 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6885 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6886 "Missing encoding in text declaration\n"); 6887 } 6888 6889 SKIP_BLANKS; 6890 if ((RAW == '?') && (NXT(1) == '>')) { 6891 SKIP(2); 6892 } else if (RAW == '>') { 6893 /* Deprecated old WD ... */ 6894 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6895 NEXT; 6896 } else { 6897 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6898 MOVETO_ENDTAG(CUR_PTR); 6899 NEXT; 6900 } 6901 } 6902 6903 /** 6904 * xmlParseExternalSubset: 6905 * @ctxt: an XML parser context 6906 * @ExternalID: the external identifier 6907 * @SystemID: the system identifier (or URL) 6908 * 6909 * parse Markup declarations from an external subset 6910 * 6911 * [30] extSubset ::= textDecl? extSubsetDecl 6912 * 6913 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6914 */ 6915 void 6916 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6917 const xmlChar *SystemID) { 6918 xmlDetectSAX2(ctxt); 6919 GROW; 6920 6921 if ((ctxt->encoding == NULL) && 6922 (ctxt->input->end - ctxt->input->cur >= 4)) { 6923 xmlChar start[4]; 6924 xmlCharEncoding enc; 6925 6926 start[0] = RAW; 6927 start[1] = NXT(1); 6928 start[2] = NXT(2); 6929 start[3] = NXT(3); 6930 enc = xmlDetectCharEncoding(start, 4); 6931 if (enc != XML_CHAR_ENCODING_NONE) 6932 xmlSwitchEncoding(ctxt, enc); 6933 } 6934 6935 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6936 xmlParseTextDecl(ctxt); 6937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6938 /* 6939 * The XML REC instructs us to stop parsing right here 6940 */ 6941 xmlHaltParser(ctxt); 6942 return; 6943 } 6944 } 6945 if (ctxt->myDoc == NULL) { 6946 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6947 if (ctxt->myDoc == NULL) { 6948 xmlErrMemory(ctxt, "New Doc failed"); 6949 return; 6950 } 6951 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6952 } 6953 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6954 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6955 6956 ctxt->instate = XML_PARSER_DTD; 6957 ctxt->external = 1; 6958 SKIP_BLANKS; 6959 while (((RAW == '<') && (NXT(1) == '?')) || 6960 ((RAW == '<') && (NXT(1) == '!')) || 6961 (RAW == '%')) { 6962 const xmlChar *check = CUR_PTR; 6963 unsigned int cons = ctxt->input->consumed; 6964 6965 GROW; 6966 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6967 xmlParseConditionalSections(ctxt); 6968 } else 6969 xmlParseMarkupDecl(ctxt); 6970 SKIP_BLANKS; 6971 6972 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6973 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6974 break; 6975 } 6976 } 6977 6978 if (RAW != 0) { 6979 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6980 } 6981 6982 } 6983 6984 /** 6985 * xmlParseReference: 6986 * @ctxt: an XML parser context 6987 * 6988 * parse and handle entity references in content, depending on the SAX 6989 * interface, this may end-up in a call to character() if this is a 6990 * CharRef, a predefined entity, if there is no reference() callback. 6991 * or if the parser was asked to switch to that mode. 6992 * 6993 * [67] Reference ::= EntityRef | CharRef 6994 */ 6995 void 6996 xmlParseReference(xmlParserCtxtPtr ctxt) { 6997 xmlEntityPtr ent; 6998 xmlChar *val; 6999 int was_checked; 7000 xmlNodePtr list = NULL; 7001 xmlParserErrors ret = XML_ERR_OK; 7002 7003 7004 if (RAW != '&') 7005 return; 7006 7007 /* 7008 * Simple case of a CharRef 7009 */ 7010 if (NXT(1) == '#') { 7011 int i = 0; 7012 xmlChar out[10]; 7013 int hex = NXT(2); 7014 int value = xmlParseCharRef(ctxt); 7015 7016 if (value == 0) 7017 return; 7018 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7019 /* 7020 * So we are using non-UTF-8 buffers 7021 * Check that the char fit on 8bits, if not 7022 * generate a CharRef. 7023 */ 7024 if (value <= 0xFF) { 7025 out[0] = value; 7026 out[1] = 0; 7027 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7028 (!ctxt->disableSAX)) 7029 ctxt->sax->characters(ctxt->userData, out, 1); 7030 } else { 7031 if ((hex == 'x') || (hex == 'X')) 7032 snprintf((char *)out, sizeof(out), "#x%X", value); 7033 else 7034 snprintf((char *)out, sizeof(out), "#%d", value); 7035 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7036 (!ctxt->disableSAX)) 7037 ctxt->sax->reference(ctxt->userData, out); 7038 } 7039 } else { 7040 /* 7041 * Just encode the value in UTF-8 7042 */ 7043 COPY_BUF(0 ,out, i, value); 7044 out[i] = 0; 7045 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7046 (!ctxt->disableSAX)) 7047 ctxt->sax->characters(ctxt->userData, out, i); 7048 } 7049 return; 7050 } 7051 7052 /* 7053 * We are seeing an entity reference 7054 */ 7055 ent = xmlParseEntityRef(ctxt); 7056 if (ent == NULL) return; 7057 if (!ctxt->wellFormed) 7058 return; 7059 was_checked = ent->checked; 7060 7061 /* special case of predefined entities */ 7062 if ((ent->name == NULL) || 7063 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7064 val = ent->content; 7065 if (val == NULL) return; 7066 /* 7067 * inline the entity. 7068 */ 7069 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7070 (!ctxt->disableSAX)) 7071 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7072 return; 7073 } 7074 7075 /* 7076 * The first reference to the entity trigger a parsing phase 7077 * where the ent->children is filled with the result from 7078 * the parsing. 7079 * Note: external parsed entities will not be loaded, it is not 7080 * required for a non-validating parser, unless the parsing option 7081 * of validating, or substituting entities were given. Doing so is 7082 * far more secure as the parser will only process data coming from 7083 * the document entity by default. 7084 */ 7085 if (((ent->checked == 0) || 7086 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7087 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7088 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7089 unsigned long oldnbent = ctxt->nbentities; 7090 7091 /* 7092 * This is a bit hackish but this seems the best 7093 * way to make sure both SAX and DOM entity support 7094 * behaves okay. 7095 */ 7096 void *user_data; 7097 if (ctxt->userData == ctxt) 7098 user_data = NULL; 7099 else 7100 user_data = ctxt->userData; 7101 7102 /* 7103 * Check that this entity is well formed 7104 * 4.3.2: An internal general parsed entity is well-formed 7105 * if its replacement text matches the production labeled 7106 * content. 7107 */ 7108 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7109 ctxt->depth++; 7110 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7111 user_data, &list); 7112 ctxt->depth--; 7113 7114 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7115 ctxt->depth++; 7116 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7117 user_data, ctxt->depth, ent->URI, 7118 ent->ExternalID, &list); 7119 ctxt->depth--; 7120 } else { 7121 ret = XML_ERR_ENTITY_PE_INTERNAL; 7122 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7123 "invalid entity type found\n", NULL); 7124 } 7125 7126 /* 7127 * Store the number of entities needing parsing for this entity 7128 * content and do checkings 7129 */ 7130 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7131 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7132 ent->checked |= 1; 7133 if (ret == XML_ERR_ENTITY_LOOP) { 7134 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7135 xmlFreeNodeList(list); 7136 return; 7137 } 7138 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7139 xmlFreeNodeList(list); 7140 return; 7141 } 7142 7143 if ((ret == XML_ERR_OK) && (list != NULL)) { 7144 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7145 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7146 (ent->children == NULL)) { 7147 ent->children = list; 7148 if (ctxt->replaceEntities) { 7149 /* 7150 * Prune it directly in the generated document 7151 * except for single text nodes. 7152 */ 7153 if (((list->type == XML_TEXT_NODE) && 7154 (list->next == NULL)) || 7155 (ctxt->parseMode == XML_PARSE_READER)) { 7156 list->parent = (xmlNodePtr) ent; 7157 list = NULL; 7158 ent->owner = 1; 7159 } else { 7160 ent->owner = 0; 7161 while (list != NULL) { 7162 list->parent = (xmlNodePtr) ctxt->node; 7163 list->doc = ctxt->myDoc; 7164 if (list->next == NULL) 7165 ent->last = list; 7166 list = list->next; 7167 } 7168 list = ent->children; 7169 #ifdef LIBXML_LEGACY_ENABLED 7170 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7171 xmlAddEntityReference(ent, list, NULL); 7172 #endif /* LIBXML_LEGACY_ENABLED */ 7173 } 7174 } else { 7175 ent->owner = 1; 7176 while (list != NULL) { 7177 list->parent = (xmlNodePtr) ent; 7178 xmlSetTreeDoc(list, ent->doc); 7179 if (list->next == NULL) 7180 ent->last = list; 7181 list = list->next; 7182 } 7183 } 7184 } else { 7185 xmlFreeNodeList(list); 7186 list = NULL; 7187 } 7188 } else if ((ret != XML_ERR_OK) && 7189 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7190 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7191 "Entity '%s' failed to parse\n", ent->name); 7192 if (ent->content != NULL) 7193 ent->content[0] = 0; 7194 xmlParserEntityCheck(ctxt, 0, ent, 0); 7195 } else if (list != NULL) { 7196 xmlFreeNodeList(list); 7197 list = NULL; 7198 } 7199 if (ent->checked == 0) 7200 ent->checked = 2; 7201 7202 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7203 was_checked = 0; 7204 } else if (ent->checked != 1) { 7205 ctxt->nbentities += ent->checked / 2; 7206 } 7207 7208 /* 7209 * Now that the entity content has been gathered 7210 * provide it to the application, this can take different forms based 7211 * on the parsing modes. 7212 */ 7213 if (ent->children == NULL) { 7214 /* 7215 * Probably running in SAX mode and the callbacks don't 7216 * build the entity content. So unless we already went 7217 * though parsing for first checking go though the entity 7218 * content to generate callbacks associated to the entity 7219 */ 7220 if (was_checked != 0) { 7221 void *user_data; 7222 /* 7223 * This is a bit hackish but this seems the best 7224 * way to make sure both SAX and DOM entity support 7225 * behaves okay. 7226 */ 7227 if (ctxt->userData == ctxt) 7228 user_data = NULL; 7229 else 7230 user_data = ctxt->userData; 7231 7232 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7233 ctxt->depth++; 7234 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7235 ent->content, user_data, NULL); 7236 ctxt->depth--; 7237 } else if (ent->etype == 7238 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7239 ctxt->depth++; 7240 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7241 ctxt->sax, user_data, ctxt->depth, 7242 ent->URI, ent->ExternalID, NULL); 7243 ctxt->depth--; 7244 } else { 7245 ret = XML_ERR_ENTITY_PE_INTERNAL; 7246 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7247 "invalid entity type found\n", NULL); 7248 } 7249 if (ret == XML_ERR_ENTITY_LOOP) { 7250 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7251 return; 7252 } 7253 } 7254 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7255 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7256 /* 7257 * Entity reference callback comes second, it's somewhat 7258 * superfluous but a compatibility to historical behaviour 7259 */ 7260 ctxt->sax->reference(ctxt->userData, ent->name); 7261 } 7262 return; 7263 } 7264 7265 /* 7266 * If we didn't get any children for the entity being built 7267 */ 7268 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7269 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7270 /* 7271 * Create a node. 7272 */ 7273 ctxt->sax->reference(ctxt->userData, ent->name); 7274 return; 7275 } 7276 7277 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7278 /* 7279 * There is a problem on the handling of _private for entities 7280 * (bug 155816): Should we copy the content of the field from 7281 * the entity (possibly overwriting some value set by the user 7282 * when a copy is created), should we leave it alone, or should 7283 * we try to take care of different situations? The problem 7284 * is exacerbated by the usage of this field by the xmlReader. 7285 * To fix this bug, we look at _private on the created node 7286 * and, if it's NULL, we copy in whatever was in the entity. 7287 * If it's not NULL we leave it alone. This is somewhat of a 7288 * hack - maybe we should have further tests to determine 7289 * what to do. 7290 */ 7291 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7292 /* 7293 * Seems we are generating the DOM content, do 7294 * a simple tree copy for all references except the first 7295 * In the first occurrence list contains the replacement. 7296 */ 7297 if (((list == NULL) && (ent->owner == 0)) || 7298 (ctxt->parseMode == XML_PARSE_READER)) { 7299 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7300 7301 /* 7302 * We are copying here, make sure there is no abuse 7303 */ 7304 ctxt->sizeentcopy += ent->length + 5; 7305 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7306 return; 7307 7308 /* 7309 * when operating on a reader, the entities definitions 7310 * are always owning the entities subtree. 7311 if (ctxt->parseMode == XML_PARSE_READER) 7312 ent->owner = 1; 7313 */ 7314 7315 cur = ent->children; 7316 while (cur != NULL) { 7317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7318 if (nw != NULL) { 7319 if (nw->_private == NULL) 7320 nw->_private = cur->_private; 7321 if (firstChild == NULL){ 7322 firstChild = nw; 7323 } 7324 nw = xmlAddChild(ctxt->node, nw); 7325 } 7326 if (cur == ent->last) { 7327 /* 7328 * needed to detect some strange empty 7329 * node cases in the reader tests 7330 */ 7331 if ((ctxt->parseMode == XML_PARSE_READER) && 7332 (nw != NULL) && 7333 (nw->type == XML_ELEMENT_NODE) && 7334 (nw->children == NULL)) 7335 nw->extra = 1; 7336 7337 break; 7338 } 7339 cur = cur->next; 7340 } 7341 #ifdef LIBXML_LEGACY_ENABLED 7342 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7343 xmlAddEntityReference(ent, firstChild, nw); 7344 #endif /* LIBXML_LEGACY_ENABLED */ 7345 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7346 xmlNodePtr nw = NULL, cur, next, last, 7347 firstChild = NULL; 7348 7349 /* 7350 * We are copying here, make sure there is no abuse 7351 */ 7352 ctxt->sizeentcopy += ent->length + 5; 7353 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7354 return; 7355 7356 /* 7357 * Copy the entity child list and make it the new 7358 * entity child list. The goal is to make sure any 7359 * ID or REF referenced will be the one from the 7360 * document content and not the entity copy. 7361 */ 7362 cur = ent->children; 7363 ent->children = NULL; 7364 last = ent->last; 7365 ent->last = NULL; 7366 while (cur != NULL) { 7367 next = cur->next; 7368 cur->next = NULL; 7369 cur->parent = NULL; 7370 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7371 if (nw != NULL) { 7372 if (nw->_private == NULL) 7373 nw->_private = cur->_private; 7374 if (firstChild == NULL){ 7375 firstChild = cur; 7376 } 7377 xmlAddChild((xmlNodePtr) ent, nw); 7378 xmlAddChild(ctxt->node, cur); 7379 } 7380 if (cur == last) 7381 break; 7382 cur = next; 7383 } 7384 if (ent->owner == 0) 7385 ent->owner = 1; 7386 #ifdef LIBXML_LEGACY_ENABLED 7387 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7388 xmlAddEntityReference(ent, firstChild, nw); 7389 #endif /* LIBXML_LEGACY_ENABLED */ 7390 } else { 7391 const xmlChar *nbktext; 7392 7393 /* 7394 * the name change is to avoid coalescing of the 7395 * node with a possible previous text one which 7396 * would make ent->children a dangling pointer 7397 */ 7398 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7399 -1); 7400 if (ent->children->type == XML_TEXT_NODE) 7401 ent->children->name = nbktext; 7402 if ((ent->last != ent->children) && 7403 (ent->last->type == XML_TEXT_NODE)) 7404 ent->last->name = nbktext; 7405 xmlAddChildList(ctxt->node, ent->children); 7406 } 7407 7408 /* 7409 * This is to avoid a nasty side effect, see 7410 * characters() in SAX.c 7411 */ 7412 ctxt->nodemem = 0; 7413 ctxt->nodelen = 0; 7414 return; 7415 } 7416 } 7417 } 7418 7419 /** 7420 * xmlParseEntityRef: 7421 * @ctxt: an XML parser context 7422 * 7423 * parse ENTITY references declarations 7424 * 7425 * [68] EntityRef ::= '&' Name ';' 7426 * 7427 * [ WFC: Entity Declared ] 7428 * In a document without any DTD, a document with only an internal DTD 7429 * subset which contains no parameter entity references, or a document 7430 * with "standalone='yes'", the Name given in the entity reference 7431 * must match that in an entity declaration, except that well-formed 7432 * documents need not declare any of the following entities: amp, lt, 7433 * gt, apos, quot. The declaration of a parameter entity must precede 7434 * any reference to it. Similarly, the declaration of a general entity 7435 * must precede any reference to it which appears in a default value in an 7436 * attribute-list declaration. Note that if entities are declared in the 7437 * external subset or in external parameter entities, a non-validating 7438 * processor is not obligated to read and process their declarations; 7439 * for such documents, the rule that an entity must be declared is a 7440 * well-formedness constraint only if standalone='yes'. 7441 * 7442 * [ WFC: Parsed Entity ] 7443 * An entity reference must not contain the name of an unparsed entity 7444 * 7445 * Returns the xmlEntityPtr if found, or NULL otherwise. 7446 */ 7447 xmlEntityPtr 7448 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7449 const xmlChar *name; 7450 xmlEntityPtr ent = NULL; 7451 7452 GROW; 7453 if (ctxt->instate == XML_PARSER_EOF) 7454 return(NULL); 7455 7456 if (RAW != '&') 7457 return(NULL); 7458 NEXT; 7459 name = xmlParseName(ctxt); 7460 if (name == NULL) { 7461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7462 "xmlParseEntityRef: no name\n"); 7463 return(NULL); 7464 } 7465 if (RAW != ';') { 7466 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7467 return(NULL); 7468 } 7469 NEXT; 7470 7471 /* 7472 * Predefined entities override any extra definition 7473 */ 7474 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7475 ent = xmlGetPredefinedEntity(name); 7476 if (ent != NULL) 7477 return(ent); 7478 } 7479 7480 /* 7481 * Increase the number of entity references parsed 7482 */ 7483 ctxt->nbentities++; 7484 7485 /* 7486 * Ask first SAX for entity resolution, otherwise try the 7487 * entities which may have stored in the parser context. 7488 */ 7489 if (ctxt->sax != NULL) { 7490 if (ctxt->sax->getEntity != NULL) 7491 ent = ctxt->sax->getEntity(ctxt->userData, name); 7492 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7493 (ctxt->options & XML_PARSE_OLDSAX)) 7494 ent = xmlGetPredefinedEntity(name); 7495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7496 (ctxt->userData==ctxt)) { 7497 ent = xmlSAX2GetEntity(ctxt, name); 7498 } 7499 } 7500 if (ctxt->instate == XML_PARSER_EOF) 7501 return(NULL); 7502 /* 7503 * [ WFC: Entity Declared ] 7504 * In a document without any DTD, a document with only an 7505 * internal DTD subset which contains no parameter entity 7506 * references, or a document with "standalone='yes'", the 7507 * Name given in the entity reference must match that in an 7508 * entity declaration, except that well-formed documents 7509 * need not declare any of the following entities: amp, lt, 7510 * gt, apos, quot. 7511 * The declaration of a parameter entity must precede any 7512 * reference to it. 7513 * Similarly, the declaration of a general entity must 7514 * precede any reference to it which appears in a default 7515 * value in an attribute-list declaration. Note that if 7516 * entities are declared in the external subset or in 7517 * external parameter entities, a non-validating processor 7518 * is not obligated to read and process their declarations; 7519 * for such documents, the rule that an entity must be 7520 * declared is a well-formedness constraint only if 7521 * standalone='yes'. 7522 */ 7523 if (ent == NULL) { 7524 if ((ctxt->standalone == 1) || 7525 ((ctxt->hasExternalSubset == 0) && 7526 (ctxt->hasPErefs == 0))) { 7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7528 "Entity '%s' not defined\n", name); 7529 } else { 7530 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7531 "Entity '%s' not defined\n", name); 7532 if ((ctxt->inSubset == 0) && 7533 (ctxt->sax != NULL) && 7534 (ctxt->sax->reference != NULL)) { 7535 ctxt->sax->reference(ctxt->userData, name); 7536 } 7537 } 7538 xmlParserEntityCheck(ctxt, 0, ent, 0); 7539 ctxt->valid = 0; 7540 } 7541 7542 /* 7543 * [ WFC: Parsed Entity ] 7544 * An entity reference must not contain the name of an 7545 * unparsed entity 7546 */ 7547 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7549 "Entity reference to unparsed entity %s\n", name); 7550 } 7551 7552 /* 7553 * [ WFC: No External Entity References ] 7554 * Attribute values cannot contain direct or indirect 7555 * entity references to external entities. 7556 */ 7557 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7558 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7560 "Attribute references external entity '%s'\n", name); 7561 } 7562 /* 7563 * [ WFC: No < in Attribute Values ] 7564 * The replacement text of any entity referred to directly or 7565 * indirectly in an attribute value (other than "<") must 7566 * not contain a <. 7567 */ 7568 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7569 (ent != NULL) && 7570 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7571 if (((ent->checked & 1) || (ent->checked == 0)) && 7572 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7573 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7574 "'<' in entity '%s' is not allowed in attributes values\n", name); 7575 } 7576 } 7577 7578 /* 7579 * Internal check, no parameter entities here ... 7580 */ 7581 else { 7582 switch (ent->etype) { 7583 case XML_INTERNAL_PARAMETER_ENTITY: 7584 case XML_EXTERNAL_PARAMETER_ENTITY: 7585 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7586 "Attempt to reference the parameter entity '%s'\n", 7587 name); 7588 break; 7589 default: 7590 break; 7591 } 7592 } 7593 7594 /* 7595 * [ WFC: No Recursion ] 7596 * A parsed entity must not contain a recursive reference 7597 * to itself, either directly or indirectly. 7598 * Done somewhere else 7599 */ 7600 return(ent); 7601 } 7602 7603 /** 7604 * xmlParseStringEntityRef: 7605 * @ctxt: an XML parser context 7606 * @str: a pointer to an index in the string 7607 * 7608 * parse ENTITY references declarations, but this version parses it from 7609 * a string value. 7610 * 7611 * [68] EntityRef ::= '&' Name ';' 7612 * 7613 * [ WFC: Entity Declared ] 7614 * In a document without any DTD, a document with only an internal DTD 7615 * subset which contains no parameter entity references, or a document 7616 * with "standalone='yes'", the Name given in the entity reference 7617 * must match that in an entity declaration, except that well-formed 7618 * documents need not declare any of the following entities: amp, lt, 7619 * gt, apos, quot. The declaration of a parameter entity must precede 7620 * any reference to it. Similarly, the declaration of a general entity 7621 * must precede any reference to it which appears in a default value in an 7622 * attribute-list declaration. Note that if entities are declared in the 7623 * external subset or in external parameter entities, a non-validating 7624 * processor is not obligated to read and process their declarations; 7625 * for such documents, the rule that an entity must be declared is a 7626 * well-formedness constraint only if standalone='yes'. 7627 * 7628 * [ WFC: Parsed Entity ] 7629 * An entity reference must not contain the name of an unparsed entity 7630 * 7631 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7632 * is updated to the current location in the string. 7633 */ 7634 static xmlEntityPtr 7635 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7636 xmlChar *name; 7637 const xmlChar *ptr; 7638 xmlChar cur; 7639 xmlEntityPtr ent = NULL; 7640 7641 if ((str == NULL) || (*str == NULL)) 7642 return(NULL); 7643 ptr = *str; 7644 cur = *ptr; 7645 if (cur != '&') 7646 return(NULL); 7647 7648 ptr++; 7649 name = xmlParseStringName(ctxt, &ptr); 7650 if (name == NULL) { 7651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7652 "xmlParseStringEntityRef: no name\n"); 7653 *str = ptr; 7654 return(NULL); 7655 } 7656 if (*ptr != ';') { 7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7658 xmlFree(name); 7659 *str = ptr; 7660 return(NULL); 7661 } 7662 ptr++; 7663 7664 7665 /* 7666 * Predefined entities override any extra definition 7667 */ 7668 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7669 ent = xmlGetPredefinedEntity(name); 7670 if (ent != NULL) { 7671 xmlFree(name); 7672 *str = ptr; 7673 return(ent); 7674 } 7675 } 7676 7677 /* 7678 * Increate the number of entity references parsed 7679 */ 7680 ctxt->nbentities++; 7681 7682 /* 7683 * Ask first SAX for entity resolution, otherwise try the 7684 * entities which may have stored in the parser context. 7685 */ 7686 if (ctxt->sax != NULL) { 7687 if (ctxt->sax->getEntity != NULL) 7688 ent = ctxt->sax->getEntity(ctxt->userData, name); 7689 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7690 ent = xmlGetPredefinedEntity(name); 7691 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7692 ent = xmlSAX2GetEntity(ctxt, name); 7693 } 7694 } 7695 if (ctxt->instate == XML_PARSER_EOF) { 7696 xmlFree(name); 7697 return(NULL); 7698 } 7699 7700 /* 7701 * [ WFC: Entity Declared ] 7702 * In a document without any DTD, a document with only an 7703 * internal DTD subset which contains no parameter entity 7704 * references, or a document with "standalone='yes'", the 7705 * Name given in the entity reference must match that in an 7706 * entity declaration, except that well-formed documents 7707 * need not declare any of the following entities: amp, lt, 7708 * gt, apos, quot. 7709 * The declaration of a parameter entity must precede any 7710 * reference to it. 7711 * Similarly, the declaration of a general entity must 7712 * precede any reference to it which appears in a default 7713 * value in an attribute-list declaration. Note that if 7714 * entities are declared in the external subset or in 7715 * external parameter entities, a non-validating processor 7716 * is not obligated to read and process their declarations; 7717 * for such documents, the rule that an entity must be 7718 * declared is a well-formedness constraint only if 7719 * standalone='yes'. 7720 */ 7721 if (ent == NULL) { 7722 if ((ctxt->standalone == 1) || 7723 ((ctxt->hasExternalSubset == 0) && 7724 (ctxt->hasPErefs == 0))) { 7725 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7726 "Entity '%s' not defined\n", name); 7727 } else { 7728 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7729 "Entity '%s' not defined\n", 7730 name); 7731 } 7732 xmlParserEntityCheck(ctxt, 0, ent, 0); 7733 /* TODO ? check regressions ctxt->valid = 0; */ 7734 } 7735 7736 /* 7737 * [ WFC: Parsed Entity ] 7738 * An entity reference must not contain the name of an 7739 * unparsed entity 7740 */ 7741 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7743 "Entity reference to unparsed entity %s\n", name); 7744 } 7745 7746 /* 7747 * [ WFC: No External Entity References ] 7748 * Attribute values cannot contain direct or indirect 7749 * entity references to external entities. 7750 */ 7751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7752 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7753 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7754 "Attribute references external entity '%s'\n", name); 7755 } 7756 /* 7757 * [ WFC: No < in Attribute Values ] 7758 * The replacement text of any entity referred to directly or 7759 * indirectly in an attribute value (other than "<") must 7760 * not contain a <. 7761 */ 7762 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7763 (ent != NULL) && (ent->content != NULL) && 7764 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7765 (xmlStrchr(ent->content, '<'))) { 7766 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7767 "'<' in entity '%s' is not allowed in attributes values\n", 7768 name); 7769 } 7770 7771 /* 7772 * Internal check, no parameter entities here ... 7773 */ 7774 else { 7775 switch (ent->etype) { 7776 case XML_INTERNAL_PARAMETER_ENTITY: 7777 case XML_EXTERNAL_PARAMETER_ENTITY: 7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7779 "Attempt to reference the parameter entity '%s'\n", 7780 name); 7781 break; 7782 default: 7783 break; 7784 } 7785 } 7786 7787 /* 7788 * [ WFC: No Recursion ] 7789 * A parsed entity must not contain a recursive reference 7790 * to itself, either directly or indirectly. 7791 * Done somewhere else 7792 */ 7793 7794 xmlFree(name); 7795 *str = ptr; 7796 return(ent); 7797 } 7798 7799 /** 7800 * xmlParsePEReference: 7801 * @ctxt: an XML parser context 7802 * 7803 * parse PEReference declarations 7804 * The entity content is handled directly by pushing it's content as 7805 * a new input stream. 7806 * 7807 * [69] PEReference ::= '%' Name ';' 7808 * 7809 * [ WFC: No Recursion ] 7810 * A parsed entity must not contain a recursive 7811 * reference to itself, either directly or indirectly. 7812 * 7813 * [ WFC: Entity Declared ] 7814 * In a document without any DTD, a document with only an internal DTD 7815 * subset which contains no parameter entity references, or a document 7816 * with "standalone='yes'", ... ... The declaration of a parameter 7817 * entity must precede any reference to it... 7818 * 7819 * [ VC: Entity Declared ] 7820 * In a document with an external subset or external parameter entities 7821 * with "standalone='no'", ... ... The declaration of a parameter entity 7822 * must precede any reference to it... 7823 * 7824 * [ WFC: In DTD ] 7825 * Parameter-entity references may only appear in the DTD. 7826 * NOTE: misleading but this is handled. 7827 */ 7828 void 7829 xmlParsePEReference(xmlParserCtxtPtr ctxt) 7830 { 7831 const xmlChar *name; 7832 xmlEntityPtr entity = NULL; 7833 xmlParserInputPtr input; 7834 7835 if (RAW != '%') 7836 return; 7837 NEXT; 7838 name = xmlParseName(ctxt); 7839 if (name == NULL) { 7840 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 7841 return; 7842 } 7843 if (xmlParserDebugEntities) 7844 xmlGenericError(xmlGenericErrorContext, 7845 "PEReference: %s\n", name); 7846 if (RAW != ';') { 7847 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 7848 return; 7849 } 7850 7851 NEXT; 7852 7853 /* 7854 * Increate the number of entity references parsed 7855 */ 7856 ctxt->nbentities++; 7857 7858 /* 7859 * Request the entity from SAX 7860 */ 7861 if ((ctxt->sax != NULL) && 7862 (ctxt->sax->getParameterEntity != NULL)) 7863 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7864 if (ctxt->instate == XML_PARSER_EOF) 7865 return; 7866 if (entity == NULL) { 7867 /* 7868 * [ WFC: Entity Declared ] 7869 * In a document without any DTD, a document with only an 7870 * internal DTD subset which contains no parameter entity 7871 * references, or a document with "standalone='yes'", ... 7872 * ... The declaration of a parameter entity must precede 7873 * any reference to it... 7874 */ 7875 if ((ctxt->standalone == 1) || 7876 ((ctxt->hasExternalSubset == 0) && 7877 (ctxt->hasPErefs == 0))) { 7878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7879 "PEReference: %%%s; not found\n", 7880 name); 7881 } else { 7882 /* 7883 * [ VC: Entity Declared ] 7884 * In a document with an external subset or external 7885 * parameter entities with "standalone='no'", ... 7886 * ... The declaration of a parameter entity must 7887 * precede any reference to it... 7888 */ 7889 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 7890 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 7891 "PEReference: %%%s; not found\n", 7892 name, NULL); 7893 } else 7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7895 "PEReference: %%%s; not found\n", 7896 name, NULL); 7897 ctxt->valid = 0; 7898 } 7899 xmlParserEntityCheck(ctxt, 0, NULL, 0); 7900 } else { 7901 /* 7902 * Internal checking in case the entity quest barfed 7903 */ 7904 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7905 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7907 "Internal: %%%s; is not a parameter entity\n", 7908 name, NULL); 7909 } else { 7910 xmlChar start[4]; 7911 xmlCharEncoding enc; 7912 7913 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7914 ((ctxt->options & XML_PARSE_NOENT) == 0) && 7915 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 7916 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 7917 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 7918 (ctxt->replaceEntities == 0) && 7919 (ctxt->validate == 0)) 7920 return; 7921 7922 input = xmlNewEntityInputStream(ctxt, entity); 7923 if (xmlPushInput(ctxt, input) < 0) { 7924 xmlFreeInputStream(input); 7925 return; 7926 } 7927 7928 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 7929 /* 7930 * Get the 4 first bytes and decode the charset 7931 * if enc != XML_CHAR_ENCODING_NONE 7932 * plug some encoding conversion routines. 7933 * Note that, since we may have some non-UTF8 7934 * encoding (like UTF16, bug 135229), the 'length' 7935 * is not known, but we can calculate based upon 7936 * the amount of data in the buffer. 7937 */ 7938 GROW 7939 if (ctxt->instate == XML_PARSER_EOF) 7940 return; 7941 if ((ctxt->input->end - ctxt->input->cur)>=4) { 7942 start[0] = RAW; 7943 start[1] = NXT(1); 7944 start[2] = NXT(2); 7945 start[3] = NXT(3); 7946 enc = xmlDetectCharEncoding(start, 4); 7947 if (enc != XML_CHAR_ENCODING_NONE) { 7948 xmlSwitchEncoding(ctxt, enc); 7949 } 7950 } 7951 7952 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7953 (IS_BLANK_CH(NXT(5)))) { 7954 xmlParseTextDecl(ctxt); 7955 } 7956 } 7957 } 7958 } 7959 ctxt->hasPErefs = 1; 7960 } 7961 7962 /** 7963 * xmlLoadEntityContent: 7964 * @ctxt: an XML parser context 7965 * @entity: an unloaded system entity 7966 * 7967 * Load the original content of the given system entity from the 7968 * ExternalID/SystemID given. This is to be used for Included in Literal 7969 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7970 * 7971 * Returns 0 in case of success and -1 in case of failure 7972 */ 7973 static int 7974 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7975 xmlParserInputPtr input; 7976 xmlBufferPtr buf; 7977 int l, c; 7978 int count = 0; 7979 7980 if ((ctxt == NULL) || (entity == NULL) || 7981 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7982 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7983 (entity->content != NULL)) { 7984 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7985 "xmlLoadEntityContent parameter error"); 7986 return(-1); 7987 } 7988 7989 if (xmlParserDebugEntities) 7990 xmlGenericError(xmlGenericErrorContext, 7991 "Reading %s entity content input\n", entity->name); 7992 7993 buf = xmlBufferCreate(); 7994 if (buf == NULL) { 7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7996 "xmlLoadEntityContent parameter error"); 7997 return(-1); 7998 } 7999 8000 input = xmlNewEntityInputStream(ctxt, entity); 8001 if (input == NULL) { 8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8003 "xmlLoadEntityContent input error"); 8004 xmlBufferFree(buf); 8005 return(-1); 8006 } 8007 8008 /* 8009 * Push the entity as the current input, read char by char 8010 * saving to the buffer until the end of the entity or an error 8011 */ 8012 if (xmlPushInput(ctxt, input) < 0) { 8013 xmlBufferFree(buf); 8014 return(-1); 8015 } 8016 8017 GROW; 8018 c = CUR_CHAR(l); 8019 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8020 (IS_CHAR(c))) { 8021 xmlBufferAdd(buf, ctxt->input->cur, l); 8022 if (count++ > XML_PARSER_CHUNK_SIZE) { 8023 count = 0; 8024 GROW; 8025 if (ctxt->instate == XML_PARSER_EOF) { 8026 xmlBufferFree(buf); 8027 return(-1); 8028 } 8029 } 8030 NEXTL(l); 8031 c = CUR_CHAR(l); 8032 if (c == 0) { 8033 count = 0; 8034 GROW; 8035 if (ctxt->instate == XML_PARSER_EOF) { 8036 xmlBufferFree(buf); 8037 return(-1); 8038 } 8039 c = CUR_CHAR(l); 8040 } 8041 } 8042 8043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8044 xmlPopInput(ctxt); 8045 } else if (!IS_CHAR(c)) { 8046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8047 "xmlLoadEntityContent: invalid char value %d\n", 8048 c); 8049 xmlBufferFree(buf); 8050 return(-1); 8051 } 8052 entity->content = buf->content; 8053 buf->content = NULL; 8054 xmlBufferFree(buf); 8055 8056 return(0); 8057 } 8058 8059 /** 8060 * xmlParseStringPEReference: 8061 * @ctxt: an XML parser context 8062 * @str: a pointer to an index in the string 8063 * 8064 * parse PEReference declarations 8065 * 8066 * [69] PEReference ::= '%' Name ';' 8067 * 8068 * [ WFC: No Recursion ] 8069 * A parsed entity must not contain a recursive 8070 * reference to itself, either directly or indirectly. 8071 * 8072 * [ WFC: Entity Declared ] 8073 * In a document without any DTD, a document with only an internal DTD 8074 * subset which contains no parameter entity references, or a document 8075 * with "standalone='yes'", ... ... The declaration of a parameter 8076 * entity must precede any reference to it... 8077 * 8078 * [ VC: Entity Declared ] 8079 * In a document with an external subset or external parameter entities 8080 * with "standalone='no'", ... ... The declaration of a parameter entity 8081 * must precede any reference to it... 8082 * 8083 * [ WFC: In DTD ] 8084 * Parameter-entity references may only appear in the DTD. 8085 * NOTE: misleading but this is handled. 8086 * 8087 * Returns the string of the entity content. 8088 * str is updated to the current value of the index 8089 */ 8090 static xmlEntityPtr 8091 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8092 const xmlChar *ptr; 8093 xmlChar cur; 8094 xmlChar *name; 8095 xmlEntityPtr entity = NULL; 8096 8097 if ((str == NULL) || (*str == NULL)) return(NULL); 8098 ptr = *str; 8099 cur = *ptr; 8100 if (cur != '%') 8101 return(NULL); 8102 ptr++; 8103 name = xmlParseStringName(ctxt, &ptr); 8104 if (name == NULL) { 8105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8106 "xmlParseStringPEReference: no name\n"); 8107 *str = ptr; 8108 return(NULL); 8109 } 8110 cur = *ptr; 8111 if (cur != ';') { 8112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8113 xmlFree(name); 8114 *str = ptr; 8115 return(NULL); 8116 } 8117 ptr++; 8118 8119 /* 8120 * Increate the number of entity references parsed 8121 */ 8122 ctxt->nbentities++; 8123 8124 /* 8125 * Request the entity from SAX 8126 */ 8127 if ((ctxt->sax != NULL) && 8128 (ctxt->sax->getParameterEntity != NULL)) 8129 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8130 if (ctxt->instate == XML_PARSER_EOF) { 8131 xmlFree(name); 8132 *str = ptr; 8133 return(NULL); 8134 } 8135 if (entity == NULL) { 8136 /* 8137 * [ WFC: Entity Declared ] 8138 * In a document without any DTD, a document with only an 8139 * internal DTD subset which contains no parameter entity 8140 * references, or a document with "standalone='yes'", ... 8141 * ... The declaration of a parameter entity must precede 8142 * any reference to it... 8143 */ 8144 if ((ctxt->standalone == 1) || 8145 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8147 "PEReference: %%%s; not found\n", name); 8148 } else { 8149 /* 8150 * [ VC: Entity Declared ] 8151 * In a document with an external subset or external 8152 * parameter entities with "standalone='no'", ... 8153 * ... The declaration of a parameter entity must 8154 * precede any reference to it... 8155 */ 8156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8157 "PEReference: %%%s; not found\n", 8158 name, NULL); 8159 ctxt->valid = 0; 8160 } 8161 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8162 } else { 8163 /* 8164 * Internal checking in case the entity quest barfed 8165 */ 8166 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8167 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8168 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8169 "%%%s; is not a parameter entity\n", 8170 name, NULL); 8171 } 8172 } 8173 ctxt->hasPErefs = 1; 8174 xmlFree(name); 8175 *str = ptr; 8176 return(entity); 8177 } 8178 8179 /** 8180 * xmlParseDocTypeDecl: 8181 * @ctxt: an XML parser context 8182 * 8183 * parse a DOCTYPE declaration 8184 * 8185 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8186 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8187 * 8188 * [ VC: Root Element Type ] 8189 * The Name in the document type declaration must match the element 8190 * type of the root element. 8191 */ 8192 8193 void 8194 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8195 const xmlChar *name = NULL; 8196 xmlChar *ExternalID = NULL; 8197 xmlChar *URI = NULL; 8198 8199 /* 8200 * We know that '<!DOCTYPE' has been detected. 8201 */ 8202 SKIP(9); 8203 8204 SKIP_BLANKS; 8205 8206 /* 8207 * Parse the DOCTYPE name. 8208 */ 8209 name = xmlParseName(ctxt); 8210 if (name == NULL) { 8211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8212 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8213 } 8214 ctxt->intSubName = name; 8215 8216 SKIP_BLANKS; 8217 8218 /* 8219 * Check for SystemID and ExternalID 8220 */ 8221 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8222 8223 if ((URI != NULL) || (ExternalID != NULL)) { 8224 ctxt->hasExternalSubset = 1; 8225 } 8226 ctxt->extSubURI = URI; 8227 ctxt->extSubSystem = ExternalID; 8228 8229 SKIP_BLANKS; 8230 8231 /* 8232 * Create and update the internal subset. 8233 */ 8234 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8235 (!ctxt->disableSAX)) 8236 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8237 if (ctxt->instate == XML_PARSER_EOF) 8238 return; 8239 8240 /* 8241 * Is there any internal subset declarations ? 8242 * they are handled separately in xmlParseInternalSubset() 8243 */ 8244 if (RAW == '[') 8245 return; 8246 8247 /* 8248 * We should be at the end of the DOCTYPE declaration. 8249 */ 8250 if (RAW != '>') { 8251 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8252 } 8253 NEXT; 8254 } 8255 8256 /** 8257 * xmlParseInternalSubset: 8258 * @ctxt: an XML parser context 8259 * 8260 * parse the internal subset declaration 8261 * 8262 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8263 */ 8264 8265 static void 8266 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8267 /* 8268 * Is there any DTD definition ? 8269 */ 8270 if (RAW == '[') { 8271 int baseInputNr = ctxt->inputNr; 8272 ctxt->instate = XML_PARSER_DTD; 8273 NEXT; 8274 /* 8275 * Parse the succession of Markup declarations and 8276 * PEReferences. 8277 * Subsequence (markupdecl | PEReference | S)* 8278 */ 8279 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8280 (ctxt->instate != XML_PARSER_EOF)) { 8281 const xmlChar *check = CUR_PTR; 8282 unsigned int cons = ctxt->input->consumed; 8283 8284 SKIP_BLANKS; 8285 xmlParseMarkupDecl(ctxt); 8286 xmlParsePEReference(ctxt); 8287 8288 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8289 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8290 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8291 if (ctxt->inputNr > baseInputNr) 8292 xmlPopInput(ctxt); 8293 else 8294 break; 8295 } 8296 } 8297 if (RAW == ']') { 8298 NEXT; 8299 SKIP_BLANKS; 8300 } 8301 } 8302 8303 /* 8304 * We should be at the end of the DOCTYPE declaration. 8305 */ 8306 if (RAW != '>') { 8307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8308 return; 8309 } 8310 NEXT; 8311 } 8312 8313 #ifdef LIBXML_SAX1_ENABLED 8314 /** 8315 * xmlParseAttribute: 8316 * @ctxt: an XML parser context 8317 * @value: a xmlChar ** used to store the value of the attribute 8318 * 8319 * parse an attribute 8320 * 8321 * [41] Attribute ::= Name Eq AttValue 8322 * 8323 * [ WFC: No External Entity References ] 8324 * Attribute values cannot contain direct or indirect entity references 8325 * to external entities. 8326 * 8327 * [ WFC: No < in Attribute Values ] 8328 * The replacement text of any entity referred to directly or indirectly in 8329 * an attribute value (other than "<") must not contain a <. 8330 * 8331 * [ VC: Attribute Value Type ] 8332 * The attribute must have been declared; the value must be of the type 8333 * declared for it. 8334 * 8335 * [25] Eq ::= S? '=' S? 8336 * 8337 * With namespace: 8338 * 8339 * [NS 11] Attribute ::= QName Eq AttValue 8340 * 8341 * Also the case QName == xmlns:??? is handled independently as a namespace 8342 * definition. 8343 * 8344 * Returns the attribute name, and the value in *value. 8345 */ 8346 8347 const xmlChar * 8348 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8349 const xmlChar *name; 8350 xmlChar *val; 8351 8352 *value = NULL; 8353 GROW; 8354 name = xmlParseName(ctxt); 8355 if (name == NULL) { 8356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8357 "error parsing attribute name\n"); 8358 return(NULL); 8359 } 8360 8361 /* 8362 * read the value 8363 */ 8364 SKIP_BLANKS; 8365 if (RAW == '=') { 8366 NEXT; 8367 SKIP_BLANKS; 8368 val = xmlParseAttValue(ctxt); 8369 ctxt->instate = XML_PARSER_CONTENT; 8370 } else { 8371 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8372 "Specification mandates value for attribute %s\n", name); 8373 return(NULL); 8374 } 8375 8376 /* 8377 * Check that xml:lang conforms to the specification 8378 * No more registered as an error, just generate a warning now 8379 * since this was deprecated in XML second edition 8380 */ 8381 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8382 if (!xmlCheckLanguageID(val)) { 8383 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8384 "Malformed value for xml:lang : %s\n", 8385 val, NULL); 8386 } 8387 } 8388 8389 /* 8390 * Check that xml:space conforms to the specification 8391 */ 8392 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8393 if (xmlStrEqual(val, BAD_CAST "default")) 8394 *(ctxt->space) = 0; 8395 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8396 *(ctxt->space) = 1; 8397 else { 8398 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8399 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8400 val, NULL); 8401 } 8402 } 8403 8404 *value = val; 8405 return(name); 8406 } 8407 8408 /** 8409 * xmlParseStartTag: 8410 * @ctxt: an XML parser context 8411 * 8412 * parse a start of tag either for rule element or 8413 * EmptyElement. In both case we don't parse the tag closing chars. 8414 * 8415 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8416 * 8417 * [ WFC: Unique Att Spec ] 8418 * No attribute name may appear more than once in the same start-tag or 8419 * empty-element tag. 8420 * 8421 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8422 * 8423 * [ WFC: Unique Att Spec ] 8424 * No attribute name may appear more than once in the same start-tag or 8425 * empty-element tag. 8426 * 8427 * With namespace: 8428 * 8429 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8430 * 8431 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8432 * 8433 * Returns the element name parsed 8434 */ 8435 8436 const xmlChar * 8437 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8438 const xmlChar *name; 8439 const xmlChar *attname; 8440 xmlChar *attvalue; 8441 const xmlChar **atts = ctxt->atts; 8442 int nbatts = 0; 8443 int maxatts = ctxt->maxatts; 8444 int i; 8445 8446 if (RAW != '<') return(NULL); 8447 NEXT1; 8448 8449 name = xmlParseName(ctxt); 8450 if (name == NULL) { 8451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8452 "xmlParseStartTag: invalid element name\n"); 8453 return(NULL); 8454 } 8455 8456 /* 8457 * Now parse the attributes, it ends up with the ending 8458 * 8459 * (S Attribute)* S? 8460 */ 8461 SKIP_BLANKS; 8462 GROW; 8463 8464 while (((RAW != '>') && 8465 ((RAW != '/') || (NXT(1) != '>')) && 8466 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8467 const xmlChar *q = CUR_PTR; 8468 unsigned int cons = ctxt->input->consumed; 8469 8470 attname = xmlParseAttribute(ctxt, &attvalue); 8471 if ((attname != NULL) && (attvalue != NULL)) { 8472 /* 8473 * [ WFC: Unique Att Spec ] 8474 * No attribute name may appear more than once in the same 8475 * start-tag or empty-element tag. 8476 */ 8477 for (i = 0; i < nbatts;i += 2) { 8478 if (xmlStrEqual(atts[i], attname)) { 8479 xmlErrAttributeDup(ctxt, NULL, attname); 8480 xmlFree(attvalue); 8481 goto failed; 8482 } 8483 } 8484 /* 8485 * Add the pair to atts 8486 */ 8487 if (atts == NULL) { 8488 maxatts = 22; /* allow for 10 attrs by default */ 8489 atts = (const xmlChar **) 8490 xmlMalloc(maxatts * sizeof(xmlChar *)); 8491 if (atts == NULL) { 8492 xmlErrMemory(ctxt, NULL); 8493 if (attvalue != NULL) 8494 xmlFree(attvalue); 8495 goto failed; 8496 } 8497 ctxt->atts = atts; 8498 ctxt->maxatts = maxatts; 8499 } else if (nbatts + 4 > maxatts) { 8500 const xmlChar **n; 8501 8502 maxatts *= 2; 8503 n = (const xmlChar **) xmlRealloc((void *) atts, 8504 maxatts * sizeof(const xmlChar *)); 8505 if (n == NULL) { 8506 xmlErrMemory(ctxt, NULL); 8507 if (attvalue != NULL) 8508 xmlFree(attvalue); 8509 goto failed; 8510 } 8511 atts = n; 8512 ctxt->atts = atts; 8513 ctxt->maxatts = maxatts; 8514 } 8515 atts[nbatts++] = attname; 8516 atts[nbatts++] = attvalue; 8517 atts[nbatts] = NULL; 8518 atts[nbatts + 1] = NULL; 8519 } else { 8520 if (attvalue != NULL) 8521 xmlFree(attvalue); 8522 } 8523 8524 failed: 8525 8526 GROW 8527 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8528 break; 8529 if (SKIP_BLANKS == 0) { 8530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8531 "attributes construct error\n"); 8532 } 8533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8534 (attname == NULL) && (attvalue == NULL)) { 8535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8536 "xmlParseStartTag: problem parsing attributes\n"); 8537 break; 8538 } 8539 SHRINK; 8540 GROW; 8541 } 8542 8543 /* 8544 * SAX: Start of Element ! 8545 */ 8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8547 (!ctxt->disableSAX)) { 8548 if (nbatts > 0) 8549 ctxt->sax->startElement(ctxt->userData, name, atts); 8550 else 8551 ctxt->sax->startElement(ctxt->userData, name, NULL); 8552 } 8553 8554 if (atts != NULL) { 8555 /* Free only the content strings */ 8556 for (i = 1;i < nbatts;i+=2) 8557 if (atts[i] != NULL) 8558 xmlFree((xmlChar *) atts[i]); 8559 } 8560 return(name); 8561 } 8562 8563 /** 8564 * xmlParseEndTag1: 8565 * @ctxt: an XML parser context 8566 * @line: line of the start tag 8567 * @nsNr: number of namespaces on the start tag 8568 * 8569 * parse an end of tag 8570 * 8571 * [42] ETag ::= '</' Name S? '>' 8572 * 8573 * With namespace 8574 * 8575 * [NS 9] ETag ::= '</' QName S? '>' 8576 */ 8577 8578 static void 8579 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8580 const xmlChar *name; 8581 8582 GROW; 8583 if ((RAW != '<') || (NXT(1) != '/')) { 8584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8585 "xmlParseEndTag: '</' not found\n"); 8586 return; 8587 } 8588 SKIP(2); 8589 8590 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8591 8592 /* 8593 * We should definitely be at the ending "S? '>'" part 8594 */ 8595 GROW; 8596 SKIP_BLANKS; 8597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8599 } else 8600 NEXT1; 8601 8602 /* 8603 * [ WFC: Element Type Match ] 8604 * The Name in an element's end-tag must match the element type in the 8605 * start-tag. 8606 * 8607 */ 8608 if (name != (xmlChar*)1) { 8609 if (name == NULL) name = BAD_CAST "unparseable"; 8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8611 "Opening and ending tag mismatch: %s line %d and %s\n", 8612 ctxt->name, line, name); 8613 } 8614 8615 /* 8616 * SAX: End of Tag 8617 */ 8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8619 (!ctxt->disableSAX)) 8620 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8621 8622 namePop(ctxt); 8623 spacePop(ctxt); 8624 return; 8625 } 8626 8627 /** 8628 * xmlParseEndTag: 8629 * @ctxt: an XML parser context 8630 * 8631 * parse an end of tag 8632 * 8633 * [42] ETag ::= '</' Name S? '>' 8634 * 8635 * With namespace 8636 * 8637 * [NS 9] ETag ::= '</' QName S? '>' 8638 */ 8639 8640 void 8641 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8642 xmlParseEndTag1(ctxt, 0); 8643 } 8644 #endif /* LIBXML_SAX1_ENABLED */ 8645 8646 /************************************************************************ 8647 * * 8648 * SAX 2 specific operations * 8649 * * 8650 ************************************************************************/ 8651 8652 /* 8653 * xmlGetNamespace: 8654 * @ctxt: an XML parser context 8655 * @prefix: the prefix to lookup 8656 * 8657 * Lookup the namespace name for the @prefix (which ca be NULL) 8658 * The prefix must come from the @ctxt->dict dictionary 8659 * 8660 * Returns the namespace name or NULL if not bound 8661 */ 8662 static const xmlChar * 8663 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8664 int i; 8665 8666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8667 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8668 if (ctxt->nsTab[i] == prefix) { 8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8670 return(NULL); 8671 return(ctxt->nsTab[i + 1]); 8672 } 8673 return(NULL); 8674 } 8675 8676 /** 8677 * xmlParseQName: 8678 * @ctxt: an XML parser context 8679 * @prefix: pointer to store the prefix part 8680 * 8681 * parse an XML Namespace QName 8682 * 8683 * [6] QName ::= (Prefix ':')? LocalPart 8684 * [7] Prefix ::= NCName 8685 * [8] LocalPart ::= NCName 8686 * 8687 * Returns the Name parsed or NULL 8688 */ 8689 8690 static const xmlChar * 8691 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8692 const xmlChar *l, *p; 8693 8694 GROW; 8695 8696 l = xmlParseNCName(ctxt); 8697 if (l == NULL) { 8698 if (CUR == ':') { 8699 l = xmlParseName(ctxt); 8700 if (l != NULL) { 8701 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8702 "Failed to parse QName '%s'\n", l, NULL, NULL); 8703 *prefix = NULL; 8704 return(l); 8705 } 8706 } 8707 return(NULL); 8708 } 8709 if (CUR == ':') { 8710 NEXT; 8711 p = l; 8712 l = xmlParseNCName(ctxt); 8713 if (l == NULL) { 8714 xmlChar *tmp; 8715 8716 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8717 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8718 l = xmlParseNmtoken(ctxt); 8719 if (l == NULL) 8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8721 else { 8722 tmp = xmlBuildQName(l, p, NULL, 0); 8723 xmlFree((char *)l); 8724 } 8725 p = xmlDictLookup(ctxt->dict, tmp, -1); 8726 if (tmp != NULL) xmlFree(tmp); 8727 *prefix = NULL; 8728 return(p); 8729 } 8730 if (CUR == ':') { 8731 xmlChar *tmp; 8732 8733 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8735 NEXT; 8736 tmp = (xmlChar *) xmlParseName(ctxt); 8737 if (tmp != NULL) { 8738 tmp = xmlBuildQName(tmp, l, NULL, 0); 8739 l = xmlDictLookup(ctxt->dict, tmp, -1); 8740 if (tmp != NULL) xmlFree(tmp); 8741 *prefix = p; 8742 return(l); 8743 } 8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8745 l = xmlDictLookup(ctxt->dict, tmp, -1); 8746 if (tmp != NULL) xmlFree(tmp); 8747 *prefix = p; 8748 return(l); 8749 } 8750 *prefix = p; 8751 } else 8752 *prefix = NULL; 8753 return(l); 8754 } 8755 8756 /** 8757 * xmlParseQNameAndCompare: 8758 * @ctxt: an XML parser context 8759 * @name: the localname 8760 * @prefix: the prefix, if any. 8761 * 8762 * parse an XML name and compares for match 8763 * (specialized for endtag parsing) 8764 * 8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8766 * and the name for mismatch 8767 */ 8768 8769 static const xmlChar * 8770 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8771 xmlChar const *prefix) { 8772 const xmlChar *cmp; 8773 const xmlChar *in; 8774 const xmlChar *ret; 8775 const xmlChar *prefix2; 8776 8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8778 8779 GROW; 8780 in = ctxt->input->cur; 8781 8782 cmp = prefix; 8783 while (*in != 0 && *in == *cmp) { 8784 ++in; 8785 ++cmp; 8786 } 8787 if ((*cmp == 0) && (*in == ':')) { 8788 in++; 8789 cmp = name; 8790 while (*in != 0 && *in == *cmp) { 8791 ++in; 8792 ++cmp; 8793 } 8794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8795 /* success */ 8796 ctxt->input->cur = in; 8797 return((const xmlChar*) 1); 8798 } 8799 } 8800 /* 8801 * all strings coms from the dictionary, equality can be done directly 8802 */ 8803 ret = xmlParseQName (ctxt, &prefix2); 8804 if ((ret == name) && (prefix == prefix2)) 8805 return((const xmlChar*) 1); 8806 return ret; 8807 } 8808 8809 /** 8810 * xmlParseAttValueInternal: 8811 * @ctxt: an XML parser context 8812 * @len: attribute len result 8813 * @alloc: whether the attribute was reallocated as a new string 8814 * @normalize: if 1 then further non-CDATA normalization must be done 8815 * 8816 * parse a value for an attribute. 8817 * NOTE: if no normalization is needed, the routine will return pointers 8818 * directly from the data buffer. 8819 * 8820 * 3.3.3 Attribute-Value Normalization: 8821 * Before the value of an attribute is passed to the application or 8822 * checked for validity, the XML processor must normalize it as follows: 8823 * - a character reference is processed by appending the referenced 8824 * character to the attribute value 8825 * - an entity reference is processed by recursively processing the 8826 * replacement text of the entity 8827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8828 * appending #x20 to the normalized value, except that only a single 8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8830 * parsed entity or the literal entity value of an internal parsed entity 8831 * - other characters are processed by appending them to the normalized value 8832 * If the declared value is not CDATA, then the XML processor must further 8833 * process the normalized attribute value by discarding any leading and 8834 * trailing space (#x20) characters, and by replacing sequences of space 8835 * (#x20) characters by a single space (#x20) character. 8836 * All attributes for which no declaration has been read should be treated 8837 * by a non-validating parser as if declared CDATA. 8838 * 8839 * Returns the AttValue parsed or NULL. The value has to be freed by the 8840 * caller if it was copied, this can be detected by val[*len] == 0. 8841 */ 8842 8843 static xmlChar * 8844 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8845 int normalize) 8846 { 8847 xmlChar limit = 0; 8848 const xmlChar *in = NULL, *start, *end, *last; 8849 xmlChar *ret = NULL; 8850 int line, col; 8851 8852 GROW; 8853 in = (xmlChar *) CUR_PTR; 8854 line = ctxt->input->line; 8855 col = ctxt->input->col; 8856 if (*in != '"' && *in != '\'') { 8857 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8858 return (NULL); 8859 } 8860 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8861 8862 /* 8863 * try to handle in this routine the most common case where no 8864 * allocation of a new string is required and where content is 8865 * pure ASCII. 8866 */ 8867 limit = *in++; 8868 col++; 8869 end = ctxt->input->end; 8870 start = in; 8871 if (in >= end) { 8872 const xmlChar *oldbase = ctxt->input->base; 8873 GROW; 8874 if (oldbase != ctxt->input->base) { 8875 long delta = ctxt->input->base - oldbase; 8876 start = start + delta; 8877 in = in + delta; 8878 } 8879 end = ctxt->input->end; 8880 } 8881 if (normalize) { 8882 /* 8883 * Skip any leading spaces 8884 */ 8885 while ((in < end) && (*in != limit) && 8886 ((*in == 0x20) || (*in == 0x9) || 8887 (*in == 0xA) || (*in == 0xD))) { 8888 if (*in == 0xA) { 8889 line++; col = 1; 8890 } else { 8891 col++; 8892 } 8893 in++; 8894 start = in; 8895 if (in >= end) { 8896 const xmlChar *oldbase = ctxt->input->base; 8897 GROW; 8898 if (ctxt->instate == XML_PARSER_EOF) 8899 return(NULL); 8900 if (oldbase != ctxt->input->base) { 8901 long delta = ctxt->input->base - oldbase; 8902 start = start + delta; 8903 in = in + delta; 8904 } 8905 end = ctxt->input->end; 8906 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8907 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8908 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8909 "AttValue length too long\n"); 8910 return(NULL); 8911 } 8912 } 8913 } 8914 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8915 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8916 col++; 8917 if ((*in++ == 0x20) && (*in == 0x20)) break; 8918 if (in >= end) { 8919 const xmlChar *oldbase = ctxt->input->base; 8920 GROW; 8921 if (ctxt->instate == XML_PARSER_EOF) 8922 return(NULL); 8923 if (oldbase != ctxt->input->base) { 8924 long delta = ctxt->input->base - oldbase; 8925 start = start + delta; 8926 in = in + delta; 8927 } 8928 end = ctxt->input->end; 8929 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8930 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8931 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8932 "AttValue length too long\n"); 8933 return(NULL); 8934 } 8935 } 8936 } 8937 last = in; 8938 /* 8939 * skip the trailing blanks 8940 */ 8941 while ((last[-1] == 0x20) && (last > start)) last--; 8942 while ((in < end) && (*in != limit) && 8943 ((*in == 0x20) || (*in == 0x9) || 8944 (*in == 0xA) || (*in == 0xD))) { 8945 if (*in == 0xA) { 8946 line++, col = 1; 8947 } else { 8948 col++; 8949 } 8950 in++; 8951 if (in >= end) { 8952 const xmlChar *oldbase = ctxt->input->base; 8953 GROW; 8954 if (ctxt->instate == XML_PARSER_EOF) 8955 return(NULL); 8956 if (oldbase != ctxt->input->base) { 8957 long delta = ctxt->input->base - oldbase; 8958 start = start + delta; 8959 in = in + delta; 8960 last = last + delta; 8961 } 8962 end = ctxt->input->end; 8963 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8964 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8965 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8966 "AttValue length too long\n"); 8967 return(NULL); 8968 } 8969 } 8970 } 8971 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8972 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8973 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8974 "AttValue length too long\n"); 8975 return(NULL); 8976 } 8977 if (*in != limit) goto need_complex; 8978 } else { 8979 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8980 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8981 in++; 8982 col++; 8983 if (in >= end) { 8984 const xmlChar *oldbase = ctxt->input->base; 8985 GROW; 8986 if (ctxt->instate == XML_PARSER_EOF) 8987 return(NULL); 8988 if (oldbase != ctxt->input->base) { 8989 long delta = ctxt->input->base - oldbase; 8990 start = start + delta; 8991 in = in + delta; 8992 } 8993 end = ctxt->input->end; 8994 if (((in - start) > XML_MAX_TEXT_LENGTH) && 8995 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8996 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8997 "AttValue length too long\n"); 8998 return(NULL); 8999 } 9000 } 9001 } 9002 last = in; 9003 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9004 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9005 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9006 "AttValue length too long\n"); 9007 return(NULL); 9008 } 9009 if (*in != limit) goto need_complex; 9010 } 9011 in++; 9012 col++; 9013 if (len != NULL) { 9014 *len = last - start; 9015 ret = (xmlChar *) start; 9016 } else { 9017 if (alloc) *alloc = 1; 9018 ret = xmlStrndup(start, last - start); 9019 } 9020 CUR_PTR = in; 9021 ctxt->input->line = line; 9022 ctxt->input->col = col; 9023 if (alloc) *alloc = 0; 9024 return ret; 9025 need_complex: 9026 if (alloc) *alloc = 1; 9027 return xmlParseAttValueComplex(ctxt, len, normalize); 9028 } 9029 9030 /** 9031 * xmlParseAttribute2: 9032 * @ctxt: an XML parser context 9033 * @pref: the element prefix 9034 * @elem: the element name 9035 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9036 * @value: a xmlChar ** used to store the value of the attribute 9037 * @len: an int * to save the length of the attribute 9038 * @alloc: an int * to indicate if the attribute was allocated 9039 * 9040 * parse an attribute in the new SAX2 framework. 9041 * 9042 * Returns the attribute name, and the value in *value, . 9043 */ 9044 9045 static const xmlChar * 9046 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9047 const xmlChar * pref, const xmlChar * elem, 9048 const xmlChar ** prefix, xmlChar ** value, 9049 int *len, int *alloc) 9050 { 9051 const xmlChar *name; 9052 xmlChar *val, *internal_val = NULL; 9053 int normalize = 0; 9054 9055 *value = NULL; 9056 GROW; 9057 name = xmlParseQName(ctxt, prefix); 9058 if (name == NULL) { 9059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9060 "error parsing attribute name\n"); 9061 return (NULL); 9062 } 9063 9064 /* 9065 * get the type if needed 9066 */ 9067 if (ctxt->attsSpecial != NULL) { 9068 int type; 9069 9070 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9071 pref, elem, *prefix, name); 9072 if (type != 0) 9073 normalize = 1; 9074 } 9075 9076 /* 9077 * read the value 9078 */ 9079 SKIP_BLANKS; 9080 if (RAW == '=') { 9081 NEXT; 9082 SKIP_BLANKS; 9083 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9084 if (normalize) { 9085 /* 9086 * Sometimes a second normalisation pass for spaces is needed 9087 * but that only happens if charrefs or entities refernces 9088 * have been used in the attribute value, i.e. the attribute 9089 * value have been extracted in an allocated string already. 9090 */ 9091 if (*alloc) { 9092 const xmlChar *val2; 9093 9094 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9095 if ((val2 != NULL) && (val2 != val)) { 9096 xmlFree(val); 9097 val = (xmlChar *) val2; 9098 } 9099 } 9100 } 9101 ctxt->instate = XML_PARSER_CONTENT; 9102 } else { 9103 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9104 "Specification mandates value for attribute %s\n", 9105 name); 9106 return (NULL); 9107 } 9108 9109 if (*prefix == ctxt->str_xml) { 9110 /* 9111 * Check that xml:lang conforms to the specification 9112 * No more registered as an error, just generate a warning now 9113 * since this was deprecated in XML second edition 9114 */ 9115 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9116 internal_val = xmlStrndup(val, *len); 9117 if (!xmlCheckLanguageID(internal_val)) { 9118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9119 "Malformed value for xml:lang : %s\n", 9120 internal_val, NULL); 9121 } 9122 } 9123 9124 /* 9125 * Check that xml:space conforms to the specification 9126 */ 9127 if (xmlStrEqual(name, BAD_CAST "space")) { 9128 internal_val = xmlStrndup(val, *len); 9129 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9130 *(ctxt->space) = 0; 9131 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9132 *(ctxt->space) = 1; 9133 else { 9134 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9136 internal_val, NULL); 9137 } 9138 } 9139 if (internal_val) { 9140 xmlFree(internal_val); 9141 } 9142 } 9143 9144 *value = val; 9145 return (name); 9146 } 9147 /** 9148 * xmlParseStartTag2: 9149 * @ctxt: an XML parser context 9150 * 9151 * parse a start of tag either for rule element or 9152 * EmptyElement. In both case we don't parse the tag closing chars. 9153 * This routine is called when running SAX2 parsing 9154 * 9155 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9156 * 9157 * [ WFC: Unique Att Spec ] 9158 * No attribute name may appear more than once in the same start-tag or 9159 * empty-element tag. 9160 * 9161 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9162 * 9163 * [ WFC: Unique Att Spec ] 9164 * No attribute name may appear more than once in the same start-tag or 9165 * empty-element tag. 9166 * 9167 * With namespace: 9168 * 9169 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9170 * 9171 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9172 * 9173 * Returns the element name parsed 9174 */ 9175 9176 static const xmlChar * 9177 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9178 const xmlChar **URI, int *tlen) { 9179 const xmlChar *localname; 9180 const xmlChar *prefix; 9181 const xmlChar *attname; 9182 const xmlChar *aprefix; 9183 const xmlChar *nsname; 9184 xmlChar *attvalue; 9185 const xmlChar **atts = ctxt->atts; 9186 int maxatts = ctxt->maxatts; 9187 int nratts, nbatts, nbdef, inputid; 9188 int i, j, nbNs, attval; 9189 unsigned long cur; 9190 int nsNr = ctxt->nsNr; 9191 9192 if (RAW != '<') return(NULL); 9193 NEXT1; 9194 9195 /* 9196 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9197 * point since the attribute values may be stored as pointers to 9198 * the buffer and calling SHRINK would destroy them ! 9199 * The Shrinking is only possible once the full set of attribute 9200 * callbacks have been done. 9201 */ 9202 SHRINK; 9203 cur = ctxt->input->cur - ctxt->input->base; 9204 inputid = ctxt->input->id; 9205 nbatts = 0; 9206 nratts = 0; 9207 nbdef = 0; 9208 nbNs = 0; 9209 attval = 0; 9210 /* Forget any namespaces added during an earlier parse of this element. */ 9211 ctxt->nsNr = nsNr; 9212 9213 localname = xmlParseQName(ctxt, &prefix); 9214 if (localname == NULL) { 9215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9216 "StartTag: invalid element name\n"); 9217 return(NULL); 9218 } 9219 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9220 9221 /* 9222 * Now parse the attributes, it ends up with the ending 9223 * 9224 * (S Attribute)* S? 9225 */ 9226 SKIP_BLANKS; 9227 GROW; 9228 9229 while (((RAW != '>') && 9230 ((RAW != '/') || (NXT(1) != '>')) && 9231 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9232 const xmlChar *q = CUR_PTR; 9233 unsigned int cons = ctxt->input->consumed; 9234 int len = -1, alloc = 0; 9235 9236 attname = xmlParseAttribute2(ctxt, prefix, localname, 9237 &aprefix, &attvalue, &len, &alloc); 9238 if ((attname == NULL) || (attvalue == NULL)) 9239 goto next_attr; 9240 if (len < 0) len = xmlStrlen(attvalue); 9241 9242 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9243 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9244 xmlURIPtr uri; 9245 9246 if (URL == NULL) { 9247 xmlErrMemory(ctxt, "dictionary allocation failure"); 9248 if ((attvalue != NULL) && (alloc != 0)) 9249 xmlFree(attvalue); 9250 return(NULL); 9251 } 9252 if (*URL != 0) { 9253 uri = xmlParseURI((const char *) URL); 9254 if (uri == NULL) { 9255 xmlNsErr(ctxt, XML_WAR_NS_URI, 9256 "xmlns: '%s' is not a valid URI\n", 9257 URL, NULL, NULL); 9258 } else { 9259 if (uri->scheme == NULL) { 9260 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9261 "xmlns: URI %s is not absolute\n", 9262 URL, NULL, NULL); 9263 } 9264 xmlFreeURI(uri); 9265 } 9266 if (URL == ctxt->str_xml_ns) { 9267 if (attname != ctxt->str_xml) { 9268 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9269 "xml namespace URI cannot be the default namespace\n", 9270 NULL, NULL, NULL); 9271 } 9272 goto next_attr; 9273 } 9274 if ((len == 29) && 9275 (xmlStrEqual(URL, 9276 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9277 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9278 "reuse of the xmlns namespace name is forbidden\n", 9279 NULL, NULL, NULL); 9280 goto next_attr; 9281 } 9282 } 9283 /* 9284 * check that it's not a defined namespace 9285 */ 9286 for (j = 1;j <= nbNs;j++) 9287 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9288 break; 9289 if (j <= nbNs) 9290 xmlErrAttributeDup(ctxt, NULL, attname); 9291 else 9292 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9293 9294 } else if (aprefix == ctxt->str_xmlns) { 9295 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9296 xmlURIPtr uri; 9297 9298 if (attname == ctxt->str_xml) { 9299 if (URL != ctxt->str_xml_ns) { 9300 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9301 "xml namespace prefix mapped to wrong URI\n", 9302 NULL, NULL, NULL); 9303 } 9304 /* 9305 * Do not keep a namespace definition node 9306 */ 9307 goto next_attr; 9308 } 9309 if (URL == ctxt->str_xml_ns) { 9310 if (attname != ctxt->str_xml) { 9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9312 "xml namespace URI mapped to wrong prefix\n", 9313 NULL, NULL, NULL); 9314 } 9315 goto next_attr; 9316 } 9317 if (attname == ctxt->str_xmlns) { 9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9319 "redefinition of the xmlns prefix is forbidden\n", 9320 NULL, NULL, NULL); 9321 goto next_attr; 9322 } 9323 if ((len == 29) && 9324 (xmlStrEqual(URL, 9325 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9326 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9327 "reuse of the xmlns namespace name is forbidden\n", 9328 NULL, NULL, NULL); 9329 goto next_attr; 9330 } 9331 if ((URL == NULL) || (URL[0] == 0)) { 9332 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9333 "xmlns:%s: Empty XML namespace is not allowed\n", 9334 attname, NULL, NULL); 9335 goto next_attr; 9336 } else { 9337 uri = xmlParseURI((const char *) URL); 9338 if (uri == NULL) { 9339 xmlNsErr(ctxt, XML_WAR_NS_URI, 9340 "xmlns:%s: '%s' is not a valid URI\n", 9341 attname, URL, NULL); 9342 } else { 9343 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9344 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9345 "xmlns:%s: URI %s is not absolute\n", 9346 attname, URL, NULL); 9347 } 9348 xmlFreeURI(uri); 9349 } 9350 } 9351 9352 /* 9353 * check that it's not a defined namespace 9354 */ 9355 for (j = 1;j <= nbNs;j++) 9356 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9357 break; 9358 if (j <= nbNs) 9359 xmlErrAttributeDup(ctxt, aprefix, attname); 9360 else 9361 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9362 9363 } else { 9364 /* 9365 * Add the pair to atts 9366 */ 9367 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9368 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9369 goto next_attr; 9370 } 9371 maxatts = ctxt->maxatts; 9372 atts = ctxt->atts; 9373 } 9374 ctxt->attallocs[nratts++] = alloc; 9375 atts[nbatts++] = attname; 9376 atts[nbatts++] = aprefix; 9377 /* 9378 * The namespace URI field is used temporarily to point at the 9379 * base of the current input buffer for non-alloced attributes. 9380 * When the input buffer is reallocated, all the pointers become 9381 * invalid, but they can be reconstructed later. 9382 */ 9383 if (alloc) 9384 atts[nbatts++] = NULL; 9385 else 9386 atts[nbatts++] = ctxt->input->base; 9387 atts[nbatts++] = attvalue; 9388 attvalue += len; 9389 atts[nbatts++] = attvalue; 9390 /* 9391 * tag if some deallocation is needed 9392 */ 9393 if (alloc != 0) attval = 1; 9394 attvalue = NULL; /* moved into atts */ 9395 } 9396 9397 next_attr: 9398 if ((attvalue != NULL) && (alloc != 0)) { 9399 xmlFree(attvalue); 9400 attvalue = NULL; 9401 } 9402 9403 GROW 9404 if (ctxt->instate == XML_PARSER_EOF) 9405 break; 9406 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9407 break; 9408 if (SKIP_BLANKS == 0) { 9409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9410 "attributes construct error\n"); 9411 break; 9412 } 9413 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9414 (attname == NULL) && (attvalue == NULL)) { 9415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9416 "xmlParseStartTag: problem parsing attributes\n"); 9417 break; 9418 } 9419 GROW; 9420 } 9421 9422 if (ctxt->input->id != inputid) { 9423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9424 "Unexpected change of input\n"); 9425 localname = NULL; 9426 goto done; 9427 } 9428 9429 /* Reconstruct attribute value pointers. */ 9430 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9431 if (atts[i+2] != NULL) { 9432 /* 9433 * Arithmetic on dangling pointers is technically undefined 9434 * behavior, but well... 9435 */ 9436 ptrdiff_t offset = ctxt->input->base - atts[i+2]; 9437 atts[i+2] = NULL; /* Reset repurposed namespace URI */ 9438 atts[i+3] += offset; /* value */ 9439 atts[i+4] += offset; /* valuend */ 9440 } 9441 } 9442 9443 /* 9444 * The attributes defaulting 9445 */ 9446 if (ctxt->attsDefault != NULL) { 9447 xmlDefAttrsPtr defaults; 9448 9449 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9450 if (defaults != NULL) { 9451 for (i = 0;i < defaults->nbAttrs;i++) { 9452 attname = defaults->values[5 * i]; 9453 aprefix = defaults->values[5 * i + 1]; 9454 9455 /* 9456 * special work for namespaces defaulted defs 9457 */ 9458 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9459 /* 9460 * check that it's not a defined namespace 9461 */ 9462 for (j = 1;j <= nbNs;j++) 9463 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9464 break; 9465 if (j <= nbNs) continue; 9466 9467 nsname = xmlGetNamespace(ctxt, NULL); 9468 if (nsname != defaults->values[5 * i + 2]) { 9469 if (nsPush(ctxt, NULL, 9470 defaults->values[5 * i + 2]) > 0) 9471 nbNs++; 9472 } 9473 } else if (aprefix == ctxt->str_xmlns) { 9474 /* 9475 * check that it's not a defined namespace 9476 */ 9477 for (j = 1;j <= nbNs;j++) 9478 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9479 break; 9480 if (j <= nbNs) continue; 9481 9482 nsname = xmlGetNamespace(ctxt, attname); 9483 if (nsname != defaults->values[2]) { 9484 if (nsPush(ctxt, attname, 9485 defaults->values[5 * i + 2]) > 0) 9486 nbNs++; 9487 } 9488 } else { 9489 /* 9490 * check that it's not a defined attribute 9491 */ 9492 for (j = 0;j < nbatts;j+=5) { 9493 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9494 break; 9495 } 9496 if (j < nbatts) continue; 9497 9498 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9499 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9500 return(NULL); 9501 } 9502 maxatts = ctxt->maxatts; 9503 atts = ctxt->atts; 9504 } 9505 atts[nbatts++] = attname; 9506 atts[nbatts++] = aprefix; 9507 if (aprefix == NULL) 9508 atts[nbatts++] = NULL; 9509 else 9510 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9511 atts[nbatts++] = defaults->values[5 * i + 2]; 9512 atts[nbatts++] = defaults->values[5 * i + 3]; 9513 if ((ctxt->standalone == 1) && 9514 (defaults->values[5 * i + 4] != NULL)) { 9515 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9516 "standalone: attribute %s on %s defaulted from external subset\n", 9517 attname, localname); 9518 } 9519 nbdef++; 9520 } 9521 } 9522 } 9523 } 9524 9525 /* 9526 * The attributes checkings 9527 */ 9528 for (i = 0; i < nbatts;i += 5) { 9529 /* 9530 * The default namespace does not apply to attribute names. 9531 */ 9532 if (atts[i + 1] != NULL) { 9533 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9534 if (nsname == NULL) { 9535 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9536 "Namespace prefix %s for %s on %s is not defined\n", 9537 atts[i + 1], atts[i], localname); 9538 } 9539 atts[i + 2] = nsname; 9540 } else 9541 nsname = NULL; 9542 /* 9543 * [ WFC: Unique Att Spec ] 9544 * No attribute name may appear more than once in the same 9545 * start-tag or empty-element tag. 9546 * As extended by the Namespace in XML REC. 9547 */ 9548 for (j = 0; j < i;j += 5) { 9549 if (atts[i] == atts[j]) { 9550 if (atts[i+1] == atts[j+1]) { 9551 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9552 break; 9553 } 9554 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9555 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9556 "Namespaced Attribute %s in '%s' redefined\n", 9557 atts[i], nsname, NULL); 9558 break; 9559 } 9560 } 9561 } 9562 } 9563 9564 nsname = xmlGetNamespace(ctxt, prefix); 9565 if ((prefix != NULL) && (nsname == NULL)) { 9566 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9567 "Namespace prefix %s on %s is not defined\n", 9568 prefix, localname, NULL); 9569 } 9570 *pref = prefix; 9571 *URI = nsname; 9572 9573 /* 9574 * SAX: Start of Element ! 9575 */ 9576 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9577 (!ctxt->disableSAX)) { 9578 if (nbNs > 0) 9579 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9580 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9581 nbatts / 5, nbdef, atts); 9582 else 9583 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9584 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9585 } 9586 9587 done: 9588 /* 9589 * Free up attribute allocated strings if needed 9590 */ 9591 if (attval != 0) { 9592 for (i = 3,j = 0; j < nratts;i += 5,j++) 9593 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9594 xmlFree((xmlChar *) atts[i]); 9595 } 9596 9597 return(localname); 9598 } 9599 9600 /** 9601 * xmlParseEndTag2: 9602 * @ctxt: an XML parser context 9603 * @line: line of the start tag 9604 * @nsNr: number of namespaces on the start tag 9605 * 9606 * parse an end of tag 9607 * 9608 * [42] ETag ::= '</' Name S? '>' 9609 * 9610 * With namespace 9611 * 9612 * [NS 9] ETag ::= '</' QName S? '>' 9613 */ 9614 9615 static void 9616 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9617 const xmlChar *URI, int line, int nsNr, int tlen) { 9618 const xmlChar *name; 9619 size_t curLength; 9620 9621 GROW; 9622 if ((RAW != '<') || (NXT(1) != '/')) { 9623 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9624 return; 9625 } 9626 SKIP(2); 9627 9628 curLength = ctxt->input->end - ctxt->input->cur; 9629 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9630 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9631 if ((curLength >= (size_t)(tlen + 1)) && 9632 (ctxt->input->cur[tlen] == '>')) { 9633 ctxt->input->cur += tlen + 1; 9634 ctxt->input->col += tlen + 1; 9635 goto done; 9636 } 9637 ctxt->input->cur += tlen; 9638 ctxt->input->col += tlen; 9639 name = (xmlChar*)1; 9640 } else { 9641 if (prefix == NULL) 9642 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9643 else 9644 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9645 } 9646 9647 /* 9648 * We should definitely be at the ending "S? '>'" part 9649 */ 9650 GROW; 9651 if (ctxt->instate == XML_PARSER_EOF) 9652 return; 9653 SKIP_BLANKS; 9654 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9655 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9656 } else 9657 NEXT1; 9658 9659 /* 9660 * [ WFC: Element Type Match ] 9661 * The Name in an element's end-tag must match the element type in the 9662 * start-tag. 9663 * 9664 */ 9665 if (name != (xmlChar*)1) { 9666 if (name == NULL) name = BAD_CAST "unparseable"; 9667 if ((line == 0) && (ctxt->node != NULL)) 9668 line = ctxt->node->line; 9669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9670 "Opening and ending tag mismatch: %s line %d and %s\n", 9671 ctxt->name, line, name); 9672 } 9673 9674 /* 9675 * SAX: End of Tag 9676 */ 9677 done: 9678 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9679 (!ctxt->disableSAX)) 9680 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9681 9682 spacePop(ctxt); 9683 if (nsNr != 0) 9684 nsPop(ctxt, nsNr); 9685 return; 9686 } 9687 9688 /** 9689 * xmlParseCDSect: 9690 * @ctxt: an XML parser context 9691 * 9692 * Parse escaped pure raw content. 9693 * 9694 * [18] CDSect ::= CDStart CData CDEnd 9695 * 9696 * [19] CDStart ::= '<![CDATA[' 9697 * 9698 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9699 * 9700 * [21] CDEnd ::= ']]>' 9701 */ 9702 void 9703 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9704 xmlChar *buf = NULL; 9705 int len = 0; 9706 int size = XML_PARSER_BUFFER_SIZE; 9707 int r, rl; 9708 int s, sl; 9709 int cur, l; 9710 int count = 0; 9711 9712 /* Check 2.6.0 was NXT(0) not RAW */ 9713 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9714 SKIP(9); 9715 } else 9716 return; 9717 9718 ctxt->instate = XML_PARSER_CDATA_SECTION; 9719 r = CUR_CHAR(rl); 9720 if (!IS_CHAR(r)) { 9721 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9722 ctxt->instate = XML_PARSER_CONTENT; 9723 return; 9724 } 9725 NEXTL(rl); 9726 s = CUR_CHAR(sl); 9727 if (!IS_CHAR(s)) { 9728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9729 ctxt->instate = XML_PARSER_CONTENT; 9730 return; 9731 } 9732 NEXTL(sl); 9733 cur = CUR_CHAR(l); 9734 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9735 if (buf == NULL) { 9736 xmlErrMemory(ctxt, NULL); 9737 return; 9738 } 9739 while (IS_CHAR(cur) && 9740 ((r != ']') || (s != ']') || (cur != '>'))) { 9741 if (len + 5 >= size) { 9742 xmlChar *tmp; 9743 9744 if ((size > XML_MAX_TEXT_LENGTH) && 9745 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9746 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9747 "CData section too big found", NULL); 9748 xmlFree (buf); 9749 return; 9750 } 9751 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9752 if (tmp == NULL) { 9753 xmlFree(buf); 9754 xmlErrMemory(ctxt, NULL); 9755 return; 9756 } 9757 buf = tmp; 9758 size *= 2; 9759 } 9760 COPY_BUF(rl,buf,len,r); 9761 r = s; 9762 rl = sl; 9763 s = cur; 9764 sl = l; 9765 count++; 9766 if (count > 50) { 9767 GROW; 9768 if (ctxt->instate == XML_PARSER_EOF) { 9769 xmlFree(buf); 9770 return; 9771 } 9772 count = 0; 9773 } 9774 NEXTL(l); 9775 cur = CUR_CHAR(l); 9776 } 9777 buf[len] = 0; 9778 ctxt->instate = XML_PARSER_CONTENT; 9779 if (cur != '>') { 9780 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9781 "CData section not finished\n%.50s\n", buf); 9782 xmlFree(buf); 9783 return; 9784 } 9785 NEXTL(l); 9786 9787 /* 9788 * OK the buffer is to be consumed as cdata. 9789 */ 9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9791 if (ctxt->sax->cdataBlock != NULL) 9792 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9793 else if (ctxt->sax->characters != NULL) 9794 ctxt->sax->characters(ctxt->userData, buf, len); 9795 } 9796 xmlFree(buf); 9797 } 9798 9799 /** 9800 * xmlParseContent: 9801 * @ctxt: an XML parser context 9802 * 9803 * Parse a content: 9804 * 9805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9806 */ 9807 9808 void 9809 xmlParseContent(xmlParserCtxtPtr ctxt) { 9810 GROW; 9811 while ((RAW != 0) && 9812 ((RAW != '<') || (NXT(1) != '/')) && 9813 (ctxt->instate != XML_PARSER_EOF)) { 9814 const xmlChar *test = CUR_PTR; 9815 unsigned int cons = ctxt->input->consumed; 9816 const xmlChar *cur = ctxt->input->cur; 9817 9818 /* 9819 * First case : a Processing Instruction. 9820 */ 9821 if ((*cur == '<') && (cur[1] == '?')) { 9822 xmlParsePI(ctxt); 9823 } 9824 9825 /* 9826 * Second case : a CDSection 9827 */ 9828 /* 2.6.0 test was *cur not RAW */ 9829 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9830 xmlParseCDSect(ctxt); 9831 } 9832 9833 /* 9834 * Third case : a comment 9835 */ 9836 else if ((*cur == '<') && (NXT(1) == '!') && 9837 (NXT(2) == '-') && (NXT(3) == '-')) { 9838 xmlParseComment(ctxt); 9839 ctxt->instate = XML_PARSER_CONTENT; 9840 } 9841 9842 /* 9843 * Fourth case : a sub-element. 9844 */ 9845 else if (*cur == '<') { 9846 xmlParseElement(ctxt); 9847 } 9848 9849 /* 9850 * Fifth case : a reference. If if has not been resolved, 9851 * parsing returns it's Name, create the node 9852 */ 9853 9854 else if (*cur == '&') { 9855 xmlParseReference(ctxt); 9856 } 9857 9858 /* 9859 * Last case, text. Note that References are handled directly. 9860 */ 9861 else { 9862 xmlParseCharData(ctxt, 0); 9863 } 9864 9865 GROW; 9866 SHRINK; 9867 9868 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9869 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9870 "detected an error in element content\n"); 9871 xmlHaltParser(ctxt); 9872 break; 9873 } 9874 } 9875 } 9876 9877 /** 9878 * xmlParseElement: 9879 * @ctxt: an XML parser context 9880 * 9881 * parse an XML element, this is highly recursive 9882 * 9883 * [39] element ::= EmptyElemTag | STag content ETag 9884 * 9885 * [ WFC: Element Type Match ] 9886 * The Name in an element's end-tag must match the element type in the 9887 * start-tag. 9888 * 9889 */ 9890 9891 void 9892 xmlParseElement(xmlParserCtxtPtr ctxt) { 9893 const xmlChar *name; 9894 const xmlChar *prefix = NULL; 9895 const xmlChar *URI = NULL; 9896 xmlParserNodeInfo node_info; 9897 int line, tlen = 0; 9898 xmlNodePtr ret; 9899 int nsNr = ctxt->nsNr; 9900 9901 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9902 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9903 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9904 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9905 xmlParserMaxDepth); 9906 xmlHaltParser(ctxt); 9907 return; 9908 } 9909 9910 /* Capture start position */ 9911 if (ctxt->record_info) { 9912 node_info.begin_pos = ctxt->input->consumed + 9913 (CUR_PTR - ctxt->input->base); 9914 node_info.begin_line = ctxt->input->line; 9915 } 9916 9917 if (ctxt->spaceNr == 0) 9918 spacePush(ctxt, -1); 9919 else if (*ctxt->space == -2) 9920 spacePush(ctxt, -1); 9921 else 9922 spacePush(ctxt, *ctxt->space); 9923 9924 line = ctxt->input->line; 9925 #ifdef LIBXML_SAX1_ENABLED 9926 if (ctxt->sax2) 9927 #endif /* LIBXML_SAX1_ENABLED */ 9928 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9929 #ifdef LIBXML_SAX1_ENABLED 9930 else 9931 name = xmlParseStartTag(ctxt); 9932 #endif /* LIBXML_SAX1_ENABLED */ 9933 if (ctxt->instate == XML_PARSER_EOF) 9934 return; 9935 if (name == NULL) { 9936 spacePop(ctxt); 9937 return; 9938 } 9939 namePush(ctxt, name); 9940 ret = ctxt->node; 9941 9942 #ifdef LIBXML_VALID_ENABLED 9943 /* 9944 * [ VC: Root Element Type ] 9945 * The Name in the document type declaration must match the element 9946 * type of the root element. 9947 */ 9948 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9949 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9950 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9951 #endif /* LIBXML_VALID_ENABLED */ 9952 9953 /* 9954 * Check for an Empty Element. 9955 */ 9956 if ((RAW == '/') && (NXT(1) == '>')) { 9957 SKIP(2); 9958 if (ctxt->sax2) { 9959 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9960 (!ctxt->disableSAX)) 9961 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9962 #ifdef LIBXML_SAX1_ENABLED 9963 } else { 9964 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9965 (!ctxt->disableSAX)) 9966 ctxt->sax->endElement(ctxt->userData, name); 9967 #endif /* LIBXML_SAX1_ENABLED */ 9968 } 9969 namePop(ctxt); 9970 spacePop(ctxt); 9971 if (nsNr != ctxt->nsNr) 9972 nsPop(ctxt, ctxt->nsNr - nsNr); 9973 if ( ret != NULL && ctxt->record_info ) { 9974 node_info.end_pos = ctxt->input->consumed + 9975 (CUR_PTR - ctxt->input->base); 9976 node_info.end_line = ctxt->input->line; 9977 node_info.node = ret; 9978 xmlParserAddNodeInfo(ctxt, &node_info); 9979 } 9980 return; 9981 } 9982 if (RAW == '>') { 9983 NEXT1; 9984 } else { 9985 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9986 "Couldn't find end of Start Tag %s line %d\n", 9987 name, line, NULL); 9988 9989 /* 9990 * end of parsing of this node. 9991 */ 9992 nodePop(ctxt); 9993 namePop(ctxt); 9994 spacePop(ctxt); 9995 if (nsNr != ctxt->nsNr) 9996 nsPop(ctxt, ctxt->nsNr - nsNr); 9997 9998 /* 9999 * Capture end position and add node 10000 */ 10001 if ( ret != NULL && ctxt->record_info ) { 10002 node_info.end_pos = ctxt->input->consumed + 10003 (CUR_PTR - ctxt->input->base); 10004 node_info.end_line = ctxt->input->line; 10005 node_info.node = ret; 10006 xmlParserAddNodeInfo(ctxt, &node_info); 10007 } 10008 return; 10009 } 10010 10011 /* 10012 * Parse the content of the element: 10013 */ 10014 xmlParseContent(ctxt); 10015 if (ctxt->instate == XML_PARSER_EOF) 10016 return; 10017 if (!IS_BYTE_CHAR(RAW)) { 10018 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10019 "Premature end of data in tag %s line %d\n", 10020 name, line, NULL); 10021 10022 /* 10023 * end of parsing of this node. 10024 */ 10025 nodePop(ctxt); 10026 namePop(ctxt); 10027 spacePop(ctxt); 10028 if (nsNr != ctxt->nsNr) 10029 nsPop(ctxt, ctxt->nsNr - nsNr); 10030 return; 10031 } 10032 10033 /* 10034 * parse the end of tag: '</' should be here. 10035 */ 10036 if (ctxt->sax2) { 10037 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10038 namePop(ctxt); 10039 } 10040 #ifdef LIBXML_SAX1_ENABLED 10041 else 10042 xmlParseEndTag1(ctxt, line); 10043 #endif /* LIBXML_SAX1_ENABLED */ 10044 10045 /* 10046 * Capture end position and add node 10047 */ 10048 if ( ret != NULL && ctxt->record_info ) { 10049 node_info.end_pos = ctxt->input->consumed + 10050 (CUR_PTR - ctxt->input->base); 10051 node_info.end_line = ctxt->input->line; 10052 node_info.node = ret; 10053 xmlParserAddNodeInfo(ctxt, &node_info); 10054 } 10055 } 10056 10057 /** 10058 * xmlParseVersionNum: 10059 * @ctxt: an XML parser context 10060 * 10061 * parse the XML version value. 10062 * 10063 * [26] VersionNum ::= '1.' [0-9]+ 10064 * 10065 * In practice allow [0-9].[0-9]+ at that level 10066 * 10067 * Returns the string giving the XML version number, or NULL 10068 */ 10069 xmlChar * 10070 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10071 xmlChar *buf = NULL; 10072 int len = 0; 10073 int size = 10; 10074 xmlChar cur; 10075 10076 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10077 if (buf == NULL) { 10078 xmlErrMemory(ctxt, NULL); 10079 return(NULL); 10080 } 10081 cur = CUR; 10082 if (!((cur >= '0') && (cur <= '9'))) { 10083 xmlFree(buf); 10084 return(NULL); 10085 } 10086 buf[len++] = cur; 10087 NEXT; 10088 cur=CUR; 10089 if (cur != '.') { 10090 xmlFree(buf); 10091 return(NULL); 10092 } 10093 buf[len++] = cur; 10094 NEXT; 10095 cur=CUR; 10096 while ((cur >= '0') && (cur <= '9')) { 10097 if (len + 1 >= size) { 10098 xmlChar *tmp; 10099 10100 size *= 2; 10101 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10102 if (tmp == NULL) { 10103 xmlFree(buf); 10104 xmlErrMemory(ctxt, NULL); 10105 return(NULL); 10106 } 10107 buf = tmp; 10108 } 10109 buf[len++] = cur; 10110 NEXT; 10111 cur=CUR; 10112 } 10113 buf[len] = 0; 10114 return(buf); 10115 } 10116 10117 /** 10118 * xmlParseVersionInfo: 10119 * @ctxt: an XML parser context 10120 * 10121 * parse the XML version. 10122 * 10123 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10124 * 10125 * [25] Eq ::= S? '=' S? 10126 * 10127 * Returns the version string, e.g. "1.0" 10128 */ 10129 10130 xmlChar * 10131 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10132 xmlChar *version = NULL; 10133 10134 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10135 SKIP(7); 10136 SKIP_BLANKS; 10137 if (RAW != '=') { 10138 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10139 return(NULL); 10140 } 10141 NEXT; 10142 SKIP_BLANKS; 10143 if (RAW == '"') { 10144 NEXT; 10145 version = xmlParseVersionNum(ctxt); 10146 if (RAW != '"') { 10147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10148 } else 10149 NEXT; 10150 } else if (RAW == '\''){ 10151 NEXT; 10152 version = xmlParseVersionNum(ctxt); 10153 if (RAW != '\'') { 10154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10155 } else 10156 NEXT; 10157 } else { 10158 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10159 } 10160 } 10161 return(version); 10162 } 10163 10164 /** 10165 * xmlParseEncName: 10166 * @ctxt: an XML parser context 10167 * 10168 * parse the XML encoding name 10169 * 10170 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10171 * 10172 * Returns the encoding name value or NULL 10173 */ 10174 xmlChar * 10175 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10176 xmlChar *buf = NULL; 10177 int len = 0; 10178 int size = 10; 10179 xmlChar cur; 10180 10181 cur = CUR; 10182 if (((cur >= 'a') && (cur <= 'z')) || 10183 ((cur >= 'A') && (cur <= 'Z'))) { 10184 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10185 if (buf == NULL) { 10186 xmlErrMemory(ctxt, NULL); 10187 return(NULL); 10188 } 10189 10190 buf[len++] = cur; 10191 NEXT; 10192 cur = CUR; 10193 while (((cur >= 'a') && (cur <= 'z')) || 10194 ((cur >= 'A') && (cur <= 'Z')) || 10195 ((cur >= '0') && (cur <= '9')) || 10196 (cur == '.') || (cur == '_') || 10197 (cur == '-')) { 10198 if (len + 1 >= size) { 10199 xmlChar *tmp; 10200 10201 size *= 2; 10202 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10203 if (tmp == NULL) { 10204 xmlErrMemory(ctxt, NULL); 10205 xmlFree(buf); 10206 return(NULL); 10207 } 10208 buf = tmp; 10209 } 10210 buf[len++] = cur; 10211 NEXT; 10212 cur = CUR; 10213 if (cur == 0) { 10214 SHRINK; 10215 GROW; 10216 cur = CUR; 10217 } 10218 } 10219 buf[len] = 0; 10220 } else { 10221 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10222 } 10223 return(buf); 10224 } 10225 10226 /** 10227 * xmlParseEncodingDecl: 10228 * @ctxt: an XML parser context 10229 * 10230 * parse the XML encoding declaration 10231 * 10232 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10233 * 10234 * this setups the conversion filters. 10235 * 10236 * Returns the encoding value or NULL 10237 */ 10238 10239 const xmlChar * 10240 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10241 xmlChar *encoding = NULL; 10242 10243 SKIP_BLANKS; 10244 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10245 SKIP(8); 10246 SKIP_BLANKS; 10247 if (RAW != '=') { 10248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10249 return(NULL); 10250 } 10251 NEXT; 10252 SKIP_BLANKS; 10253 if (RAW == '"') { 10254 NEXT; 10255 encoding = xmlParseEncName(ctxt); 10256 if (RAW != '"') { 10257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10258 xmlFree((xmlChar *) encoding); 10259 return(NULL); 10260 } else 10261 NEXT; 10262 } else if (RAW == '\''){ 10263 NEXT; 10264 encoding = xmlParseEncName(ctxt); 10265 if (RAW != '\'') { 10266 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10267 xmlFree((xmlChar *) encoding); 10268 return(NULL); 10269 } else 10270 NEXT; 10271 } else { 10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10273 } 10274 10275 /* 10276 * Non standard parsing, allowing the user to ignore encoding 10277 */ 10278 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10279 xmlFree((xmlChar *) encoding); 10280 return(NULL); 10281 } 10282 10283 /* 10284 * UTF-16 encoding stwich has already taken place at this stage, 10285 * more over the little-endian/big-endian selection is already done 10286 */ 10287 if ((encoding != NULL) && 10288 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10289 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10290 /* 10291 * If no encoding was passed to the parser, that we are 10292 * using UTF-16 and no decoder is present i.e. the 10293 * document is apparently UTF-8 compatible, then raise an 10294 * encoding mismatch fatal error 10295 */ 10296 if ((ctxt->encoding == NULL) && 10297 (ctxt->input->buf != NULL) && 10298 (ctxt->input->buf->encoder == NULL)) { 10299 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10300 "Document labelled UTF-16 but has UTF-8 content\n"); 10301 } 10302 if (ctxt->encoding != NULL) 10303 xmlFree((xmlChar *) ctxt->encoding); 10304 ctxt->encoding = encoding; 10305 } 10306 /* 10307 * UTF-8 encoding is handled natively 10308 */ 10309 else if ((encoding != NULL) && 10310 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10311 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10312 if (ctxt->encoding != NULL) 10313 xmlFree((xmlChar *) ctxt->encoding); 10314 ctxt->encoding = encoding; 10315 } 10316 else if (encoding != NULL) { 10317 xmlCharEncodingHandlerPtr handler; 10318 10319 if (ctxt->input->encoding != NULL) 10320 xmlFree((xmlChar *) ctxt->input->encoding); 10321 ctxt->input->encoding = encoding; 10322 10323 handler = xmlFindCharEncodingHandler((const char *) encoding); 10324 if (handler != NULL) { 10325 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10326 /* failed to convert */ 10327 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10328 return(NULL); 10329 } 10330 } else { 10331 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10332 "Unsupported encoding %s\n", encoding); 10333 return(NULL); 10334 } 10335 } 10336 } 10337 return(encoding); 10338 } 10339 10340 /** 10341 * xmlParseSDDecl: 10342 * @ctxt: an XML parser context 10343 * 10344 * parse the XML standalone declaration 10345 * 10346 * [32] SDDecl ::= S 'standalone' Eq 10347 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10348 * 10349 * [ VC: Standalone Document Declaration ] 10350 * TODO The standalone document declaration must have the value "no" 10351 * if any external markup declarations contain declarations of: 10352 * - attributes with default values, if elements to which these 10353 * attributes apply appear in the document without specifications 10354 * of values for these attributes, or 10355 * - entities (other than amp, lt, gt, apos, quot), if references 10356 * to those entities appear in the document, or 10357 * - attributes with values subject to normalization, where the 10358 * attribute appears in the document with a value which will change 10359 * as a result of normalization, or 10360 * - element types with element content, if white space occurs directly 10361 * within any instance of those types. 10362 * 10363 * Returns: 10364 * 1 if standalone="yes" 10365 * 0 if standalone="no" 10366 * -2 if standalone attribute is missing or invalid 10367 * (A standalone value of -2 means that the XML declaration was found, 10368 * but no value was specified for the standalone attribute). 10369 */ 10370 10371 int 10372 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10373 int standalone = -2; 10374 10375 SKIP_BLANKS; 10376 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10377 SKIP(10); 10378 SKIP_BLANKS; 10379 if (RAW != '=') { 10380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10381 return(standalone); 10382 } 10383 NEXT; 10384 SKIP_BLANKS; 10385 if (RAW == '\''){ 10386 NEXT; 10387 if ((RAW == 'n') && (NXT(1) == 'o')) { 10388 standalone = 0; 10389 SKIP(2); 10390 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10391 (NXT(2) == 's')) { 10392 standalone = 1; 10393 SKIP(3); 10394 } else { 10395 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10396 } 10397 if (RAW != '\'') { 10398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10399 } else 10400 NEXT; 10401 } else if (RAW == '"'){ 10402 NEXT; 10403 if ((RAW == 'n') && (NXT(1) == 'o')) { 10404 standalone = 0; 10405 SKIP(2); 10406 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10407 (NXT(2) == 's')) { 10408 standalone = 1; 10409 SKIP(3); 10410 } else { 10411 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10412 } 10413 if (RAW != '"') { 10414 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10415 } else 10416 NEXT; 10417 } else { 10418 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10419 } 10420 } 10421 return(standalone); 10422 } 10423 10424 /** 10425 * xmlParseXMLDecl: 10426 * @ctxt: an XML parser context 10427 * 10428 * parse an XML declaration header 10429 * 10430 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10431 */ 10432 10433 void 10434 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10435 xmlChar *version; 10436 10437 /* 10438 * This value for standalone indicates that the document has an 10439 * XML declaration but it does not have a standalone attribute. 10440 * It will be overwritten later if a standalone attribute is found. 10441 */ 10442 ctxt->input->standalone = -2; 10443 10444 /* 10445 * We know that '<?xml' is here. 10446 */ 10447 SKIP(5); 10448 10449 if (!IS_BLANK_CH(RAW)) { 10450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10451 "Blank needed after '<?xml'\n"); 10452 } 10453 SKIP_BLANKS; 10454 10455 /* 10456 * We must have the VersionInfo here. 10457 */ 10458 version = xmlParseVersionInfo(ctxt); 10459 if (version == NULL) { 10460 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10461 } else { 10462 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10463 /* 10464 * Changed here for XML-1.0 5th edition 10465 */ 10466 if (ctxt->options & XML_PARSE_OLD10) { 10467 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10468 "Unsupported version '%s'\n", 10469 version); 10470 } else { 10471 if ((version[0] == '1') && ((version[1] == '.'))) { 10472 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10473 "Unsupported version '%s'\n", 10474 version, NULL); 10475 } else { 10476 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10477 "Unsupported version '%s'\n", 10478 version); 10479 } 10480 } 10481 } 10482 if (ctxt->version != NULL) 10483 xmlFree((void *) ctxt->version); 10484 ctxt->version = version; 10485 } 10486 10487 /* 10488 * We may have the encoding declaration 10489 */ 10490 if (!IS_BLANK_CH(RAW)) { 10491 if ((RAW == '?') && (NXT(1) == '>')) { 10492 SKIP(2); 10493 return; 10494 } 10495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10496 } 10497 xmlParseEncodingDecl(ctxt); 10498 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10499 (ctxt->instate == XML_PARSER_EOF)) { 10500 /* 10501 * The XML REC instructs us to stop parsing right here 10502 */ 10503 return; 10504 } 10505 10506 /* 10507 * We may have the standalone status. 10508 */ 10509 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10510 if ((RAW == '?') && (NXT(1) == '>')) { 10511 SKIP(2); 10512 return; 10513 } 10514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10515 } 10516 10517 /* 10518 * We can grow the input buffer freely at that point 10519 */ 10520 GROW; 10521 10522 SKIP_BLANKS; 10523 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10524 10525 SKIP_BLANKS; 10526 if ((RAW == '?') && (NXT(1) == '>')) { 10527 SKIP(2); 10528 } else if (RAW == '>') { 10529 /* Deprecated old WD ... */ 10530 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10531 NEXT; 10532 } else { 10533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10534 MOVETO_ENDTAG(CUR_PTR); 10535 NEXT; 10536 } 10537 } 10538 10539 /** 10540 * xmlParseMisc: 10541 * @ctxt: an XML parser context 10542 * 10543 * parse an XML Misc* optional field. 10544 * 10545 * [27] Misc ::= Comment | PI | S 10546 */ 10547 10548 void 10549 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10550 while ((ctxt->instate != XML_PARSER_EOF) && 10551 (((RAW == '<') && (NXT(1) == '?')) || 10552 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10553 IS_BLANK_CH(CUR))) { 10554 if ((RAW == '<') && (NXT(1) == '?')) { 10555 xmlParsePI(ctxt); 10556 } else if (IS_BLANK_CH(CUR)) { 10557 NEXT; 10558 } else 10559 xmlParseComment(ctxt); 10560 } 10561 } 10562 10563 /** 10564 * xmlParseDocument: 10565 * @ctxt: an XML parser context 10566 * 10567 * parse an XML document (and build a tree if using the standard SAX 10568 * interface). 10569 * 10570 * [1] document ::= prolog element Misc* 10571 * 10572 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10573 * 10574 * Returns 0, -1 in case of error. the parser context is augmented 10575 * as a result of the parsing. 10576 */ 10577 10578 int 10579 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10580 xmlChar start[4]; 10581 xmlCharEncoding enc; 10582 10583 xmlInitParser(); 10584 10585 if ((ctxt == NULL) || (ctxt->input == NULL)) 10586 return(-1); 10587 10588 GROW; 10589 10590 /* 10591 * SAX: detecting the level. 10592 */ 10593 xmlDetectSAX2(ctxt); 10594 10595 /* 10596 * SAX: beginning of the document processing. 10597 */ 10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10599 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10600 if (ctxt->instate == XML_PARSER_EOF) 10601 return(-1); 10602 10603 if ((ctxt->encoding == NULL) && 10604 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10605 /* 10606 * Get the 4 first bytes and decode the charset 10607 * if enc != XML_CHAR_ENCODING_NONE 10608 * plug some encoding conversion routines. 10609 */ 10610 start[0] = RAW; 10611 start[1] = NXT(1); 10612 start[2] = NXT(2); 10613 start[3] = NXT(3); 10614 enc = xmlDetectCharEncoding(&start[0], 4); 10615 if (enc != XML_CHAR_ENCODING_NONE) { 10616 xmlSwitchEncoding(ctxt, enc); 10617 } 10618 } 10619 10620 10621 if (CUR == 0) { 10622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10623 return(-1); 10624 } 10625 10626 /* 10627 * Check for the XMLDecl in the Prolog. 10628 * do not GROW here to avoid the detected encoder to decode more 10629 * than just the first line, unless the amount of data is really 10630 * too small to hold "<?xml version="1.0" encoding="foo" 10631 */ 10632 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10633 GROW; 10634 } 10635 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10636 10637 /* 10638 * Note that we will switch encoding on the fly. 10639 */ 10640 xmlParseXMLDecl(ctxt); 10641 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10642 (ctxt->instate == XML_PARSER_EOF)) { 10643 /* 10644 * The XML REC instructs us to stop parsing right here 10645 */ 10646 return(-1); 10647 } 10648 ctxt->standalone = ctxt->input->standalone; 10649 SKIP_BLANKS; 10650 } else { 10651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10652 } 10653 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10654 ctxt->sax->startDocument(ctxt->userData); 10655 if (ctxt->instate == XML_PARSER_EOF) 10656 return(-1); 10657 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10658 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10659 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10660 } 10661 10662 /* 10663 * The Misc part of the Prolog 10664 */ 10665 GROW; 10666 xmlParseMisc(ctxt); 10667 10668 /* 10669 * Then possibly doc type declaration(s) and more Misc 10670 * (doctypedecl Misc*)? 10671 */ 10672 GROW; 10673 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10674 10675 ctxt->inSubset = 1; 10676 xmlParseDocTypeDecl(ctxt); 10677 if (RAW == '[') { 10678 ctxt->instate = XML_PARSER_DTD; 10679 xmlParseInternalSubset(ctxt); 10680 if (ctxt->instate == XML_PARSER_EOF) 10681 return(-1); 10682 } 10683 10684 /* 10685 * Create and update the external subset. 10686 */ 10687 ctxt->inSubset = 2; 10688 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10689 (!ctxt->disableSAX)) 10690 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10691 ctxt->extSubSystem, ctxt->extSubURI); 10692 if (ctxt->instate == XML_PARSER_EOF) 10693 return(-1); 10694 ctxt->inSubset = 0; 10695 10696 xmlCleanSpecialAttr(ctxt); 10697 10698 ctxt->instate = XML_PARSER_PROLOG; 10699 xmlParseMisc(ctxt); 10700 } 10701 10702 /* 10703 * Time to start parsing the tree itself 10704 */ 10705 GROW; 10706 if (RAW != '<') { 10707 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10708 "Start tag expected, '<' not found\n"); 10709 } else { 10710 ctxt->instate = XML_PARSER_CONTENT; 10711 xmlParseElement(ctxt); 10712 ctxt->instate = XML_PARSER_EPILOG; 10713 10714 10715 /* 10716 * The Misc part at the end 10717 */ 10718 xmlParseMisc(ctxt); 10719 10720 if (RAW != 0) { 10721 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10722 } 10723 ctxt->instate = XML_PARSER_EOF; 10724 } 10725 10726 /* 10727 * SAX: end of the document processing. 10728 */ 10729 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10730 ctxt->sax->endDocument(ctxt->userData); 10731 10732 /* 10733 * Remove locally kept entity definitions if the tree was not built 10734 */ 10735 if ((ctxt->myDoc != NULL) && 10736 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10737 xmlFreeDoc(ctxt->myDoc); 10738 ctxt->myDoc = NULL; 10739 } 10740 10741 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10742 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10743 if (ctxt->valid) 10744 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10745 if (ctxt->nsWellFormed) 10746 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10747 if (ctxt->options & XML_PARSE_OLD10) 10748 ctxt->myDoc->properties |= XML_DOC_OLD10; 10749 } 10750 if (! ctxt->wellFormed) { 10751 ctxt->valid = 0; 10752 return(-1); 10753 } 10754 return(0); 10755 } 10756 10757 /** 10758 * xmlParseExtParsedEnt: 10759 * @ctxt: an XML parser context 10760 * 10761 * parse a general parsed entity 10762 * An external general parsed entity is well-formed if it matches the 10763 * production labeled extParsedEnt. 10764 * 10765 * [78] extParsedEnt ::= TextDecl? content 10766 * 10767 * Returns 0, -1 in case of error. the parser context is augmented 10768 * as a result of the parsing. 10769 */ 10770 10771 int 10772 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10773 xmlChar start[4]; 10774 xmlCharEncoding enc; 10775 10776 if ((ctxt == NULL) || (ctxt->input == NULL)) 10777 return(-1); 10778 10779 xmlDefaultSAXHandlerInit(); 10780 10781 xmlDetectSAX2(ctxt); 10782 10783 GROW; 10784 10785 /* 10786 * SAX: beginning of the document processing. 10787 */ 10788 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10789 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10790 10791 /* 10792 * Get the 4 first bytes and decode the charset 10793 * if enc != XML_CHAR_ENCODING_NONE 10794 * plug some encoding conversion routines. 10795 */ 10796 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10797 start[0] = RAW; 10798 start[1] = NXT(1); 10799 start[2] = NXT(2); 10800 start[3] = NXT(3); 10801 enc = xmlDetectCharEncoding(start, 4); 10802 if (enc != XML_CHAR_ENCODING_NONE) { 10803 xmlSwitchEncoding(ctxt, enc); 10804 } 10805 } 10806 10807 10808 if (CUR == 0) { 10809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10810 } 10811 10812 /* 10813 * Check for the XMLDecl in the Prolog. 10814 */ 10815 GROW; 10816 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10817 10818 /* 10819 * Note that we will switch encoding on the fly. 10820 */ 10821 xmlParseXMLDecl(ctxt); 10822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10823 /* 10824 * The XML REC instructs us to stop parsing right here 10825 */ 10826 return(-1); 10827 } 10828 SKIP_BLANKS; 10829 } else { 10830 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10831 } 10832 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10833 ctxt->sax->startDocument(ctxt->userData); 10834 if (ctxt->instate == XML_PARSER_EOF) 10835 return(-1); 10836 10837 /* 10838 * Doing validity checking on chunk doesn't make sense 10839 */ 10840 ctxt->instate = XML_PARSER_CONTENT; 10841 ctxt->validate = 0; 10842 ctxt->loadsubset = 0; 10843 ctxt->depth = 0; 10844 10845 xmlParseContent(ctxt); 10846 if (ctxt->instate == XML_PARSER_EOF) 10847 return(-1); 10848 10849 if ((RAW == '<') && (NXT(1) == '/')) { 10850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10851 } else if (RAW != 0) { 10852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10853 } 10854 10855 /* 10856 * SAX: end of the document processing. 10857 */ 10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10859 ctxt->sax->endDocument(ctxt->userData); 10860 10861 if (! ctxt->wellFormed) return(-1); 10862 return(0); 10863 } 10864 10865 #ifdef LIBXML_PUSH_ENABLED 10866 /************************************************************************ 10867 * * 10868 * Progressive parsing interfaces * 10869 * * 10870 ************************************************************************/ 10871 10872 /** 10873 * xmlParseLookupSequence: 10874 * @ctxt: an XML parser context 10875 * @first: the first char to lookup 10876 * @next: the next char to lookup or zero 10877 * @third: the next char to lookup or zero 10878 * 10879 * Try to find if a sequence (first, next, third) or just (first next) or 10880 * (first) is available in the input stream. 10881 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10882 * to avoid rescanning sequences of bytes, it DOES change the state of the 10883 * parser, do not use liberally. 10884 * 10885 * Returns the index to the current parsing point if the full sequence 10886 * is available, -1 otherwise. 10887 */ 10888 static int 10889 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10890 xmlChar next, xmlChar third) { 10891 int base, len; 10892 xmlParserInputPtr in; 10893 const xmlChar *buf; 10894 10895 in = ctxt->input; 10896 if (in == NULL) return(-1); 10897 base = in->cur - in->base; 10898 if (base < 0) return(-1); 10899 if (ctxt->checkIndex > base) 10900 base = ctxt->checkIndex; 10901 if (in->buf == NULL) { 10902 buf = in->base; 10903 len = in->length; 10904 } else { 10905 buf = xmlBufContent(in->buf->buffer); 10906 len = xmlBufUse(in->buf->buffer); 10907 } 10908 /* take into account the sequence length */ 10909 if (third) len -= 2; 10910 else if (next) len --; 10911 for (;base < len;base++) { 10912 if (buf[base] == first) { 10913 if (third != 0) { 10914 if ((buf[base + 1] != next) || 10915 (buf[base + 2] != third)) continue; 10916 } else if (next != 0) { 10917 if (buf[base + 1] != next) continue; 10918 } 10919 ctxt->checkIndex = 0; 10920 #ifdef DEBUG_PUSH 10921 if (next == 0) 10922 xmlGenericError(xmlGenericErrorContext, 10923 "PP: lookup '%c' found at %d\n", 10924 first, base); 10925 else if (third == 0) 10926 xmlGenericError(xmlGenericErrorContext, 10927 "PP: lookup '%c%c' found at %d\n", 10928 first, next, base); 10929 else 10930 xmlGenericError(xmlGenericErrorContext, 10931 "PP: lookup '%c%c%c' found at %d\n", 10932 first, next, third, base); 10933 #endif 10934 return(base - (in->cur - in->base)); 10935 } 10936 } 10937 ctxt->checkIndex = base; 10938 #ifdef DEBUG_PUSH 10939 if (next == 0) 10940 xmlGenericError(xmlGenericErrorContext, 10941 "PP: lookup '%c' failed\n", first); 10942 else if (third == 0) 10943 xmlGenericError(xmlGenericErrorContext, 10944 "PP: lookup '%c%c' failed\n", first, next); 10945 else 10946 xmlGenericError(xmlGenericErrorContext, 10947 "PP: lookup '%c%c%c' failed\n", first, next, third); 10948 #endif 10949 return(-1); 10950 } 10951 10952 /** 10953 * xmlParseGetLasts: 10954 * @ctxt: an XML parser context 10955 * @lastlt: pointer to store the last '<' from the input 10956 * @lastgt: pointer to store the last '>' from the input 10957 * 10958 * Lookup the last < and > in the current chunk 10959 */ 10960 static void 10961 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10962 const xmlChar **lastgt) { 10963 const xmlChar *tmp; 10964 10965 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10966 xmlGenericError(xmlGenericErrorContext, 10967 "Internal error: xmlParseGetLasts\n"); 10968 return; 10969 } 10970 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10971 tmp = ctxt->input->end; 10972 tmp--; 10973 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10974 if (tmp < ctxt->input->base) { 10975 *lastlt = NULL; 10976 *lastgt = NULL; 10977 } else { 10978 *lastlt = tmp; 10979 tmp++; 10980 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10981 if (*tmp == '\'') { 10982 tmp++; 10983 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10984 if (tmp < ctxt->input->end) tmp++; 10985 } else if (*tmp == '"') { 10986 tmp++; 10987 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10988 if (tmp < ctxt->input->end) tmp++; 10989 } else 10990 tmp++; 10991 } 10992 if (tmp < ctxt->input->end) 10993 *lastgt = tmp; 10994 else { 10995 tmp = *lastlt; 10996 tmp--; 10997 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10998 if (tmp >= ctxt->input->base) 10999 *lastgt = tmp; 11000 else 11001 *lastgt = NULL; 11002 } 11003 } 11004 } else { 11005 *lastlt = NULL; 11006 *lastgt = NULL; 11007 } 11008 } 11009 /** 11010 * xmlCheckCdataPush: 11011 * @cur: pointer to the block of characters 11012 * @len: length of the block in bytes 11013 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11014 * 11015 * Check that the block of characters is okay as SCdata content [20] 11016 * 11017 * Returns the number of bytes to pass if okay, a negative index where an 11018 * UTF-8 error occurred otherwise 11019 */ 11020 static int 11021 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11022 int ix; 11023 unsigned char c; 11024 int codepoint; 11025 11026 if ((utf == NULL) || (len <= 0)) 11027 return(0); 11028 11029 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11030 c = utf[ix]; 11031 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11032 if (c >= 0x20) 11033 ix++; 11034 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11035 ix++; 11036 else 11037 return(-ix); 11038 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11039 if (ix + 2 > len) return(complete ? -ix : ix); 11040 if ((utf[ix+1] & 0xc0 ) != 0x80) 11041 return(-ix); 11042 codepoint = (utf[ix] & 0x1f) << 6; 11043 codepoint |= utf[ix+1] & 0x3f; 11044 if (!xmlIsCharQ(codepoint)) 11045 return(-ix); 11046 ix += 2; 11047 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11048 if (ix + 3 > len) return(complete ? -ix : ix); 11049 if (((utf[ix+1] & 0xc0) != 0x80) || 11050 ((utf[ix+2] & 0xc0) != 0x80)) 11051 return(-ix); 11052 codepoint = (utf[ix] & 0xf) << 12; 11053 codepoint |= (utf[ix+1] & 0x3f) << 6; 11054 codepoint |= utf[ix+2] & 0x3f; 11055 if (!xmlIsCharQ(codepoint)) 11056 return(-ix); 11057 ix += 3; 11058 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11059 if (ix + 4 > len) return(complete ? -ix : ix); 11060 if (((utf[ix+1] & 0xc0) != 0x80) || 11061 ((utf[ix+2] & 0xc0) != 0x80) || 11062 ((utf[ix+3] & 0xc0) != 0x80)) 11063 return(-ix); 11064 codepoint = (utf[ix] & 0x7) << 18; 11065 codepoint |= (utf[ix+1] & 0x3f) << 12; 11066 codepoint |= (utf[ix+2] & 0x3f) << 6; 11067 codepoint |= utf[ix+3] & 0x3f; 11068 if (!xmlIsCharQ(codepoint)) 11069 return(-ix); 11070 ix += 4; 11071 } else /* unknown encoding */ 11072 return(-ix); 11073 } 11074 return(ix); 11075 } 11076 11077 /** 11078 * xmlParseTryOrFinish: 11079 * @ctxt: an XML parser context 11080 * @terminate: last chunk indicator 11081 * 11082 * Try to progress on parsing 11083 * 11084 * Returns zero if no parsing was possible 11085 */ 11086 static int 11087 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11088 int ret = 0; 11089 int avail, tlen; 11090 xmlChar cur, next; 11091 const xmlChar *lastlt, *lastgt; 11092 11093 if (ctxt->input == NULL) 11094 return(0); 11095 11096 #ifdef DEBUG_PUSH 11097 switch (ctxt->instate) { 11098 case XML_PARSER_EOF: 11099 xmlGenericError(xmlGenericErrorContext, 11100 "PP: try EOF\n"); break; 11101 case XML_PARSER_START: 11102 xmlGenericError(xmlGenericErrorContext, 11103 "PP: try START\n"); break; 11104 case XML_PARSER_MISC: 11105 xmlGenericError(xmlGenericErrorContext, 11106 "PP: try MISC\n");break; 11107 case XML_PARSER_COMMENT: 11108 xmlGenericError(xmlGenericErrorContext, 11109 "PP: try COMMENT\n");break; 11110 case XML_PARSER_PROLOG: 11111 xmlGenericError(xmlGenericErrorContext, 11112 "PP: try PROLOG\n");break; 11113 case XML_PARSER_START_TAG: 11114 xmlGenericError(xmlGenericErrorContext, 11115 "PP: try START_TAG\n");break; 11116 case XML_PARSER_CONTENT: 11117 xmlGenericError(xmlGenericErrorContext, 11118 "PP: try CONTENT\n");break; 11119 case XML_PARSER_CDATA_SECTION: 11120 xmlGenericError(xmlGenericErrorContext, 11121 "PP: try CDATA_SECTION\n");break; 11122 case XML_PARSER_END_TAG: 11123 xmlGenericError(xmlGenericErrorContext, 11124 "PP: try END_TAG\n");break; 11125 case XML_PARSER_ENTITY_DECL: 11126 xmlGenericError(xmlGenericErrorContext, 11127 "PP: try ENTITY_DECL\n");break; 11128 case XML_PARSER_ENTITY_VALUE: 11129 xmlGenericError(xmlGenericErrorContext, 11130 "PP: try ENTITY_VALUE\n");break; 11131 case XML_PARSER_ATTRIBUTE_VALUE: 11132 xmlGenericError(xmlGenericErrorContext, 11133 "PP: try ATTRIBUTE_VALUE\n");break; 11134 case XML_PARSER_DTD: 11135 xmlGenericError(xmlGenericErrorContext, 11136 "PP: try DTD\n");break; 11137 case XML_PARSER_EPILOG: 11138 xmlGenericError(xmlGenericErrorContext, 11139 "PP: try EPILOG\n");break; 11140 case XML_PARSER_PI: 11141 xmlGenericError(xmlGenericErrorContext, 11142 "PP: try PI\n");break; 11143 case XML_PARSER_IGNORE: 11144 xmlGenericError(xmlGenericErrorContext, 11145 "PP: try IGNORE\n");break; 11146 } 11147 #endif 11148 11149 if ((ctxt->input != NULL) && 11150 (ctxt->input->cur - ctxt->input->base > 4096)) { 11151 xmlSHRINK(ctxt); 11152 ctxt->checkIndex = 0; 11153 } 11154 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11155 11156 while (ctxt->instate != XML_PARSER_EOF) { 11157 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11158 return(0); 11159 11160 if (ctxt->input == NULL) break; 11161 if (ctxt->input->buf == NULL) 11162 avail = ctxt->input->length - 11163 (ctxt->input->cur - ctxt->input->base); 11164 else { 11165 /* 11166 * If we are operating on converted input, try to flush 11167 * remainng chars to avoid them stalling in the non-converted 11168 * buffer. But do not do this in document start where 11169 * encoding="..." may not have been read and we work on a 11170 * guessed encoding. 11171 */ 11172 if ((ctxt->instate != XML_PARSER_START) && 11173 (ctxt->input->buf->raw != NULL) && 11174 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11175 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11176 ctxt->input); 11177 size_t current = ctxt->input->cur - ctxt->input->base; 11178 11179 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11180 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11181 base, current); 11182 } 11183 avail = xmlBufUse(ctxt->input->buf->buffer) - 11184 (ctxt->input->cur - ctxt->input->base); 11185 } 11186 if (avail < 1) 11187 goto done; 11188 switch (ctxt->instate) { 11189 case XML_PARSER_EOF: 11190 /* 11191 * Document parsing is done ! 11192 */ 11193 goto done; 11194 case XML_PARSER_START: 11195 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11196 xmlChar start[4]; 11197 xmlCharEncoding enc; 11198 11199 /* 11200 * Very first chars read from the document flow. 11201 */ 11202 if (avail < 4) 11203 goto done; 11204 11205 /* 11206 * Get the 4 first bytes and decode the charset 11207 * if enc != XML_CHAR_ENCODING_NONE 11208 * plug some encoding conversion routines, 11209 * else xmlSwitchEncoding will set to (default) 11210 * UTF8. 11211 */ 11212 start[0] = RAW; 11213 start[1] = NXT(1); 11214 start[2] = NXT(2); 11215 start[3] = NXT(3); 11216 enc = xmlDetectCharEncoding(start, 4); 11217 xmlSwitchEncoding(ctxt, enc); 11218 break; 11219 } 11220 11221 if (avail < 2) 11222 goto done; 11223 cur = ctxt->input->cur[0]; 11224 next = ctxt->input->cur[1]; 11225 if (cur == 0) { 11226 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11227 ctxt->sax->setDocumentLocator(ctxt->userData, 11228 &xmlDefaultSAXLocator); 11229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11230 xmlHaltParser(ctxt); 11231 #ifdef DEBUG_PUSH 11232 xmlGenericError(xmlGenericErrorContext, 11233 "PP: entering EOF\n"); 11234 #endif 11235 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11236 ctxt->sax->endDocument(ctxt->userData); 11237 goto done; 11238 } 11239 if ((cur == '<') && (next == '?')) { 11240 /* PI or XML decl */ 11241 if (avail < 5) return(ret); 11242 if ((!terminate) && 11243 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11244 return(ret); 11245 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11246 ctxt->sax->setDocumentLocator(ctxt->userData, 11247 &xmlDefaultSAXLocator); 11248 if ((ctxt->input->cur[2] == 'x') && 11249 (ctxt->input->cur[3] == 'm') && 11250 (ctxt->input->cur[4] == 'l') && 11251 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11252 ret += 5; 11253 #ifdef DEBUG_PUSH 11254 xmlGenericError(xmlGenericErrorContext, 11255 "PP: Parsing XML Decl\n"); 11256 #endif 11257 xmlParseXMLDecl(ctxt); 11258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11259 /* 11260 * The XML REC instructs us to stop parsing right 11261 * here 11262 */ 11263 xmlHaltParser(ctxt); 11264 return(0); 11265 } 11266 ctxt->standalone = ctxt->input->standalone; 11267 if ((ctxt->encoding == NULL) && 11268 (ctxt->input->encoding != NULL)) 11269 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11270 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11271 (!ctxt->disableSAX)) 11272 ctxt->sax->startDocument(ctxt->userData); 11273 ctxt->instate = XML_PARSER_MISC; 11274 #ifdef DEBUG_PUSH 11275 xmlGenericError(xmlGenericErrorContext, 11276 "PP: entering MISC\n"); 11277 #endif 11278 } else { 11279 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11280 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11281 (!ctxt->disableSAX)) 11282 ctxt->sax->startDocument(ctxt->userData); 11283 ctxt->instate = XML_PARSER_MISC; 11284 #ifdef DEBUG_PUSH 11285 xmlGenericError(xmlGenericErrorContext, 11286 "PP: entering MISC\n"); 11287 #endif 11288 } 11289 } else { 11290 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11291 ctxt->sax->setDocumentLocator(ctxt->userData, 11292 &xmlDefaultSAXLocator); 11293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11294 if (ctxt->version == NULL) { 11295 xmlErrMemory(ctxt, NULL); 11296 break; 11297 } 11298 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11299 (!ctxt->disableSAX)) 11300 ctxt->sax->startDocument(ctxt->userData); 11301 ctxt->instate = XML_PARSER_MISC; 11302 #ifdef DEBUG_PUSH 11303 xmlGenericError(xmlGenericErrorContext, 11304 "PP: entering MISC\n"); 11305 #endif 11306 } 11307 break; 11308 case XML_PARSER_START_TAG: { 11309 const xmlChar *name; 11310 const xmlChar *prefix = NULL; 11311 const xmlChar *URI = NULL; 11312 int nsNr = ctxt->nsNr; 11313 11314 if ((avail < 2) && (ctxt->inputNr == 1)) 11315 goto done; 11316 cur = ctxt->input->cur[0]; 11317 if (cur != '<') { 11318 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11319 xmlHaltParser(ctxt); 11320 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11321 ctxt->sax->endDocument(ctxt->userData); 11322 goto done; 11323 } 11324 if (!terminate) { 11325 if (ctxt->progressive) { 11326 /* > can be found unescaped in attribute values */ 11327 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11328 goto done; 11329 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11330 goto done; 11331 } 11332 } 11333 if (ctxt->spaceNr == 0) 11334 spacePush(ctxt, -1); 11335 else if (*ctxt->space == -2) 11336 spacePush(ctxt, -1); 11337 else 11338 spacePush(ctxt, *ctxt->space); 11339 #ifdef LIBXML_SAX1_ENABLED 11340 if (ctxt->sax2) 11341 #endif /* LIBXML_SAX1_ENABLED */ 11342 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11343 #ifdef LIBXML_SAX1_ENABLED 11344 else 11345 name = xmlParseStartTag(ctxt); 11346 #endif /* LIBXML_SAX1_ENABLED */ 11347 if (ctxt->instate == XML_PARSER_EOF) 11348 goto done; 11349 if (name == NULL) { 11350 spacePop(ctxt); 11351 xmlHaltParser(ctxt); 11352 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11353 ctxt->sax->endDocument(ctxt->userData); 11354 goto done; 11355 } 11356 #ifdef LIBXML_VALID_ENABLED 11357 /* 11358 * [ VC: Root Element Type ] 11359 * The Name in the document type declaration must match 11360 * the element type of the root element. 11361 */ 11362 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11363 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11364 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11365 #endif /* LIBXML_VALID_ENABLED */ 11366 11367 /* 11368 * Check for an Empty Element. 11369 */ 11370 if ((RAW == '/') && (NXT(1) == '>')) { 11371 SKIP(2); 11372 11373 if (ctxt->sax2) { 11374 if ((ctxt->sax != NULL) && 11375 (ctxt->sax->endElementNs != NULL) && 11376 (!ctxt->disableSAX)) 11377 ctxt->sax->endElementNs(ctxt->userData, name, 11378 prefix, URI); 11379 if (ctxt->nsNr - nsNr > 0) 11380 nsPop(ctxt, ctxt->nsNr - nsNr); 11381 #ifdef LIBXML_SAX1_ENABLED 11382 } else { 11383 if ((ctxt->sax != NULL) && 11384 (ctxt->sax->endElement != NULL) && 11385 (!ctxt->disableSAX)) 11386 ctxt->sax->endElement(ctxt->userData, name); 11387 #endif /* LIBXML_SAX1_ENABLED */ 11388 } 11389 if (ctxt->instate == XML_PARSER_EOF) 11390 goto done; 11391 spacePop(ctxt); 11392 if (ctxt->nameNr == 0) { 11393 ctxt->instate = XML_PARSER_EPILOG; 11394 } else { 11395 ctxt->instate = XML_PARSER_CONTENT; 11396 } 11397 ctxt->progressive = 1; 11398 break; 11399 } 11400 if (RAW == '>') { 11401 NEXT; 11402 } else { 11403 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11404 "Couldn't find end of Start Tag %s\n", 11405 name); 11406 nodePop(ctxt); 11407 spacePop(ctxt); 11408 } 11409 if (ctxt->sax2) 11410 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11411 #ifdef LIBXML_SAX1_ENABLED 11412 else 11413 namePush(ctxt, name); 11414 #endif /* LIBXML_SAX1_ENABLED */ 11415 11416 ctxt->instate = XML_PARSER_CONTENT; 11417 ctxt->progressive = 1; 11418 break; 11419 } 11420 case XML_PARSER_CONTENT: { 11421 const xmlChar *test; 11422 unsigned int cons; 11423 if ((avail < 2) && (ctxt->inputNr == 1)) 11424 goto done; 11425 cur = ctxt->input->cur[0]; 11426 next = ctxt->input->cur[1]; 11427 11428 test = CUR_PTR; 11429 cons = ctxt->input->consumed; 11430 if ((cur == '<') && (next == '/')) { 11431 ctxt->instate = XML_PARSER_END_TAG; 11432 break; 11433 } else if ((cur == '<') && (next == '?')) { 11434 if ((!terminate) && 11435 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11436 ctxt->progressive = XML_PARSER_PI; 11437 goto done; 11438 } 11439 xmlParsePI(ctxt); 11440 ctxt->instate = XML_PARSER_CONTENT; 11441 ctxt->progressive = 1; 11442 } else if ((cur == '<') && (next != '!')) { 11443 ctxt->instate = XML_PARSER_START_TAG; 11444 break; 11445 } else if ((cur == '<') && (next == '!') && 11446 (ctxt->input->cur[2] == '-') && 11447 (ctxt->input->cur[3] == '-')) { 11448 int term; 11449 11450 if (avail < 4) 11451 goto done; 11452 ctxt->input->cur += 4; 11453 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11454 ctxt->input->cur -= 4; 11455 if ((!terminate) && (term < 0)) { 11456 ctxt->progressive = XML_PARSER_COMMENT; 11457 goto done; 11458 } 11459 xmlParseComment(ctxt); 11460 ctxt->instate = XML_PARSER_CONTENT; 11461 ctxt->progressive = 1; 11462 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11463 (ctxt->input->cur[2] == '[') && 11464 (ctxt->input->cur[3] == 'C') && 11465 (ctxt->input->cur[4] == 'D') && 11466 (ctxt->input->cur[5] == 'A') && 11467 (ctxt->input->cur[6] == 'T') && 11468 (ctxt->input->cur[7] == 'A') && 11469 (ctxt->input->cur[8] == '[')) { 11470 SKIP(9); 11471 ctxt->instate = XML_PARSER_CDATA_SECTION; 11472 break; 11473 } else if ((cur == '<') && (next == '!') && 11474 (avail < 9)) { 11475 goto done; 11476 } else if (cur == '&') { 11477 if ((!terminate) && 11478 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11479 goto done; 11480 xmlParseReference(ctxt); 11481 } else { 11482 /* TODO Avoid the extra copy, handle directly !!! */ 11483 /* 11484 * Goal of the following test is: 11485 * - minimize calls to the SAX 'character' callback 11486 * when they are mergeable 11487 * - handle an problem for isBlank when we only parse 11488 * a sequence of blank chars and the next one is 11489 * not available to check against '<' presence. 11490 * - tries to homogenize the differences in SAX 11491 * callbacks between the push and pull versions 11492 * of the parser. 11493 */ 11494 if ((ctxt->inputNr == 1) && 11495 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11496 if (!terminate) { 11497 if (ctxt->progressive) { 11498 if ((lastlt == NULL) || 11499 (ctxt->input->cur > lastlt)) 11500 goto done; 11501 } else if (xmlParseLookupSequence(ctxt, 11502 '<', 0, 0) < 0) { 11503 goto done; 11504 } 11505 } 11506 } 11507 ctxt->checkIndex = 0; 11508 xmlParseCharData(ctxt, 0); 11509 } 11510 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11512 "detected an error in element content\n"); 11513 xmlHaltParser(ctxt); 11514 break; 11515 } 11516 break; 11517 } 11518 case XML_PARSER_END_TAG: 11519 if (avail < 2) 11520 goto done; 11521 if (!terminate) { 11522 if (ctxt->progressive) { 11523 /* > can be found unescaped in attribute values */ 11524 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11525 goto done; 11526 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11527 goto done; 11528 } 11529 } 11530 if (ctxt->sax2) { 11531 xmlParseEndTag2(ctxt, 11532 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11533 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11534 (int) (ptrdiff_t) 11535 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11536 nameNsPop(ctxt); 11537 } 11538 #ifdef LIBXML_SAX1_ENABLED 11539 else 11540 xmlParseEndTag1(ctxt, 0); 11541 #endif /* LIBXML_SAX1_ENABLED */ 11542 if (ctxt->instate == XML_PARSER_EOF) { 11543 /* Nothing */ 11544 } else if (ctxt->nameNr == 0) { 11545 ctxt->instate = XML_PARSER_EPILOG; 11546 } else { 11547 ctxt->instate = XML_PARSER_CONTENT; 11548 } 11549 break; 11550 case XML_PARSER_CDATA_SECTION: { 11551 /* 11552 * The Push mode need to have the SAX callback for 11553 * cdataBlock merge back contiguous callbacks. 11554 */ 11555 int base; 11556 11557 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11558 if (base < 0) { 11559 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11560 int tmp; 11561 11562 tmp = xmlCheckCdataPush(ctxt->input->cur, 11563 XML_PARSER_BIG_BUFFER_SIZE, 0); 11564 if (tmp < 0) { 11565 tmp = -tmp; 11566 ctxt->input->cur += tmp; 11567 goto encoding_error; 11568 } 11569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11570 if (ctxt->sax->cdataBlock != NULL) 11571 ctxt->sax->cdataBlock(ctxt->userData, 11572 ctxt->input->cur, tmp); 11573 else if (ctxt->sax->characters != NULL) 11574 ctxt->sax->characters(ctxt->userData, 11575 ctxt->input->cur, tmp); 11576 } 11577 if (ctxt->instate == XML_PARSER_EOF) 11578 goto done; 11579 SKIPL(tmp); 11580 ctxt->checkIndex = 0; 11581 } 11582 goto done; 11583 } else { 11584 int tmp; 11585 11586 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11587 if ((tmp < 0) || (tmp != base)) { 11588 tmp = -tmp; 11589 ctxt->input->cur += tmp; 11590 goto encoding_error; 11591 } 11592 if ((ctxt->sax != NULL) && (base == 0) && 11593 (ctxt->sax->cdataBlock != NULL) && 11594 (!ctxt->disableSAX)) { 11595 /* 11596 * Special case to provide identical behaviour 11597 * between pull and push parsers on enpty CDATA 11598 * sections 11599 */ 11600 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11601 (!strncmp((const char *)&ctxt->input->cur[-9], 11602 "<![CDATA[", 9))) 11603 ctxt->sax->cdataBlock(ctxt->userData, 11604 BAD_CAST "", 0); 11605 } else if ((ctxt->sax != NULL) && (base > 0) && 11606 (!ctxt->disableSAX)) { 11607 if (ctxt->sax->cdataBlock != NULL) 11608 ctxt->sax->cdataBlock(ctxt->userData, 11609 ctxt->input->cur, base); 11610 else if (ctxt->sax->characters != NULL) 11611 ctxt->sax->characters(ctxt->userData, 11612 ctxt->input->cur, base); 11613 } 11614 if (ctxt->instate == XML_PARSER_EOF) 11615 goto done; 11616 SKIPL(base + 3); 11617 ctxt->checkIndex = 0; 11618 ctxt->instate = XML_PARSER_CONTENT; 11619 #ifdef DEBUG_PUSH 11620 xmlGenericError(xmlGenericErrorContext, 11621 "PP: entering CONTENT\n"); 11622 #endif 11623 } 11624 break; 11625 } 11626 case XML_PARSER_MISC: 11627 SKIP_BLANKS; 11628 if (ctxt->input->buf == NULL) 11629 avail = ctxt->input->length - 11630 (ctxt->input->cur - ctxt->input->base); 11631 else 11632 avail = xmlBufUse(ctxt->input->buf->buffer) - 11633 (ctxt->input->cur - ctxt->input->base); 11634 if (avail < 2) 11635 goto done; 11636 cur = ctxt->input->cur[0]; 11637 next = ctxt->input->cur[1]; 11638 if ((cur == '<') && (next == '?')) { 11639 if ((!terminate) && 11640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11641 ctxt->progressive = XML_PARSER_PI; 11642 goto done; 11643 } 11644 #ifdef DEBUG_PUSH 11645 xmlGenericError(xmlGenericErrorContext, 11646 "PP: Parsing PI\n"); 11647 #endif 11648 xmlParsePI(ctxt); 11649 if (ctxt->instate == XML_PARSER_EOF) 11650 goto done; 11651 ctxt->instate = XML_PARSER_MISC; 11652 ctxt->progressive = 1; 11653 ctxt->checkIndex = 0; 11654 } else if ((cur == '<') && (next == '!') && 11655 (ctxt->input->cur[2] == '-') && 11656 (ctxt->input->cur[3] == '-')) { 11657 if ((!terminate) && 11658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11659 ctxt->progressive = XML_PARSER_COMMENT; 11660 goto done; 11661 } 11662 #ifdef DEBUG_PUSH 11663 xmlGenericError(xmlGenericErrorContext, 11664 "PP: Parsing Comment\n"); 11665 #endif 11666 xmlParseComment(ctxt); 11667 if (ctxt->instate == XML_PARSER_EOF) 11668 goto done; 11669 ctxt->instate = XML_PARSER_MISC; 11670 ctxt->progressive = 1; 11671 ctxt->checkIndex = 0; 11672 } else if ((cur == '<') && (next == '!') && 11673 (ctxt->input->cur[2] == 'D') && 11674 (ctxt->input->cur[3] == 'O') && 11675 (ctxt->input->cur[4] == 'C') && 11676 (ctxt->input->cur[5] == 'T') && 11677 (ctxt->input->cur[6] == 'Y') && 11678 (ctxt->input->cur[7] == 'P') && 11679 (ctxt->input->cur[8] == 'E')) { 11680 if ((!terminate) && 11681 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11682 ctxt->progressive = XML_PARSER_DTD; 11683 goto done; 11684 } 11685 #ifdef DEBUG_PUSH 11686 xmlGenericError(xmlGenericErrorContext, 11687 "PP: Parsing internal subset\n"); 11688 #endif 11689 ctxt->inSubset = 1; 11690 ctxt->progressive = 0; 11691 ctxt->checkIndex = 0; 11692 xmlParseDocTypeDecl(ctxt); 11693 if (ctxt->instate == XML_PARSER_EOF) 11694 goto done; 11695 if (RAW == '[') { 11696 ctxt->instate = XML_PARSER_DTD; 11697 #ifdef DEBUG_PUSH 11698 xmlGenericError(xmlGenericErrorContext, 11699 "PP: entering DTD\n"); 11700 #endif 11701 } else { 11702 /* 11703 * Create and update the external subset. 11704 */ 11705 ctxt->inSubset = 2; 11706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11707 (ctxt->sax->externalSubset != NULL)) 11708 ctxt->sax->externalSubset(ctxt->userData, 11709 ctxt->intSubName, ctxt->extSubSystem, 11710 ctxt->extSubURI); 11711 ctxt->inSubset = 0; 11712 xmlCleanSpecialAttr(ctxt); 11713 ctxt->instate = XML_PARSER_PROLOG; 11714 #ifdef DEBUG_PUSH 11715 xmlGenericError(xmlGenericErrorContext, 11716 "PP: entering PROLOG\n"); 11717 #endif 11718 } 11719 } else if ((cur == '<') && (next == '!') && 11720 (avail < 9)) { 11721 goto done; 11722 } else { 11723 ctxt->instate = XML_PARSER_START_TAG; 11724 ctxt->progressive = XML_PARSER_START_TAG; 11725 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11726 #ifdef DEBUG_PUSH 11727 xmlGenericError(xmlGenericErrorContext, 11728 "PP: entering START_TAG\n"); 11729 #endif 11730 } 11731 break; 11732 case XML_PARSER_PROLOG: 11733 SKIP_BLANKS; 11734 if (ctxt->input->buf == NULL) 11735 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11736 else 11737 avail = xmlBufUse(ctxt->input->buf->buffer) - 11738 (ctxt->input->cur - ctxt->input->base); 11739 if (avail < 2) 11740 goto done; 11741 cur = ctxt->input->cur[0]; 11742 next = ctxt->input->cur[1]; 11743 if ((cur == '<') && (next == '?')) { 11744 if ((!terminate) && 11745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11746 ctxt->progressive = XML_PARSER_PI; 11747 goto done; 11748 } 11749 #ifdef DEBUG_PUSH 11750 xmlGenericError(xmlGenericErrorContext, 11751 "PP: Parsing PI\n"); 11752 #endif 11753 xmlParsePI(ctxt); 11754 if (ctxt->instate == XML_PARSER_EOF) 11755 goto done; 11756 ctxt->instate = XML_PARSER_PROLOG; 11757 ctxt->progressive = 1; 11758 } else if ((cur == '<') && (next == '!') && 11759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11760 if ((!terminate) && 11761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11762 ctxt->progressive = XML_PARSER_COMMENT; 11763 goto done; 11764 } 11765 #ifdef DEBUG_PUSH 11766 xmlGenericError(xmlGenericErrorContext, 11767 "PP: Parsing Comment\n"); 11768 #endif 11769 xmlParseComment(ctxt); 11770 if (ctxt->instate == XML_PARSER_EOF) 11771 goto done; 11772 ctxt->instate = XML_PARSER_PROLOG; 11773 ctxt->progressive = 1; 11774 } else if ((cur == '<') && (next == '!') && 11775 (avail < 4)) { 11776 goto done; 11777 } else { 11778 ctxt->instate = XML_PARSER_START_TAG; 11779 if (ctxt->progressive == 0) 11780 ctxt->progressive = XML_PARSER_START_TAG; 11781 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11782 #ifdef DEBUG_PUSH 11783 xmlGenericError(xmlGenericErrorContext, 11784 "PP: entering START_TAG\n"); 11785 #endif 11786 } 11787 break; 11788 case XML_PARSER_EPILOG: 11789 SKIP_BLANKS; 11790 if (ctxt->input->buf == NULL) 11791 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11792 else 11793 avail = xmlBufUse(ctxt->input->buf->buffer) - 11794 (ctxt->input->cur - ctxt->input->base); 11795 if (avail < 2) 11796 goto done; 11797 cur = ctxt->input->cur[0]; 11798 next = ctxt->input->cur[1]; 11799 if ((cur == '<') && (next == '?')) { 11800 if ((!terminate) && 11801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11802 ctxt->progressive = XML_PARSER_PI; 11803 goto done; 11804 } 11805 #ifdef DEBUG_PUSH 11806 xmlGenericError(xmlGenericErrorContext, 11807 "PP: Parsing PI\n"); 11808 #endif 11809 xmlParsePI(ctxt); 11810 if (ctxt->instate == XML_PARSER_EOF) 11811 goto done; 11812 ctxt->instate = XML_PARSER_EPILOG; 11813 ctxt->progressive = 1; 11814 } else if ((cur == '<') && (next == '!') && 11815 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11816 if ((!terminate) && 11817 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11818 ctxt->progressive = XML_PARSER_COMMENT; 11819 goto done; 11820 } 11821 #ifdef DEBUG_PUSH 11822 xmlGenericError(xmlGenericErrorContext, 11823 "PP: Parsing Comment\n"); 11824 #endif 11825 xmlParseComment(ctxt); 11826 if (ctxt->instate == XML_PARSER_EOF) 11827 goto done; 11828 ctxt->instate = XML_PARSER_EPILOG; 11829 ctxt->progressive = 1; 11830 } else if ((cur == '<') && (next == '!') && 11831 (avail < 4)) { 11832 goto done; 11833 } else { 11834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11835 xmlHaltParser(ctxt); 11836 #ifdef DEBUG_PUSH 11837 xmlGenericError(xmlGenericErrorContext, 11838 "PP: entering EOF\n"); 11839 #endif 11840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11841 ctxt->sax->endDocument(ctxt->userData); 11842 goto done; 11843 } 11844 break; 11845 case XML_PARSER_DTD: { 11846 /* 11847 * Sorry but progressive parsing of the internal subset 11848 * is not expected to be supported. We first check that 11849 * the full content of the internal subset is available and 11850 * the parsing is launched only at that point. 11851 * Internal subset ends up with "']' S? '>'" in an unescaped 11852 * section and not in a ']]>' sequence which are conditional 11853 * sections (whoever argued to keep that crap in XML deserve 11854 * a place in hell !). 11855 */ 11856 int base, i; 11857 xmlChar *buf; 11858 xmlChar quote = 0; 11859 size_t use; 11860 11861 base = ctxt->input->cur - ctxt->input->base; 11862 if (base < 0) return(0); 11863 if (ctxt->checkIndex > base) 11864 base = ctxt->checkIndex; 11865 buf = xmlBufContent(ctxt->input->buf->buffer); 11866 use = xmlBufUse(ctxt->input->buf->buffer); 11867 for (;(unsigned int) base < use; base++) { 11868 if (quote != 0) { 11869 if (buf[base] == quote) 11870 quote = 0; 11871 continue; 11872 } 11873 if ((quote == 0) && (buf[base] == '<')) { 11874 int found = 0; 11875 /* special handling of comments */ 11876 if (((unsigned int) base + 4 < use) && 11877 (buf[base + 1] == '!') && 11878 (buf[base + 2] == '-') && 11879 (buf[base + 3] == '-')) { 11880 for (;(unsigned int) base + 3 < use; base++) { 11881 if ((buf[base] == '-') && 11882 (buf[base + 1] == '-') && 11883 (buf[base + 2] == '>')) { 11884 found = 1; 11885 base += 2; 11886 break; 11887 } 11888 } 11889 if (!found) { 11890 #if 0 11891 fprintf(stderr, "unfinished comment\n"); 11892 #endif 11893 break; /* for */ 11894 } 11895 continue; 11896 } 11897 } 11898 if (buf[base] == '"') { 11899 quote = '"'; 11900 continue; 11901 } 11902 if (buf[base] == '\'') { 11903 quote = '\''; 11904 continue; 11905 } 11906 if (buf[base] == ']') { 11907 #if 0 11908 fprintf(stderr, "%c%c%c%c: ", buf[base], 11909 buf[base + 1], buf[base + 2], buf[base + 3]); 11910 #endif 11911 if ((unsigned int) base +1 >= use) 11912 break; 11913 if (buf[base + 1] == ']') { 11914 /* conditional crap, skip both ']' ! */ 11915 base++; 11916 continue; 11917 } 11918 for (i = 1; (unsigned int) base + i < use; i++) { 11919 if (buf[base + i] == '>') { 11920 #if 0 11921 fprintf(stderr, "found\n"); 11922 #endif 11923 goto found_end_int_subset; 11924 } 11925 if (!IS_BLANK_CH(buf[base + i])) { 11926 #if 0 11927 fprintf(stderr, "not found\n"); 11928 #endif 11929 goto not_end_of_int_subset; 11930 } 11931 } 11932 #if 0 11933 fprintf(stderr, "end of stream\n"); 11934 #endif 11935 break; 11936 11937 } 11938 not_end_of_int_subset: 11939 continue; /* for */ 11940 } 11941 /* 11942 * We didn't found the end of the Internal subset 11943 */ 11944 if (quote == 0) 11945 ctxt->checkIndex = base; 11946 else 11947 ctxt->checkIndex = 0; 11948 #ifdef DEBUG_PUSH 11949 if (next == 0) 11950 xmlGenericError(xmlGenericErrorContext, 11951 "PP: lookup of int subset end filed\n"); 11952 #endif 11953 goto done; 11954 11955 found_end_int_subset: 11956 ctxt->checkIndex = 0; 11957 xmlParseInternalSubset(ctxt); 11958 if (ctxt->instate == XML_PARSER_EOF) 11959 goto done; 11960 ctxt->inSubset = 2; 11961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11962 (ctxt->sax->externalSubset != NULL)) 11963 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11964 ctxt->extSubSystem, ctxt->extSubURI); 11965 ctxt->inSubset = 0; 11966 xmlCleanSpecialAttr(ctxt); 11967 if (ctxt->instate == XML_PARSER_EOF) 11968 goto done; 11969 ctxt->instate = XML_PARSER_PROLOG; 11970 ctxt->checkIndex = 0; 11971 #ifdef DEBUG_PUSH 11972 xmlGenericError(xmlGenericErrorContext, 11973 "PP: entering PROLOG\n"); 11974 #endif 11975 break; 11976 } 11977 case XML_PARSER_COMMENT: 11978 xmlGenericError(xmlGenericErrorContext, 11979 "PP: internal error, state == COMMENT\n"); 11980 ctxt->instate = XML_PARSER_CONTENT; 11981 #ifdef DEBUG_PUSH 11982 xmlGenericError(xmlGenericErrorContext, 11983 "PP: entering CONTENT\n"); 11984 #endif 11985 break; 11986 case XML_PARSER_IGNORE: 11987 xmlGenericError(xmlGenericErrorContext, 11988 "PP: internal error, state == IGNORE"); 11989 ctxt->instate = XML_PARSER_DTD; 11990 #ifdef DEBUG_PUSH 11991 xmlGenericError(xmlGenericErrorContext, 11992 "PP: entering DTD\n"); 11993 #endif 11994 break; 11995 case XML_PARSER_PI: 11996 xmlGenericError(xmlGenericErrorContext, 11997 "PP: internal error, state == PI\n"); 11998 ctxt->instate = XML_PARSER_CONTENT; 11999 #ifdef DEBUG_PUSH 12000 xmlGenericError(xmlGenericErrorContext, 12001 "PP: entering CONTENT\n"); 12002 #endif 12003 break; 12004 case XML_PARSER_ENTITY_DECL: 12005 xmlGenericError(xmlGenericErrorContext, 12006 "PP: internal error, state == ENTITY_DECL\n"); 12007 ctxt->instate = XML_PARSER_DTD; 12008 #ifdef DEBUG_PUSH 12009 xmlGenericError(xmlGenericErrorContext, 12010 "PP: entering DTD\n"); 12011 #endif 12012 break; 12013 case XML_PARSER_ENTITY_VALUE: 12014 xmlGenericError(xmlGenericErrorContext, 12015 "PP: internal error, state == ENTITY_VALUE\n"); 12016 ctxt->instate = XML_PARSER_CONTENT; 12017 #ifdef DEBUG_PUSH 12018 xmlGenericError(xmlGenericErrorContext, 12019 "PP: entering DTD\n"); 12020 #endif 12021 break; 12022 case XML_PARSER_ATTRIBUTE_VALUE: 12023 xmlGenericError(xmlGenericErrorContext, 12024 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12025 ctxt->instate = XML_PARSER_START_TAG; 12026 #ifdef DEBUG_PUSH 12027 xmlGenericError(xmlGenericErrorContext, 12028 "PP: entering START_TAG\n"); 12029 #endif 12030 break; 12031 case XML_PARSER_SYSTEM_LITERAL: 12032 xmlGenericError(xmlGenericErrorContext, 12033 "PP: internal error, state == SYSTEM_LITERAL\n"); 12034 ctxt->instate = XML_PARSER_START_TAG; 12035 #ifdef DEBUG_PUSH 12036 xmlGenericError(xmlGenericErrorContext, 12037 "PP: entering START_TAG\n"); 12038 #endif 12039 break; 12040 case XML_PARSER_PUBLIC_LITERAL: 12041 xmlGenericError(xmlGenericErrorContext, 12042 "PP: internal error, state == PUBLIC_LITERAL\n"); 12043 ctxt->instate = XML_PARSER_START_TAG; 12044 #ifdef DEBUG_PUSH 12045 xmlGenericError(xmlGenericErrorContext, 12046 "PP: entering START_TAG\n"); 12047 #endif 12048 break; 12049 } 12050 } 12051 done: 12052 #ifdef DEBUG_PUSH 12053 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12054 #endif 12055 return(ret); 12056 encoding_error: 12057 { 12058 char buffer[150]; 12059 12060 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12061 ctxt->input->cur[0], ctxt->input->cur[1], 12062 ctxt->input->cur[2], ctxt->input->cur[3]); 12063 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12064 "Input is not proper UTF-8, indicate encoding !\n%s", 12065 BAD_CAST buffer, NULL); 12066 } 12067 return(0); 12068 } 12069 12070 /** 12071 * xmlParseCheckTransition: 12072 * @ctxt: an XML parser context 12073 * @chunk: a char array 12074 * @size: the size in byte of the chunk 12075 * 12076 * Check depending on the current parser state if the chunk given must be 12077 * processed immediately or one need more data to advance on parsing. 12078 * 12079 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12080 */ 12081 static int 12082 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12083 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12084 return(-1); 12085 if (ctxt->instate == XML_PARSER_START_TAG) { 12086 if (memchr(chunk, '>', size) != NULL) 12087 return(1); 12088 return(0); 12089 } 12090 if (ctxt->progressive == XML_PARSER_COMMENT) { 12091 if (memchr(chunk, '>', size) != NULL) 12092 return(1); 12093 return(0); 12094 } 12095 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12096 if (memchr(chunk, '>', size) != NULL) 12097 return(1); 12098 return(0); 12099 } 12100 if (ctxt->progressive == XML_PARSER_PI) { 12101 if (memchr(chunk, '>', size) != NULL) 12102 return(1); 12103 return(0); 12104 } 12105 if (ctxt->instate == XML_PARSER_END_TAG) { 12106 if (memchr(chunk, '>', size) != NULL) 12107 return(1); 12108 return(0); 12109 } 12110 if ((ctxt->progressive == XML_PARSER_DTD) || 12111 (ctxt->instate == XML_PARSER_DTD)) { 12112 if (memchr(chunk, '>', size) != NULL) 12113 return(1); 12114 return(0); 12115 } 12116 return(1); 12117 } 12118 12119 /** 12120 * xmlParseChunk: 12121 * @ctxt: an XML parser context 12122 * @chunk: an char array 12123 * @size: the size in byte of the chunk 12124 * @terminate: last chunk indicator 12125 * 12126 * Parse a Chunk of memory 12127 * 12128 * Returns zero if no error, the xmlParserErrors otherwise. 12129 */ 12130 int 12131 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12132 int terminate) { 12133 int end_in_lf = 0; 12134 int remain = 0; 12135 size_t old_avail = 0; 12136 size_t avail = 0; 12137 12138 if (ctxt == NULL) 12139 return(XML_ERR_INTERNAL_ERROR); 12140 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12141 return(ctxt->errNo); 12142 if (ctxt->instate == XML_PARSER_EOF) 12143 return(-1); 12144 if (ctxt->instate == XML_PARSER_START) 12145 xmlDetectSAX2(ctxt); 12146 if ((size > 0) && (chunk != NULL) && (!terminate) && 12147 (chunk[size - 1] == '\r')) { 12148 end_in_lf = 1; 12149 size--; 12150 } 12151 12152 xmldecl_done: 12153 12154 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12155 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12156 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12157 size_t cur = ctxt->input->cur - ctxt->input->base; 12158 int res; 12159 12160 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12161 /* 12162 * Specific handling if we autodetected an encoding, we should not 12163 * push more than the first line ... which depend on the encoding 12164 * And only push the rest once the final encoding was detected 12165 */ 12166 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12167 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12168 unsigned int len = 45; 12169 12170 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12171 BAD_CAST "UTF-16")) || 12172 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12173 BAD_CAST "UTF16"))) 12174 len = 90; 12175 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12176 BAD_CAST "UCS-4")) || 12177 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12178 BAD_CAST "UCS4"))) 12179 len = 180; 12180 12181 if (ctxt->input->buf->rawconsumed < len) 12182 len -= ctxt->input->buf->rawconsumed; 12183 12184 /* 12185 * Change size for reading the initial declaration only 12186 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12187 * will blindly copy extra bytes from memory. 12188 */ 12189 if ((unsigned int) size > len) { 12190 remain = size - len; 12191 size = len; 12192 } else { 12193 remain = 0; 12194 } 12195 } 12196 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12197 if (res < 0) { 12198 ctxt->errNo = XML_PARSER_EOF; 12199 xmlHaltParser(ctxt); 12200 return (XML_PARSER_EOF); 12201 } 12202 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12203 #ifdef DEBUG_PUSH 12204 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12205 #endif 12206 12207 } else if (ctxt->instate != XML_PARSER_EOF) { 12208 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12209 xmlParserInputBufferPtr in = ctxt->input->buf; 12210 if ((in->encoder != NULL) && (in->buffer != NULL) && 12211 (in->raw != NULL)) { 12212 int nbchars; 12213 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12214 size_t current = ctxt->input->cur - ctxt->input->base; 12215 12216 nbchars = xmlCharEncInput(in, terminate); 12217 if (nbchars < 0) { 12218 /* TODO 2.6.0 */ 12219 xmlGenericError(xmlGenericErrorContext, 12220 "xmlParseChunk: encoder error\n"); 12221 xmlHaltParser(ctxt); 12222 return(XML_ERR_INVALID_ENCODING); 12223 } 12224 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12225 } 12226 } 12227 } 12228 if (remain != 0) { 12229 xmlParseTryOrFinish(ctxt, 0); 12230 } else { 12231 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12232 avail = xmlBufUse(ctxt->input->buf->buffer); 12233 /* 12234 * Depending on the current state it may not be such 12235 * a good idea to try parsing if there is nothing in the chunk 12236 * which would be worth doing a parser state transition and we 12237 * need to wait for more data 12238 */ 12239 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12240 (old_avail == 0) || (avail == 0) || 12241 (xmlParseCheckTransition(ctxt, 12242 (const char *)&ctxt->input->base[old_avail], 12243 avail - old_avail))) 12244 xmlParseTryOrFinish(ctxt, terminate); 12245 } 12246 if (ctxt->instate == XML_PARSER_EOF) 12247 return(ctxt->errNo); 12248 12249 if ((ctxt->input != NULL) && 12250 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12251 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12252 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12253 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12254 xmlHaltParser(ctxt); 12255 } 12256 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12257 return(ctxt->errNo); 12258 12259 if (remain != 0) { 12260 chunk += size; 12261 size = remain; 12262 remain = 0; 12263 goto xmldecl_done; 12264 } 12265 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12266 (ctxt->input->buf != NULL)) { 12267 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12268 ctxt->input); 12269 size_t current = ctxt->input->cur - ctxt->input->base; 12270 12271 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12272 12273 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12274 base, current); 12275 } 12276 if (terminate) { 12277 /* 12278 * Check for termination 12279 */ 12280 int cur_avail = 0; 12281 12282 if (ctxt->input != NULL) { 12283 if (ctxt->input->buf == NULL) 12284 cur_avail = ctxt->input->length - 12285 (ctxt->input->cur - ctxt->input->base); 12286 else 12287 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12288 (ctxt->input->cur - ctxt->input->base); 12289 } 12290 12291 if ((ctxt->instate != XML_PARSER_EOF) && 12292 (ctxt->instate != XML_PARSER_EPILOG)) { 12293 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12294 } 12295 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12297 } 12298 if (ctxt->instate != XML_PARSER_EOF) { 12299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12300 ctxt->sax->endDocument(ctxt->userData); 12301 } 12302 ctxt->instate = XML_PARSER_EOF; 12303 } 12304 if (ctxt->wellFormed == 0) 12305 return((xmlParserErrors) ctxt->errNo); 12306 else 12307 return(0); 12308 } 12309 12310 /************************************************************************ 12311 * * 12312 * I/O front end functions to the parser * 12313 * * 12314 ************************************************************************/ 12315 12316 /** 12317 * xmlCreatePushParserCtxt: 12318 * @sax: a SAX handler 12319 * @user_data: The user data returned on SAX callbacks 12320 * @chunk: a pointer to an array of chars 12321 * @size: number of chars in the array 12322 * @filename: an optional file name or URI 12323 * 12324 * Create a parser context for using the XML parser in push mode. 12325 * If @buffer and @size are non-NULL, the data is used to detect 12326 * the encoding. The remaining characters will be parsed so they 12327 * don't need to be fed in again through xmlParseChunk. 12328 * To allow content encoding detection, @size should be >= 4 12329 * The value of @filename is used for fetching external entities 12330 * and error/warning reports. 12331 * 12332 * Returns the new parser context or NULL 12333 */ 12334 12335 xmlParserCtxtPtr 12336 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12337 const char *chunk, int size, const char *filename) { 12338 xmlParserCtxtPtr ctxt; 12339 xmlParserInputPtr inputStream; 12340 xmlParserInputBufferPtr buf; 12341 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12342 12343 /* 12344 * plug some encoding conversion routines 12345 */ 12346 if ((chunk != NULL) && (size >= 4)) 12347 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12348 12349 buf = xmlAllocParserInputBuffer(enc); 12350 if (buf == NULL) return(NULL); 12351 12352 ctxt = xmlNewParserCtxt(); 12353 if (ctxt == NULL) { 12354 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12355 xmlFreeParserInputBuffer(buf); 12356 return(NULL); 12357 } 12358 ctxt->dictNames = 1; 12359 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12360 if (ctxt->pushTab == NULL) { 12361 xmlErrMemory(ctxt, NULL); 12362 xmlFreeParserInputBuffer(buf); 12363 xmlFreeParserCtxt(ctxt); 12364 return(NULL); 12365 } 12366 if (sax != NULL) { 12367 #ifdef LIBXML_SAX1_ENABLED 12368 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12369 #endif /* LIBXML_SAX1_ENABLED */ 12370 xmlFree(ctxt->sax); 12371 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12372 if (ctxt->sax == NULL) { 12373 xmlErrMemory(ctxt, NULL); 12374 xmlFreeParserInputBuffer(buf); 12375 xmlFreeParserCtxt(ctxt); 12376 return(NULL); 12377 } 12378 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12379 if (sax->initialized == XML_SAX2_MAGIC) 12380 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12381 else 12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12383 if (user_data != NULL) 12384 ctxt->userData = user_data; 12385 } 12386 if (filename == NULL) { 12387 ctxt->directory = NULL; 12388 } else { 12389 ctxt->directory = xmlParserGetDirectory(filename); 12390 } 12391 12392 inputStream = xmlNewInputStream(ctxt); 12393 if (inputStream == NULL) { 12394 xmlFreeParserCtxt(ctxt); 12395 xmlFreeParserInputBuffer(buf); 12396 return(NULL); 12397 } 12398 12399 if (filename == NULL) 12400 inputStream->filename = NULL; 12401 else { 12402 inputStream->filename = (char *) 12403 xmlCanonicPath((const xmlChar *) filename); 12404 if (inputStream->filename == NULL) { 12405 xmlFreeParserCtxt(ctxt); 12406 xmlFreeParserInputBuffer(buf); 12407 return(NULL); 12408 } 12409 } 12410 inputStream->buf = buf; 12411 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12412 inputPush(ctxt, inputStream); 12413 12414 /* 12415 * If the caller didn't provide an initial 'chunk' for determining 12416 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12417 * that it can be automatically determined later 12418 */ 12419 if ((size == 0) || (chunk == NULL)) { 12420 ctxt->charset = XML_CHAR_ENCODING_NONE; 12421 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12423 size_t cur = ctxt->input->cur - ctxt->input->base; 12424 12425 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12426 12427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12428 #ifdef DEBUG_PUSH 12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12430 #endif 12431 } 12432 12433 if (enc != XML_CHAR_ENCODING_NONE) { 12434 xmlSwitchEncoding(ctxt, enc); 12435 } 12436 12437 return(ctxt); 12438 } 12439 #endif /* LIBXML_PUSH_ENABLED */ 12440 12441 /** 12442 * xmlHaltParser: 12443 * @ctxt: an XML parser context 12444 * 12445 * Blocks further parser processing don't override error 12446 * for internal use 12447 */ 12448 static void 12449 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12450 if (ctxt == NULL) 12451 return; 12452 ctxt->instate = XML_PARSER_EOF; 12453 ctxt->disableSAX = 1; 12454 while (ctxt->inputNr > 1) 12455 xmlFreeInputStream(inputPop(ctxt)); 12456 if (ctxt->input != NULL) { 12457 /* 12458 * in case there was a specific allocation deallocate before 12459 * overriding base 12460 */ 12461 if (ctxt->input->free != NULL) { 12462 ctxt->input->free((xmlChar *) ctxt->input->base); 12463 ctxt->input->free = NULL; 12464 } 12465 if (ctxt->input->buf != NULL) { 12466 xmlFreeParserInputBuffer(ctxt->input->buf); 12467 ctxt->input->buf = NULL; 12468 } 12469 ctxt->input->cur = BAD_CAST""; 12470 ctxt->input->length = 0; 12471 ctxt->input->base = ctxt->input->cur; 12472 ctxt->input->end = ctxt->input->cur; 12473 } 12474 } 12475 12476 /** 12477 * xmlStopParser: 12478 * @ctxt: an XML parser context 12479 * 12480 * Blocks further parser processing 12481 */ 12482 void 12483 xmlStopParser(xmlParserCtxtPtr ctxt) { 12484 if (ctxt == NULL) 12485 return; 12486 xmlHaltParser(ctxt); 12487 ctxt->errNo = XML_ERR_USER_STOP; 12488 } 12489 12490 /** 12491 * xmlCreateIOParserCtxt: 12492 * @sax: a SAX handler 12493 * @user_data: The user data returned on SAX callbacks 12494 * @ioread: an I/O read function 12495 * @ioclose: an I/O close function 12496 * @ioctx: an I/O handler 12497 * @enc: the charset encoding if known 12498 * 12499 * Create a parser context for using the XML parser with an existing 12500 * I/O stream 12501 * 12502 * Returns the new parser context or NULL 12503 */ 12504 xmlParserCtxtPtr 12505 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12506 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12507 void *ioctx, xmlCharEncoding enc) { 12508 xmlParserCtxtPtr ctxt; 12509 xmlParserInputPtr inputStream; 12510 xmlParserInputBufferPtr buf; 12511 12512 if (ioread == NULL) return(NULL); 12513 12514 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12515 if (buf == NULL) { 12516 if (ioclose != NULL) 12517 ioclose(ioctx); 12518 return (NULL); 12519 } 12520 12521 ctxt = xmlNewParserCtxt(); 12522 if (ctxt == NULL) { 12523 xmlFreeParserInputBuffer(buf); 12524 return(NULL); 12525 } 12526 if (sax != NULL) { 12527 #ifdef LIBXML_SAX1_ENABLED 12528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12529 #endif /* LIBXML_SAX1_ENABLED */ 12530 xmlFree(ctxt->sax); 12531 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12532 if (ctxt->sax == NULL) { 12533 xmlErrMemory(ctxt, NULL); 12534 xmlFreeParserCtxt(ctxt); 12535 return(NULL); 12536 } 12537 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12538 if (sax->initialized == XML_SAX2_MAGIC) 12539 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12540 else 12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12542 if (user_data != NULL) 12543 ctxt->userData = user_data; 12544 } 12545 12546 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12547 if (inputStream == NULL) { 12548 xmlFreeParserCtxt(ctxt); 12549 return(NULL); 12550 } 12551 inputPush(ctxt, inputStream); 12552 12553 return(ctxt); 12554 } 12555 12556 #ifdef LIBXML_VALID_ENABLED 12557 /************************************************************************ 12558 * * 12559 * Front ends when parsing a DTD * 12560 * * 12561 ************************************************************************/ 12562 12563 /** 12564 * xmlIOParseDTD: 12565 * @sax: the SAX handler block or NULL 12566 * @input: an Input Buffer 12567 * @enc: the charset encoding if known 12568 * 12569 * Load and parse a DTD 12570 * 12571 * Returns the resulting xmlDtdPtr or NULL in case of error. 12572 * @input will be freed by the function in any case. 12573 */ 12574 12575 xmlDtdPtr 12576 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12577 xmlCharEncoding enc) { 12578 xmlDtdPtr ret = NULL; 12579 xmlParserCtxtPtr ctxt; 12580 xmlParserInputPtr pinput = NULL; 12581 xmlChar start[4]; 12582 12583 if (input == NULL) 12584 return(NULL); 12585 12586 ctxt = xmlNewParserCtxt(); 12587 if (ctxt == NULL) { 12588 xmlFreeParserInputBuffer(input); 12589 return(NULL); 12590 } 12591 12592 /* We are loading a DTD */ 12593 ctxt->options |= XML_PARSE_DTDLOAD; 12594 12595 /* 12596 * Set-up the SAX context 12597 */ 12598 if (sax != NULL) { 12599 if (ctxt->sax != NULL) 12600 xmlFree(ctxt->sax); 12601 ctxt->sax = sax; 12602 ctxt->userData = ctxt; 12603 } 12604 xmlDetectSAX2(ctxt); 12605 12606 /* 12607 * generate a parser input from the I/O handler 12608 */ 12609 12610 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12611 if (pinput == NULL) { 12612 if (sax != NULL) ctxt->sax = NULL; 12613 xmlFreeParserInputBuffer(input); 12614 xmlFreeParserCtxt(ctxt); 12615 return(NULL); 12616 } 12617 12618 /* 12619 * plug some encoding conversion routines here. 12620 */ 12621 if (xmlPushInput(ctxt, pinput) < 0) { 12622 if (sax != NULL) ctxt->sax = NULL; 12623 xmlFreeParserCtxt(ctxt); 12624 return(NULL); 12625 } 12626 if (enc != XML_CHAR_ENCODING_NONE) { 12627 xmlSwitchEncoding(ctxt, enc); 12628 } 12629 12630 pinput->filename = NULL; 12631 pinput->line = 1; 12632 pinput->col = 1; 12633 pinput->base = ctxt->input->cur; 12634 pinput->cur = ctxt->input->cur; 12635 pinput->free = NULL; 12636 12637 /* 12638 * let's parse that entity knowing it's an external subset. 12639 */ 12640 ctxt->inSubset = 2; 12641 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12642 if (ctxt->myDoc == NULL) { 12643 xmlErrMemory(ctxt, "New Doc failed"); 12644 return(NULL); 12645 } 12646 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12647 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12648 BAD_CAST "none", BAD_CAST "none"); 12649 12650 if ((enc == XML_CHAR_ENCODING_NONE) && 12651 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12652 /* 12653 * Get the 4 first bytes and decode the charset 12654 * if enc != XML_CHAR_ENCODING_NONE 12655 * plug some encoding conversion routines. 12656 */ 12657 start[0] = RAW; 12658 start[1] = NXT(1); 12659 start[2] = NXT(2); 12660 start[3] = NXT(3); 12661 enc = xmlDetectCharEncoding(start, 4); 12662 if (enc != XML_CHAR_ENCODING_NONE) { 12663 xmlSwitchEncoding(ctxt, enc); 12664 } 12665 } 12666 12667 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12668 12669 if (ctxt->myDoc != NULL) { 12670 if (ctxt->wellFormed) { 12671 ret = ctxt->myDoc->extSubset; 12672 ctxt->myDoc->extSubset = NULL; 12673 if (ret != NULL) { 12674 xmlNodePtr tmp; 12675 12676 ret->doc = NULL; 12677 tmp = ret->children; 12678 while (tmp != NULL) { 12679 tmp->doc = NULL; 12680 tmp = tmp->next; 12681 } 12682 } 12683 } else { 12684 ret = NULL; 12685 } 12686 xmlFreeDoc(ctxt->myDoc); 12687 ctxt->myDoc = NULL; 12688 } 12689 if (sax != NULL) ctxt->sax = NULL; 12690 xmlFreeParserCtxt(ctxt); 12691 12692 return(ret); 12693 } 12694 12695 /** 12696 * xmlSAXParseDTD: 12697 * @sax: the SAX handler block 12698 * @ExternalID: a NAME* containing the External ID of the DTD 12699 * @SystemID: a NAME* containing the URL to the DTD 12700 * 12701 * Load and parse an external subset. 12702 * 12703 * Returns the resulting xmlDtdPtr or NULL in case of error. 12704 */ 12705 12706 xmlDtdPtr 12707 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12708 const xmlChar *SystemID) { 12709 xmlDtdPtr ret = NULL; 12710 xmlParserCtxtPtr ctxt; 12711 xmlParserInputPtr input = NULL; 12712 xmlCharEncoding enc; 12713 xmlChar* systemIdCanonic; 12714 12715 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12716 12717 ctxt = xmlNewParserCtxt(); 12718 if (ctxt == NULL) { 12719 return(NULL); 12720 } 12721 12722 /* We are loading a DTD */ 12723 ctxt->options |= XML_PARSE_DTDLOAD; 12724 12725 /* 12726 * Set-up the SAX context 12727 */ 12728 if (sax != NULL) { 12729 if (ctxt->sax != NULL) 12730 xmlFree(ctxt->sax); 12731 ctxt->sax = sax; 12732 ctxt->userData = ctxt; 12733 } 12734 12735 /* 12736 * Canonicalise the system ID 12737 */ 12738 systemIdCanonic = xmlCanonicPath(SystemID); 12739 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12740 xmlFreeParserCtxt(ctxt); 12741 return(NULL); 12742 } 12743 12744 /* 12745 * Ask the Entity resolver to load the damn thing 12746 */ 12747 12748 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12749 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12750 systemIdCanonic); 12751 if (input == NULL) { 12752 if (sax != NULL) ctxt->sax = NULL; 12753 xmlFreeParserCtxt(ctxt); 12754 if (systemIdCanonic != NULL) 12755 xmlFree(systemIdCanonic); 12756 return(NULL); 12757 } 12758 12759 /* 12760 * plug some encoding conversion routines here. 12761 */ 12762 if (xmlPushInput(ctxt, input) < 0) { 12763 if (sax != NULL) ctxt->sax = NULL; 12764 xmlFreeParserCtxt(ctxt); 12765 if (systemIdCanonic != NULL) 12766 xmlFree(systemIdCanonic); 12767 return(NULL); 12768 } 12769 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12770 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12771 xmlSwitchEncoding(ctxt, enc); 12772 } 12773 12774 if (input->filename == NULL) 12775 input->filename = (char *) systemIdCanonic; 12776 else 12777 xmlFree(systemIdCanonic); 12778 input->line = 1; 12779 input->col = 1; 12780 input->base = ctxt->input->cur; 12781 input->cur = ctxt->input->cur; 12782 input->free = NULL; 12783 12784 /* 12785 * let's parse that entity knowing it's an external subset. 12786 */ 12787 ctxt->inSubset = 2; 12788 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12789 if (ctxt->myDoc == NULL) { 12790 xmlErrMemory(ctxt, "New Doc failed"); 12791 if (sax != NULL) ctxt->sax = NULL; 12792 xmlFreeParserCtxt(ctxt); 12793 return(NULL); 12794 } 12795 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12796 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12797 ExternalID, SystemID); 12798 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12799 12800 if (ctxt->myDoc != NULL) { 12801 if (ctxt->wellFormed) { 12802 ret = ctxt->myDoc->extSubset; 12803 ctxt->myDoc->extSubset = NULL; 12804 if (ret != NULL) { 12805 xmlNodePtr tmp; 12806 12807 ret->doc = NULL; 12808 tmp = ret->children; 12809 while (tmp != NULL) { 12810 tmp->doc = NULL; 12811 tmp = tmp->next; 12812 } 12813 } 12814 } else { 12815 ret = NULL; 12816 } 12817 xmlFreeDoc(ctxt->myDoc); 12818 ctxt->myDoc = NULL; 12819 } 12820 if (sax != NULL) ctxt->sax = NULL; 12821 xmlFreeParserCtxt(ctxt); 12822 12823 return(ret); 12824 } 12825 12826 12827 /** 12828 * xmlParseDTD: 12829 * @ExternalID: a NAME* containing the External ID of the DTD 12830 * @SystemID: a NAME* containing the URL to the DTD 12831 * 12832 * Load and parse an external subset. 12833 * 12834 * Returns the resulting xmlDtdPtr or NULL in case of error. 12835 */ 12836 12837 xmlDtdPtr 12838 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12839 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12840 } 12841 #endif /* LIBXML_VALID_ENABLED */ 12842 12843 /************************************************************************ 12844 * * 12845 * Front ends when parsing an Entity * 12846 * * 12847 ************************************************************************/ 12848 12849 /** 12850 * xmlParseCtxtExternalEntity: 12851 * @ctx: the existing parsing context 12852 * @URL: the URL for the entity to load 12853 * @ID: the System ID for the entity to load 12854 * @lst: the return value for the set of parsed nodes 12855 * 12856 * Parse an external general entity within an existing parsing context 12857 * An external general parsed entity is well-formed if it matches the 12858 * production labeled extParsedEnt. 12859 * 12860 * [78] extParsedEnt ::= TextDecl? content 12861 * 12862 * Returns 0 if the entity is well formed, -1 in case of args problem and 12863 * the parser error code otherwise 12864 */ 12865 12866 int 12867 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12868 const xmlChar *ID, xmlNodePtr *lst) { 12869 xmlParserCtxtPtr ctxt; 12870 xmlDocPtr newDoc; 12871 xmlNodePtr newRoot; 12872 xmlSAXHandlerPtr oldsax = NULL; 12873 int ret = 0; 12874 xmlChar start[4]; 12875 xmlCharEncoding enc; 12876 12877 if (ctx == NULL) return(-1); 12878 12879 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12880 (ctx->depth > 1024)) { 12881 return(XML_ERR_ENTITY_LOOP); 12882 } 12883 12884 if (lst != NULL) 12885 *lst = NULL; 12886 if ((URL == NULL) && (ID == NULL)) 12887 return(-1); 12888 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12889 return(-1); 12890 12891 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12892 if (ctxt == NULL) { 12893 return(-1); 12894 } 12895 12896 oldsax = ctxt->sax; 12897 ctxt->sax = ctx->sax; 12898 xmlDetectSAX2(ctxt); 12899 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12900 if (newDoc == NULL) { 12901 xmlFreeParserCtxt(ctxt); 12902 return(-1); 12903 } 12904 newDoc->properties = XML_DOC_INTERNAL; 12905 if (ctx->myDoc->dict) { 12906 newDoc->dict = ctx->myDoc->dict; 12907 xmlDictReference(newDoc->dict); 12908 } 12909 if (ctx->myDoc != NULL) { 12910 newDoc->intSubset = ctx->myDoc->intSubset; 12911 newDoc->extSubset = ctx->myDoc->extSubset; 12912 } 12913 if (ctx->myDoc->URL != NULL) { 12914 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12915 } 12916 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12917 if (newRoot == NULL) { 12918 ctxt->sax = oldsax; 12919 xmlFreeParserCtxt(ctxt); 12920 newDoc->intSubset = NULL; 12921 newDoc->extSubset = NULL; 12922 xmlFreeDoc(newDoc); 12923 return(-1); 12924 } 12925 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12926 nodePush(ctxt, newDoc->children); 12927 if (ctx->myDoc == NULL) { 12928 ctxt->myDoc = newDoc; 12929 } else { 12930 ctxt->myDoc = ctx->myDoc; 12931 newDoc->children->doc = ctx->myDoc; 12932 } 12933 12934 /* 12935 * Get the 4 first bytes and decode the charset 12936 * if enc != XML_CHAR_ENCODING_NONE 12937 * plug some encoding conversion routines. 12938 */ 12939 GROW 12940 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12941 start[0] = RAW; 12942 start[1] = NXT(1); 12943 start[2] = NXT(2); 12944 start[3] = NXT(3); 12945 enc = xmlDetectCharEncoding(start, 4); 12946 if (enc != XML_CHAR_ENCODING_NONE) { 12947 xmlSwitchEncoding(ctxt, enc); 12948 } 12949 } 12950 12951 /* 12952 * Parse a possible text declaration first 12953 */ 12954 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12955 xmlParseTextDecl(ctxt); 12956 /* 12957 * An XML-1.0 document can't reference an entity not XML-1.0 12958 */ 12959 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12960 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12961 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12962 "Version mismatch between document and entity\n"); 12963 } 12964 } 12965 12966 /* 12967 * If the user provided its own SAX callbacks then reuse the 12968 * useData callback field, otherwise the expected setup in a 12969 * DOM builder is to have userData == ctxt 12970 */ 12971 if (ctx->userData == ctx) 12972 ctxt->userData = ctxt; 12973 else 12974 ctxt->userData = ctx->userData; 12975 12976 /* 12977 * Doing validity checking on chunk doesn't make sense 12978 */ 12979 ctxt->instate = XML_PARSER_CONTENT; 12980 ctxt->validate = ctx->validate; 12981 ctxt->valid = ctx->valid; 12982 ctxt->loadsubset = ctx->loadsubset; 12983 ctxt->depth = ctx->depth + 1; 12984 ctxt->replaceEntities = ctx->replaceEntities; 12985 if (ctxt->validate) { 12986 ctxt->vctxt.error = ctx->vctxt.error; 12987 ctxt->vctxt.warning = ctx->vctxt.warning; 12988 } else { 12989 ctxt->vctxt.error = NULL; 12990 ctxt->vctxt.warning = NULL; 12991 } 12992 ctxt->vctxt.nodeTab = NULL; 12993 ctxt->vctxt.nodeNr = 0; 12994 ctxt->vctxt.nodeMax = 0; 12995 ctxt->vctxt.node = NULL; 12996 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12997 ctxt->dict = ctx->dict; 12998 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12999 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13000 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13001 ctxt->dictNames = ctx->dictNames; 13002 ctxt->attsDefault = ctx->attsDefault; 13003 ctxt->attsSpecial = ctx->attsSpecial; 13004 ctxt->linenumbers = ctx->linenumbers; 13005 13006 xmlParseContent(ctxt); 13007 13008 ctx->validate = ctxt->validate; 13009 ctx->valid = ctxt->valid; 13010 if ((RAW == '<') && (NXT(1) == '/')) { 13011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13012 } else if (RAW != 0) { 13013 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13014 } 13015 if (ctxt->node != newDoc->children) { 13016 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13017 } 13018 13019 if (!ctxt->wellFormed) { 13020 if (ctxt->errNo == 0) 13021 ret = 1; 13022 else 13023 ret = ctxt->errNo; 13024 } else { 13025 if (lst != NULL) { 13026 xmlNodePtr cur; 13027 13028 /* 13029 * Return the newly created nodeset after unlinking it from 13030 * they pseudo parent. 13031 */ 13032 cur = newDoc->children->children; 13033 *lst = cur; 13034 while (cur != NULL) { 13035 cur->parent = NULL; 13036 cur = cur->next; 13037 } 13038 newDoc->children->children = NULL; 13039 } 13040 ret = 0; 13041 } 13042 ctxt->sax = oldsax; 13043 ctxt->dict = NULL; 13044 ctxt->attsDefault = NULL; 13045 ctxt->attsSpecial = NULL; 13046 xmlFreeParserCtxt(ctxt); 13047 newDoc->intSubset = NULL; 13048 newDoc->extSubset = NULL; 13049 xmlFreeDoc(newDoc); 13050 13051 return(ret); 13052 } 13053 13054 /** 13055 * xmlParseExternalEntityPrivate: 13056 * @doc: the document the chunk pertains to 13057 * @oldctxt: the previous parser context if available 13058 * @sax: the SAX handler bloc (possibly NULL) 13059 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13060 * @depth: Used for loop detection, use 0 13061 * @URL: the URL for the entity to load 13062 * @ID: the System ID for the entity to load 13063 * @list: the return value for the set of parsed nodes 13064 * 13065 * Private version of xmlParseExternalEntity() 13066 * 13067 * Returns 0 if the entity is well formed, -1 in case of args problem and 13068 * the parser error code otherwise 13069 */ 13070 13071 static xmlParserErrors 13072 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13073 xmlSAXHandlerPtr sax, 13074 void *user_data, int depth, const xmlChar *URL, 13075 const xmlChar *ID, xmlNodePtr *list) { 13076 xmlParserCtxtPtr ctxt; 13077 xmlDocPtr newDoc; 13078 xmlNodePtr newRoot; 13079 xmlSAXHandlerPtr oldsax = NULL; 13080 xmlParserErrors ret = XML_ERR_OK; 13081 xmlChar start[4]; 13082 xmlCharEncoding enc; 13083 13084 if (((depth > 40) && 13085 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13086 (depth > 1024)) { 13087 return(XML_ERR_ENTITY_LOOP); 13088 } 13089 13090 if (list != NULL) 13091 *list = NULL; 13092 if ((URL == NULL) && (ID == NULL)) 13093 return(XML_ERR_INTERNAL_ERROR); 13094 if (doc == NULL) 13095 return(XML_ERR_INTERNAL_ERROR); 13096 13097 13098 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13099 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13100 ctxt->userData = ctxt; 13101 if (oldctxt != NULL) { 13102 ctxt->_private = oldctxt->_private; 13103 ctxt->loadsubset = oldctxt->loadsubset; 13104 ctxt->validate = oldctxt->validate; 13105 ctxt->external = oldctxt->external; 13106 ctxt->record_info = oldctxt->record_info; 13107 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13108 ctxt->node_seq.length = oldctxt->node_seq.length; 13109 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13110 } else { 13111 /* 13112 * Doing validity checking on chunk without context 13113 * doesn't make sense 13114 */ 13115 ctxt->_private = NULL; 13116 ctxt->validate = 0; 13117 ctxt->external = 2; 13118 ctxt->loadsubset = 0; 13119 } 13120 if (sax != NULL) { 13121 oldsax = ctxt->sax; 13122 ctxt->sax = sax; 13123 if (user_data != NULL) 13124 ctxt->userData = user_data; 13125 } 13126 xmlDetectSAX2(ctxt); 13127 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13128 if (newDoc == NULL) { 13129 ctxt->node_seq.maximum = 0; 13130 ctxt->node_seq.length = 0; 13131 ctxt->node_seq.buffer = NULL; 13132 xmlFreeParserCtxt(ctxt); 13133 return(XML_ERR_INTERNAL_ERROR); 13134 } 13135 newDoc->properties = XML_DOC_INTERNAL; 13136 newDoc->intSubset = doc->intSubset; 13137 newDoc->extSubset = doc->extSubset; 13138 newDoc->dict = doc->dict; 13139 xmlDictReference(newDoc->dict); 13140 13141 if (doc->URL != NULL) { 13142 newDoc->URL = xmlStrdup(doc->URL); 13143 } 13144 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13145 if (newRoot == NULL) { 13146 if (sax != NULL) 13147 ctxt->sax = oldsax; 13148 ctxt->node_seq.maximum = 0; 13149 ctxt->node_seq.length = 0; 13150 ctxt->node_seq.buffer = NULL; 13151 xmlFreeParserCtxt(ctxt); 13152 newDoc->intSubset = NULL; 13153 newDoc->extSubset = NULL; 13154 xmlFreeDoc(newDoc); 13155 return(XML_ERR_INTERNAL_ERROR); 13156 } 13157 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13158 nodePush(ctxt, newDoc->children); 13159 ctxt->myDoc = doc; 13160 newRoot->doc = doc; 13161 13162 /* 13163 * Get the 4 first bytes and decode the charset 13164 * if enc != XML_CHAR_ENCODING_NONE 13165 * plug some encoding conversion routines. 13166 */ 13167 GROW; 13168 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13169 start[0] = RAW; 13170 start[1] = NXT(1); 13171 start[2] = NXT(2); 13172 start[3] = NXT(3); 13173 enc = xmlDetectCharEncoding(start, 4); 13174 if (enc != XML_CHAR_ENCODING_NONE) { 13175 xmlSwitchEncoding(ctxt, enc); 13176 } 13177 } 13178 13179 /* 13180 * Parse a possible text declaration first 13181 */ 13182 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13183 xmlParseTextDecl(ctxt); 13184 } 13185 13186 ctxt->instate = XML_PARSER_CONTENT; 13187 ctxt->depth = depth; 13188 13189 xmlParseContent(ctxt); 13190 13191 if ((RAW == '<') && (NXT(1) == '/')) { 13192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13193 } else if (RAW != 0) { 13194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13195 } 13196 if (ctxt->node != newDoc->children) { 13197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13198 } 13199 13200 if (!ctxt->wellFormed) { 13201 if (ctxt->errNo == 0) 13202 ret = XML_ERR_INTERNAL_ERROR; 13203 else 13204 ret = (xmlParserErrors)ctxt->errNo; 13205 } else { 13206 if (list != NULL) { 13207 xmlNodePtr cur; 13208 13209 /* 13210 * Return the newly created nodeset after unlinking it from 13211 * they pseudo parent. 13212 */ 13213 cur = newDoc->children->children; 13214 *list = cur; 13215 while (cur != NULL) { 13216 cur->parent = NULL; 13217 cur = cur->next; 13218 } 13219 newDoc->children->children = NULL; 13220 } 13221 ret = XML_ERR_OK; 13222 } 13223 13224 /* 13225 * Record in the parent context the number of entities replacement 13226 * done when parsing that reference. 13227 */ 13228 if (oldctxt != NULL) 13229 oldctxt->nbentities += ctxt->nbentities; 13230 13231 /* 13232 * Also record the size of the entity parsed 13233 */ 13234 if (ctxt->input != NULL && oldctxt != NULL) { 13235 oldctxt->sizeentities += ctxt->input->consumed; 13236 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13237 } 13238 /* 13239 * And record the last error if any 13240 */ 13241 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13242 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13243 13244 if (sax != NULL) 13245 ctxt->sax = oldsax; 13246 if (oldctxt != NULL) { 13247 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13248 oldctxt->node_seq.length = ctxt->node_seq.length; 13249 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13250 } 13251 ctxt->node_seq.maximum = 0; 13252 ctxt->node_seq.length = 0; 13253 ctxt->node_seq.buffer = NULL; 13254 xmlFreeParserCtxt(ctxt); 13255 newDoc->intSubset = NULL; 13256 newDoc->extSubset = NULL; 13257 xmlFreeDoc(newDoc); 13258 13259 return(ret); 13260 } 13261 13262 #ifdef LIBXML_SAX1_ENABLED 13263 /** 13264 * xmlParseExternalEntity: 13265 * @doc: the document the chunk pertains to 13266 * @sax: the SAX handler bloc (possibly NULL) 13267 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13268 * @depth: Used for loop detection, use 0 13269 * @URL: the URL for the entity to load 13270 * @ID: the System ID for the entity to load 13271 * @lst: the return value for the set of parsed nodes 13272 * 13273 * Parse an external general entity 13274 * An external general parsed entity is well-formed if it matches the 13275 * production labeled extParsedEnt. 13276 * 13277 * [78] extParsedEnt ::= TextDecl? content 13278 * 13279 * Returns 0 if the entity is well formed, -1 in case of args problem and 13280 * the parser error code otherwise 13281 */ 13282 13283 int 13284 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13285 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13286 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13287 ID, lst)); 13288 } 13289 13290 /** 13291 * xmlParseBalancedChunkMemory: 13292 * @doc: the document the chunk pertains to 13293 * @sax: the SAX handler bloc (possibly NULL) 13294 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13295 * @depth: Used for loop detection, use 0 13296 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13297 * @lst: the return value for the set of parsed nodes 13298 * 13299 * Parse a well-balanced chunk of an XML document 13300 * called by the parser 13301 * The allowed sequence for the Well Balanced Chunk is the one defined by 13302 * the content production in the XML grammar: 13303 * 13304 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13305 * 13306 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13307 * the parser error code otherwise 13308 */ 13309 13310 int 13311 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13312 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13313 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13314 depth, string, lst, 0 ); 13315 } 13316 #endif /* LIBXML_SAX1_ENABLED */ 13317 13318 /** 13319 * xmlParseBalancedChunkMemoryInternal: 13320 * @oldctxt: the existing parsing context 13321 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13322 * @user_data: the user data field for the parser context 13323 * @lst: the return value for the set of parsed nodes 13324 * 13325 * 13326 * Parse a well-balanced chunk of an XML document 13327 * called by the parser 13328 * The allowed sequence for the Well Balanced Chunk is the one defined by 13329 * the content production in the XML grammar: 13330 * 13331 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13332 * 13333 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13334 * error code otherwise 13335 * 13336 * In case recover is set to 1, the nodelist will not be empty even if 13337 * the parsed chunk is not well balanced. 13338 */ 13339 static xmlParserErrors 13340 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13341 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13342 xmlParserCtxtPtr ctxt; 13343 xmlDocPtr newDoc = NULL; 13344 xmlNodePtr newRoot; 13345 xmlSAXHandlerPtr oldsax = NULL; 13346 xmlNodePtr content = NULL; 13347 xmlNodePtr last = NULL; 13348 int size; 13349 xmlParserErrors ret = XML_ERR_OK; 13350 #ifdef SAX2 13351 int i; 13352 #endif 13353 13354 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13355 (oldctxt->depth > 1024)) { 13356 return(XML_ERR_ENTITY_LOOP); 13357 } 13358 13359 13360 if (lst != NULL) 13361 *lst = NULL; 13362 if (string == NULL) 13363 return(XML_ERR_INTERNAL_ERROR); 13364 13365 size = xmlStrlen(string); 13366 13367 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13368 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13369 if (user_data != NULL) 13370 ctxt->userData = user_data; 13371 else 13372 ctxt->userData = ctxt; 13373 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13374 ctxt->dict = oldctxt->dict; 13375 ctxt->input_id = oldctxt->input_id + 1; 13376 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13377 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13378 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13379 13380 #ifdef SAX2 13381 /* propagate namespaces down the entity */ 13382 for (i = 0;i < oldctxt->nsNr;i += 2) { 13383 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13384 } 13385 #endif 13386 13387 oldsax = ctxt->sax; 13388 ctxt->sax = oldctxt->sax; 13389 xmlDetectSAX2(ctxt); 13390 ctxt->replaceEntities = oldctxt->replaceEntities; 13391 ctxt->options = oldctxt->options; 13392 13393 ctxt->_private = oldctxt->_private; 13394 if (oldctxt->myDoc == NULL) { 13395 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13396 if (newDoc == NULL) { 13397 ctxt->sax = oldsax; 13398 ctxt->dict = NULL; 13399 xmlFreeParserCtxt(ctxt); 13400 return(XML_ERR_INTERNAL_ERROR); 13401 } 13402 newDoc->properties = XML_DOC_INTERNAL; 13403 newDoc->dict = ctxt->dict; 13404 xmlDictReference(newDoc->dict); 13405 ctxt->myDoc = newDoc; 13406 } else { 13407 ctxt->myDoc = oldctxt->myDoc; 13408 content = ctxt->myDoc->children; 13409 last = ctxt->myDoc->last; 13410 } 13411 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13412 if (newRoot == NULL) { 13413 ctxt->sax = oldsax; 13414 ctxt->dict = NULL; 13415 xmlFreeParserCtxt(ctxt); 13416 if (newDoc != NULL) { 13417 xmlFreeDoc(newDoc); 13418 } 13419 return(XML_ERR_INTERNAL_ERROR); 13420 } 13421 ctxt->myDoc->children = NULL; 13422 ctxt->myDoc->last = NULL; 13423 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13424 nodePush(ctxt, ctxt->myDoc->children); 13425 ctxt->instate = XML_PARSER_CONTENT; 13426 ctxt->depth = oldctxt->depth + 1; 13427 13428 ctxt->validate = 0; 13429 ctxt->loadsubset = oldctxt->loadsubset; 13430 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13431 /* 13432 * ID/IDREF registration will be done in xmlValidateElement below 13433 */ 13434 ctxt->loadsubset |= XML_SKIP_IDS; 13435 } 13436 ctxt->dictNames = oldctxt->dictNames; 13437 ctxt->attsDefault = oldctxt->attsDefault; 13438 ctxt->attsSpecial = oldctxt->attsSpecial; 13439 13440 xmlParseContent(ctxt); 13441 if ((RAW == '<') && (NXT(1) == '/')) { 13442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13443 } else if (RAW != 0) { 13444 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13445 } 13446 if (ctxt->node != ctxt->myDoc->children) { 13447 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13448 } 13449 13450 if (!ctxt->wellFormed) { 13451 if (ctxt->errNo == 0) 13452 ret = XML_ERR_INTERNAL_ERROR; 13453 else 13454 ret = (xmlParserErrors)ctxt->errNo; 13455 } else { 13456 ret = XML_ERR_OK; 13457 } 13458 13459 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13460 xmlNodePtr cur; 13461 13462 /* 13463 * Return the newly created nodeset after unlinking it from 13464 * they pseudo parent. 13465 */ 13466 cur = ctxt->myDoc->children->children; 13467 *lst = cur; 13468 while (cur != NULL) { 13469 #ifdef LIBXML_VALID_ENABLED 13470 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13471 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13472 (cur->type == XML_ELEMENT_NODE)) { 13473 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13474 oldctxt->myDoc, cur); 13475 } 13476 #endif /* LIBXML_VALID_ENABLED */ 13477 cur->parent = NULL; 13478 cur = cur->next; 13479 } 13480 ctxt->myDoc->children->children = NULL; 13481 } 13482 if (ctxt->myDoc != NULL) { 13483 xmlFreeNode(ctxt->myDoc->children); 13484 ctxt->myDoc->children = content; 13485 ctxt->myDoc->last = last; 13486 } 13487 13488 /* 13489 * Record in the parent context the number of entities replacement 13490 * done when parsing that reference. 13491 */ 13492 if (oldctxt != NULL) 13493 oldctxt->nbentities += ctxt->nbentities; 13494 13495 /* 13496 * Also record the last error if any 13497 */ 13498 if (ctxt->lastError.code != XML_ERR_OK) 13499 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13500 13501 ctxt->sax = oldsax; 13502 ctxt->dict = NULL; 13503 ctxt->attsDefault = NULL; 13504 ctxt->attsSpecial = NULL; 13505 xmlFreeParserCtxt(ctxt); 13506 if (newDoc != NULL) { 13507 xmlFreeDoc(newDoc); 13508 } 13509 13510 return(ret); 13511 } 13512 13513 /** 13514 * xmlParseInNodeContext: 13515 * @node: the context node 13516 * @data: the input string 13517 * @datalen: the input string length in bytes 13518 * @options: a combination of xmlParserOption 13519 * @lst: the return value for the set of parsed nodes 13520 * 13521 * Parse a well-balanced chunk of an XML document 13522 * within the context (DTD, namespaces, etc ...) of the given node. 13523 * 13524 * The allowed sequence for the data is a Well Balanced Chunk defined by 13525 * the content production in the XML grammar: 13526 * 13527 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13528 * 13529 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13530 * error code otherwise 13531 */ 13532 xmlParserErrors 13533 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13534 int options, xmlNodePtr *lst) { 13535 #ifdef SAX2 13536 xmlParserCtxtPtr ctxt; 13537 xmlDocPtr doc = NULL; 13538 xmlNodePtr fake, cur; 13539 int nsnr = 0; 13540 13541 xmlParserErrors ret = XML_ERR_OK; 13542 13543 /* 13544 * check all input parameters, grab the document 13545 */ 13546 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13547 return(XML_ERR_INTERNAL_ERROR); 13548 switch (node->type) { 13549 case XML_ELEMENT_NODE: 13550 case XML_ATTRIBUTE_NODE: 13551 case XML_TEXT_NODE: 13552 case XML_CDATA_SECTION_NODE: 13553 case XML_ENTITY_REF_NODE: 13554 case XML_PI_NODE: 13555 case XML_COMMENT_NODE: 13556 case XML_DOCUMENT_NODE: 13557 case XML_HTML_DOCUMENT_NODE: 13558 break; 13559 default: 13560 return(XML_ERR_INTERNAL_ERROR); 13561 13562 } 13563 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13564 (node->type != XML_DOCUMENT_NODE) && 13565 (node->type != XML_HTML_DOCUMENT_NODE)) 13566 node = node->parent; 13567 if (node == NULL) 13568 return(XML_ERR_INTERNAL_ERROR); 13569 if (node->type == XML_ELEMENT_NODE) 13570 doc = node->doc; 13571 else 13572 doc = (xmlDocPtr) node; 13573 if (doc == NULL) 13574 return(XML_ERR_INTERNAL_ERROR); 13575 13576 /* 13577 * allocate a context and set-up everything not related to the 13578 * node position in the tree 13579 */ 13580 if (doc->type == XML_DOCUMENT_NODE) 13581 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13582 #ifdef LIBXML_HTML_ENABLED 13583 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13584 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13585 /* 13586 * When parsing in context, it makes no sense to add implied 13587 * elements like html/body/etc... 13588 */ 13589 options |= HTML_PARSE_NOIMPLIED; 13590 } 13591 #endif 13592 else 13593 return(XML_ERR_INTERNAL_ERROR); 13594 13595 if (ctxt == NULL) 13596 return(XML_ERR_NO_MEMORY); 13597 13598 /* 13599 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13600 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13601 * we must wait until the last moment to free the original one. 13602 */ 13603 if (doc->dict != NULL) { 13604 if (ctxt->dict != NULL) 13605 xmlDictFree(ctxt->dict); 13606 ctxt->dict = doc->dict; 13607 } else 13608 options |= XML_PARSE_NODICT; 13609 13610 if (doc->encoding != NULL) { 13611 xmlCharEncodingHandlerPtr hdlr; 13612 13613 if (ctxt->encoding != NULL) 13614 xmlFree((xmlChar *) ctxt->encoding); 13615 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13616 13617 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13618 if (hdlr != NULL) { 13619 xmlSwitchToEncoding(ctxt, hdlr); 13620 } else { 13621 return(XML_ERR_UNSUPPORTED_ENCODING); 13622 } 13623 } 13624 13625 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13626 xmlDetectSAX2(ctxt); 13627 ctxt->myDoc = doc; 13628 /* parsing in context, i.e. as within existing content */ 13629 ctxt->input_id = 2; 13630 ctxt->instate = XML_PARSER_CONTENT; 13631 13632 fake = xmlNewComment(NULL); 13633 if (fake == NULL) { 13634 xmlFreeParserCtxt(ctxt); 13635 return(XML_ERR_NO_MEMORY); 13636 } 13637 xmlAddChild(node, fake); 13638 13639 if (node->type == XML_ELEMENT_NODE) { 13640 nodePush(ctxt, node); 13641 /* 13642 * initialize the SAX2 namespaces stack 13643 */ 13644 cur = node; 13645 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13646 xmlNsPtr ns = cur->nsDef; 13647 const xmlChar *iprefix, *ihref; 13648 13649 while (ns != NULL) { 13650 if (ctxt->dict) { 13651 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13652 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13653 } else { 13654 iprefix = ns->prefix; 13655 ihref = ns->href; 13656 } 13657 13658 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13659 nsPush(ctxt, iprefix, ihref); 13660 nsnr++; 13661 } 13662 ns = ns->next; 13663 } 13664 cur = cur->parent; 13665 } 13666 } 13667 13668 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13669 /* 13670 * ID/IDREF registration will be done in xmlValidateElement below 13671 */ 13672 ctxt->loadsubset |= XML_SKIP_IDS; 13673 } 13674 13675 #ifdef LIBXML_HTML_ENABLED 13676 if (doc->type == XML_HTML_DOCUMENT_NODE) 13677 __htmlParseContent(ctxt); 13678 else 13679 #endif 13680 xmlParseContent(ctxt); 13681 13682 nsPop(ctxt, nsnr); 13683 if ((RAW == '<') && (NXT(1) == '/')) { 13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13685 } else if (RAW != 0) { 13686 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13687 } 13688 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13689 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13690 ctxt->wellFormed = 0; 13691 } 13692 13693 if (!ctxt->wellFormed) { 13694 if (ctxt->errNo == 0) 13695 ret = XML_ERR_INTERNAL_ERROR; 13696 else 13697 ret = (xmlParserErrors)ctxt->errNo; 13698 } else { 13699 ret = XML_ERR_OK; 13700 } 13701 13702 /* 13703 * Return the newly created nodeset after unlinking it from 13704 * the pseudo sibling. 13705 */ 13706 13707 cur = fake->next; 13708 fake->next = NULL; 13709 node->last = fake; 13710 13711 if (cur != NULL) { 13712 cur->prev = NULL; 13713 } 13714 13715 *lst = cur; 13716 13717 while (cur != NULL) { 13718 cur->parent = NULL; 13719 cur = cur->next; 13720 } 13721 13722 xmlUnlinkNode(fake); 13723 xmlFreeNode(fake); 13724 13725 13726 if (ret != XML_ERR_OK) { 13727 xmlFreeNodeList(*lst); 13728 *lst = NULL; 13729 } 13730 13731 if (doc->dict != NULL) 13732 ctxt->dict = NULL; 13733 xmlFreeParserCtxt(ctxt); 13734 13735 return(ret); 13736 #else /* !SAX2 */ 13737 return(XML_ERR_INTERNAL_ERROR); 13738 #endif 13739 } 13740 13741 #ifdef LIBXML_SAX1_ENABLED 13742 /** 13743 * xmlParseBalancedChunkMemoryRecover: 13744 * @doc: the document the chunk pertains to 13745 * @sax: the SAX handler bloc (possibly NULL) 13746 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13747 * @depth: Used for loop detection, use 0 13748 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13749 * @lst: the return value for the set of parsed nodes 13750 * @recover: return nodes even if the data is broken (use 0) 13751 * 13752 * 13753 * Parse a well-balanced chunk of an XML document 13754 * called by the parser 13755 * The allowed sequence for the Well Balanced Chunk is the one defined by 13756 * the content production in the XML grammar: 13757 * 13758 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13759 * 13760 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13761 * the parser error code otherwise 13762 * 13763 * In case recover is set to 1, the nodelist will not be empty even if 13764 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13765 * some extent. 13766 */ 13767 int 13768 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13769 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13770 int recover) { 13771 xmlParserCtxtPtr ctxt; 13772 xmlDocPtr newDoc; 13773 xmlSAXHandlerPtr oldsax = NULL; 13774 xmlNodePtr content, newRoot; 13775 int size; 13776 int ret = 0; 13777 13778 if (depth > 40) { 13779 return(XML_ERR_ENTITY_LOOP); 13780 } 13781 13782 13783 if (lst != NULL) 13784 *lst = NULL; 13785 if (string == NULL) 13786 return(-1); 13787 13788 size = xmlStrlen(string); 13789 13790 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13791 if (ctxt == NULL) return(-1); 13792 ctxt->userData = ctxt; 13793 if (sax != NULL) { 13794 oldsax = ctxt->sax; 13795 ctxt->sax = sax; 13796 if (user_data != NULL) 13797 ctxt->userData = user_data; 13798 } 13799 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13800 if (newDoc == NULL) { 13801 xmlFreeParserCtxt(ctxt); 13802 return(-1); 13803 } 13804 newDoc->properties = XML_DOC_INTERNAL; 13805 if ((doc != NULL) && (doc->dict != NULL)) { 13806 xmlDictFree(ctxt->dict); 13807 ctxt->dict = doc->dict; 13808 xmlDictReference(ctxt->dict); 13809 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13810 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13811 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13812 ctxt->dictNames = 1; 13813 } else { 13814 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13815 } 13816 if (doc != NULL) { 13817 newDoc->intSubset = doc->intSubset; 13818 newDoc->extSubset = doc->extSubset; 13819 } 13820 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13821 if (newRoot == NULL) { 13822 if (sax != NULL) 13823 ctxt->sax = oldsax; 13824 xmlFreeParserCtxt(ctxt); 13825 newDoc->intSubset = NULL; 13826 newDoc->extSubset = NULL; 13827 xmlFreeDoc(newDoc); 13828 return(-1); 13829 } 13830 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13831 nodePush(ctxt, newRoot); 13832 if (doc == NULL) { 13833 ctxt->myDoc = newDoc; 13834 } else { 13835 ctxt->myDoc = newDoc; 13836 newDoc->children->doc = doc; 13837 /* Ensure that doc has XML spec namespace */ 13838 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13839 newDoc->oldNs = doc->oldNs; 13840 } 13841 ctxt->instate = XML_PARSER_CONTENT; 13842 ctxt->input_id = 2; 13843 ctxt->depth = depth; 13844 13845 /* 13846 * Doing validity checking on chunk doesn't make sense 13847 */ 13848 ctxt->validate = 0; 13849 ctxt->loadsubset = 0; 13850 xmlDetectSAX2(ctxt); 13851 13852 if ( doc != NULL ){ 13853 content = doc->children; 13854 doc->children = NULL; 13855 xmlParseContent(ctxt); 13856 doc->children = content; 13857 } 13858 else { 13859 xmlParseContent(ctxt); 13860 } 13861 if ((RAW == '<') && (NXT(1) == '/')) { 13862 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13863 } else if (RAW != 0) { 13864 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13865 } 13866 if (ctxt->node != newDoc->children) { 13867 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13868 } 13869 13870 if (!ctxt->wellFormed) { 13871 if (ctxt->errNo == 0) 13872 ret = 1; 13873 else 13874 ret = ctxt->errNo; 13875 } else { 13876 ret = 0; 13877 } 13878 13879 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13880 xmlNodePtr cur; 13881 13882 /* 13883 * Return the newly created nodeset after unlinking it from 13884 * they pseudo parent. 13885 */ 13886 cur = newDoc->children->children; 13887 *lst = cur; 13888 while (cur != NULL) { 13889 xmlSetTreeDoc(cur, doc); 13890 cur->parent = NULL; 13891 cur = cur->next; 13892 } 13893 newDoc->children->children = NULL; 13894 } 13895 13896 if (sax != NULL) 13897 ctxt->sax = oldsax; 13898 xmlFreeParserCtxt(ctxt); 13899 newDoc->intSubset = NULL; 13900 newDoc->extSubset = NULL; 13901 newDoc->oldNs = NULL; 13902 xmlFreeDoc(newDoc); 13903 13904 return(ret); 13905 } 13906 13907 /** 13908 * xmlSAXParseEntity: 13909 * @sax: the SAX handler block 13910 * @filename: the filename 13911 * 13912 * parse an XML external entity out of context and build a tree. 13913 * It use the given SAX function block to handle the parsing callback. 13914 * If sax is NULL, fallback to the default DOM tree building routines. 13915 * 13916 * [78] extParsedEnt ::= TextDecl? content 13917 * 13918 * This correspond to a "Well Balanced" chunk 13919 * 13920 * Returns the resulting document tree 13921 */ 13922 13923 xmlDocPtr 13924 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13925 xmlDocPtr ret; 13926 xmlParserCtxtPtr ctxt; 13927 13928 ctxt = xmlCreateFileParserCtxt(filename); 13929 if (ctxt == NULL) { 13930 return(NULL); 13931 } 13932 if (sax != NULL) { 13933 if (ctxt->sax != NULL) 13934 xmlFree(ctxt->sax); 13935 ctxt->sax = sax; 13936 ctxt->userData = NULL; 13937 } 13938 13939 xmlParseExtParsedEnt(ctxt); 13940 13941 if (ctxt->wellFormed) 13942 ret = ctxt->myDoc; 13943 else { 13944 ret = NULL; 13945 xmlFreeDoc(ctxt->myDoc); 13946 ctxt->myDoc = NULL; 13947 } 13948 if (sax != NULL) 13949 ctxt->sax = NULL; 13950 xmlFreeParserCtxt(ctxt); 13951 13952 return(ret); 13953 } 13954 13955 /** 13956 * xmlParseEntity: 13957 * @filename: the filename 13958 * 13959 * parse an XML external entity out of context and build a tree. 13960 * 13961 * [78] extParsedEnt ::= TextDecl? content 13962 * 13963 * This correspond to a "Well Balanced" chunk 13964 * 13965 * Returns the resulting document tree 13966 */ 13967 13968 xmlDocPtr 13969 xmlParseEntity(const char *filename) { 13970 return(xmlSAXParseEntity(NULL, filename)); 13971 } 13972 #endif /* LIBXML_SAX1_ENABLED */ 13973 13974 /** 13975 * xmlCreateEntityParserCtxtInternal: 13976 * @URL: the entity URL 13977 * @ID: the entity PUBLIC ID 13978 * @base: a possible base for the target URI 13979 * @pctx: parser context used to set options on new context 13980 * 13981 * Create a parser context for an external entity 13982 * Automatic support for ZLIB/Compress compressed document is provided 13983 * by default if found at compile-time. 13984 * 13985 * Returns the new parser context or NULL 13986 */ 13987 static xmlParserCtxtPtr 13988 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13989 const xmlChar *base, xmlParserCtxtPtr pctx) { 13990 xmlParserCtxtPtr ctxt; 13991 xmlParserInputPtr inputStream; 13992 char *directory = NULL; 13993 xmlChar *uri; 13994 13995 ctxt = xmlNewParserCtxt(); 13996 if (ctxt == NULL) { 13997 return(NULL); 13998 } 13999 14000 if (pctx != NULL) { 14001 ctxt->options = pctx->options; 14002 ctxt->_private = pctx->_private; 14003 /* 14004 * this is a subparser of pctx, so the input_id should be 14005 * incremented to distinguish from main entity 14006 */ 14007 ctxt->input_id = pctx->input_id + 1; 14008 } 14009 14010 uri = xmlBuildURI(URL, base); 14011 14012 if (uri == NULL) { 14013 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14014 if (inputStream == NULL) { 14015 xmlFreeParserCtxt(ctxt); 14016 return(NULL); 14017 } 14018 14019 inputPush(ctxt, inputStream); 14020 14021 if ((ctxt->directory == NULL) && (directory == NULL)) 14022 directory = xmlParserGetDirectory((char *)URL); 14023 if ((ctxt->directory == NULL) && (directory != NULL)) 14024 ctxt->directory = directory; 14025 } else { 14026 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14027 if (inputStream == NULL) { 14028 xmlFree(uri); 14029 xmlFreeParserCtxt(ctxt); 14030 return(NULL); 14031 } 14032 14033 inputPush(ctxt, inputStream); 14034 14035 if ((ctxt->directory == NULL) && (directory == NULL)) 14036 directory = xmlParserGetDirectory((char *)uri); 14037 if ((ctxt->directory == NULL) && (directory != NULL)) 14038 ctxt->directory = directory; 14039 xmlFree(uri); 14040 } 14041 return(ctxt); 14042 } 14043 14044 /** 14045 * xmlCreateEntityParserCtxt: 14046 * @URL: the entity URL 14047 * @ID: the entity PUBLIC ID 14048 * @base: a possible base for the target URI 14049 * 14050 * Create a parser context for an external entity 14051 * Automatic support for ZLIB/Compress compressed document is provided 14052 * by default if found at compile-time. 14053 * 14054 * Returns the new parser context or NULL 14055 */ 14056 xmlParserCtxtPtr 14057 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14058 const xmlChar *base) { 14059 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14060 14061 } 14062 14063 /************************************************************************ 14064 * * 14065 * Front ends when parsing from a file * 14066 * * 14067 ************************************************************************/ 14068 14069 /** 14070 * xmlCreateURLParserCtxt: 14071 * @filename: the filename or URL 14072 * @options: a combination of xmlParserOption 14073 * 14074 * Create a parser context for a file or URL content. 14075 * Automatic support for ZLIB/Compress compressed document is provided 14076 * by default if found at compile-time and for file accesses 14077 * 14078 * Returns the new parser context or NULL 14079 */ 14080 xmlParserCtxtPtr 14081 xmlCreateURLParserCtxt(const char *filename, int options) 14082 { 14083 xmlParserCtxtPtr ctxt; 14084 xmlParserInputPtr inputStream; 14085 char *directory = NULL; 14086 14087 ctxt = xmlNewParserCtxt(); 14088 if (ctxt == NULL) { 14089 xmlErrMemory(NULL, "cannot allocate parser context"); 14090 return(NULL); 14091 } 14092 14093 if (options) 14094 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14095 ctxt->linenumbers = 1; 14096 14097 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14098 if (inputStream == NULL) { 14099 xmlFreeParserCtxt(ctxt); 14100 return(NULL); 14101 } 14102 14103 inputPush(ctxt, inputStream); 14104 if ((ctxt->directory == NULL) && (directory == NULL)) 14105 directory = xmlParserGetDirectory(filename); 14106 if ((ctxt->directory == NULL) && (directory != NULL)) 14107 ctxt->directory = directory; 14108 14109 return(ctxt); 14110 } 14111 14112 /** 14113 * xmlCreateFileParserCtxt: 14114 * @filename: the filename 14115 * 14116 * Create a parser context for a file content. 14117 * Automatic support for ZLIB/Compress compressed document is provided 14118 * by default if found at compile-time. 14119 * 14120 * Returns the new parser context or NULL 14121 */ 14122 xmlParserCtxtPtr 14123 xmlCreateFileParserCtxt(const char *filename) 14124 { 14125 return(xmlCreateURLParserCtxt(filename, 0)); 14126 } 14127 14128 #ifdef LIBXML_SAX1_ENABLED 14129 /** 14130 * xmlSAXParseFileWithData: 14131 * @sax: the SAX handler block 14132 * @filename: the filename 14133 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14134 * documents 14135 * @data: the userdata 14136 * 14137 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14138 * compressed document is provided by default if found at compile-time. 14139 * It use the given SAX function block to handle the parsing callback. 14140 * If sax is NULL, fallback to the default DOM tree building routines. 14141 * 14142 * User data (void *) is stored within the parser context in the 14143 * context's _private member, so it is available nearly everywhere in libxml 14144 * 14145 * Returns the resulting document tree 14146 */ 14147 14148 xmlDocPtr 14149 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14150 int recovery, void *data) { 14151 xmlDocPtr ret; 14152 xmlParserCtxtPtr ctxt; 14153 14154 xmlInitParser(); 14155 14156 ctxt = xmlCreateFileParserCtxt(filename); 14157 if (ctxt == NULL) { 14158 return(NULL); 14159 } 14160 if (sax != NULL) { 14161 if (ctxt->sax != NULL) 14162 xmlFree(ctxt->sax); 14163 ctxt->sax = sax; 14164 } 14165 xmlDetectSAX2(ctxt); 14166 if (data!=NULL) { 14167 ctxt->_private = data; 14168 } 14169 14170 if (ctxt->directory == NULL) 14171 ctxt->directory = xmlParserGetDirectory(filename); 14172 14173 ctxt->recovery = recovery; 14174 14175 xmlParseDocument(ctxt); 14176 14177 if ((ctxt->wellFormed) || recovery) { 14178 ret = ctxt->myDoc; 14179 if (ret != NULL) { 14180 if (ctxt->input->buf->compressed > 0) 14181 ret->compression = 9; 14182 else 14183 ret->compression = ctxt->input->buf->compressed; 14184 } 14185 } 14186 else { 14187 ret = NULL; 14188 xmlFreeDoc(ctxt->myDoc); 14189 ctxt->myDoc = NULL; 14190 } 14191 if (sax != NULL) 14192 ctxt->sax = NULL; 14193 xmlFreeParserCtxt(ctxt); 14194 14195 return(ret); 14196 } 14197 14198 /** 14199 * xmlSAXParseFile: 14200 * @sax: the SAX handler block 14201 * @filename: the filename 14202 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14203 * documents 14204 * 14205 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14206 * compressed document is provided by default if found at compile-time. 14207 * It use the given SAX function block to handle the parsing callback. 14208 * If sax is NULL, fallback to the default DOM tree building routines. 14209 * 14210 * Returns the resulting document tree 14211 */ 14212 14213 xmlDocPtr 14214 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14215 int recovery) { 14216 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14217 } 14218 14219 /** 14220 * xmlRecoverDoc: 14221 * @cur: a pointer to an array of xmlChar 14222 * 14223 * parse an XML in-memory document and build a tree. 14224 * In the case the document is not Well Formed, a attempt to build a 14225 * tree is tried anyway 14226 * 14227 * Returns the resulting document tree or NULL in case of failure 14228 */ 14229 14230 xmlDocPtr 14231 xmlRecoverDoc(const xmlChar *cur) { 14232 return(xmlSAXParseDoc(NULL, cur, 1)); 14233 } 14234 14235 /** 14236 * xmlParseFile: 14237 * @filename: the filename 14238 * 14239 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14240 * compressed document is provided by default if found at compile-time. 14241 * 14242 * Returns the resulting document tree if the file was wellformed, 14243 * NULL otherwise. 14244 */ 14245 14246 xmlDocPtr 14247 xmlParseFile(const char *filename) { 14248 return(xmlSAXParseFile(NULL, filename, 0)); 14249 } 14250 14251 /** 14252 * xmlRecoverFile: 14253 * @filename: the filename 14254 * 14255 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14256 * compressed document is provided by default if found at compile-time. 14257 * In the case the document is not Well Formed, it attempts to build 14258 * a tree anyway 14259 * 14260 * Returns the resulting document tree or NULL in case of failure 14261 */ 14262 14263 xmlDocPtr 14264 xmlRecoverFile(const char *filename) { 14265 return(xmlSAXParseFile(NULL, filename, 1)); 14266 } 14267 14268 14269 /** 14270 * xmlSetupParserForBuffer: 14271 * @ctxt: an XML parser context 14272 * @buffer: a xmlChar * buffer 14273 * @filename: a file name 14274 * 14275 * Setup the parser context to parse a new buffer; Clears any prior 14276 * contents from the parser context. The buffer parameter must not be 14277 * NULL, but the filename parameter can be 14278 */ 14279 void 14280 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14281 const char* filename) 14282 { 14283 xmlParserInputPtr input; 14284 14285 if ((ctxt == NULL) || (buffer == NULL)) 14286 return; 14287 14288 input = xmlNewInputStream(ctxt); 14289 if (input == NULL) { 14290 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14291 xmlClearParserCtxt(ctxt); 14292 return; 14293 } 14294 14295 xmlClearParserCtxt(ctxt); 14296 if (filename != NULL) 14297 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14298 input->base = buffer; 14299 input->cur = buffer; 14300 input->end = &buffer[xmlStrlen(buffer)]; 14301 inputPush(ctxt, input); 14302 } 14303 14304 /** 14305 * xmlSAXUserParseFile: 14306 * @sax: a SAX handler 14307 * @user_data: The user data returned on SAX callbacks 14308 * @filename: a file name 14309 * 14310 * parse an XML file and call the given SAX handler routines. 14311 * Automatic support for ZLIB/Compress compressed document is provided 14312 * 14313 * Returns 0 in case of success or a error number otherwise 14314 */ 14315 int 14316 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14317 const char *filename) { 14318 int ret = 0; 14319 xmlParserCtxtPtr ctxt; 14320 14321 ctxt = xmlCreateFileParserCtxt(filename); 14322 if (ctxt == NULL) return -1; 14323 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14324 xmlFree(ctxt->sax); 14325 ctxt->sax = sax; 14326 xmlDetectSAX2(ctxt); 14327 14328 if (user_data != NULL) 14329 ctxt->userData = user_data; 14330 14331 xmlParseDocument(ctxt); 14332 14333 if (ctxt->wellFormed) 14334 ret = 0; 14335 else { 14336 if (ctxt->errNo != 0) 14337 ret = ctxt->errNo; 14338 else 14339 ret = -1; 14340 } 14341 if (sax != NULL) 14342 ctxt->sax = NULL; 14343 if (ctxt->myDoc != NULL) { 14344 xmlFreeDoc(ctxt->myDoc); 14345 ctxt->myDoc = NULL; 14346 } 14347 xmlFreeParserCtxt(ctxt); 14348 14349 return ret; 14350 } 14351 #endif /* LIBXML_SAX1_ENABLED */ 14352 14353 /************************************************************************ 14354 * * 14355 * Front ends when parsing from memory * 14356 * * 14357 ************************************************************************/ 14358 14359 /** 14360 * xmlCreateMemoryParserCtxt: 14361 * @buffer: a pointer to a char array 14362 * @size: the size of the array 14363 * 14364 * Create a parser context for an XML in-memory document. 14365 * 14366 * Returns the new parser context or NULL 14367 */ 14368 xmlParserCtxtPtr 14369 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14370 xmlParserCtxtPtr ctxt; 14371 xmlParserInputPtr input; 14372 xmlParserInputBufferPtr buf; 14373 14374 if (buffer == NULL) 14375 return(NULL); 14376 if (size <= 0) 14377 return(NULL); 14378 14379 ctxt = xmlNewParserCtxt(); 14380 if (ctxt == NULL) 14381 return(NULL); 14382 14383 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14384 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14385 if (buf == NULL) { 14386 xmlFreeParserCtxt(ctxt); 14387 return(NULL); 14388 } 14389 14390 input = xmlNewInputStream(ctxt); 14391 if (input == NULL) { 14392 xmlFreeParserInputBuffer(buf); 14393 xmlFreeParserCtxt(ctxt); 14394 return(NULL); 14395 } 14396 14397 input->filename = NULL; 14398 input->buf = buf; 14399 xmlBufResetInput(input->buf->buffer, input); 14400 14401 inputPush(ctxt, input); 14402 return(ctxt); 14403 } 14404 14405 #ifdef LIBXML_SAX1_ENABLED 14406 /** 14407 * xmlSAXParseMemoryWithData: 14408 * @sax: the SAX handler block 14409 * @buffer: an pointer to a char array 14410 * @size: the size of the array 14411 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14412 * documents 14413 * @data: the userdata 14414 * 14415 * parse an XML in-memory block and use the given SAX function block 14416 * to handle the parsing callback. If sax is NULL, fallback to the default 14417 * DOM tree building routines. 14418 * 14419 * User data (void *) is stored within the parser context in the 14420 * context's _private member, so it is available nearly everywhere in libxml 14421 * 14422 * Returns the resulting document tree 14423 */ 14424 14425 xmlDocPtr 14426 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14427 int size, int recovery, void *data) { 14428 xmlDocPtr ret; 14429 xmlParserCtxtPtr ctxt; 14430 14431 xmlInitParser(); 14432 14433 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14434 if (ctxt == NULL) return(NULL); 14435 if (sax != NULL) { 14436 if (ctxt->sax != NULL) 14437 xmlFree(ctxt->sax); 14438 ctxt->sax = sax; 14439 } 14440 xmlDetectSAX2(ctxt); 14441 if (data!=NULL) { 14442 ctxt->_private=data; 14443 } 14444 14445 ctxt->recovery = recovery; 14446 14447 xmlParseDocument(ctxt); 14448 14449 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14450 else { 14451 ret = NULL; 14452 xmlFreeDoc(ctxt->myDoc); 14453 ctxt->myDoc = NULL; 14454 } 14455 if (sax != NULL) 14456 ctxt->sax = NULL; 14457 xmlFreeParserCtxt(ctxt); 14458 14459 return(ret); 14460 } 14461 14462 /** 14463 * xmlSAXParseMemory: 14464 * @sax: the SAX handler block 14465 * @buffer: an pointer to a char array 14466 * @size: the size of the array 14467 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14468 * documents 14469 * 14470 * parse an XML in-memory block and use the given SAX function block 14471 * to handle the parsing callback. If sax is NULL, fallback to the default 14472 * DOM tree building routines. 14473 * 14474 * Returns the resulting document tree 14475 */ 14476 xmlDocPtr 14477 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14478 int size, int recovery) { 14479 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14480 } 14481 14482 /** 14483 * xmlParseMemory: 14484 * @buffer: an pointer to a char array 14485 * @size: the size of the array 14486 * 14487 * parse an XML in-memory block and build a tree. 14488 * 14489 * Returns the resulting document tree 14490 */ 14491 14492 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14493 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14494 } 14495 14496 /** 14497 * xmlRecoverMemory: 14498 * @buffer: an pointer to a char array 14499 * @size: the size of the array 14500 * 14501 * parse an XML in-memory block and build a tree. 14502 * In the case the document is not Well Formed, an attempt to 14503 * build a tree is tried anyway 14504 * 14505 * Returns the resulting document tree or NULL in case of error 14506 */ 14507 14508 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14509 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14510 } 14511 14512 /** 14513 * xmlSAXUserParseMemory: 14514 * @sax: a SAX handler 14515 * @user_data: The user data returned on SAX callbacks 14516 * @buffer: an in-memory XML document input 14517 * @size: the length of the XML document in bytes 14518 * 14519 * A better SAX parsing routine. 14520 * parse an XML in-memory buffer and call the given SAX handler routines. 14521 * 14522 * Returns 0 in case of success or a error number otherwise 14523 */ 14524 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14525 const char *buffer, int size) { 14526 int ret = 0; 14527 xmlParserCtxtPtr ctxt; 14528 14529 xmlInitParser(); 14530 14531 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14532 if (ctxt == NULL) return -1; 14533 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14534 xmlFree(ctxt->sax); 14535 ctxt->sax = sax; 14536 xmlDetectSAX2(ctxt); 14537 14538 if (user_data != NULL) 14539 ctxt->userData = user_data; 14540 14541 xmlParseDocument(ctxt); 14542 14543 if (ctxt->wellFormed) 14544 ret = 0; 14545 else { 14546 if (ctxt->errNo != 0) 14547 ret = ctxt->errNo; 14548 else 14549 ret = -1; 14550 } 14551 if (sax != NULL) 14552 ctxt->sax = NULL; 14553 if (ctxt->myDoc != NULL) { 14554 xmlFreeDoc(ctxt->myDoc); 14555 ctxt->myDoc = NULL; 14556 } 14557 xmlFreeParserCtxt(ctxt); 14558 14559 return ret; 14560 } 14561 #endif /* LIBXML_SAX1_ENABLED */ 14562 14563 /** 14564 * xmlCreateDocParserCtxt: 14565 * @cur: a pointer to an array of xmlChar 14566 * 14567 * Creates a parser context for an XML in-memory document. 14568 * 14569 * Returns the new parser context or NULL 14570 */ 14571 xmlParserCtxtPtr 14572 xmlCreateDocParserCtxt(const xmlChar *cur) { 14573 int len; 14574 14575 if (cur == NULL) 14576 return(NULL); 14577 len = xmlStrlen(cur); 14578 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14579 } 14580 14581 #ifdef LIBXML_SAX1_ENABLED 14582 /** 14583 * xmlSAXParseDoc: 14584 * @sax: the SAX handler block 14585 * @cur: a pointer to an array of xmlChar 14586 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14587 * documents 14588 * 14589 * parse an XML in-memory document and build a tree. 14590 * It use the given SAX function block to handle the parsing callback. 14591 * If sax is NULL, fallback to the default DOM tree building routines. 14592 * 14593 * Returns the resulting document tree 14594 */ 14595 14596 xmlDocPtr 14597 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14598 xmlDocPtr ret; 14599 xmlParserCtxtPtr ctxt; 14600 xmlSAXHandlerPtr oldsax = NULL; 14601 14602 if (cur == NULL) return(NULL); 14603 14604 14605 ctxt = xmlCreateDocParserCtxt(cur); 14606 if (ctxt == NULL) return(NULL); 14607 if (sax != NULL) { 14608 oldsax = ctxt->sax; 14609 ctxt->sax = sax; 14610 ctxt->userData = NULL; 14611 } 14612 xmlDetectSAX2(ctxt); 14613 14614 xmlParseDocument(ctxt); 14615 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14616 else { 14617 ret = NULL; 14618 xmlFreeDoc(ctxt->myDoc); 14619 ctxt->myDoc = NULL; 14620 } 14621 if (sax != NULL) 14622 ctxt->sax = oldsax; 14623 xmlFreeParserCtxt(ctxt); 14624 14625 return(ret); 14626 } 14627 14628 /** 14629 * xmlParseDoc: 14630 * @cur: a pointer to an array of xmlChar 14631 * 14632 * parse an XML in-memory document and build a tree. 14633 * 14634 * Returns the resulting document tree 14635 */ 14636 14637 xmlDocPtr 14638 xmlParseDoc(const xmlChar *cur) { 14639 return(xmlSAXParseDoc(NULL, cur, 0)); 14640 } 14641 #endif /* LIBXML_SAX1_ENABLED */ 14642 14643 #ifdef LIBXML_LEGACY_ENABLED 14644 /************************************************************************ 14645 * * 14646 * Specific function to keep track of entities references * 14647 * and used by the XSLT debugger * 14648 * * 14649 ************************************************************************/ 14650 14651 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14652 14653 /** 14654 * xmlAddEntityReference: 14655 * @ent : A valid entity 14656 * @firstNode : A valid first node for children of entity 14657 * @lastNode : A valid last node of children entity 14658 * 14659 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14660 */ 14661 static void 14662 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14663 xmlNodePtr lastNode) 14664 { 14665 if (xmlEntityRefFunc != NULL) { 14666 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14667 } 14668 } 14669 14670 14671 /** 14672 * xmlSetEntityReferenceFunc: 14673 * @func: A valid function 14674 * 14675 * Set the function to call call back when a xml reference has been made 14676 */ 14677 void 14678 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14679 { 14680 xmlEntityRefFunc = func; 14681 } 14682 #endif /* LIBXML_LEGACY_ENABLED */ 14683 14684 /************************************************************************ 14685 * * 14686 * Miscellaneous * 14687 * * 14688 ************************************************************************/ 14689 14690 #ifdef LIBXML_XPATH_ENABLED 14691 #include <libxml/xpath.h> 14692 #endif 14693 14694 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14695 static int xmlParserInitialized = 0; 14696 14697 /** 14698 * xmlInitParser: 14699 * 14700 * Initialization function for the XML parser. 14701 * This is not reentrant. Call once before processing in case of 14702 * use in multithreaded programs. 14703 */ 14704 14705 void 14706 xmlInitParser(void) { 14707 if (xmlParserInitialized != 0) 14708 return; 14709 14710 #ifdef LIBXML_THREAD_ENABLED 14711 __xmlGlobalInitMutexLock(); 14712 if (xmlParserInitialized == 0) { 14713 #endif 14714 xmlInitThreads(); 14715 xmlInitGlobals(); 14716 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14717 (xmlGenericError == NULL)) 14718 initGenericErrorDefaultFunc(NULL); 14719 xmlInitMemory(); 14720 xmlInitializeDict(); 14721 xmlInitCharEncodingHandlers(); 14722 xmlDefaultSAXHandlerInit(); 14723 xmlRegisterDefaultInputCallbacks(); 14724 #ifdef LIBXML_OUTPUT_ENABLED 14725 xmlRegisterDefaultOutputCallbacks(); 14726 #endif /* LIBXML_OUTPUT_ENABLED */ 14727 #ifdef LIBXML_HTML_ENABLED 14728 htmlInitAutoClose(); 14729 htmlDefaultSAXHandlerInit(); 14730 #endif 14731 #ifdef LIBXML_XPATH_ENABLED 14732 xmlXPathInit(); 14733 #endif 14734 xmlParserInitialized = 1; 14735 #ifdef LIBXML_THREAD_ENABLED 14736 } 14737 __xmlGlobalInitMutexUnlock(); 14738 #endif 14739 } 14740 14741 /** 14742 * xmlCleanupParser: 14743 * 14744 * This function name is somewhat misleading. It does not clean up 14745 * parser state, it cleans up memory allocated by the library itself. 14746 * It is a cleanup function for the XML library. It tries to reclaim all 14747 * related global memory allocated for the library processing. 14748 * It doesn't deallocate any document related memory. One should 14749 * call xmlCleanupParser() only when the process has finished using 14750 * the library and all XML/HTML documents built with it. 14751 * See also xmlInitParser() which has the opposite function of preparing 14752 * the library for operations. 14753 * 14754 * WARNING: if your application is multithreaded or has plugin support 14755 * calling this may crash the application if another thread or 14756 * a plugin is still using libxml2. It's sometimes very hard to 14757 * guess if libxml2 is in use in the application, some libraries 14758 * or plugins may use it without notice. In case of doubt abstain 14759 * from calling this function or do it just before calling exit() 14760 * to avoid leak reports from valgrind ! 14761 */ 14762 14763 void 14764 xmlCleanupParser(void) { 14765 if (!xmlParserInitialized) 14766 return; 14767 14768 xmlCleanupCharEncodingHandlers(); 14769 #ifdef LIBXML_CATALOG_ENABLED 14770 xmlCatalogCleanup(); 14771 #endif 14772 xmlDictCleanup(); 14773 xmlCleanupInputCallbacks(); 14774 #ifdef LIBXML_OUTPUT_ENABLED 14775 xmlCleanupOutputCallbacks(); 14776 #endif 14777 #ifdef LIBXML_SCHEMAS_ENABLED 14778 xmlSchemaCleanupTypes(); 14779 xmlRelaxNGCleanupTypes(); 14780 #endif 14781 xmlResetLastError(); 14782 xmlCleanupGlobals(); 14783 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14784 xmlCleanupMemory(); 14785 xmlParserInitialized = 0; 14786 } 14787 14788 /************************************************************************ 14789 * * 14790 * New set (2.6.0) of simpler and more flexible APIs * 14791 * * 14792 ************************************************************************/ 14793 14794 /** 14795 * DICT_FREE: 14796 * @str: a string 14797 * 14798 * Free a string if it is not owned by the "dict" dictionary in the 14799 * current scope 14800 */ 14801 #define DICT_FREE(str) \ 14802 if ((str) && ((!dict) || \ 14803 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14804 xmlFree((char *)(str)); 14805 14806 /** 14807 * xmlCtxtReset: 14808 * @ctxt: an XML parser context 14809 * 14810 * Reset a parser context 14811 */ 14812 void 14813 xmlCtxtReset(xmlParserCtxtPtr ctxt) 14814 { 14815 xmlParserInputPtr input; 14816 xmlDictPtr dict; 14817 14818 if (ctxt == NULL) 14819 return; 14820 14821 dict = ctxt->dict; 14822 14823 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14824 xmlFreeInputStream(input); 14825 } 14826 ctxt->inputNr = 0; 14827 ctxt->input = NULL; 14828 14829 ctxt->spaceNr = 0; 14830 if (ctxt->spaceTab != NULL) { 14831 ctxt->spaceTab[0] = -1; 14832 ctxt->space = &ctxt->spaceTab[0]; 14833 } else { 14834 ctxt->space = NULL; 14835 } 14836 14837 14838 ctxt->nodeNr = 0; 14839 ctxt->node = NULL; 14840 14841 ctxt->nameNr = 0; 14842 ctxt->name = NULL; 14843 14844 DICT_FREE(ctxt->version); 14845 ctxt->version = NULL; 14846 DICT_FREE(ctxt->encoding); 14847 ctxt->encoding = NULL; 14848 DICT_FREE(ctxt->directory); 14849 ctxt->directory = NULL; 14850 DICT_FREE(ctxt->extSubURI); 14851 ctxt->extSubURI = NULL; 14852 DICT_FREE(ctxt->extSubSystem); 14853 ctxt->extSubSystem = NULL; 14854 if (ctxt->myDoc != NULL) 14855 xmlFreeDoc(ctxt->myDoc); 14856 ctxt->myDoc = NULL; 14857 14858 ctxt->standalone = -1; 14859 ctxt->hasExternalSubset = 0; 14860 ctxt->hasPErefs = 0; 14861 ctxt->html = 0; 14862 ctxt->external = 0; 14863 ctxt->instate = XML_PARSER_START; 14864 ctxt->token = 0; 14865 14866 ctxt->wellFormed = 1; 14867 ctxt->nsWellFormed = 1; 14868 ctxt->disableSAX = 0; 14869 ctxt->valid = 1; 14870 #if 0 14871 ctxt->vctxt.userData = ctxt; 14872 ctxt->vctxt.error = xmlParserValidityError; 14873 ctxt->vctxt.warning = xmlParserValidityWarning; 14874 #endif 14875 ctxt->record_info = 0; 14876 ctxt->nbChars = 0; 14877 ctxt->checkIndex = 0; 14878 ctxt->inSubset = 0; 14879 ctxt->errNo = XML_ERR_OK; 14880 ctxt->depth = 0; 14881 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14882 ctxt->catalogs = NULL; 14883 ctxt->nbentities = 0; 14884 ctxt->sizeentities = 0; 14885 ctxt->sizeentcopy = 0; 14886 xmlInitNodeInfoSeq(&ctxt->node_seq); 14887 14888 if (ctxt->attsDefault != NULL) { 14889 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 14890 ctxt->attsDefault = NULL; 14891 } 14892 if (ctxt->attsSpecial != NULL) { 14893 xmlHashFree(ctxt->attsSpecial, NULL); 14894 ctxt->attsSpecial = NULL; 14895 } 14896 14897 #ifdef LIBXML_CATALOG_ENABLED 14898 if (ctxt->catalogs != NULL) 14899 xmlCatalogFreeLocal(ctxt->catalogs); 14900 #endif 14901 if (ctxt->lastError.code != XML_ERR_OK) 14902 xmlResetError(&ctxt->lastError); 14903 } 14904 14905 /** 14906 * xmlCtxtResetPush: 14907 * @ctxt: an XML parser context 14908 * @chunk: a pointer to an array of chars 14909 * @size: number of chars in the array 14910 * @filename: an optional file name or URI 14911 * @encoding: the document encoding, or NULL 14912 * 14913 * Reset a push parser context 14914 * 14915 * Returns 0 in case of success and 1 in case of error 14916 */ 14917 int 14918 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14919 int size, const char *filename, const char *encoding) 14920 { 14921 xmlParserInputPtr inputStream; 14922 xmlParserInputBufferPtr buf; 14923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14924 14925 if (ctxt == NULL) 14926 return(1); 14927 14928 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14930 14931 buf = xmlAllocParserInputBuffer(enc); 14932 if (buf == NULL) 14933 return(1); 14934 14935 if (ctxt == NULL) { 14936 xmlFreeParserInputBuffer(buf); 14937 return(1); 14938 } 14939 14940 xmlCtxtReset(ctxt); 14941 14942 if (ctxt->pushTab == NULL) { 14943 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14944 sizeof(xmlChar *)); 14945 if (ctxt->pushTab == NULL) { 14946 xmlErrMemory(ctxt, NULL); 14947 xmlFreeParserInputBuffer(buf); 14948 return(1); 14949 } 14950 } 14951 14952 if (filename == NULL) { 14953 ctxt->directory = NULL; 14954 } else { 14955 ctxt->directory = xmlParserGetDirectory(filename); 14956 } 14957 14958 inputStream = xmlNewInputStream(ctxt); 14959 if (inputStream == NULL) { 14960 xmlFreeParserInputBuffer(buf); 14961 return(1); 14962 } 14963 14964 if (filename == NULL) 14965 inputStream->filename = NULL; 14966 else 14967 inputStream->filename = (char *) 14968 xmlCanonicPath((const xmlChar *) filename); 14969 inputStream->buf = buf; 14970 xmlBufResetInput(buf->buffer, inputStream); 14971 14972 inputPush(ctxt, inputStream); 14973 14974 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14975 (ctxt->input->buf != NULL)) { 14976 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14977 size_t cur = ctxt->input->cur - ctxt->input->base; 14978 14979 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14980 14981 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14982 #ifdef DEBUG_PUSH 14983 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14984 #endif 14985 } 14986 14987 if (encoding != NULL) { 14988 xmlCharEncodingHandlerPtr hdlr; 14989 14990 if (ctxt->encoding != NULL) 14991 xmlFree((xmlChar *) ctxt->encoding); 14992 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14993 14994 hdlr = xmlFindCharEncodingHandler(encoding); 14995 if (hdlr != NULL) { 14996 xmlSwitchToEncoding(ctxt, hdlr); 14997 } else { 14998 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14999 "Unsupported encoding %s\n", BAD_CAST encoding); 15000 } 15001 } else if (enc != XML_CHAR_ENCODING_NONE) { 15002 xmlSwitchEncoding(ctxt, enc); 15003 } 15004 15005 return(0); 15006 } 15007 15008 15009 /** 15010 * xmlCtxtUseOptionsInternal: 15011 * @ctxt: an XML parser context 15012 * @options: a combination of xmlParserOption 15013 * @encoding: the user provided encoding to use 15014 * 15015 * Applies the options to the parser context 15016 * 15017 * Returns 0 in case of success, the set of unknown or unimplemented options 15018 * in case of error. 15019 */ 15020 static int 15021 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15022 { 15023 if (ctxt == NULL) 15024 return(-1); 15025 if (encoding != NULL) { 15026 if (ctxt->encoding != NULL) 15027 xmlFree((xmlChar *) ctxt->encoding); 15028 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15029 } 15030 if (options & XML_PARSE_RECOVER) { 15031 ctxt->recovery = 1; 15032 options -= XML_PARSE_RECOVER; 15033 ctxt->options |= XML_PARSE_RECOVER; 15034 } else 15035 ctxt->recovery = 0; 15036 if (options & XML_PARSE_DTDLOAD) { 15037 ctxt->loadsubset = XML_DETECT_IDS; 15038 options -= XML_PARSE_DTDLOAD; 15039 ctxt->options |= XML_PARSE_DTDLOAD; 15040 } else 15041 ctxt->loadsubset = 0; 15042 if (options & XML_PARSE_DTDATTR) { 15043 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15044 options -= XML_PARSE_DTDATTR; 15045 ctxt->options |= XML_PARSE_DTDATTR; 15046 } 15047 if (options & XML_PARSE_NOENT) { 15048 ctxt->replaceEntities = 1; 15049 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15050 options -= XML_PARSE_NOENT; 15051 ctxt->options |= XML_PARSE_NOENT; 15052 } else 15053 ctxt->replaceEntities = 0; 15054 if (options & XML_PARSE_PEDANTIC) { 15055 ctxt->pedantic = 1; 15056 options -= XML_PARSE_PEDANTIC; 15057 ctxt->options |= XML_PARSE_PEDANTIC; 15058 } else 15059 ctxt->pedantic = 0; 15060 if (options & XML_PARSE_NOBLANKS) { 15061 ctxt->keepBlanks = 0; 15062 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15063 options -= XML_PARSE_NOBLANKS; 15064 ctxt->options |= XML_PARSE_NOBLANKS; 15065 } else 15066 ctxt->keepBlanks = 1; 15067 if (options & XML_PARSE_DTDVALID) { 15068 ctxt->validate = 1; 15069 if (options & XML_PARSE_NOWARNING) 15070 ctxt->vctxt.warning = NULL; 15071 if (options & XML_PARSE_NOERROR) 15072 ctxt->vctxt.error = NULL; 15073 options -= XML_PARSE_DTDVALID; 15074 ctxt->options |= XML_PARSE_DTDVALID; 15075 } else 15076 ctxt->validate = 0; 15077 if (options & XML_PARSE_NOWARNING) { 15078 ctxt->sax->warning = NULL; 15079 options -= XML_PARSE_NOWARNING; 15080 } 15081 if (options & XML_PARSE_NOERROR) { 15082 ctxt->sax->error = NULL; 15083 ctxt->sax->fatalError = NULL; 15084 options -= XML_PARSE_NOERROR; 15085 } 15086 #ifdef LIBXML_SAX1_ENABLED 15087 if (options & XML_PARSE_SAX1) { 15088 ctxt->sax->startElement = xmlSAX2StartElement; 15089 ctxt->sax->endElement = xmlSAX2EndElement; 15090 ctxt->sax->startElementNs = NULL; 15091 ctxt->sax->endElementNs = NULL; 15092 ctxt->sax->initialized = 1; 15093 options -= XML_PARSE_SAX1; 15094 ctxt->options |= XML_PARSE_SAX1; 15095 } 15096 #endif /* LIBXML_SAX1_ENABLED */ 15097 if (options & XML_PARSE_NODICT) { 15098 ctxt->dictNames = 0; 15099 options -= XML_PARSE_NODICT; 15100 ctxt->options |= XML_PARSE_NODICT; 15101 } else { 15102 ctxt->dictNames = 1; 15103 } 15104 if (options & XML_PARSE_NOCDATA) { 15105 ctxt->sax->cdataBlock = NULL; 15106 options -= XML_PARSE_NOCDATA; 15107 ctxt->options |= XML_PARSE_NOCDATA; 15108 } 15109 if (options & XML_PARSE_NSCLEAN) { 15110 ctxt->options |= XML_PARSE_NSCLEAN; 15111 options -= XML_PARSE_NSCLEAN; 15112 } 15113 if (options & XML_PARSE_NONET) { 15114 ctxt->options |= XML_PARSE_NONET; 15115 options -= XML_PARSE_NONET; 15116 } 15117 if (options & XML_PARSE_COMPACT) { 15118 ctxt->options |= XML_PARSE_COMPACT; 15119 options -= XML_PARSE_COMPACT; 15120 } 15121 if (options & XML_PARSE_OLD10) { 15122 ctxt->options |= XML_PARSE_OLD10; 15123 options -= XML_PARSE_OLD10; 15124 } 15125 if (options & XML_PARSE_NOBASEFIX) { 15126 ctxt->options |= XML_PARSE_NOBASEFIX; 15127 options -= XML_PARSE_NOBASEFIX; 15128 } 15129 if (options & XML_PARSE_HUGE) { 15130 ctxt->options |= XML_PARSE_HUGE; 15131 options -= XML_PARSE_HUGE; 15132 if (ctxt->dict != NULL) 15133 xmlDictSetLimit(ctxt->dict, 0); 15134 } 15135 if (options & XML_PARSE_OLDSAX) { 15136 ctxt->options |= XML_PARSE_OLDSAX; 15137 options -= XML_PARSE_OLDSAX; 15138 } 15139 if (options & XML_PARSE_IGNORE_ENC) { 15140 ctxt->options |= XML_PARSE_IGNORE_ENC; 15141 options -= XML_PARSE_IGNORE_ENC; 15142 } 15143 if (options & XML_PARSE_BIG_LINES) { 15144 ctxt->options |= XML_PARSE_BIG_LINES; 15145 options -= XML_PARSE_BIG_LINES; 15146 } 15147 ctxt->linenumbers = 1; 15148 return (options); 15149 } 15150 15151 /** 15152 * xmlCtxtUseOptions: 15153 * @ctxt: an XML parser context 15154 * @options: a combination of xmlParserOption 15155 * 15156 * Applies the options to the parser context 15157 * 15158 * Returns 0 in case of success, the set of unknown or unimplemented options 15159 * in case of error. 15160 */ 15161 int 15162 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15163 { 15164 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15165 } 15166 15167 /** 15168 * xmlDoRead: 15169 * @ctxt: an XML parser context 15170 * @URL: the base URL to use for the document 15171 * @encoding: the document encoding, or NULL 15172 * @options: a combination of xmlParserOption 15173 * @reuse: keep the context for reuse 15174 * 15175 * Common front-end for the xmlRead functions 15176 * 15177 * Returns the resulting document tree or NULL 15178 */ 15179 static xmlDocPtr 15180 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15181 int options, int reuse) 15182 { 15183 xmlDocPtr ret; 15184 15185 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15186 if (encoding != NULL) { 15187 xmlCharEncodingHandlerPtr hdlr; 15188 15189 hdlr = xmlFindCharEncodingHandler(encoding); 15190 if (hdlr != NULL) 15191 xmlSwitchToEncoding(ctxt, hdlr); 15192 } 15193 if ((URL != NULL) && (ctxt->input != NULL) && 15194 (ctxt->input->filename == NULL)) 15195 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15196 xmlParseDocument(ctxt); 15197 if ((ctxt->wellFormed) || ctxt->recovery) 15198 ret = ctxt->myDoc; 15199 else { 15200 ret = NULL; 15201 if (ctxt->myDoc != NULL) { 15202 xmlFreeDoc(ctxt->myDoc); 15203 } 15204 } 15205 ctxt->myDoc = NULL; 15206 if (!reuse) { 15207 xmlFreeParserCtxt(ctxt); 15208 } 15209 15210 return (ret); 15211 } 15212 15213 /** 15214 * xmlReadDoc: 15215 * @cur: a pointer to a zero terminated string 15216 * @URL: the base URL to use for the document 15217 * @encoding: the document encoding, or NULL 15218 * @options: a combination of xmlParserOption 15219 * 15220 * parse an XML in-memory document and build a tree. 15221 * 15222 * Returns the resulting document tree 15223 */ 15224 xmlDocPtr 15225 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15226 { 15227 xmlParserCtxtPtr ctxt; 15228 15229 if (cur == NULL) 15230 return (NULL); 15231 xmlInitParser(); 15232 15233 ctxt = xmlCreateDocParserCtxt(cur); 15234 if (ctxt == NULL) 15235 return (NULL); 15236 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15237 } 15238 15239 /** 15240 * xmlReadFile: 15241 * @filename: a file or URL 15242 * @encoding: the document encoding, or NULL 15243 * @options: a combination of xmlParserOption 15244 * 15245 * parse an XML file from the filesystem or the network. 15246 * 15247 * Returns the resulting document tree 15248 */ 15249 xmlDocPtr 15250 xmlReadFile(const char *filename, const char *encoding, int options) 15251 { 15252 xmlParserCtxtPtr ctxt; 15253 15254 xmlInitParser(); 15255 ctxt = xmlCreateURLParserCtxt(filename, options); 15256 if (ctxt == NULL) 15257 return (NULL); 15258 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15259 } 15260 15261 /** 15262 * xmlReadMemory: 15263 * @buffer: a pointer to a char array 15264 * @size: the size of the array 15265 * @URL: the base URL to use for the document 15266 * @encoding: the document encoding, or NULL 15267 * @options: a combination of xmlParserOption 15268 * 15269 * parse an XML in-memory document and build a tree. 15270 * 15271 * Returns the resulting document tree 15272 */ 15273 xmlDocPtr 15274 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15275 { 15276 xmlParserCtxtPtr ctxt; 15277 15278 xmlInitParser(); 15279 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15280 if (ctxt == NULL) 15281 return (NULL); 15282 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15283 } 15284 15285 /** 15286 * xmlReadFd: 15287 * @fd: an open file descriptor 15288 * @URL: the base URL to use for the document 15289 * @encoding: the document encoding, or NULL 15290 * @options: a combination of xmlParserOption 15291 * 15292 * parse an XML from a file descriptor and build a tree. 15293 * NOTE that the file descriptor will not be closed when the 15294 * reader is closed or reset. 15295 * 15296 * Returns the resulting document tree 15297 */ 15298 xmlDocPtr 15299 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15300 { 15301 xmlParserCtxtPtr ctxt; 15302 xmlParserInputBufferPtr input; 15303 xmlParserInputPtr stream; 15304 15305 if (fd < 0) 15306 return (NULL); 15307 xmlInitParser(); 15308 15309 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15310 if (input == NULL) 15311 return (NULL); 15312 input->closecallback = NULL; 15313 ctxt = xmlNewParserCtxt(); 15314 if (ctxt == NULL) { 15315 xmlFreeParserInputBuffer(input); 15316 return (NULL); 15317 } 15318 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15319 if (stream == NULL) { 15320 xmlFreeParserInputBuffer(input); 15321 xmlFreeParserCtxt(ctxt); 15322 return (NULL); 15323 } 15324 inputPush(ctxt, stream); 15325 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15326 } 15327 15328 /** 15329 * xmlReadIO: 15330 * @ioread: an I/O read function 15331 * @ioclose: an I/O close function 15332 * @ioctx: an I/O handler 15333 * @URL: the base URL to use for the document 15334 * @encoding: the document encoding, or NULL 15335 * @options: a combination of xmlParserOption 15336 * 15337 * parse an XML document from I/O functions and source and build a tree. 15338 * 15339 * Returns the resulting document tree 15340 */ 15341 xmlDocPtr 15342 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15343 void *ioctx, const char *URL, const char *encoding, int options) 15344 { 15345 xmlParserCtxtPtr ctxt; 15346 xmlParserInputBufferPtr input; 15347 xmlParserInputPtr stream; 15348 15349 if (ioread == NULL) 15350 return (NULL); 15351 xmlInitParser(); 15352 15353 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15354 XML_CHAR_ENCODING_NONE); 15355 if (input == NULL) { 15356 if (ioclose != NULL) 15357 ioclose(ioctx); 15358 return (NULL); 15359 } 15360 ctxt = xmlNewParserCtxt(); 15361 if (ctxt == NULL) { 15362 xmlFreeParserInputBuffer(input); 15363 return (NULL); 15364 } 15365 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15366 if (stream == NULL) { 15367 xmlFreeParserInputBuffer(input); 15368 xmlFreeParserCtxt(ctxt); 15369 return (NULL); 15370 } 15371 inputPush(ctxt, stream); 15372 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15373 } 15374 15375 /** 15376 * xmlCtxtReadDoc: 15377 * @ctxt: an XML parser context 15378 * @cur: a pointer to a zero terminated string 15379 * @URL: the base URL to use for the document 15380 * @encoding: the document encoding, or NULL 15381 * @options: a combination of xmlParserOption 15382 * 15383 * parse an XML in-memory document and build a tree. 15384 * This reuses the existing @ctxt parser context 15385 * 15386 * Returns the resulting document tree 15387 */ 15388 xmlDocPtr 15389 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15390 const char *URL, const char *encoding, int options) 15391 { 15392 xmlParserInputPtr stream; 15393 15394 if (cur == NULL) 15395 return (NULL); 15396 if (ctxt == NULL) 15397 return (NULL); 15398 xmlInitParser(); 15399 15400 xmlCtxtReset(ctxt); 15401 15402 stream = xmlNewStringInputStream(ctxt, cur); 15403 if (stream == NULL) { 15404 return (NULL); 15405 } 15406 inputPush(ctxt, stream); 15407 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15408 } 15409 15410 /** 15411 * xmlCtxtReadFile: 15412 * @ctxt: an XML parser context 15413 * @filename: a file or URL 15414 * @encoding: the document encoding, or NULL 15415 * @options: a combination of xmlParserOption 15416 * 15417 * parse an XML file from the filesystem or the network. 15418 * This reuses the existing @ctxt parser context 15419 * 15420 * Returns the resulting document tree 15421 */ 15422 xmlDocPtr 15423 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15424 const char *encoding, int options) 15425 { 15426 xmlParserInputPtr stream; 15427 15428 if (filename == NULL) 15429 return (NULL); 15430 if (ctxt == NULL) 15431 return (NULL); 15432 xmlInitParser(); 15433 15434 xmlCtxtReset(ctxt); 15435 15436 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15437 if (stream == NULL) { 15438 return (NULL); 15439 } 15440 inputPush(ctxt, stream); 15441 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15442 } 15443 15444 /** 15445 * xmlCtxtReadMemory: 15446 * @ctxt: an XML parser context 15447 * @buffer: a pointer to a char array 15448 * @size: the size of the array 15449 * @URL: the base URL to use for the document 15450 * @encoding: the document encoding, or NULL 15451 * @options: a combination of xmlParserOption 15452 * 15453 * parse an XML in-memory document and build a tree. 15454 * This reuses the existing @ctxt parser context 15455 * 15456 * Returns the resulting document tree 15457 */ 15458 xmlDocPtr 15459 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15460 const char *URL, const char *encoding, int options) 15461 { 15462 xmlParserInputBufferPtr input; 15463 xmlParserInputPtr stream; 15464 15465 if (ctxt == NULL) 15466 return (NULL); 15467 if (buffer == NULL) 15468 return (NULL); 15469 xmlInitParser(); 15470 15471 xmlCtxtReset(ctxt); 15472 15473 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15474 if (input == NULL) { 15475 return(NULL); 15476 } 15477 15478 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15479 if (stream == NULL) { 15480 xmlFreeParserInputBuffer(input); 15481 return(NULL); 15482 } 15483 15484 inputPush(ctxt, stream); 15485 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15486 } 15487 15488 /** 15489 * xmlCtxtReadFd: 15490 * @ctxt: an XML parser context 15491 * @fd: an open file descriptor 15492 * @URL: the base URL to use for the document 15493 * @encoding: the document encoding, or NULL 15494 * @options: a combination of xmlParserOption 15495 * 15496 * parse an XML from a file descriptor and build a tree. 15497 * This reuses the existing @ctxt parser context 15498 * NOTE that the file descriptor will not be closed when the 15499 * reader is closed or reset. 15500 * 15501 * Returns the resulting document tree 15502 */ 15503 xmlDocPtr 15504 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15505 const char *URL, const char *encoding, int options) 15506 { 15507 xmlParserInputBufferPtr input; 15508 xmlParserInputPtr stream; 15509 15510 if (fd < 0) 15511 return (NULL); 15512 if (ctxt == NULL) 15513 return (NULL); 15514 xmlInitParser(); 15515 15516 xmlCtxtReset(ctxt); 15517 15518 15519 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15520 if (input == NULL) 15521 return (NULL); 15522 input->closecallback = NULL; 15523 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15524 if (stream == NULL) { 15525 xmlFreeParserInputBuffer(input); 15526 return (NULL); 15527 } 15528 inputPush(ctxt, stream); 15529 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15530 } 15531 15532 /** 15533 * xmlCtxtReadIO: 15534 * @ctxt: an XML parser context 15535 * @ioread: an I/O read function 15536 * @ioclose: an I/O close function 15537 * @ioctx: an I/O handler 15538 * @URL: the base URL to use for the document 15539 * @encoding: the document encoding, or NULL 15540 * @options: a combination of xmlParserOption 15541 * 15542 * parse an XML document from I/O functions and source and build a tree. 15543 * This reuses the existing @ctxt parser context 15544 * 15545 * Returns the resulting document tree 15546 */ 15547 xmlDocPtr 15548 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15549 xmlInputCloseCallback ioclose, void *ioctx, 15550 const char *URL, 15551 const char *encoding, int options) 15552 { 15553 xmlParserInputBufferPtr input; 15554 xmlParserInputPtr stream; 15555 15556 if (ioread == NULL) 15557 return (NULL); 15558 if (ctxt == NULL) 15559 return (NULL); 15560 xmlInitParser(); 15561 15562 xmlCtxtReset(ctxt); 15563 15564 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15565 XML_CHAR_ENCODING_NONE); 15566 if (input == NULL) { 15567 if (ioclose != NULL) 15568 ioclose(ioctx); 15569 return (NULL); 15570 } 15571 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15572 if (stream == NULL) { 15573 xmlFreeParserInputBuffer(input); 15574 return (NULL); 15575 } 15576 inputPush(ctxt, stream); 15577 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15578 } 15579 15580 #define bottom_parser 15581 #include "elfgcchack.h" 15582