1 /* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10 #define IN_LIBXML 11 #include "libxml.h" 12 13 #if defined(_WIN32) && !defined (__CYGWIN__) 14 #define XML_DIR_SEP '\\' 15 #else 16 #define XML_DIR_SEP '/' 17 #endif 18 19 #include <string.h> 20 #ifdef HAVE_CTYPE_H 21 #include <ctype.h> 22 #endif 23 #ifdef HAVE_STDLIB_H 24 #include <stdlib.h> 25 #endif 26 #ifdef HAVE_SYS_STAT_H 27 #include <sys/stat.h> 28 #endif 29 #ifdef HAVE_FCNTL_H 30 #include <fcntl.h> 31 #endif 32 #ifdef HAVE_UNISTD_H 33 #include <unistd.h> 34 #endif 35 #ifdef HAVE_ZLIB_H 36 #include <zlib.h> 37 #endif 38 39 #include <libxml/xmlmemory.h> 40 #include <libxml/tree.h> 41 #include <libxml/parser.h> 42 #include <libxml/parserInternals.h> 43 #include <libxml/valid.h> 44 #include <libxml/entities.h> 45 #include <libxml/xmlerror.h> 46 #include <libxml/encoding.h> 47 #include <libxml/valid.h> 48 #include <libxml/xmlIO.h> 49 #include <libxml/uri.h> 50 #include <libxml/dict.h> 51 #include <libxml/SAX.h> 52 #ifdef LIBXML_CATALOG_ENABLED 53 #include <libxml/catalog.h> 54 #endif 55 #include <libxml/globals.h> 56 #include <libxml/chvalid.h> 57 58 #define CUR(ctxt) ctxt->input->cur 59 #define END(ctxt) ctxt->input->end 60 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) 61 62 #include "buf.h" 63 #include "enc.h" 64 65 /* 66 * Various global defaults for parsing 67 */ 68 69 /** 70 * xmlCheckVersion: 71 * @version: the include version number 72 * 73 * check the compiled lib version against the include one. 74 * This can warn or immediately kill the application 75 */ 76 void 77 xmlCheckVersion(int version) { 78 int myversion = (int) LIBXML_VERSION; 79 80 xmlInitParser(); 81 82 if ((myversion / 10000) != (version / 10000)) { 83 xmlGenericError(xmlGenericErrorContext, 84 "Fatal: program compiled against libxml %d using libxml %d\n", 85 (version / 10000), (myversion / 10000)); 86 fprintf(stderr, 87 "Fatal: program compiled against libxml %d using libxml %d\n", 88 (version / 10000), (myversion / 10000)); 89 } 90 if ((myversion / 100) < (version / 100)) { 91 xmlGenericError(xmlGenericErrorContext, 92 "Warning: program compiled against libxml %d using older %d\n", 93 (version / 100), (myversion / 100)); 94 } 95 } 96 97 98 /************************************************************************ 99 * * 100 * Some factorized error routines * 101 * * 102 ************************************************************************/ 103 104 105 /** 106 * xmlErrMemory: 107 * @ctxt: an XML parser context 108 * @extra: extra informations 109 * 110 * Handle a redefinition of attribute error 111 */ 112 void 113 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 114 { 115 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 116 (ctxt->instate == XML_PARSER_EOF)) 117 return; 118 if (ctxt != NULL) { 119 ctxt->errNo = XML_ERR_NO_MEMORY; 120 ctxt->instate = XML_PARSER_EOF; 121 ctxt->disableSAX = 1; 122 } 123 if (extra) 124 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 125 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 126 NULL, NULL, 0, 0, 127 "Memory allocation failed : %s\n", extra); 128 else 129 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 130 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 131 NULL, NULL, 0, 0, "Memory allocation failed\n"); 132 } 133 134 /** 135 * __xmlErrEncoding: 136 * @ctxt: an XML parser context 137 * @xmlerr: the error number 138 * @msg: the error message 139 * @str1: an string info 140 * @str2: an string info 141 * 142 * Handle an encoding error 143 */ 144 void 145 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 146 const char *msg, const xmlChar * str1, const xmlChar * str2) 147 { 148 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 149 (ctxt->instate == XML_PARSER_EOF)) 150 return; 151 if (ctxt != NULL) 152 ctxt->errNo = xmlerr; 153 __xmlRaiseError(NULL, NULL, NULL, 154 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 155 NULL, 0, (const char *) str1, (const char *) str2, 156 NULL, 0, 0, msg, str1, str2); 157 if (ctxt != NULL) { 158 ctxt->wellFormed = 0; 159 if (ctxt->recovery == 0) 160 ctxt->disableSAX = 1; 161 } 162 } 163 164 /** 165 * xmlErrInternal: 166 * @ctxt: an XML parser context 167 * @msg: the error message 168 * @str: error informations 169 * 170 * Handle an internal error 171 */ 172 static void LIBXML_ATTR_FORMAT(2,0) 173 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 174 { 175 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 176 (ctxt->instate == XML_PARSER_EOF)) 177 return; 178 if (ctxt != NULL) 179 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 180 __xmlRaiseError(NULL, NULL, NULL, 181 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 182 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 183 0, 0, msg, str); 184 if (ctxt != NULL) { 185 ctxt->wellFormed = 0; 186 if (ctxt->recovery == 0) 187 ctxt->disableSAX = 1; 188 } 189 } 190 191 /** 192 * xmlErrEncodingInt: 193 * @ctxt: an XML parser context 194 * @error: the error number 195 * @msg: the error message 196 * @val: an integer value 197 * 198 * n encoding error 199 */ 200 static void LIBXML_ATTR_FORMAT(3,0) 201 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 202 const char *msg, int val) 203 { 204 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 205 (ctxt->instate == XML_PARSER_EOF)) 206 return; 207 if (ctxt != NULL) 208 ctxt->errNo = error; 209 __xmlRaiseError(NULL, NULL, NULL, 210 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 211 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 212 if (ctxt != NULL) { 213 ctxt->wellFormed = 0; 214 if (ctxt->recovery == 0) 215 ctxt->disableSAX = 1; 216 } 217 } 218 219 /** 220 * xmlIsLetter: 221 * @c: an unicode character (int) 222 * 223 * Check whether the character is allowed by the production 224 * [84] Letter ::= BaseChar | Ideographic 225 * 226 * Returns 0 if not, non-zero otherwise 227 */ 228 int 229 xmlIsLetter(int c) { 230 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 231 } 232 233 /************************************************************************ 234 * * 235 * Input handling functions for progressive parsing * 236 * * 237 ************************************************************************/ 238 239 /* #define DEBUG_INPUT */ 240 /* #define DEBUG_STACK */ 241 /* #define DEBUG_PUSH */ 242 243 244 /* we need to keep enough input to show errors in context */ 245 #define LINE_LEN 80 246 247 #ifdef DEBUG_INPUT 248 #define CHECK_BUFFER(in) check_buffer(in) 249 250 static 251 void check_buffer(xmlParserInputPtr in) { 252 if (in->base != xmlBufContent(in->buf->buffer)) { 253 xmlGenericError(xmlGenericErrorContext, 254 "xmlParserInput: base mismatch problem\n"); 255 } 256 if (in->cur < in->base) { 257 xmlGenericError(xmlGenericErrorContext, 258 "xmlParserInput: cur < base problem\n"); 259 } 260 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 261 xmlGenericError(xmlGenericErrorContext, 262 "xmlParserInput: cur > base + use problem\n"); 263 } 264 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 265 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 266 xmlBufUse(in->buf->buffer)); 267 } 268 269 #else 270 #define CHECK_BUFFER(in) 271 #endif 272 273 274 /** 275 * xmlParserInputRead: 276 * @in: an XML parser input 277 * @len: an indicative size for the lookahead 278 * 279 * This function was internal and is deprecated. 280 * 281 * Returns -1 as this is an error to use it. 282 */ 283 int 284 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 285 return(-1); 286 } 287 288 /** 289 * xmlParserInputGrow: 290 * @in: an XML parser input 291 * @len: an indicative size for the lookahead 292 * 293 * This function increase the input for the parser. It tries to 294 * preserve pointers to the input buffer, and keep already read data 295 * 296 * Returns the amount of char read, or -1 in case of error, 0 indicate the 297 * end of this entity 298 */ 299 int 300 xmlParserInputGrow(xmlParserInputPtr in, int len) { 301 int ret; 302 size_t indx; 303 const xmlChar *content; 304 305 if ((in == NULL) || (len < 0)) return(-1); 306 #ifdef DEBUG_INPUT 307 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 308 #endif 309 if (in->buf == NULL) return(-1); 310 if (in->base == NULL) return(-1); 311 if (in->cur == NULL) return(-1); 312 if (in->buf->buffer == NULL) return(-1); 313 314 CHECK_BUFFER(in); 315 316 indx = in->cur - in->base; 317 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 318 319 CHECK_BUFFER(in); 320 321 return(0); 322 } 323 if (in->buf->readcallback != NULL) { 324 ret = xmlParserInputBufferGrow(in->buf, len); 325 } else 326 return(0); 327 328 /* 329 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 330 * block, but we use it really as an integer to do some 331 * pointer arithmetic. Insure will raise it as a bug but in 332 * that specific case, that's not ! 333 */ 334 335 content = xmlBufContent(in->buf->buffer); 336 if (in->base != content) { 337 /* 338 * the buffer has been reallocated 339 */ 340 indx = in->cur - in->base; 341 in->base = content; 342 in->cur = &content[indx]; 343 } 344 in->end = xmlBufEnd(in->buf->buffer); 345 346 CHECK_BUFFER(in); 347 348 return(ret); 349 } 350 351 /** 352 * xmlParserInputShrink: 353 * @in: an XML parser input 354 * 355 * This function removes used input for the parser. 356 */ 357 void 358 xmlParserInputShrink(xmlParserInputPtr in) { 359 size_t used; 360 size_t ret; 361 size_t indx; 362 const xmlChar *content; 363 364 #ifdef DEBUG_INPUT 365 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 366 #endif 367 if (in == NULL) return; 368 if (in->buf == NULL) return; 369 if (in->base == NULL) return; 370 if (in->cur == NULL) return; 371 if (in->buf->buffer == NULL) return; 372 373 CHECK_BUFFER(in); 374 375 used = in->cur - xmlBufContent(in->buf->buffer); 376 /* 377 * Do not shrink on large buffers whose only a tiny fraction 378 * was consumed 379 */ 380 if (used > INPUT_CHUNK) { 381 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 382 if (ret > 0) { 383 in->cur -= ret; 384 in->consumed += ret; 385 } 386 in->end = xmlBufEnd(in->buf->buffer); 387 } 388 389 CHECK_BUFFER(in); 390 391 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 392 return; 393 } 394 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 395 content = xmlBufContent(in->buf->buffer); 396 if (in->base != content) { 397 /* 398 * the buffer has been reallocated 399 */ 400 indx = in->cur - in->base; 401 in->base = content; 402 in->cur = &content[indx]; 403 } 404 in->end = xmlBufEnd(in->buf->buffer); 405 406 CHECK_BUFFER(in); 407 } 408 409 /************************************************************************ 410 * * 411 * UTF8 character input and related functions * 412 * * 413 ************************************************************************/ 414 415 /** 416 * xmlNextChar: 417 * @ctxt: the XML parser context 418 * 419 * Skip to the next char input char. 420 */ 421 422 void 423 xmlNextChar(xmlParserCtxtPtr ctxt) 424 { 425 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 426 (ctxt->input == NULL)) 427 return; 428 429 if (!(VALID_CTXT(ctxt))) { 430 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); 431 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 432 xmlStopParser(ctxt); 433 return; 434 } 435 436 if ((*ctxt->input->cur == 0) && 437 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { 438 return; 439 } 440 441 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 442 const unsigned char *cur; 443 unsigned char c; 444 445 /* 446 * 2.11 End-of-Line Handling 447 * the literal two-character sequence "#xD#xA" or a standalone 448 * literal #xD, an XML processor must pass to the application 449 * the single character #xA. 450 */ 451 if (*(ctxt->input->cur) == '\n') { 452 ctxt->input->line++; ctxt->input->col = 1; 453 } else 454 ctxt->input->col++; 455 456 /* 457 * We are supposed to handle UTF8, check it's valid 458 * From rfc2044: encoding of the Unicode values on UTF-8: 459 * 460 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 461 * 0000 0000-0000 007F 0xxxxxxx 462 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 463 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 464 * 465 * Check for the 0x110000 limit too 466 */ 467 cur = ctxt->input->cur; 468 469 c = *cur; 470 if (c & 0x80) { 471 if (c == 0xC0) 472 goto encoding_error; 473 if (cur[1] == 0) { 474 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 475 cur = ctxt->input->cur; 476 } 477 if ((cur[1] & 0xc0) != 0x80) 478 goto encoding_error; 479 if ((c & 0xe0) == 0xe0) { 480 unsigned int val; 481 482 if (cur[2] == 0) { 483 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 484 cur = ctxt->input->cur; 485 } 486 if ((cur[2] & 0xc0) != 0x80) 487 goto encoding_error; 488 if ((c & 0xf0) == 0xf0) { 489 if (cur[3] == 0) { 490 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 491 cur = ctxt->input->cur; 492 } 493 if (((c & 0xf8) != 0xf0) || 494 ((cur[3] & 0xc0) != 0x80)) 495 goto encoding_error; 496 /* 4-byte code */ 497 ctxt->input->cur += 4; 498 val = (cur[0] & 0x7) << 18; 499 val |= (cur[1] & 0x3f) << 12; 500 val |= (cur[2] & 0x3f) << 6; 501 val |= cur[3] & 0x3f; 502 } else { 503 /* 3-byte code */ 504 ctxt->input->cur += 3; 505 val = (cur[0] & 0xf) << 12; 506 val |= (cur[1] & 0x3f) << 6; 507 val |= cur[2] & 0x3f; 508 } 509 if (((val > 0xd7ff) && (val < 0xe000)) || 510 ((val > 0xfffd) && (val < 0x10000)) || 511 (val >= 0x110000)) { 512 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 513 "Char 0x%X out of allowed range\n", 514 val); 515 } 516 } else 517 /* 2-byte code */ 518 ctxt->input->cur += 2; 519 } else 520 /* 1-byte code */ 521 ctxt->input->cur++; 522 523 ctxt->nbChars++; 524 } else { 525 /* 526 * Assume it's a fixed length encoding (1) with 527 * a compatible encoding for the ASCII set, since 528 * XML constructs only use < 128 chars 529 */ 530 531 if (*(ctxt->input->cur) == '\n') { 532 ctxt->input->line++; ctxt->input->col = 1; 533 } else 534 ctxt->input->col++; 535 ctxt->input->cur++; 536 ctxt->nbChars++; 537 } 538 if (*ctxt->input->cur == 0) 539 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 540 return; 541 encoding_error: 542 /* 543 * If we detect an UTF8 error that probably mean that the 544 * input encoding didn't get properly advertised in the 545 * declaration header. Report the error and switch the encoding 546 * to ISO-Latin-1 (if you don't like this policy, just declare the 547 * encoding !) 548 */ 549 if ((ctxt == NULL) || (ctxt->input == NULL) || 550 (ctxt->input->end - ctxt->input->cur < 4)) { 551 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 552 "Input is not proper UTF-8, indicate encoding !\n", 553 NULL, NULL); 554 } else { 555 char buffer[150]; 556 557 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 558 ctxt->input->cur[0], ctxt->input->cur[1], 559 ctxt->input->cur[2], ctxt->input->cur[3]); 560 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 561 "Input is not proper UTF-8, indicate encoding !\n%s", 562 BAD_CAST buffer, NULL); 563 } 564 ctxt->charset = XML_CHAR_ENCODING_8859_1; 565 ctxt->input->cur++; 566 return; 567 } 568 569 /** 570 * xmlCurrentChar: 571 * @ctxt: the XML parser context 572 * @len: pointer to the length of the char read 573 * 574 * The current char value, if using UTF-8 this may actually span multiple 575 * bytes in the input buffer. Implement the end of line normalization: 576 * 2.11 End-of-Line Handling 577 * Wherever an external parsed entity or the literal entity value 578 * of an internal parsed entity contains either the literal two-character 579 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 580 * must pass to the application the single character #xA. 581 * This behavior can conveniently be produced by normalizing all 582 * line breaks to #xA on input, before parsing.) 583 * 584 * Returns the current char value and its length 585 */ 586 587 int 588 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 589 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 590 if (ctxt->instate == XML_PARSER_EOF) 591 return(0); 592 593 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 594 *len = 1; 595 return((int) *ctxt->input->cur); 596 } 597 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 598 /* 599 * We are supposed to handle UTF8, check it's valid 600 * From rfc2044: encoding of the Unicode values on UTF-8: 601 * 602 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 603 * 0000 0000-0000 007F 0xxxxxxx 604 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 605 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 606 * 607 * Check for the 0x110000 limit too 608 */ 609 const unsigned char *cur = ctxt->input->cur; 610 unsigned char c; 611 unsigned int val; 612 613 c = *cur; 614 if (c & 0x80) { 615 if (((c & 0x40) == 0) || (c == 0xC0)) 616 goto encoding_error; 617 if (cur[1] == 0) { 618 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 619 cur = ctxt->input->cur; 620 } 621 if ((cur[1] & 0xc0) != 0x80) 622 goto encoding_error; 623 if ((c & 0xe0) == 0xe0) { 624 if (cur[2] == 0) { 625 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 626 cur = ctxt->input->cur; 627 } 628 if ((cur[2] & 0xc0) != 0x80) 629 goto encoding_error; 630 if ((c & 0xf0) == 0xf0) { 631 if (cur[3] == 0) { 632 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 633 cur = ctxt->input->cur; 634 } 635 if (((c & 0xf8) != 0xf0) || 636 ((cur[3] & 0xc0) != 0x80)) 637 goto encoding_error; 638 /* 4-byte code */ 639 *len = 4; 640 val = (cur[0] & 0x7) << 18; 641 val |= (cur[1] & 0x3f) << 12; 642 val |= (cur[2] & 0x3f) << 6; 643 val |= cur[3] & 0x3f; 644 if (val < 0x10000) 645 goto encoding_error; 646 } else { 647 /* 3-byte code */ 648 *len = 3; 649 val = (cur[0] & 0xf) << 12; 650 val |= (cur[1] & 0x3f) << 6; 651 val |= cur[2] & 0x3f; 652 if (val < 0x800) 653 goto encoding_error; 654 } 655 } else { 656 /* 2-byte code */ 657 *len = 2; 658 val = (cur[0] & 0x1f) << 6; 659 val |= cur[1] & 0x3f; 660 if (val < 0x80) 661 goto encoding_error; 662 } 663 if (!IS_CHAR(val)) { 664 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 665 "Char 0x%X out of allowed range\n", val); 666 } 667 return(val); 668 } else { 669 /* 1-byte code */ 670 *len = 1; 671 if (*ctxt->input->cur == 0) 672 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 673 if ((*ctxt->input->cur == 0) && 674 (ctxt->input->end > ctxt->input->cur)) { 675 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 676 "Char 0x0 out of allowed range\n", 0); 677 } 678 if (*ctxt->input->cur == 0xD) { 679 if (ctxt->input->cur[1] == 0xA) { 680 ctxt->nbChars++; 681 ctxt->input->cur++; 682 } 683 return(0xA); 684 } 685 return((int) *ctxt->input->cur); 686 } 687 } 688 /* 689 * Assume it's a fixed length encoding (1) with 690 * a compatible encoding for the ASCII set, since 691 * XML constructs only use < 128 chars 692 */ 693 *len = 1; 694 if (*ctxt->input->cur == 0xD) { 695 if (ctxt->input->cur[1] == 0xA) { 696 ctxt->nbChars++; 697 ctxt->input->cur++; 698 } 699 return(0xA); 700 } 701 return((int) *ctxt->input->cur); 702 encoding_error: 703 /* 704 * An encoding problem may arise from a truncated input buffer 705 * splitting a character in the middle. In that case do not raise 706 * an error but return 0 to endicate an end of stream problem 707 */ 708 if (ctxt->input->end - ctxt->input->cur < 4) { 709 *len = 0; 710 return(0); 711 } 712 713 /* 714 * If we detect an UTF8 error that probably mean that the 715 * input encoding didn't get properly advertised in the 716 * declaration header. Report the error and switch the encoding 717 * to ISO-Latin-1 (if you don't like this policy, just declare the 718 * encoding !) 719 */ 720 { 721 char buffer[150]; 722 723 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 724 ctxt->input->cur[0], ctxt->input->cur[1], 725 ctxt->input->cur[2], ctxt->input->cur[3]); 726 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 727 "Input is not proper UTF-8, indicate encoding !\n%s", 728 BAD_CAST buffer, NULL); 729 } 730 ctxt->charset = XML_CHAR_ENCODING_8859_1; 731 *len = 1; 732 return((int) *ctxt->input->cur); 733 } 734 735 /** 736 * xmlStringCurrentChar: 737 * @ctxt: the XML parser context 738 * @cur: pointer to the beginning of the char 739 * @len: pointer to the length of the char read 740 * 741 * The current char value, if using UTF-8 this may actually span multiple 742 * bytes in the input buffer. 743 * 744 * Returns the current char value and its length 745 */ 746 747 int 748 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 749 { 750 if ((len == NULL) || (cur == NULL)) return(0); 751 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 752 /* 753 * We are supposed to handle UTF8, check it's valid 754 * From rfc2044: encoding of the Unicode values on UTF-8: 755 * 756 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 757 * 0000 0000-0000 007F 0xxxxxxx 758 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 759 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 760 * 761 * Check for the 0x110000 limit too 762 */ 763 unsigned char c; 764 unsigned int val; 765 766 c = *cur; 767 if (c & 0x80) { 768 if ((cur[1] & 0xc0) != 0x80) 769 goto encoding_error; 770 if ((c & 0xe0) == 0xe0) { 771 772 if ((cur[2] & 0xc0) != 0x80) 773 goto encoding_error; 774 if ((c & 0xf0) == 0xf0) { 775 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 776 goto encoding_error; 777 /* 4-byte code */ 778 *len = 4; 779 val = (cur[0] & 0x7) << 18; 780 val |= (cur[1] & 0x3f) << 12; 781 val |= (cur[2] & 0x3f) << 6; 782 val |= cur[3] & 0x3f; 783 } else { 784 /* 3-byte code */ 785 *len = 3; 786 val = (cur[0] & 0xf) << 12; 787 val |= (cur[1] & 0x3f) << 6; 788 val |= cur[2] & 0x3f; 789 } 790 } else { 791 /* 2-byte code */ 792 *len = 2; 793 val = (cur[0] & 0x1f) << 6; 794 val |= cur[1] & 0x3f; 795 } 796 if (!IS_CHAR(val)) { 797 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 798 "Char 0x%X out of allowed range\n", val); 799 } 800 return (val); 801 } else { 802 /* 1-byte code */ 803 *len = 1; 804 return ((int) *cur); 805 } 806 } 807 /* 808 * Assume it's a fixed length encoding (1) with 809 * a compatible encoding for the ASCII set, since 810 * XML constructs only use < 128 chars 811 */ 812 *len = 1; 813 return ((int) *cur); 814 encoding_error: 815 816 /* 817 * An encoding problem may arise from a truncated input buffer 818 * splitting a character in the middle. In that case do not raise 819 * an error but return 0 to endicate an end of stream problem 820 */ 821 if ((ctxt == NULL) || (ctxt->input == NULL) || 822 (ctxt->input->end - ctxt->input->cur < 4)) { 823 *len = 0; 824 return(0); 825 } 826 /* 827 * If we detect an UTF8 error that probably mean that the 828 * input encoding didn't get properly advertised in the 829 * declaration header. Report the error and switch the encoding 830 * to ISO-Latin-1 (if you don't like this policy, just declare the 831 * encoding !) 832 */ 833 { 834 char buffer[150]; 835 836 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 837 ctxt->input->cur[0], ctxt->input->cur[1], 838 ctxt->input->cur[2], ctxt->input->cur[3]); 839 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 840 "Input is not proper UTF-8, indicate encoding !\n%s", 841 BAD_CAST buffer, NULL); 842 } 843 *len = 1; 844 return ((int) *cur); 845 } 846 847 /** 848 * xmlCopyCharMultiByte: 849 * @out: pointer to an array of xmlChar 850 * @val: the char value 851 * 852 * append the char value in the array 853 * 854 * Returns the number of xmlChar written 855 */ 856 int 857 xmlCopyCharMultiByte(xmlChar *out, int val) { 858 if (out == NULL) return(0); 859 /* 860 * We are supposed to handle UTF8, check it's valid 861 * From rfc2044: encoding of the Unicode values on UTF-8: 862 * 863 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 864 * 0000 0000-0000 007F 0xxxxxxx 865 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 866 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 867 */ 868 if (val >= 0x80) { 869 xmlChar *savedout = out; 870 int bits; 871 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 872 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 873 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 874 else { 875 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 876 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 877 val); 878 return(0); 879 } 880 for ( ; bits >= 0; bits-= 6) 881 *out++= ((val >> bits) & 0x3F) | 0x80 ; 882 return (out - savedout); 883 } 884 *out = (xmlChar) val; 885 return 1; 886 } 887 888 /** 889 * xmlCopyChar: 890 * @len: Ignored, compatibility 891 * @out: pointer to an array of xmlChar 892 * @val: the char value 893 * 894 * append the char value in the array 895 * 896 * Returns the number of xmlChar written 897 */ 898 899 int 900 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 901 if (out == NULL) return(0); 902 /* the len parameter is ignored */ 903 if (val >= 0x80) { 904 return(xmlCopyCharMultiByte (out, val)); 905 } 906 *out = (xmlChar) val; 907 return 1; 908 } 909 910 /************************************************************************ 911 * * 912 * Commodity functions to switch encodings * 913 * * 914 ************************************************************************/ 915 916 static int 917 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 918 xmlCharEncodingHandlerPtr handler, int len); 919 static int 920 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 921 xmlCharEncodingHandlerPtr handler, int len); 922 /** 923 * xmlSwitchEncoding: 924 * @ctxt: the parser context 925 * @enc: the encoding value (number) 926 * 927 * change the input functions when discovering the character encoding 928 * of a given entity. 929 * 930 * Returns 0 in case of success, -1 otherwise 931 */ 932 int 933 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 934 { 935 xmlCharEncodingHandlerPtr handler; 936 int len = -1; 937 int ret; 938 939 if (ctxt == NULL) return(-1); 940 switch (enc) { 941 case XML_CHAR_ENCODING_ERROR: 942 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 943 "encoding unknown\n", NULL, NULL); 944 return(-1); 945 case XML_CHAR_ENCODING_NONE: 946 /* let's assume it's UTF-8 without the XML decl */ 947 ctxt->charset = XML_CHAR_ENCODING_UTF8; 948 return(0); 949 case XML_CHAR_ENCODING_UTF8: 950 /* default encoding, no conversion should be needed */ 951 ctxt->charset = XML_CHAR_ENCODING_UTF8; 952 953 /* 954 * Errata on XML-1.0 June 20 2001 955 * Specific handling of the Byte Order Mark for 956 * UTF-8 957 */ 958 if ((ctxt->input != NULL) && 959 (ctxt->input->cur[0] == 0xEF) && 960 (ctxt->input->cur[1] == 0xBB) && 961 (ctxt->input->cur[2] == 0xBF)) { 962 ctxt->input->cur += 3; 963 } 964 return(0); 965 case XML_CHAR_ENCODING_UTF16LE: 966 case XML_CHAR_ENCODING_UTF16BE: 967 /*The raw input characters are encoded 968 *in UTF-16. As we expect this function 969 *to be called after xmlCharEncInFunc, we expect 970 *ctxt->input->cur to contain UTF-8 encoded characters. 971 *So the raw UTF16 Byte Order Mark 972 *has also been converted into 973 *an UTF-8 BOM. Let's skip that BOM. 974 */ 975 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 976 (ctxt->input->cur[0] == 0xEF) && 977 (ctxt->input->cur[1] == 0xBB) && 978 (ctxt->input->cur[2] == 0xBF)) { 979 ctxt->input->cur += 3; 980 } 981 len = 90; 982 break; 983 case XML_CHAR_ENCODING_UCS2: 984 len = 90; 985 break; 986 case XML_CHAR_ENCODING_UCS4BE: 987 case XML_CHAR_ENCODING_UCS4LE: 988 case XML_CHAR_ENCODING_UCS4_2143: 989 case XML_CHAR_ENCODING_UCS4_3412: 990 len = 180; 991 break; 992 case XML_CHAR_ENCODING_EBCDIC: 993 case XML_CHAR_ENCODING_8859_1: 994 case XML_CHAR_ENCODING_8859_2: 995 case XML_CHAR_ENCODING_8859_3: 996 case XML_CHAR_ENCODING_8859_4: 997 case XML_CHAR_ENCODING_8859_5: 998 case XML_CHAR_ENCODING_8859_6: 999 case XML_CHAR_ENCODING_8859_7: 1000 case XML_CHAR_ENCODING_8859_8: 1001 case XML_CHAR_ENCODING_8859_9: 1002 case XML_CHAR_ENCODING_ASCII: 1003 case XML_CHAR_ENCODING_2022_JP: 1004 case XML_CHAR_ENCODING_SHIFT_JIS: 1005 case XML_CHAR_ENCODING_EUC_JP: 1006 len = 45; 1007 break; 1008 } 1009 handler = xmlGetCharEncodingHandler(enc); 1010 if (handler == NULL) { 1011 /* 1012 * Default handlers. 1013 */ 1014 switch (enc) { 1015 case XML_CHAR_ENCODING_ASCII: 1016 /* default encoding, no conversion should be needed */ 1017 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1018 return(0); 1019 case XML_CHAR_ENCODING_UTF16LE: 1020 break; 1021 case XML_CHAR_ENCODING_UTF16BE: 1022 break; 1023 case XML_CHAR_ENCODING_UCS4LE: 1024 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1025 "encoding not supported %s\n", 1026 BAD_CAST "USC4 little endian", NULL); 1027 break; 1028 case XML_CHAR_ENCODING_UCS4BE: 1029 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1030 "encoding not supported %s\n", 1031 BAD_CAST "USC4 big endian", NULL); 1032 break; 1033 case XML_CHAR_ENCODING_EBCDIC: 1034 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1035 "encoding not supported %s\n", 1036 BAD_CAST "EBCDIC", NULL); 1037 break; 1038 case XML_CHAR_ENCODING_UCS4_2143: 1039 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1040 "encoding not supported %s\n", 1041 BAD_CAST "UCS4 2143", NULL); 1042 break; 1043 case XML_CHAR_ENCODING_UCS4_3412: 1044 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1045 "encoding not supported %s\n", 1046 BAD_CAST "UCS4 3412", NULL); 1047 break; 1048 case XML_CHAR_ENCODING_UCS2: 1049 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1050 "encoding not supported %s\n", 1051 BAD_CAST "UCS2", NULL); 1052 break; 1053 case XML_CHAR_ENCODING_8859_1: 1054 case XML_CHAR_ENCODING_8859_2: 1055 case XML_CHAR_ENCODING_8859_3: 1056 case XML_CHAR_ENCODING_8859_4: 1057 case XML_CHAR_ENCODING_8859_5: 1058 case XML_CHAR_ENCODING_8859_6: 1059 case XML_CHAR_ENCODING_8859_7: 1060 case XML_CHAR_ENCODING_8859_8: 1061 case XML_CHAR_ENCODING_8859_9: 1062 /* 1063 * We used to keep the internal content in the 1064 * document encoding however this turns being unmaintainable 1065 * So xmlGetCharEncodingHandler() will return non-null 1066 * values for this now. 1067 */ 1068 if ((ctxt->inputNr == 1) && 1069 (ctxt->encoding == NULL) && 1070 (ctxt->input != NULL) && 1071 (ctxt->input->encoding != NULL)) { 1072 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1073 } 1074 ctxt->charset = enc; 1075 return(0); 1076 case XML_CHAR_ENCODING_2022_JP: 1077 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1078 "encoding not supported %s\n", 1079 BAD_CAST "ISO-2022-JP", NULL); 1080 break; 1081 case XML_CHAR_ENCODING_SHIFT_JIS: 1082 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1083 "encoding not supported %s\n", 1084 BAD_CAST "Shift_JIS", NULL); 1085 break; 1086 case XML_CHAR_ENCODING_EUC_JP: 1087 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1088 "encoding not supported %s\n", 1089 BAD_CAST "EUC-JP", NULL); 1090 break; 1091 default: 1092 break; 1093 } 1094 } 1095 /* 1096 * TODO: We could recover from errors in external entites if we 1097 * didn't stop the parser. But most callers of this function don't 1098 * check the return value. 1099 */ 1100 if (handler == NULL) { 1101 xmlStopParser(ctxt); 1102 return(-1); 1103 } 1104 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1105 ret = xmlSwitchToEncodingInt(ctxt, handler, len); 1106 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { 1107 /* 1108 * on encoding conversion errors, stop the parser 1109 */ 1110 xmlStopParser(ctxt); 1111 ctxt->errNo = XML_I18N_CONV_FAILED; 1112 } 1113 return(ret); 1114 } 1115 1116 /** 1117 * xmlSwitchInputEncoding: 1118 * @ctxt: the parser context 1119 * @input: the input stream 1120 * @handler: the encoding handler 1121 * @len: the number of bytes to convert for the first line or -1 1122 * 1123 * change the input functions when discovering the character encoding 1124 * of a given entity. 1125 * 1126 * Returns 0 in case of success, -1 otherwise 1127 */ 1128 static int 1129 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1130 xmlCharEncodingHandlerPtr handler, int len) 1131 { 1132 int nbchars; 1133 1134 if (handler == NULL) 1135 return (-1); 1136 if (input == NULL) 1137 return (-1); 1138 if (input->buf != NULL) { 1139 if (input->buf->encoder != NULL) { 1140 /* 1141 * Check in case the auto encoding detetection triggered 1142 * in already. 1143 */ 1144 if (input->buf->encoder == handler) 1145 return (0); 1146 1147 /* 1148 * "UTF-16" can be used for both LE and BE 1149 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1150 BAD_CAST "UTF-16", 6)) && 1151 (!xmlStrncmp(BAD_CAST handler->name, 1152 BAD_CAST "UTF-16", 6))) { 1153 return(0); 1154 } 1155 */ 1156 1157 /* 1158 * Note: this is a bit dangerous, but that's what it 1159 * takes to use nearly compatible signature for different 1160 * encodings. 1161 */ 1162 xmlCharEncCloseFunc(input->buf->encoder); 1163 input->buf->encoder = handler; 1164 return (0); 1165 } 1166 input->buf->encoder = handler; 1167 1168 /* 1169 * Is there already some content down the pipe to convert ? 1170 */ 1171 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1172 int processed; 1173 unsigned int use; 1174 1175 /* 1176 * Specific handling of the Byte Order Mark for 1177 * UTF-16 1178 */ 1179 if ((handler->name != NULL) && 1180 (!strcmp(handler->name, "UTF-16LE") || 1181 !strcmp(handler->name, "UTF-16")) && 1182 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1183 input->cur += 2; 1184 } 1185 if ((handler->name != NULL) && 1186 (!strcmp(handler->name, "UTF-16BE")) && 1187 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1188 input->cur += 2; 1189 } 1190 /* 1191 * Errata on XML-1.0 June 20 2001 1192 * Specific handling of the Byte Order Mark for 1193 * UTF-8 1194 */ 1195 if ((handler->name != NULL) && 1196 (!strcmp(handler->name, "UTF-8")) && 1197 (input->cur[0] == 0xEF) && 1198 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1199 input->cur += 3; 1200 } 1201 1202 /* 1203 * Shrink the current input buffer. 1204 * Move it as the raw buffer and create a new input buffer 1205 */ 1206 processed = input->cur - input->base; 1207 xmlBufShrink(input->buf->buffer, processed); 1208 input->buf->raw = input->buf->buffer; 1209 input->buf->buffer = xmlBufCreate(); 1210 input->buf->rawconsumed = processed; 1211 use = xmlBufUse(input->buf->raw); 1212 1213 if (ctxt->html) { 1214 /* 1215 * convert as much as possible of the buffer 1216 */ 1217 nbchars = xmlCharEncInput(input->buf, 1); 1218 } else { 1219 /* 1220 * convert just enough to get 1221 * '<?xml version="1.0" encoding="xxx"?>' 1222 * parsed with the autodetected encoding 1223 * into the parser reading buffer. 1224 */ 1225 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1226 } 1227 xmlBufResetInput(input->buf->buffer, input); 1228 if (nbchars < 0) { 1229 xmlErrInternal(ctxt, 1230 "switching encoding: encoder error\n", 1231 NULL); 1232 return (-1); 1233 } 1234 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1235 } 1236 return (0); 1237 } else if (input->length == 0) { 1238 /* 1239 * When parsing a static memory array one must know the 1240 * size to be able to convert the buffer. 1241 */ 1242 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1243 return (-1); 1244 } 1245 return (0); 1246 } 1247 1248 /** 1249 * xmlSwitchInputEncoding: 1250 * @ctxt: the parser context 1251 * @input: the input stream 1252 * @handler: the encoding handler 1253 * 1254 * change the input functions when discovering the character encoding 1255 * of a given entity. 1256 * 1257 * Returns 0 in case of success, -1 otherwise 1258 */ 1259 int 1260 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1261 xmlCharEncodingHandlerPtr handler) { 1262 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1263 } 1264 1265 /** 1266 * xmlSwitchToEncodingInt: 1267 * @ctxt: the parser context 1268 * @handler: the encoding handler 1269 * @len: the length to convert or -1 1270 * 1271 * change the input functions when discovering the character encoding 1272 * of a given entity, and convert only @len bytes of the output, this 1273 * is needed on auto detect to allows any declared encoding later to 1274 * convert the actual content after the xmlDecl 1275 * 1276 * Returns 0 in case of success, -1 otherwise 1277 */ 1278 static int 1279 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1280 xmlCharEncodingHandlerPtr handler, int len) { 1281 int ret = 0; 1282 1283 if (handler != NULL) { 1284 if (ctxt->input != NULL) { 1285 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1286 } else { 1287 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1288 NULL); 1289 return(-1); 1290 } 1291 /* 1292 * The parsing is now done in UTF8 natively 1293 */ 1294 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1295 } else 1296 return(-1); 1297 return(ret); 1298 } 1299 1300 /** 1301 * xmlSwitchToEncoding: 1302 * @ctxt: the parser context 1303 * @handler: the encoding handler 1304 * 1305 * change the input functions when discovering the character encoding 1306 * of a given entity. 1307 * 1308 * Returns 0 in case of success, -1 otherwise 1309 */ 1310 int 1311 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1312 { 1313 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1314 } 1315 1316 /************************************************************************ 1317 * * 1318 * Commodity functions to handle entities processing * 1319 * * 1320 ************************************************************************/ 1321 1322 /** 1323 * xmlFreeInputStream: 1324 * @input: an xmlParserInputPtr 1325 * 1326 * Free up an input stream. 1327 */ 1328 void 1329 xmlFreeInputStream(xmlParserInputPtr input) { 1330 if (input == NULL) return; 1331 1332 if (input->filename != NULL) xmlFree((char *) input->filename); 1333 if (input->directory != NULL) xmlFree((char *) input->directory); 1334 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1335 if (input->version != NULL) xmlFree((char *) input->version); 1336 if ((input->free != NULL) && (input->base != NULL)) 1337 input->free((xmlChar *) input->base); 1338 if (input->buf != NULL) 1339 xmlFreeParserInputBuffer(input->buf); 1340 xmlFree(input); 1341 } 1342 1343 /** 1344 * xmlNewInputStream: 1345 * @ctxt: an XML parser context 1346 * 1347 * Create a new input stream structure. 1348 * 1349 * Returns the new input stream or NULL 1350 */ 1351 xmlParserInputPtr 1352 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1353 xmlParserInputPtr input; 1354 1355 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1356 if (input == NULL) { 1357 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1358 return(NULL); 1359 } 1360 memset(input, 0, sizeof(xmlParserInput)); 1361 input->line = 1; 1362 input->col = 1; 1363 input->standalone = -1; 1364 1365 /* 1366 * If the context is NULL the id cannot be initialized, but that 1367 * should not happen while parsing which is the situation where 1368 * the id is actually needed. 1369 */ 1370 if (ctxt != NULL) 1371 input->id = ctxt->input_id++; 1372 1373 return(input); 1374 } 1375 1376 /** 1377 * xmlNewIOInputStream: 1378 * @ctxt: an XML parser context 1379 * @input: an I/O Input 1380 * @enc: the charset encoding if known 1381 * 1382 * Create a new input stream structure encapsulating the @input into 1383 * a stream suitable for the parser. 1384 * 1385 * Returns the new input stream or NULL 1386 */ 1387 xmlParserInputPtr 1388 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1389 xmlCharEncoding enc) { 1390 xmlParserInputPtr inputStream; 1391 1392 if (input == NULL) return(NULL); 1393 if (xmlParserDebugEntities) 1394 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1395 inputStream = xmlNewInputStream(ctxt); 1396 if (inputStream == NULL) { 1397 return(NULL); 1398 } 1399 inputStream->filename = NULL; 1400 inputStream->buf = input; 1401 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1402 1403 if (enc != XML_CHAR_ENCODING_NONE) { 1404 xmlSwitchEncoding(ctxt, enc); 1405 } 1406 1407 return(inputStream); 1408 } 1409 1410 /** 1411 * xmlNewEntityInputStream: 1412 * @ctxt: an XML parser context 1413 * @entity: an Entity pointer 1414 * 1415 * Create a new input stream based on an xmlEntityPtr 1416 * 1417 * Returns the new input stream or NULL 1418 */ 1419 xmlParserInputPtr 1420 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1421 xmlParserInputPtr input; 1422 1423 if (entity == NULL) { 1424 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1425 NULL); 1426 return(NULL); 1427 } 1428 if (xmlParserDebugEntities) 1429 xmlGenericError(xmlGenericErrorContext, 1430 "new input from entity: %s\n", entity->name); 1431 if (entity->content == NULL) { 1432 switch (entity->etype) { 1433 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1434 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1435 entity->name); 1436 break; 1437 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1438 case XML_EXTERNAL_PARAMETER_ENTITY: 1439 return(xmlLoadExternalEntity((char *) entity->URI, 1440 (char *) entity->ExternalID, ctxt)); 1441 case XML_INTERNAL_GENERAL_ENTITY: 1442 xmlErrInternal(ctxt, 1443 "Internal entity %s without content !\n", 1444 entity->name); 1445 break; 1446 case XML_INTERNAL_PARAMETER_ENTITY: 1447 xmlErrInternal(ctxt, 1448 "Internal parameter entity %s without content !\n", 1449 entity->name); 1450 break; 1451 case XML_INTERNAL_PREDEFINED_ENTITY: 1452 xmlErrInternal(ctxt, 1453 "Predefined entity %s without content !\n", 1454 entity->name); 1455 break; 1456 } 1457 return(NULL); 1458 } 1459 input = xmlNewInputStream(ctxt); 1460 if (input == NULL) { 1461 return(NULL); 1462 } 1463 if (entity->URI != NULL) 1464 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1465 input->base = entity->content; 1466 if (entity->length == 0) 1467 entity->length = xmlStrlen(entity->content); 1468 input->cur = entity->content; 1469 input->length = entity->length; 1470 input->end = &entity->content[input->length]; 1471 return(input); 1472 } 1473 1474 /** 1475 * xmlNewStringInputStream: 1476 * @ctxt: an XML parser context 1477 * @buffer: an memory buffer 1478 * 1479 * Create a new input stream based on a memory buffer. 1480 * Returns the new input stream 1481 */ 1482 xmlParserInputPtr 1483 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1484 xmlParserInputPtr input; 1485 1486 if (buffer == NULL) { 1487 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1488 NULL); 1489 return(NULL); 1490 } 1491 if (xmlParserDebugEntities) 1492 xmlGenericError(xmlGenericErrorContext, 1493 "new fixed input: %.30s\n", buffer); 1494 input = xmlNewInputStream(ctxt); 1495 if (input == NULL) { 1496 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1497 return(NULL); 1498 } 1499 input->base = buffer; 1500 input->cur = buffer; 1501 input->length = xmlStrlen(buffer); 1502 input->end = &buffer[input->length]; 1503 return(input); 1504 } 1505 1506 /** 1507 * xmlNewInputFromFile: 1508 * @ctxt: an XML parser context 1509 * @filename: the filename to use as entity 1510 * 1511 * Create a new input stream based on a file or an URL. 1512 * 1513 * Returns the new input stream or NULL in case of error 1514 */ 1515 xmlParserInputPtr 1516 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1517 xmlParserInputBufferPtr buf; 1518 xmlParserInputPtr inputStream; 1519 char *directory = NULL; 1520 xmlChar *URI = NULL; 1521 1522 if (xmlParserDebugEntities) 1523 xmlGenericError(xmlGenericErrorContext, 1524 "new input from file: %s\n", filename); 1525 if (ctxt == NULL) return(NULL); 1526 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1527 if (buf == NULL) { 1528 if (filename == NULL) 1529 __xmlLoaderErr(ctxt, 1530 "failed to load external entity: NULL filename \n", 1531 NULL); 1532 else 1533 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1534 (const char *) filename); 1535 return(NULL); 1536 } 1537 1538 inputStream = xmlNewInputStream(ctxt); 1539 if (inputStream == NULL) 1540 return(NULL); 1541 1542 inputStream->buf = buf; 1543 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1544 if (inputStream == NULL) 1545 return(NULL); 1546 1547 if (inputStream->filename == NULL) 1548 URI = xmlStrdup((xmlChar *) filename); 1549 else 1550 URI = xmlStrdup((xmlChar *) inputStream->filename); 1551 directory = xmlParserGetDirectory((const char *) URI); 1552 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1553 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1554 if (URI != NULL) xmlFree((char *) URI); 1555 inputStream->directory = directory; 1556 1557 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1558 if ((ctxt->directory == NULL) && (directory != NULL)) 1559 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1560 return(inputStream); 1561 } 1562 1563 /************************************************************************ 1564 * * 1565 * Commodity functions to handle parser contexts * 1566 * * 1567 ************************************************************************/ 1568 1569 /** 1570 * xmlInitParserCtxt: 1571 * @ctxt: an XML parser context 1572 * 1573 * Initialize a parser context 1574 * 1575 * Returns 0 in case of success and -1 in case of error 1576 */ 1577 1578 int 1579 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1580 { 1581 xmlParserInputPtr input; 1582 1583 if(ctxt==NULL) { 1584 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1585 return(-1); 1586 } 1587 1588 xmlDefaultSAXHandlerInit(); 1589 1590 if (ctxt->dict == NULL) 1591 ctxt->dict = xmlDictCreate(); 1592 if (ctxt->dict == NULL) { 1593 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1594 return(-1); 1595 } 1596 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1597 1598 if (ctxt->sax == NULL) 1599 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1600 if (ctxt->sax == NULL) { 1601 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1602 return(-1); 1603 } 1604 else 1605 xmlSAXVersion(ctxt->sax, 2); 1606 1607 ctxt->maxatts = 0; 1608 ctxt->atts = NULL; 1609 /* Allocate the Input stack */ 1610 if (ctxt->inputTab == NULL) { 1611 ctxt->inputTab = (xmlParserInputPtr *) 1612 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1613 ctxt->inputMax = 5; 1614 } 1615 if (ctxt->inputTab == NULL) { 1616 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1617 ctxt->inputNr = 0; 1618 ctxt->inputMax = 0; 1619 ctxt->input = NULL; 1620 return(-1); 1621 } 1622 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1623 xmlFreeInputStream(input); 1624 } 1625 ctxt->inputNr = 0; 1626 ctxt->input = NULL; 1627 1628 ctxt->version = NULL; 1629 ctxt->encoding = NULL; 1630 ctxt->standalone = -1; 1631 ctxt->hasExternalSubset = 0; 1632 ctxt->hasPErefs = 0; 1633 ctxt->html = 0; 1634 ctxt->external = 0; 1635 ctxt->instate = XML_PARSER_START; 1636 ctxt->token = 0; 1637 ctxt->directory = NULL; 1638 1639 /* Allocate the Node stack */ 1640 if (ctxt->nodeTab == NULL) { 1641 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1642 ctxt->nodeMax = 10; 1643 } 1644 if (ctxt->nodeTab == NULL) { 1645 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1646 ctxt->nodeNr = 0; 1647 ctxt->nodeMax = 0; 1648 ctxt->node = NULL; 1649 ctxt->inputNr = 0; 1650 ctxt->inputMax = 0; 1651 ctxt->input = NULL; 1652 return(-1); 1653 } 1654 ctxt->nodeNr = 0; 1655 ctxt->node = NULL; 1656 1657 /* Allocate the Name stack */ 1658 if (ctxt->nameTab == NULL) { 1659 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1660 ctxt->nameMax = 10; 1661 } 1662 if (ctxt->nameTab == NULL) { 1663 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1664 ctxt->nodeNr = 0; 1665 ctxt->nodeMax = 0; 1666 ctxt->node = NULL; 1667 ctxt->inputNr = 0; 1668 ctxt->inputMax = 0; 1669 ctxt->input = NULL; 1670 ctxt->nameNr = 0; 1671 ctxt->nameMax = 0; 1672 ctxt->name = NULL; 1673 return(-1); 1674 } 1675 ctxt->nameNr = 0; 1676 ctxt->name = NULL; 1677 1678 /* Allocate the space stack */ 1679 if (ctxt->spaceTab == NULL) { 1680 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1681 ctxt->spaceMax = 10; 1682 } 1683 if (ctxt->spaceTab == NULL) { 1684 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1685 ctxt->nodeNr = 0; 1686 ctxt->nodeMax = 0; 1687 ctxt->node = NULL; 1688 ctxt->inputNr = 0; 1689 ctxt->inputMax = 0; 1690 ctxt->input = NULL; 1691 ctxt->nameNr = 0; 1692 ctxt->nameMax = 0; 1693 ctxt->name = NULL; 1694 ctxt->spaceNr = 0; 1695 ctxt->spaceMax = 0; 1696 ctxt->space = NULL; 1697 return(-1); 1698 } 1699 ctxt->spaceNr = 1; 1700 ctxt->spaceMax = 10; 1701 ctxt->spaceTab[0] = -1; 1702 ctxt->space = &ctxt->spaceTab[0]; 1703 ctxt->userData = ctxt; 1704 ctxt->myDoc = NULL; 1705 ctxt->wellFormed = 1; 1706 ctxt->nsWellFormed = 1; 1707 ctxt->valid = 1; 1708 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1709 if (ctxt->loadsubset) { 1710 ctxt->options |= XML_PARSE_DTDLOAD; 1711 } 1712 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1713 ctxt->pedantic = xmlPedanticParserDefaultValue; 1714 if (ctxt->pedantic) { 1715 ctxt->options |= XML_PARSE_PEDANTIC; 1716 } 1717 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1718 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1719 if (ctxt->keepBlanks == 0) { 1720 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1721 ctxt->options |= XML_PARSE_NOBLANKS; 1722 } 1723 1724 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1725 ctxt->vctxt.userData = ctxt; 1726 ctxt->vctxt.error = xmlParserValidityError; 1727 ctxt->vctxt.warning = xmlParserValidityWarning; 1728 if (ctxt->validate) { 1729 if (xmlGetWarningsDefaultValue == 0) 1730 ctxt->vctxt.warning = NULL; 1731 else 1732 ctxt->vctxt.warning = xmlParserValidityWarning; 1733 ctxt->vctxt.nodeMax = 0; 1734 ctxt->options |= XML_PARSE_DTDVALID; 1735 } 1736 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1737 if (ctxt->replaceEntities) { 1738 ctxt->options |= XML_PARSE_NOENT; 1739 } 1740 ctxt->record_info = 0; 1741 ctxt->nbChars = 0; 1742 ctxt->checkIndex = 0; 1743 ctxt->inSubset = 0; 1744 ctxt->errNo = XML_ERR_OK; 1745 ctxt->depth = 0; 1746 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1747 ctxt->catalogs = NULL; 1748 ctxt->nbentities = 0; 1749 ctxt->sizeentities = 0; 1750 ctxt->sizeentcopy = 0; 1751 ctxt->input_id = 1; 1752 xmlInitNodeInfoSeq(&ctxt->node_seq); 1753 return(0); 1754 } 1755 1756 /** 1757 * xmlFreeParserCtxt: 1758 * @ctxt: an XML parser context 1759 * 1760 * Free all the memory used by a parser context. However the parsed 1761 * document in ctxt->myDoc is not freed. 1762 */ 1763 1764 void 1765 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1766 { 1767 xmlParserInputPtr input; 1768 1769 if (ctxt == NULL) return; 1770 1771 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1772 xmlFreeInputStream(input); 1773 } 1774 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1775 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1776 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1777 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1778 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1779 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1780 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1781 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1782 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1783 #ifdef LIBXML_SAX1_ENABLED 1784 if ((ctxt->sax != NULL) && 1785 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1786 #else 1787 if (ctxt->sax != NULL) 1788 #endif /* LIBXML_SAX1_ENABLED */ 1789 xmlFree(ctxt->sax); 1790 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1791 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1792 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1793 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1794 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1795 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1796 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1797 if (ctxt->attsDefault != NULL) 1798 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1799 if (ctxt->attsSpecial != NULL) 1800 xmlHashFree(ctxt->attsSpecial, NULL); 1801 if (ctxt->freeElems != NULL) { 1802 xmlNodePtr cur, next; 1803 1804 cur = ctxt->freeElems; 1805 while (cur != NULL) { 1806 next = cur->next; 1807 xmlFree(cur); 1808 cur = next; 1809 } 1810 } 1811 if (ctxt->freeAttrs != NULL) { 1812 xmlAttrPtr cur, next; 1813 1814 cur = ctxt->freeAttrs; 1815 while (cur != NULL) { 1816 next = cur->next; 1817 xmlFree(cur); 1818 cur = next; 1819 } 1820 } 1821 /* 1822 * cleanup the error strings 1823 */ 1824 if (ctxt->lastError.message != NULL) 1825 xmlFree(ctxt->lastError.message); 1826 if (ctxt->lastError.file != NULL) 1827 xmlFree(ctxt->lastError.file); 1828 if (ctxt->lastError.str1 != NULL) 1829 xmlFree(ctxt->lastError.str1); 1830 if (ctxt->lastError.str2 != NULL) 1831 xmlFree(ctxt->lastError.str2); 1832 if (ctxt->lastError.str3 != NULL) 1833 xmlFree(ctxt->lastError.str3); 1834 1835 #ifdef LIBXML_CATALOG_ENABLED 1836 if (ctxt->catalogs != NULL) 1837 xmlCatalogFreeLocal(ctxt->catalogs); 1838 #endif 1839 xmlFree(ctxt); 1840 } 1841 1842 /** 1843 * xmlNewParserCtxt: 1844 * 1845 * Allocate and initialize a new parser context. 1846 * 1847 * Returns the xmlParserCtxtPtr or NULL 1848 */ 1849 1850 xmlParserCtxtPtr 1851 xmlNewParserCtxt(void) 1852 { 1853 xmlParserCtxtPtr ctxt; 1854 1855 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1856 if (ctxt == NULL) { 1857 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1858 return(NULL); 1859 } 1860 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1861 if (xmlInitParserCtxt(ctxt) < 0) { 1862 xmlFreeParserCtxt(ctxt); 1863 return(NULL); 1864 } 1865 return(ctxt); 1866 } 1867 1868 /************************************************************************ 1869 * * 1870 * Handling of node informations * 1871 * * 1872 ************************************************************************/ 1873 1874 /** 1875 * xmlClearParserCtxt: 1876 * @ctxt: an XML parser context 1877 * 1878 * Clear (release owned resources) and reinitialize a parser context 1879 */ 1880 1881 void 1882 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1883 { 1884 if (ctxt==NULL) 1885 return; 1886 xmlClearNodeInfoSeq(&ctxt->node_seq); 1887 xmlCtxtReset(ctxt); 1888 } 1889 1890 1891 /** 1892 * xmlParserFindNodeInfo: 1893 * @ctx: an XML parser context 1894 * @node: an XML node within the tree 1895 * 1896 * Find the parser node info struct for a given node 1897 * 1898 * Returns an xmlParserNodeInfo block pointer or NULL 1899 */ 1900 const xmlParserNodeInfo * 1901 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1902 { 1903 unsigned long pos; 1904 1905 if ((ctx == NULL) || (node == NULL)) 1906 return (NULL); 1907 /* Find position where node should be at */ 1908 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1909 if (pos < ctx->node_seq.length 1910 && ctx->node_seq.buffer[pos].node == node) 1911 return &ctx->node_seq.buffer[pos]; 1912 else 1913 return NULL; 1914 } 1915 1916 1917 /** 1918 * xmlInitNodeInfoSeq: 1919 * @seq: a node info sequence pointer 1920 * 1921 * -- Initialize (set to initial state) node info sequence 1922 */ 1923 void 1924 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1925 { 1926 if (seq == NULL) 1927 return; 1928 seq->length = 0; 1929 seq->maximum = 0; 1930 seq->buffer = NULL; 1931 } 1932 1933 /** 1934 * xmlClearNodeInfoSeq: 1935 * @seq: a node info sequence pointer 1936 * 1937 * -- Clear (release memory and reinitialize) node 1938 * info sequence 1939 */ 1940 void 1941 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1942 { 1943 if (seq == NULL) 1944 return; 1945 if (seq->buffer != NULL) 1946 xmlFree(seq->buffer); 1947 xmlInitNodeInfoSeq(seq); 1948 } 1949 1950 /** 1951 * xmlParserFindNodeInfoIndex: 1952 * @seq: a node info sequence pointer 1953 * @node: an XML node pointer 1954 * 1955 * 1956 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1957 * the given node is or should be at in a sorted sequence 1958 * 1959 * Returns a long indicating the position of the record 1960 */ 1961 unsigned long 1962 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1963 const xmlNodePtr node) 1964 { 1965 unsigned long upper, lower, middle; 1966 int found = 0; 1967 1968 if ((seq == NULL) || (node == NULL)) 1969 return ((unsigned long) -1); 1970 1971 /* Do a binary search for the key */ 1972 lower = 1; 1973 upper = seq->length; 1974 middle = 0; 1975 while (lower <= upper && !found) { 1976 middle = lower + (upper - lower) / 2; 1977 if (node == seq->buffer[middle - 1].node) 1978 found = 1; 1979 else if (node < seq->buffer[middle - 1].node) 1980 upper = middle - 1; 1981 else 1982 lower = middle + 1; 1983 } 1984 1985 /* Return position */ 1986 if (middle == 0 || seq->buffer[middle - 1].node < node) 1987 return middle; 1988 else 1989 return middle - 1; 1990 } 1991 1992 1993 /** 1994 * xmlParserAddNodeInfo: 1995 * @ctxt: an XML parser context 1996 * @info: a node info sequence pointer 1997 * 1998 * Insert node info record into the sorted sequence 1999 */ 2000 void 2001 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2002 const xmlParserNodeInfoPtr info) 2003 { 2004 unsigned long pos; 2005 2006 if ((ctxt == NULL) || (info == NULL)) return; 2007 2008 /* Find pos and check to see if node is already in the sequence */ 2009 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2010 info->node); 2011 2012 if ((pos < ctxt->node_seq.length) && 2013 (ctxt->node_seq.buffer != NULL) && 2014 (ctxt->node_seq.buffer[pos].node == info->node)) { 2015 ctxt->node_seq.buffer[pos] = *info; 2016 } 2017 2018 /* Otherwise, we need to add new node to buffer */ 2019 else { 2020 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2021 (ctxt->node_seq.buffer == NULL)) { 2022 xmlParserNodeInfo *tmp_buffer; 2023 unsigned int byte_size; 2024 2025 if (ctxt->node_seq.maximum == 0) 2026 ctxt->node_seq.maximum = 2; 2027 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2028 (2 * ctxt->node_seq.maximum)); 2029 2030 if (ctxt->node_seq.buffer == NULL) 2031 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2032 else 2033 tmp_buffer = 2034 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2035 byte_size); 2036 2037 if (tmp_buffer == NULL) { 2038 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2039 return; 2040 } 2041 ctxt->node_seq.buffer = tmp_buffer; 2042 ctxt->node_seq.maximum *= 2; 2043 } 2044 2045 /* If position is not at end, move elements out of the way */ 2046 if (pos != ctxt->node_seq.length) { 2047 unsigned long i; 2048 2049 for (i = ctxt->node_seq.length; i > pos; i--) 2050 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2051 } 2052 2053 /* Copy element and increase length */ 2054 ctxt->node_seq.buffer[pos] = *info; 2055 ctxt->node_seq.length++; 2056 } 2057 } 2058 2059 /************************************************************************ 2060 * * 2061 * Defaults settings * 2062 * * 2063 ************************************************************************/ 2064 /** 2065 * xmlPedanticParserDefault: 2066 * @val: int 0 or 1 2067 * 2068 * Set and return the previous value for enabling pedantic warnings. 2069 * 2070 * Returns the last value for 0 for no substitution, 1 for substitution. 2071 */ 2072 2073 int 2074 xmlPedanticParserDefault(int val) { 2075 int old = xmlPedanticParserDefaultValue; 2076 2077 xmlPedanticParserDefaultValue = val; 2078 return(old); 2079 } 2080 2081 /** 2082 * xmlLineNumbersDefault: 2083 * @val: int 0 or 1 2084 * 2085 * Set and return the previous value for enabling line numbers in elements 2086 * contents. This may break on old application and is turned off by default. 2087 * 2088 * Returns the last value for 0 for no substitution, 1 for substitution. 2089 */ 2090 2091 int 2092 xmlLineNumbersDefault(int val) { 2093 int old = xmlLineNumbersDefaultValue; 2094 2095 xmlLineNumbersDefaultValue = val; 2096 return(old); 2097 } 2098 2099 /** 2100 * xmlSubstituteEntitiesDefault: 2101 * @val: int 0 or 1 2102 * 2103 * Set and return the previous value for default entity support. 2104 * Initially the parser always keep entity references instead of substituting 2105 * entity values in the output. This function has to be used to change the 2106 * default parser behavior 2107 * SAX::substituteEntities() has to be used for changing that on a file by 2108 * file basis. 2109 * 2110 * Returns the last value for 0 for no substitution, 1 for substitution. 2111 */ 2112 2113 int 2114 xmlSubstituteEntitiesDefault(int val) { 2115 int old = xmlSubstituteEntitiesDefaultValue; 2116 2117 xmlSubstituteEntitiesDefaultValue = val; 2118 return(old); 2119 } 2120 2121 /** 2122 * xmlKeepBlanksDefault: 2123 * @val: int 0 or 1 2124 * 2125 * Set and return the previous value for default blanks text nodes support. 2126 * The 1.x version of the parser used an heuristic to try to detect 2127 * ignorable white spaces. As a result the SAX callback was generating 2128 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2129 * using the DOM output text nodes containing those blanks were not generated. 2130 * The 2.x and later version will switch to the XML standard way and 2131 * ignorableWhitespace() are only generated when running the parser in 2132 * validating mode and when the current element doesn't allow CDATA or 2133 * mixed content. 2134 * This function is provided as a way to force the standard behavior 2135 * on 1.X libs and to switch back to the old mode for compatibility when 2136 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2137 * by using xmlIsBlankNode() commodity function to detect the "empty" 2138 * nodes generated. 2139 * This value also affect autogeneration of indentation when saving code 2140 * if blanks sections are kept, indentation is not generated. 2141 * 2142 * Returns the last value for 0 for no substitution, 1 for substitution. 2143 */ 2144 2145 int 2146 xmlKeepBlanksDefault(int val) { 2147 int old = xmlKeepBlanksDefaultValue; 2148 2149 xmlKeepBlanksDefaultValue = val; 2150 if (!val) xmlIndentTreeOutput = 1; 2151 return(old); 2152 } 2153 2154 #define bottom_parserInternals 2155 #include "elfgcchack.h" 2156