1 /* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10 #define IN_LIBXML 11 #include "libxml.h" 12 13 #if defined(_WIN32) && !defined (__CYGWIN__) 14 #define XML_DIR_SEP '\\' 15 #else 16 #define XML_DIR_SEP '/' 17 #endif 18 19 #include <string.h> 20 #ifdef HAVE_CTYPE_H 21 #include <ctype.h> 22 #endif 23 #ifdef HAVE_STDLIB_H 24 #include <stdlib.h> 25 #endif 26 #ifdef HAVE_SYS_STAT_H 27 #include <sys/stat.h> 28 #endif 29 #ifdef HAVE_FCNTL_H 30 #include <fcntl.h> 31 #endif 32 #ifdef HAVE_UNISTD_H 33 #include <unistd.h> 34 #endif 35 #ifdef LIBXML_ZLIB_ENABLED 36 #include <zlib.h> 37 #endif 38 39 #include <libxml/xmlmemory.h> 40 #include <libxml/tree.h> 41 #include <libxml/parser.h> 42 #include <libxml/parserInternals.h> 43 #include <libxml/valid.h> 44 #include <libxml/entities.h> 45 #include <libxml/xmlerror.h> 46 #include <libxml/encoding.h> 47 #include <libxml/valid.h> 48 #include <libxml/xmlIO.h> 49 #include <libxml/uri.h> 50 #include <libxml/dict.h> 51 #include <libxml/SAX.h> 52 #ifdef LIBXML_CATALOG_ENABLED 53 #include <libxml/catalog.h> 54 #endif 55 #include <libxml/globals.h> 56 #include <libxml/chvalid.h> 57 58 #define CUR(ctxt) ctxt->input->cur 59 #define END(ctxt) ctxt->input->end 60 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) 61 62 #include "buf.h" 63 #include "enc.h" 64 65 /* 66 * Various global defaults for parsing 67 */ 68 69 /** 70 * xmlCheckVersion: 71 * @version: the include version number 72 * 73 * check the compiled lib version against the include one. 74 * This can warn or immediately kill the application 75 */ 76 void 77 xmlCheckVersion(int version) { 78 int myversion = (int) LIBXML_VERSION; 79 80 xmlInitParser(); 81 82 if ((myversion / 10000) != (version / 10000)) { 83 xmlGenericError(xmlGenericErrorContext, 84 "Fatal: program compiled against libxml %d using libxml %d\n", 85 (version / 10000), (myversion / 10000)); 86 fprintf(stderr, 87 "Fatal: program compiled against libxml %d using libxml %d\n", 88 (version / 10000), (myversion / 10000)); 89 } 90 if ((myversion / 100) < (version / 100)) { 91 xmlGenericError(xmlGenericErrorContext, 92 "Warning: program compiled against libxml %d using older %d\n", 93 (version / 100), (myversion / 100)); 94 } 95 } 96 97 98 /************************************************************************ 99 * * 100 * Some factorized error routines * 101 * * 102 ************************************************************************/ 103 104 105 /** 106 * xmlErrMemory: 107 * @ctxt: an XML parser context 108 * @extra: extra informations 109 * 110 * Handle a redefinition of attribute error 111 */ 112 void 113 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 114 { 115 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 116 (ctxt->instate == XML_PARSER_EOF)) 117 return; 118 if (ctxt != NULL) { 119 ctxt->errNo = XML_ERR_NO_MEMORY; 120 ctxt->instate = XML_PARSER_EOF; 121 ctxt->disableSAX = 1; 122 } 123 if (extra) 124 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 125 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 126 NULL, NULL, 0, 0, 127 "Memory allocation failed : %s\n", extra); 128 else 129 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 130 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 131 NULL, NULL, 0, 0, "Memory allocation failed\n"); 132 } 133 134 /** 135 * __xmlErrEncoding: 136 * @ctxt: an XML parser context 137 * @xmlerr: the error number 138 * @msg: the error message 139 * @str1: an string info 140 * @str2: an string info 141 * 142 * Handle an encoding error 143 */ 144 void 145 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 146 const char *msg, const xmlChar * str1, const xmlChar * str2) 147 { 148 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 149 (ctxt->instate == XML_PARSER_EOF)) 150 return; 151 if (ctxt != NULL) 152 ctxt->errNo = xmlerr; 153 __xmlRaiseError(NULL, NULL, NULL, 154 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 155 NULL, 0, (const char *) str1, (const char *) str2, 156 NULL, 0, 0, msg, str1, str2); 157 if (ctxt != NULL) { 158 ctxt->wellFormed = 0; 159 if (ctxt->recovery == 0) 160 ctxt->disableSAX = 1; 161 } 162 } 163 164 /** 165 * xmlErrInternal: 166 * @ctxt: an XML parser context 167 * @msg: the error message 168 * @str: error informations 169 * 170 * Handle an internal error 171 */ 172 static void LIBXML_ATTR_FORMAT(2,0) 173 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 174 { 175 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 176 (ctxt->instate == XML_PARSER_EOF)) 177 return; 178 if (ctxt != NULL) 179 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 180 __xmlRaiseError(NULL, NULL, NULL, 181 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 182 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 183 0, 0, msg, str); 184 if (ctxt != NULL) { 185 ctxt->wellFormed = 0; 186 if (ctxt->recovery == 0) 187 ctxt->disableSAX = 1; 188 } 189 } 190 191 /** 192 * xmlErrEncodingInt: 193 * @ctxt: an XML parser context 194 * @error: the error number 195 * @msg: the error message 196 * @val: an integer value 197 * 198 * n encoding error 199 */ 200 static void LIBXML_ATTR_FORMAT(3,0) 201 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 202 const char *msg, int val) 203 { 204 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 205 (ctxt->instate == XML_PARSER_EOF)) 206 return; 207 if (ctxt != NULL) 208 ctxt->errNo = error; 209 __xmlRaiseError(NULL, NULL, NULL, 210 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 211 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 212 if (ctxt != NULL) { 213 ctxt->wellFormed = 0; 214 if (ctxt->recovery == 0) 215 ctxt->disableSAX = 1; 216 } 217 } 218 219 /** 220 * xmlIsLetter: 221 * @c: an unicode character (int) 222 * 223 * Check whether the character is allowed by the production 224 * [84] Letter ::= BaseChar | Ideographic 225 * 226 * Returns 0 if not, non-zero otherwise 227 */ 228 int 229 xmlIsLetter(int c) { 230 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 231 } 232 233 /************************************************************************ 234 * * 235 * Input handling functions for progressive parsing * 236 * * 237 ************************************************************************/ 238 239 /* #define DEBUG_INPUT */ 240 /* #define DEBUG_STACK */ 241 /* #define DEBUG_PUSH */ 242 243 244 /* we need to keep enough input to show errors in context */ 245 #define LINE_LEN 80 246 247 #ifdef DEBUG_INPUT 248 #define CHECK_BUFFER(in) check_buffer(in) 249 250 static 251 void check_buffer(xmlParserInputPtr in) { 252 if (in->base != xmlBufContent(in->buf->buffer)) { 253 xmlGenericError(xmlGenericErrorContext, 254 "xmlParserInput: base mismatch problem\n"); 255 } 256 if (in->cur < in->base) { 257 xmlGenericError(xmlGenericErrorContext, 258 "xmlParserInput: cur < base problem\n"); 259 } 260 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 261 xmlGenericError(xmlGenericErrorContext, 262 "xmlParserInput: cur > base + use problem\n"); 263 } 264 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 265 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 266 xmlBufUse(in->buf->buffer)); 267 } 268 269 #else 270 #define CHECK_BUFFER(in) 271 #endif 272 273 274 /** 275 * xmlParserInputRead: 276 * @in: an XML parser input 277 * @len: an indicative size for the lookahead 278 * 279 * This function was internal and is deprecated. 280 * 281 * Returns -1 as this is an error to use it. 282 */ 283 int 284 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 285 return(-1); 286 } 287 288 /** 289 * xmlParserInputGrow: 290 * @in: an XML parser input 291 * @len: an indicative size for the lookahead 292 * 293 * This function increase the input for the parser. It tries to 294 * preserve pointers to the input buffer, and keep already read data 295 * 296 * Returns the amount of char read, or -1 in case of error, 0 indicate the 297 * end of this entity 298 */ 299 int 300 xmlParserInputGrow(xmlParserInputPtr in, int len) { 301 int ret; 302 size_t indx; 303 const xmlChar *content; 304 305 if ((in == NULL) || (len < 0)) return(-1); 306 #ifdef DEBUG_INPUT 307 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 308 #endif 309 if (in->buf == NULL) return(-1); 310 if (in->base == NULL) return(-1); 311 if (in->cur == NULL) return(-1); 312 if (in->buf->buffer == NULL) return(-1); 313 314 CHECK_BUFFER(in); 315 316 indx = in->cur - in->base; 317 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 318 319 CHECK_BUFFER(in); 320 321 return(0); 322 } 323 if (in->buf->readcallback != NULL) { 324 ret = xmlParserInputBufferGrow(in->buf, len); 325 } else 326 return(0); 327 328 /* 329 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 330 * block, but we use it really as an integer to do some 331 * pointer arithmetic. Insure will raise it as a bug but in 332 * that specific case, that's not ! 333 */ 334 335 content = xmlBufContent(in->buf->buffer); 336 if (in->base != content) { 337 /* 338 * the buffer has been reallocated 339 */ 340 indx = in->cur - in->base; 341 in->base = content; 342 in->cur = &content[indx]; 343 } 344 in->end = xmlBufEnd(in->buf->buffer); 345 346 CHECK_BUFFER(in); 347 348 return(ret); 349 } 350 351 /** 352 * xmlParserInputShrink: 353 * @in: an XML parser input 354 * 355 * This function removes used input for the parser. 356 */ 357 void 358 xmlParserInputShrink(xmlParserInputPtr in) { 359 size_t used; 360 size_t ret; 361 size_t indx; 362 const xmlChar *content; 363 364 #ifdef DEBUG_INPUT 365 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 366 #endif 367 if (in == NULL) return; 368 if (in->buf == NULL) return; 369 if (in->base == NULL) return; 370 if (in->cur == NULL) return; 371 if (in->buf->buffer == NULL) return; 372 373 CHECK_BUFFER(in); 374 375 used = in->cur - xmlBufContent(in->buf->buffer); 376 /* 377 * Do not shrink on large buffers whose only a tiny fraction 378 * was consumed 379 */ 380 if (used > INPUT_CHUNK) { 381 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 382 if (ret > 0) { 383 in->cur -= ret; 384 in->consumed += ret; 385 } 386 in->end = xmlBufEnd(in->buf->buffer); 387 } 388 389 CHECK_BUFFER(in); 390 391 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 392 return; 393 } 394 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 395 content = xmlBufContent(in->buf->buffer); 396 if (in->base != content) { 397 /* 398 * the buffer has been reallocated 399 */ 400 indx = in->cur - in->base; 401 in->base = content; 402 in->cur = &content[indx]; 403 } 404 in->end = xmlBufEnd(in->buf->buffer); 405 406 CHECK_BUFFER(in); 407 } 408 409 /************************************************************************ 410 * * 411 * UTF8 character input and related functions * 412 * * 413 ************************************************************************/ 414 415 /** 416 * xmlNextChar: 417 * @ctxt: the XML parser context 418 * 419 * Skip to the next char input char. 420 */ 421 422 void 423 xmlNextChar(xmlParserCtxtPtr ctxt) 424 { 425 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 426 (ctxt->input == NULL)) 427 return; 428 429 if (!(VALID_CTXT(ctxt))) { 430 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); 431 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 432 xmlStopParser(ctxt); 433 return; 434 } 435 436 if ((*ctxt->input->cur == 0) && 437 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { 438 return; 439 } 440 441 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 442 const unsigned char *cur; 443 unsigned char c; 444 445 /* 446 * 2.11 End-of-Line Handling 447 * the literal two-character sequence "#xD#xA" or a standalone 448 * literal #xD, an XML processor must pass to the application 449 * the single character #xA. 450 */ 451 if (*(ctxt->input->cur) == '\n') { 452 ctxt->input->line++; ctxt->input->col = 1; 453 } else 454 ctxt->input->col++; 455 456 /* 457 * We are supposed to handle UTF8, check it's valid 458 * From rfc2044: encoding of the Unicode values on UTF-8: 459 * 460 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 461 * 0000 0000-0000 007F 0xxxxxxx 462 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 463 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 464 * 465 * Check for the 0x110000 limit too 466 */ 467 cur = ctxt->input->cur; 468 469 c = *cur; 470 if (c & 0x80) { 471 if (c == 0xC0) 472 goto encoding_error; 473 if (cur[1] == 0) { 474 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 475 cur = ctxt->input->cur; 476 } 477 if ((cur[1] & 0xc0) != 0x80) 478 goto encoding_error; 479 if ((c & 0xe0) == 0xe0) { 480 unsigned int val; 481 482 if (cur[2] == 0) { 483 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 484 cur = ctxt->input->cur; 485 } 486 if ((cur[2] & 0xc0) != 0x80) 487 goto encoding_error; 488 if ((c & 0xf0) == 0xf0) { 489 if (cur[3] == 0) { 490 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 491 cur = ctxt->input->cur; 492 } 493 if (((c & 0xf8) != 0xf0) || 494 ((cur[3] & 0xc0) != 0x80)) 495 goto encoding_error; 496 /* 4-byte code */ 497 ctxt->input->cur += 4; 498 val = (cur[0] & 0x7) << 18; 499 val |= (cur[1] & 0x3f) << 12; 500 val |= (cur[2] & 0x3f) << 6; 501 val |= cur[3] & 0x3f; 502 } else { 503 /* 3-byte code */ 504 ctxt->input->cur += 3; 505 val = (cur[0] & 0xf) << 12; 506 val |= (cur[1] & 0x3f) << 6; 507 val |= cur[2] & 0x3f; 508 } 509 if (((val > 0xd7ff) && (val < 0xe000)) || 510 ((val > 0xfffd) && (val < 0x10000)) || 511 (val >= 0x110000)) { 512 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 513 "Char 0x%X out of allowed range\n", 514 val); 515 } 516 } else 517 /* 2-byte code */ 518 ctxt->input->cur += 2; 519 } else 520 /* 1-byte code */ 521 ctxt->input->cur++; 522 523 ctxt->nbChars++; 524 } else { 525 /* 526 * Assume it's a fixed length encoding (1) with 527 * a compatible encoding for the ASCII set, since 528 * XML constructs only use < 128 chars 529 */ 530 531 if (*(ctxt->input->cur) == '\n') { 532 ctxt->input->line++; ctxt->input->col = 1; 533 } else 534 ctxt->input->col++; 535 ctxt->input->cur++; 536 ctxt->nbChars++; 537 } 538 if (*ctxt->input->cur == 0) 539 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 540 return; 541 encoding_error: 542 /* 543 * If we detect an UTF8 error that probably mean that the 544 * input encoding didn't get properly advertised in the 545 * declaration header. Report the error and switch the encoding 546 * to ISO-Latin-1 (if you don't like this policy, just declare the 547 * encoding !) 548 */ 549 if ((ctxt == NULL) || (ctxt->input == NULL) || 550 (ctxt->input->end - ctxt->input->cur < 4)) { 551 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 552 "Input is not proper UTF-8, indicate encoding !\n", 553 NULL, NULL); 554 } else { 555 char buffer[150]; 556 557 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 558 ctxt->input->cur[0], ctxt->input->cur[1], 559 ctxt->input->cur[2], ctxt->input->cur[3]); 560 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 561 "Input is not proper UTF-8, indicate encoding !\n%s", 562 BAD_CAST buffer, NULL); 563 } 564 ctxt->charset = XML_CHAR_ENCODING_8859_1; 565 ctxt->input->cur++; 566 return; 567 } 568 569 /** 570 * xmlCurrentChar: 571 * @ctxt: the XML parser context 572 * @len: pointer to the length of the char read 573 * 574 * The current char value, if using UTF-8 this may actually span multiple 575 * bytes in the input buffer. Implement the end of line normalization: 576 * 2.11 End-of-Line Handling 577 * Wherever an external parsed entity or the literal entity value 578 * of an internal parsed entity contains either the literal two-character 579 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 580 * must pass to the application the single character #xA. 581 * This behavior can conveniently be produced by normalizing all 582 * line breaks to #xA on input, before parsing.) 583 * 584 * Returns the current char value and its length 585 */ 586 587 int 588 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 589 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 590 if (ctxt->instate == XML_PARSER_EOF) 591 return(0); 592 593 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 594 *len = 1; 595 return((int) *ctxt->input->cur); 596 } 597 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 598 /* 599 * We are supposed to handle UTF8, check it's valid 600 * From rfc2044: encoding of the Unicode values on UTF-8: 601 * 602 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 603 * 0000 0000-0000 007F 0xxxxxxx 604 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 605 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 606 * 607 * Check for the 0x110000 limit too 608 */ 609 const unsigned char *cur = ctxt->input->cur; 610 unsigned char c; 611 unsigned int val; 612 613 c = *cur; 614 if (c & 0x80) { 615 if (((c & 0x40) == 0) || (c == 0xC0)) 616 goto encoding_error; 617 if (cur[1] == 0) { 618 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 619 cur = ctxt->input->cur; 620 } 621 if ((cur[1] & 0xc0) != 0x80) 622 goto encoding_error; 623 if ((c & 0xe0) == 0xe0) { 624 if (cur[2] == 0) { 625 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 626 cur = ctxt->input->cur; 627 } 628 if ((cur[2] & 0xc0) != 0x80) 629 goto encoding_error; 630 if ((c & 0xf0) == 0xf0) { 631 if (cur[3] == 0) { 632 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 633 cur = ctxt->input->cur; 634 } 635 if (((c & 0xf8) != 0xf0) || 636 ((cur[3] & 0xc0) != 0x80)) 637 goto encoding_error; 638 /* 4-byte code */ 639 *len = 4; 640 val = (cur[0] & 0x7) << 18; 641 val |= (cur[1] & 0x3f) << 12; 642 val |= (cur[2] & 0x3f) << 6; 643 val |= cur[3] & 0x3f; 644 if (val < 0x10000) 645 goto encoding_error; 646 } else { 647 /* 3-byte code */ 648 *len = 3; 649 val = (cur[0] & 0xf) << 12; 650 val |= (cur[1] & 0x3f) << 6; 651 val |= cur[2] & 0x3f; 652 if (val < 0x800) 653 goto encoding_error; 654 } 655 } else { 656 /* 2-byte code */ 657 *len = 2; 658 val = (cur[0] & 0x1f) << 6; 659 val |= cur[1] & 0x3f; 660 if (val < 0x80) 661 goto encoding_error; 662 } 663 if (!IS_CHAR(val)) { 664 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 665 "Char 0x%X out of allowed range\n", val); 666 } 667 return(val); 668 } else { 669 /* 1-byte code */ 670 *len = 1; 671 if (*ctxt->input->cur == 0) 672 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 673 if ((*ctxt->input->cur == 0) && 674 (ctxt->input->end > ctxt->input->cur)) { 675 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 676 "Char 0x0 out of allowed range\n", 0); 677 } 678 if (*ctxt->input->cur == 0xD) { 679 if (ctxt->input->cur[1] == 0xA) { 680 ctxt->nbChars++; 681 ctxt->input->cur++; 682 } 683 return(0xA); 684 } 685 return((int) *ctxt->input->cur); 686 } 687 } 688 /* 689 * Assume it's a fixed length encoding (1) with 690 * a compatible encoding for the ASCII set, since 691 * XML constructs only use < 128 chars 692 */ 693 *len = 1; 694 if (*ctxt->input->cur == 0xD) { 695 if (ctxt->input->cur[1] == 0xA) { 696 ctxt->nbChars++; 697 ctxt->input->cur++; 698 } 699 return(0xA); 700 } 701 return((int) *ctxt->input->cur); 702 encoding_error: 703 /* 704 * An encoding problem may arise from a truncated input buffer 705 * splitting a character in the middle. In that case do not raise 706 * an error but return 0 to indicate an end of stream problem 707 */ 708 if (ctxt->input->end - ctxt->input->cur < 4) { 709 *len = 0; 710 return(0); 711 } 712 713 /* 714 * If we detect an UTF8 error that probably mean that the 715 * input encoding didn't get properly advertised in the 716 * declaration header. Report the error and switch the encoding 717 * to ISO-Latin-1 (if you don't like this policy, just declare the 718 * encoding !) 719 */ 720 { 721 char buffer[150]; 722 723 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 724 ctxt->input->cur[0], ctxt->input->cur[1], 725 ctxt->input->cur[2], ctxt->input->cur[3]); 726 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 727 "Input is not proper UTF-8, indicate encoding !\n%s", 728 BAD_CAST buffer, NULL); 729 } 730 ctxt->charset = XML_CHAR_ENCODING_8859_1; 731 *len = 1; 732 return((int) *ctxt->input->cur); 733 } 734 735 /** 736 * xmlStringCurrentChar: 737 * @ctxt: the XML parser context 738 * @cur: pointer to the beginning of the char 739 * @len: pointer to the length of the char read 740 * 741 * The current char value, if using UTF-8 this may actually span multiple 742 * bytes in the input buffer. 743 * 744 * Returns the current char value and its length 745 */ 746 747 int 748 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 749 { 750 if ((len == NULL) || (cur == NULL)) return(0); 751 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 752 /* 753 * We are supposed to handle UTF8, check it's valid 754 * From rfc2044: encoding of the Unicode values on UTF-8: 755 * 756 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 757 * 0000 0000-0000 007F 0xxxxxxx 758 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 759 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 760 * 761 * Check for the 0x110000 limit too 762 */ 763 unsigned char c; 764 unsigned int val; 765 766 c = *cur; 767 if (c & 0x80) { 768 if ((cur[1] & 0xc0) != 0x80) 769 goto encoding_error; 770 if ((c & 0xe0) == 0xe0) { 771 772 if ((cur[2] & 0xc0) != 0x80) 773 goto encoding_error; 774 if ((c & 0xf0) == 0xf0) { 775 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 776 goto encoding_error; 777 /* 4-byte code */ 778 *len = 4; 779 val = (cur[0] & 0x7) << 18; 780 val |= (cur[1] & 0x3f) << 12; 781 val |= (cur[2] & 0x3f) << 6; 782 val |= cur[3] & 0x3f; 783 } else { 784 /* 3-byte code */ 785 *len = 3; 786 val = (cur[0] & 0xf) << 12; 787 val |= (cur[1] & 0x3f) << 6; 788 val |= cur[2] & 0x3f; 789 } 790 } else { 791 /* 2-byte code */ 792 *len = 2; 793 val = (cur[0] & 0x1f) << 6; 794 val |= cur[1] & 0x3f; 795 } 796 if (!IS_CHAR(val)) { 797 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 798 "Char 0x%X out of allowed range\n", val); 799 } 800 return (val); 801 } else { 802 /* 1-byte code */ 803 *len = 1; 804 return ((int) *cur); 805 } 806 } 807 /* 808 * Assume it's a fixed length encoding (1) with 809 * a compatible encoding for the ASCII set, since 810 * XML constructs only use < 128 chars 811 */ 812 *len = 1; 813 return ((int) *cur); 814 encoding_error: 815 816 /* 817 * An encoding problem may arise from a truncated input buffer 818 * splitting a character in the middle. In that case do not raise 819 * an error but return 0 to indicate an end of stream problem 820 */ 821 if ((ctxt == NULL) || (ctxt->input == NULL) || 822 (ctxt->input->end - ctxt->input->cur < 4)) { 823 *len = 0; 824 return(0); 825 } 826 /* 827 * If we detect an UTF8 error that probably mean that the 828 * input encoding didn't get properly advertised in the 829 * declaration header. Report the error and switch the encoding 830 * to ISO-Latin-1 (if you don't like this policy, just declare the 831 * encoding !) 832 */ 833 { 834 char buffer[150]; 835 836 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 837 ctxt->input->cur[0], ctxt->input->cur[1], 838 ctxt->input->cur[2], ctxt->input->cur[3]); 839 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 840 "Input is not proper UTF-8, indicate encoding !\n%s", 841 BAD_CAST buffer, NULL); 842 } 843 *len = 1; 844 return ((int) *cur); 845 } 846 847 /** 848 * xmlCopyCharMultiByte: 849 * @out: pointer to an array of xmlChar 850 * @val: the char value 851 * 852 * append the char value in the array 853 * 854 * Returns the number of xmlChar written 855 */ 856 int 857 xmlCopyCharMultiByte(xmlChar *out, int val) { 858 if (out == NULL) return(0); 859 /* 860 * We are supposed to handle UTF8, check it's valid 861 * From rfc2044: encoding of the Unicode values on UTF-8: 862 * 863 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 864 * 0000 0000-0000 007F 0xxxxxxx 865 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 866 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 867 */ 868 if (val >= 0x80) { 869 xmlChar *savedout = out; 870 int bits; 871 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 872 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 873 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 874 else { 875 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 876 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 877 val); 878 return(0); 879 } 880 for ( ; bits >= 0; bits-= 6) 881 *out++= ((val >> bits) & 0x3F) | 0x80 ; 882 return (out - savedout); 883 } 884 *out = (xmlChar) val; 885 return 1; 886 } 887 888 /** 889 * xmlCopyChar: 890 * @len: Ignored, compatibility 891 * @out: pointer to an array of xmlChar 892 * @val: the char value 893 * 894 * append the char value in the array 895 * 896 * Returns the number of xmlChar written 897 */ 898 899 int 900 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 901 if (out == NULL) return(0); 902 /* the len parameter is ignored */ 903 if (val >= 0x80) { 904 return(xmlCopyCharMultiByte (out, val)); 905 } 906 *out = (xmlChar) val; 907 return 1; 908 } 909 910 /************************************************************************ 911 * * 912 * Commodity functions to switch encodings * 913 * * 914 ************************************************************************/ 915 916 static int 917 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 918 xmlCharEncodingHandlerPtr handler, int len); 919 static int 920 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 921 xmlCharEncodingHandlerPtr handler, int len); 922 /** 923 * xmlSwitchEncoding: 924 * @ctxt: the parser context 925 * @enc: the encoding value (number) 926 * 927 * change the input functions when discovering the character encoding 928 * of a given entity. 929 * 930 * Returns 0 in case of success, -1 otherwise 931 */ 932 int 933 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 934 { 935 xmlCharEncodingHandlerPtr handler; 936 int len = -1; 937 int ret; 938 939 if (ctxt == NULL) return(-1); 940 switch (enc) { 941 case XML_CHAR_ENCODING_ERROR: 942 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 943 "encoding unknown\n", NULL, NULL); 944 return(-1); 945 case XML_CHAR_ENCODING_NONE: 946 /* let's assume it's UTF-8 without the XML decl */ 947 ctxt->charset = XML_CHAR_ENCODING_UTF8; 948 return(0); 949 case XML_CHAR_ENCODING_UTF8: 950 /* default encoding, no conversion should be needed */ 951 ctxt->charset = XML_CHAR_ENCODING_UTF8; 952 953 /* 954 * Errata on XML-1.0 June 20 2001 955 * Specific handling of the Byte Order Mark for 956 * UTF-8 957 */ 958 if ((ctxt->input != NULL) && 959 (ctxt->input->cur[0] == 0xEF) && 960 (ctxt->input->cur[1] == 0xBB) && 961 (ctxt->input->cur[2] == 0xBF)) { 962 ctxt->input->cur += 3; 963 } 964 return(0); 965 case XML_CHAR_ENCODING_UTF16LE: 966 case XML_CHAR_ENCODING_UTF16BE: 967 /*The raw input characters are encoded 968 *in UTF-16. As we expect this function 969 *to be called after xmlCharEncInFunc, we expect 970 *ctxt->input->cur to contain UTF-8 encoded characters. 971 *So the raw UTF16 Byte Order Mark 972 *has also been converted into 973 *an UTF-8 BOM. Let's skip that BOM. 974 */ 975 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 976 (ctxt->input->cur[0] == 0xEF) && 977 (ctxt->input->cur[1] == 0xBB) && 978 (ctxt->input->cur[2] == 0xBF)) { 979 ctxt->input->cur += 3; 980 } 981 len = 90; 982 break; 983 case XML_CHAR_ENCODING_UCS2: 984 len = 90; 985 break; 986 case XML_CHAR_ENCODING_UCS4BE: 987 case XML_CHAR_ENCODING_UCS4LE: 988 case XML_CHAR_ENCODING_UCS4_2143: 989 case XML_CHAR_ENCODING_UCS4_3412: 990 len = 180; 991 break; 992 case XML_CHAR_ENCODING_EBCDIC: 993 case XML_CHAR_ENCODING_8859_1: 994 case XML_CHAR_ENCODING_8859_2: 995 case XML_CHAR_ENCODING_8859_3: 996 case XML_CHAR_ENCODING_8859_4: 997 case XML_CHAR_ENCODING_8859_5: 998 case XML_CHAR_ENCODING_8859_6: 999 case XML_CHAR_ENCODING_8859_7: 1000 case XML_CHAR_ENCODING_8859_8: 1001 case XML_CHAR_ENCODING_8859_9: 1002 case XML_CHAR_ENCODING_ASCII: 1003 case XML_CHAR_ENCODING_2022_JP: 1004 case XML_CHAR_ENCODING_SHIFT_JIS: 1005 case XML_CHAR_ENCODING_EUC_JP: 1006 len = 45; 1007 break; 1008 } 1009 handler = xmlGetCharEncodingHandler(enc); 1010 if (handler == NULL) { 1011 /* 1012 * Default handlers. 1013 */ 1014 switch (enc) { 1015 case XML_CHAR_ENCODING_ASCII: 1016 /* default encoding, no conversion should be needed */ 1017 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1018 return(0); 1019 case XML_CHAR_ENCODING_UTF16LE: 1020 break; 1021 case XML_CHAR_ENCODING_UTF16BE: 1022 break; 1023 case XML_CHAR_ENCODING_UCS4LE: 1024 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1025 "encoding not supported %s\n", 1026 BAD_CAST "USC4 little endian", NULL); 1027 break; 1028 case XML_CHAR_ENCODING_UCS4BE: 1029 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1030 "encoding not supported %s\n", 1031 BAD_CAST "USC4 big endian", NULL); 1032 break; 1033 case XML_CHAR_ENCODING_EBCDIC: 1034 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1035 "encoding not supported %s\n", 1036 BAD_CAST "EBCDIC", NULL); 1037 break; 1038 case XML_CHAR_ENCODING_UCS4_2143: 1039 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1040 "encoding not supported %s\n", 1041 BAD_CAST "UCS4 2143", NULL); 1042 break; 1043 case XML_CHAR_ENCODING_UCS4_3412: 1044 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1045 "encoding not supported %s\n", 1046 BAD_CAST "UCS4 3412", NULL); 1047 break; 1048 case XML_CHAR_ENCODING_UCS2: 1049 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1050 "encoding not supported %s\n", 1051 BAD_CAST "UCS2", NULL); 1052 break; 1053 case XML_CHAR_ENCODING_8859_1: 1054 case XML_CHAR_ENCODING_8859_2: 1055 case XML_CHAR_ENCODING_8859_3: 1056 case XML_CHAR_ENCODING_8859_4: 1057 case XML_CHAR_ENCODING_8859_5: 1058 case XML_CHAR_ENCODING_8859_6: 1059 case XML_CHAR_ENCODING_8859_7: 1060 case XML_CHAR_ENCODING_8859_8: 1061 case XML_CHAR_ENCODING_8859_9: 1062 /* 1063 * We used to keep the internal content in the 1064 * document encoding however this turns being unmaintainable 1065 * So xmlGetCharEncodingHandler() will return non-null 1066 * values for this now. 1067 */ 1068 if ((ctxt->inputNr == 1) && 1069 (ctxt->encoding == NULL) && 1070 (ctxt->input != NULL) && 1071 (ctxt->input->encoding != NULL)) { 1072 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1073 } 1074 ctxt->charset = enc; 1075 return(0); 1076 case XML_CHAR_ENCODING_2022_JP: 1077 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1078 "encoding not supported %s\n", 1079 BAD_CAST "ISO-2022-JP", NULL); 1080 break; 1081 case XML_CHAR_ENCODING_SHIFT_JIS: 1082 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1083 "encoding not supported %s\n", 1084 BAD_CAST "Shift_JIS", NULL); 1085 break; 1086 case XML_CHAR_ENCODING_EUC_JP: 1087 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1088 "encoding not supported %s\n", 1089 BAD_CAST "EUC-JP", NULL); 1090 break; 1091 default: 1092 break; 1093 } 1094 } 1095 /* 1096 * TODO: We could recover from errors in external entities if we 1097 * didn't stop the parser. But most callers of this function don't 1098 * check the return value. 1099 */ 1100 if (handler == NULL) { 1101 xmlStopParser(ctxt); 1102 return(-1); 1103 } 1104 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1105 ret = xmlSwitchToEncodingInt(ctxt, handler, len); 1106 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { 1107 /* 1108 * on encoding conversion errors, stop the parser 1109 */ 1110 xmlStopParser(ctxt); 1111 ctxt->errNo = XML_I18N_CONV_FAILED; 1112 } 1113 return(ret); 1114 } 1115 1116 /** 1117 * xmlSwitchInputEncoding: 1118 * @ctxt: the parser context 1119 * @input: the input stream 1120 * @handler: the encoding handler 1121 * @len: the number of bytes to convert for the first line or -1 1122 * 1123 * change the input functions when discovering the character encoding 1124 * of a given entity. 1125 * 1126 * Returns 0 in case of success, -1 otherwise 1127 */ 1128 static int 1129 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1130 xmlCharEncodingHandlerPtr handler, int len) 1131 { 1132 int nbchars; 1133 1134 if (handler == NULL) 1135 return (-1); 1136 if (input == NULL) 1137 return (-1); 1138 if (input->buf != NULL) { 1139 if (input->buf->encoder != NULL) { 1140 /* 1141 * Check in case the auto encoding detection triggered 1142 * in already. 1143 */ 1144 if (input->buf->encoder == handler) 1145 return (0); 1146 1147 /* 1148 * "UTF-16" can be used for both LE and BE 1149 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1150 BAD_CAST "UTF-16", 6)) && 1151 (!xmlStrncmp(BAD_CAST handler->name, 1152 BAD_CAST "UTF-16", 6))) { 1153 return(0); 1154 } 1155 */ 1156 1157 /* 1158 * Note: this is a bit dangerous, but that's what it 1159 * takes to use nearly compatible signature for different 1160 * encodings. 1161 */ 1162 xmlCharEncCloseFunc(input->buf->encoder); 1163 input->buf->encoder = handler; 1164 return (0); 1165 } 1166 input->buf->encoder = handler; 1167 1168 /* 1169 * Is there already some content down the pipe to convert ? 1170 */ 1171 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1172 int processed; 1173 unsigned int use; 1174 1175 /* 1176 * Specific handling of the Byte Order Mark for 1177 * UTF-16 1178 */ 1179 if ((handler->name != NULL) && 1180 (!strcmp(handler->name, "UTF-16LE") || 1181 !strcmp(handler->name, "UTF-16")) && 1182 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1183 input->cur += 2; 1184 } 1185 if ((handler->name != NULL) && 1186 (!strcmp(handler->name, "UTF-16BE")) && 1187 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1188 input->cur += 2; 1189 } 1190 /* 1191 * Errata on XML-1.0 June 20 2001 1192 * Specific handling of the Byte Order Mark for 1193 * UTF-8 1194 */ 1195 if ((handler->name != NULL) && 1196 (!strcmp(handler->name, "UTF-8")) && 1197 (input->cur[0] == 0xEF) && 1198 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1199 input->cur += 3; 1200 } 1201 1202 /* 1203 * Shrink the current input buffer. 1204 * Move it as the raw buffer and create a new input buffer 1205 */ 1206 processed = input->cur - input->base; 1207 xmlBufShrink(input->buf->buffer, processed); 1208 input->buf->raw = input->buf->buffer; 1209 input->buf->buffer = xmlBufCreate(); 1210 input->buf->rawconsumed = processed; 1211 use = xmlBufUse(input->buf->raw); 1212 1213 if (ctxt->html) { 1214 /* 1215 * convert as much as possible of the buffer 1216 */ 1217 nbchars = xmlCharEncInput(input->buf, 1); 1218 } else { 1219 /* 1220 * convert just enough to get 1221 * '<?xml version="1.0" encoding="xxx"?>' 1222 * parsed with the autodetected encoding 1223 * into the parser reading buffer. 1224 */ 1225 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1226 } 1227 xmlBufResetInput(input->buf->buffer, input); 1228 if (nbchars < 0) { 1229 xmlErrInternal(ctxt, 1230 "switching encoding: encoder error\n", 1231 NULL); 1232 return (-1); 1233 } 1234 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1235 } 1236 return (0); 1237 } else if (input->length == 0) { 1238 /* 1239 * When parsing a static memory array one must know the 1240 * size to be able to convert the buffer. 1241 */ 1242 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1243 /* 1244 * Callers assume that the input buffer takes ownership of the 1245 * encoding handler. xmlCharEncCloseFunc frees unregistered 1246 * handlers and avoids a memory leak. 1247 */ 1248 xmlCharEncCloseFunc(handler); 1249 return (-1); 1250 } 1251 /* 1252 * We should actually raise an error here, see issue #34. 1253 */ 1254 xmlCharEncCloseFunc(handler); 1255 return (0); 1256 } 1257 1258 /** 1259 * xmlSwitchInputEncoding: 1260 * @ctxt: the parser context 1261 * @input: the input stream 1262 * @handler: the encoding handler 1263 * 1264 * change the input functions when discovering the character encoding 1265 * of a given entity. 1266 * 1267 * Returns 0 in case of success, -1 otherwise 1268 */ 1269 int 1270 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1271 xmlCharEncodingHandlerPtr handler) { 1272 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1273 } 1274 1275 /** 1276 * xmlSwitchToEncodingInt: 1277 * @ctxt: the parser context 1278 * @handler: the encoding handler 1279 * @len: the length to convert or -1 1280 * 1281 * change the input functions when discovering the character encoding 1282 * of a given entity, and convert only @len bytes of the output, this 1283 * is needed on auto detect to allows any declared encoding later to 1284 * convert the actual content after the xmlDecl 1285 * 1286 * Returns 0 in case of success, -1 otherwise 1287 */ 1288 static int 1289 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1290 xmlCharEncodingHandlerPtr handler, int len) { 1291 int ret = 0; 1292 1293 if (handler != NULL) { 1294 if (ctxt->input != NULL) { 1295 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1296 } else { 1297 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1298 NULL); 1299 return(-1); 1300 } 1301 /* 1302 * The parsing is now done in UTF8 natively 1303 */ 1304 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1305 } else 1306 return(-1); 1307 return(ret); 1308 } 1309 1310 /** 1311 * xmlSwitchToEncoding: 1312 * @ctxt: the parser context 1313 * @handler: the encoding handler 1314 * 1315 * change the input functions when discovering the character encoding 1316 * of a given entity. 1317 * 1318 * Returns 0 in case of success, -1 otherwise 1319 */ 1320 int 1321 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1322 { 1323 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1324 } 1325 1326 /************************************************************************ 1327 * * 1328 * Commodity functions to handle entities processing * 1329 * * 1330 ************************************************************************/ 1331 1332 /** 1333 * xmlFreeInputStream: 1334 * @input: an xmlParserInputPtr 1335 * 1336 * Free up an input stream. 1337 */ 1338 void 1339 xmlFreeInputStream(xmlParserInputPtr input) { 1340 if (input == NULL) return; 1341 1342 if (input->filename != NULL) xmlFree((char *) input->filename); 1343 if (input->directory != NULL) xmlFree((char *) input->directory); 1344 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1345 if (input->version != NULL) xmlFree((char *) input->version); 1346 if ((input->free != NULL) && (input->base != NULL)) 1347 input->free((xmlChar *) input->base); 1348 if (input->buf != NULL) 1349 xmlFreeParserInputBuffer(input->buf); 1350 xmlFree(input); 1351 } 1352 1353 /** 1354 * xmlNewInputStream: 1355 * @ctxt: an XML parser context 1356 * 1357 * Create a new input stream structure. 1358 * 1359 * Returns the new input stream or NULL 1360 */ 1361 xmlParserInputPtr 1362 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1363 xmlParserInputPtr input; 1364 1365 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1366 if (input == NULL) { 1367 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1368 return(NULL); 1369 } 1370 memset(input, 0, sizeof(xmlParserInput)); 1371 input->line = 1; 1372 input->col = 1; 1373 input->standalone = -1; 1374 1375 /* 1376 * If the context is NULL the id cannot be initialized, but that 1377 * should not happen while parsing which is the situation where 1378 * the id is actually needed. 1379 */ 1380 if (ctxt != NULL) 1381 input->id = ctxt->input_id++; 1382 1383 return(input); 1384 } 1385 1386 /** 1387 * xmlNewIOInputStream: 1388 * @ctxt: an XML parser context 1389 * @input: an I/O Input 1390 * @enc: the charset encoding if known 1391 * 1392 * Create a new input stream structure encapsulating the @input into 1393 * a stream suitable for the parser. 1394 * 1395 * Returns the new input stream or NULL 1396 */ 1397 xmlParserInputPtr 1398 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1399 xmlCharEncoding enc) { 1400 xmlParserInputPtr inputStream; 1401 1402 if (input == NULL) return(NULL); 1403 if (xmlParserDebugEntities) 1404 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1405 inputStream = xmlNewInputStream(ctxt); 1406 if (inputStream == NULL) { 1407 return(NULL); 1408 } 1409 inputStream->filename = NULL; 1410 inputStream->buf = input; 1411 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1412 1413 if (enc != XML_CHAR_ENCODING_NONE) { 1414 xmlSwitchEncoding(ctxt, enc); 1415 } 1416 1417 return(inputStream); 1418 } 1419 1420 /** 1421 * xmlNewEntityInputStream: 1422 * @ctxt: an XML parser context 1423 * @entity: an Entity pointer 1424 * 1425 * Create a new input stream based on an xmlEntityPtr 1426 * 1427 * Returns the new input stream or NULL 1428 */ 1429 xmlParserInputPtr 1430 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1431 xmlParserInputPtr input; 1432 1433 if (entity == NULL) { 1434 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1435 NULL); 1436 return(NULL); 1437 } 1438 if (xmlParserDebugEntities) 1439 xmlGenericError(xmlGenericErrorContext, 1440 "new input from entity: %s\n", entity->name); 1441 if (entity->content == NULL) { 1442 switch (entity->etype) { 1443 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1444 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1445 entity->name); 1446 break; 1447 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1448 case XML_EXTERNAL_PARAMETER_ENTITY: 1449 return(xmlLoadExternalEntity((char *) entity->URI, 1450 (char *) entity->ExternalID, ctxt)); 1451 case XML_INTERNAL_GENERAL_ENTITY: 1452 xmlErrInternal(ctxt, 1453 "Internal entity %s without content !\n", 1454 entity->name); 1455 break; 1456 case XML_INTERNAL_PARAMETER_ENTITY: 1457 xmlErrInternal(ctxt, 1458 "Internal parameter entity %s without content !\n", 1459 entity->name); 1460 break; 1461 case XML_INTERNAL_PREDEFINED_ENTITY: 1462 xmlErrInternal(ctxt, 1463 "Predefined entity %s without content !\n", 1464 entity->name); 1465 break; 1466 } 1467 return(NULL); 1468 } 1469 input = xmlNewInputStream(ctxt); 1470 if (input == NULL) { 1471 return(NULL); 1472 } 1473 if (entity->URI != NULL) 1474 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1475 input->base = entity->content; 1476 if (entity->length == 0) 1477 entity->length = xmlStrlen(entity->content); 1478 input->cur = entity->content; 1479 input->length = entity->length; 1480 input->end = &entity->content[input->length]; 1481 return(input); 1482 } 1483 1484 /** 1485 * xmlNewStringInputStream: 1486 * @ctxt: an XML parser context 1487 * @buffer: an memory buffer 1488 * 1489 * Create a new input stream based on a memory buffer. 1490 * Returns the new input stream 1491 */ 1492 xmlParserInputPtr 1493 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1494 xmlParserInputPtr input; 1495 1496 if (buffer == NULL) { 1497 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1498 NULL); 1499 return(NULL); 1500 } 1501 if (xmlParserDebugEntities) 1502 xmlGenericError(xmlGenericErrorContext, 1503 "new fixed input: %.30s\n", buffer); 1504 input = xmlNewInputStream(ctxt); 1505 if (input == NULL) { 1506 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1507 return(NULL); 1508 } 1509 input->base = buffer; 1510 input->cur = buffer; 1511 input->length = xmlStrlen(buffer); 1512 input->end = &buffer[input->length]; 1513 return(input); 1514 } 1515 1516 /** 1517 * xmlNewInputFromFile: 1518 * @ctxt: an XML parser context 1519 * @filename: the filename to use as entity 1520 * 1521 * Create a new input stream based on a file or an URL. 1522 * 1523 * Returns the new input stream or NULL in case of error 1524 */ 1525 xmlParserInputPtr 1526 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1527 xmlParserInputBufferPtr buf; 1528 xmlParserInputPtr inputStream; 1529 char *directory = NULL; 1530 xmlChar *URI = NULL; 1531 1532 if (xmlParserDebugEntities) 1533 xmlGenericError(xmlGenericErrorContext, 1534 "new input from file: %s\n", filename); 1535 if (ctxt == NULL) return(NULL); 1536 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1537 if (buf == NULL) { 1538 if (filename == NULL) 1539 __xmlLoaderErr(ctxt, 1540 "failed to load external entity: NULL filename \n", 1541 NULL); 1542 else 1543 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1544 (const char *) filename); 1545 return(NULL); 1546 } 1547 1548 inputStream = xmlNewInputStream(ctxt); 1549 if (inputStream == NULL) 1550 return(NULL); 1551 1552 inputStream->buf = buf; 1553 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1554 if (inputStream == NULL) 1555 return(NULL); 1556 1557 if (inputStream->filename == NULL) 1558 URI = xmlStrdup((xmlChar *) filename); 1559 else 1560 URI = xmlStrdup((xmlChar *) inputStream->filename); 1561 directory = xmlParserGetDirectory((const char *) URI); 1562 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1563 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1564 if (URI != NULL) xmlFree((char *) URI); 1565 inputStream->directory = directory; 1566 1567 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1568 if ((ctxt->directory == NULL) && (directory != NULL)) 1569 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1570 return(inputStream); 1571 } 1572 1573 /************************************************************************ 1574 * * 1575 * Commodity functions to handle parser contexts * 1576 * * 1577 ************************************************************************/ 1578 1579 /** 1580 * xmlInitParserCtxt: 1581 * @ctxt: an XML parser context 1582 * 1583 * Initialize a parser context 1584 * 1585 * Returns 0 in case of success and -1 in case of error 1586 */ 1587 1588 int 1589 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1590 { 1591 xmlParserInputPtr input; 1592 1593 if(ctxt==NULL) { 1594 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1595 return(-1); 1596 } 1597 1598 xmlDefaultSAXHandlerInit(); 1599 1600 if (ctxt->dict == NULL) 1601 ctxt->dict = xmlDictCreate(); 1602 if (ctxt->dict == NULL) { 1603 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1604 return(-1); 1605 } 1606 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1607 1608 if (ctxt->sax == NULL) 1609 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1610 if (ctxt->sax == NULL) { 1611 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1612 return(-1); 1613 } 1614 else 1615 xmlSAXVersion(ctxt->sax, 2); 1616 1617 ctxt->maxatts = 0; 1618 ctxt->atts = NULL; 1619 /* Allocate the Input stack */ 1620 if (ctxt->inputTab == NULL) { 1621 ctxt->inputTab = (xmlParserInputPtr *) 1622 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1623 ctxt->inputMax = 5; 1624 } 1625 if (ctxt->inputTab == NULL) { 1626 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1627 ctxt->inputNr = 0; 1628 ctxt->inputMax = 0; 1629 ctxt->input = NULL; 1630 return(-1); 1631 } 1632 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1633 xmlFreeInputStream(input); 1634 } 1635 ctxt->inputNr = 0; 1636 ctxt->input = NULL; 1637 1638 ctxt->version = NULL; 1639 ctxt->encoding = NULL; 1640 ctxt->standalone = -1; 1641 ctxt->hasExternalSubset = 0; 1642 ctxt->hasPErefs = 0; 1643 ctxt->html = 0; 1644 ctxt->external = 0; 1645 ctxt->instate = XML_PARSER_START; 1646 ctxt->token = 0; 1647 ctxt->directory = NULL; 1648 1649 /* Allocate the Node stack */ 1650 if (ctxt->nodeTab == NULL) { 1651 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1652 ctxt->nodeMax = 10; 1653 } 1654 if (ctxt->nodeTab == NULL) { 1655 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1656 ctxt->nodeNr = 0; 1657 ctxt->nodeMax = 0; 1658 ctxt->node = NULL; 1659 ctxt->inputNr = 0; 1660 ctxt->inputMax = 0; 1661 ctxt->input = NULL; 1662 return(-1); 1663 } 1664 ctxt->nodeNr = 0; 1665 ctxt->node = NULL; 1666 1667 /* Allocate the Name stack */ 1668 if (ctxt->nameTab == NULL) { 1669 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1670 ctxt->nameMax = 10; 1671 } 1672 if (ctxt->nameTab == NULL) { 1673 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1674 ctxt->nodeNr = 0; 1675 ctxt->nodeMax = 0; 1676 ctxt->node = NULL; 1677 ctxt->inputNr = 0; 1678 ctxt->inputMax = 0; 1679 ctxt->input = NULL; 1680 ctxt->nameNr = 0; 1681 ctxt->nameMax = 0; 1682 ctxt->name = NULL; 1683 return(-1); 1684 } 1685 ctxt->nameNr = 0; 1686 ctxt->name = NULL; 1687 1688 /* Allocate the space stack */ 1689 if (ctxt->spaceTab == NULL) { 1690 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1691 ctxt->spaceMax = 10; 1692 } 1693 if (ctxt->spaceTab == NULL) { 1694 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1695 ctxt->nodeNr = 0; 1696 ctxt->nodeMax = 0; 1697 ctxt->node = NULL; 1698 ctxt->inputNr = 0; 1699 ctxt->inputMax = 0; 1700 ctxt->input = NULL; 1701 ctxt->nameNr = 0; 1702 ctxt->nameMax = 0; 1703 ctxt->name = NULL; 1704 ctxt->spaceNr = 0; 1705 ctxt->spaceMax = 0; 1706 ctxt->space = NULL; 1707 return(-1); 1708 } 1709 ctxt->spaceNr = 1; 1710 ctxt->spaceMax = 10; 1711 ctxt->spaceTab[0] = -1; 1712 ctxt->space = &ctxt->spaceTab[0]; 1713 ctxt->userData = ctxt; 1714 ctxt->myDoc = NULL; 1715 ctxt->wellFormed = 1; 1716 ctxt->nsWellFormed = 1; 1717 ctxt->valid = 1; 1718 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1719 if (ctxt->loadsubset) { 1720 ctxt->options |= XML_PARSE_DTDLOAD; 1721 } 1722 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1723 ctxt->pedantic = xmlPedanticParserDefaultValue; 1724 if (ctxt->pedantic) { 1725 ctxt->options |= XML_PARSE_PEDANTIC; 1726 } 1727 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1728 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1729 if (ctxt->keepBlanks == 0) { 1730 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1731 ctxt->options |= XML_PARSE_NOBLANKS; 1732 } 1733 1734 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1735 ctxt->vctxt.userData = ctxt; 1736 ctxt->vctxt.error = xmlParserValidityError; 1737 ctxt->vctxt.warning = xmlParserValidityWarning; 1738 if (ctxt->validate) { 1739 if (xmlGetWarningsDefaultValue == 0) 1740 ctxt->vctxt.warning = NULL; 1741 else 1742 ctxt->vctxt.warning = xmlParserValidityWarning; 1743 ctxt->vctxt.nodeMax = 0; 1744 ctxt->options |= XML_PARSE_DTDVALID; 1745 } 1746 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1747 if (ctxt->replaceEntities) { 1748 ctxt->options |= XML_PARSE_NOENT; 1749 } 1750 ctxt->record_info = 0; 1751 ctxt->nbChars = 0; 1752 ctxt->checkIndex = 0; 1753 ctxt->inSubset = 0; 1754 ctxt->errNo = XML_ERR_OK; 1755 ctxt->depth = 0; 1756 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1757 ctxt->catalogs = NULL; 1758 ctxt->nbentities = 0; 1759 ctxt->sizeentities = 0; 1760 ctxt->sizeentcopy = 0; 1761 ctxt->input_id = 1; 1762 xmlInitNodeInfoSeq(&ctxt->node_seq); 1763 return(0); 1764 } 1765 1766 /** 1767 * xmlFreeParserCtxt: 1768 * @ctxt: an XML parser context 1769 * 1770 * Free all the memory used by a parser context. However the parsed 1771 * document in ctxt->myDoc is not freed. 1772 */ 1773 1774 void 1775 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1776 { 1777 xmlParserInputPtr input; 1778 1779 if (ctxt == NULL) return; 1780 1781 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1782 xmlFreeInputStream(input); 1783 } 1784 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1785 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1786 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1787 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1788 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1789 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1790 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1791 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1792 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1793 #ifdef LIBXML_SAX1_ENABLED 1794 if ((ctxt->sax != NULL) && 1795 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1796 #else 1797 if (ctxt->sax != NULL) 1798 #endif /* LIBXML_SAX1_ENABLED */ 1799 xmlFree(ctxt->sax); 1800 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1801 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1802 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1803 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1804 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1805 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1806 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1807 if (ctxt->attsDefault != NULL) 1808 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 1809 if (ctxt->attsSpecial != NULL) 1810 xmlHashFree(ctxt->attsSpecial, NULL); 1811 if (ctxt->freeElems != NULL) { 1812 xmlNodePtr cur, next; 1813 1814 cur = ctxt->freeElems; 1815 while (cur != NULL) { 1816 next = cur->next; 1817 xmlFree(cur); 1818 cur = next; 1819 } 1820 } 1821 if (ctxt->freeAttrs != NULL) { 1822 xmlAttrPtr cur, next; 1823 1824 cur = ctxt->freeAttrs; 1825 while (cur != NULL) { 1826 next = cur->next; 1827 xmlFree(cur); 1828 cur = next; 1829 } 1830 } 1831 /* 1832 * cleanup the error strings 1833 */ 1834 if (ctxt->lastError.message != NULL) 1835 xmlFree(ctxt->lastError.message); 1836 if (ctxt->lastError.file != NULL) 1837 xmlFree(ctxt->lastError.file); 1838 if (ctxt->lastError.str1 != NULL) 1839 xmlFree(ctxt->lastError.str1); 1840 if (ctxt->lastError.str2 != NULL) 1841 xmlFree(ctxt->lastError.str2); 1842 if (ctxt->lastError.str3 != NULL) 1843 xmlFree(ctxt->lastError.str3); 1844 1845 #ifdef LIBXML_CATALOG_ENABLED 1846 if (ctxt->catalogs != NULL) 1847 xmlCatalogFreeLocal(ctxt->catalogs); 1848 #endif 1849 xmlFree(ctxt); 1850 } 1851 1852 /** 1853 * xmlNewParserCtxt: 1854 * 1855 * Allocate and initialize a new parser context. 1856 * 1857 * Returns the xmlParserCtxtPtr or NULL 1858 */ 1859 1860 xmlParserCtxtPtr 1861 xmlNewParserCtxt(void) 1862 { 1863 xmlParserCtxtPtr ctxt; 1864 1865 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1866 if (ctxt == NULL) { 1867 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1868 return(NULL); 1869 } 1870 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1871 if (xmlInitParserCtxt(ctxt) < 0) { 1872 xmlFreeParserCtxt(ctxt); 1873 return(NULL); 1874 } 1875 return(ctxt); 1876 } 1877 1878 /************************************************************************ 1879 * * 1880 * Handling of node informations * 1881 * * 1882 ************************************************************************/ 1883 1884 /** 1885 * xmlClearParserCtxt: 1886 * @ctxt: an XML parser context 1887 * 1888 * Clear (release owned resources) and reinitialize a parser context 1889 */ 1890 1891 void 1892 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1893 { 1894 if (ctxt==NULL) 1895 return; 1896 xmlClearNodeInfoSeq(&ctxt->node_seq); 1897 xmlCtxtReset(ctxt); 1898 } 1899 1900 1901 /** 1902 * xmlParserFindNodeInfo: 1903 * @ctx: an XML parser context 1904 * @node: an XML node within the tree 1905 * 1906 * Find the parser node info struct for a given node 1907 * 1908 * Returns an xmlParserNodeInfo block pointer or NULL 1909 */ 1910 const xmlParserNodeInfo * 1911 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1912 { 1913 unsigned long pos; 1914 1915 if ((ctx == NULL) || (node == NULL)) 1916 return (NULL); 1917 /* Find position where node should be at */ 1918 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1919 if (pos < ctx->node_seq.length 1920 && ctx->node_seq.buffer[pos].node == node) 1921 return &ctx->node_seq.buffer[pos]; 1922 else 1923 return NULL; 1924 } 1925 1926 1927 /** 1928 * xmlInitNodeInfoSeq: 1929 * @seq: a node info sequence pointer 1930 * 1931 * -- Initialize (set to initial state) node info sequence 1932 */ 1933 void 1934 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1935 { 1936 if (seq == NULL) 1937 return; 1938 seq->length = 0; 1939 seq->maximum = 0; 1940 seq->buffer = NULL; 1941 } 1942 1943 /** 1944 * xmlClearNodeInfoSeq: 1945 * @seq: a node info sequence pointer 1946 * 1947 * -- Clear (release memory and reinitialize) node 1948 * info sequence 1949 */ 1950 void 1951 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1952 { 1953 if (seq == NULL) 1954 return; 1955 if (seq->buffer != NULL) 1956 xmlFree(seq->buffer); 1957 xmlInitNodeInfoSeq(seq); 1958 } 1959 1960 /** 1961 * xmlParserFindNodeInfoIndex: 1962 * @seq: a node info sequence pointer 1963 * @node: an XML node pointer 1964 * 1965 * 1966 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1967 * the given node is or should be at in a sorted sequence 1968 * 1969 * Returns a long indicating the position of the record 1970 */ 1971 unsigned long 1972 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1973 const xmlNodePtr node) 1974 { 1975 unsigned long upper, lower, middle; 1976 int found = 0; 1977 1978 if ((seq == NULL) || (node == NULL)) 1979 return ((unsigned long) -1); 1980 1981 /* Do a binary search for the key */ 1982 lower = 1; 1983 upper = seq->length; 1984 middle = 0; 1985 while (lower <= upper && !found) { 1986 middle = lower + (upper - lower) / 2; 1987 if (node == seq->buffer[middle - 1].node) 1988 found = 1; 1989 else if (node < seq->buffer[middle - 1].node) 1990 upper = middle - 1; 1991 else 1992 lower = middle + 1; 1993 } 1994 1995 /* Return position */ 1996 if (middle == 0 || seq->buffer[middle - 1].node < node) 1997 return middle; 1998 else 1999 return middle - 1; 2000 } 2001 2002 2003 /** 2004 * xmlParserAddNodeInfo: 2005 * @ctxt: an XML parser context 2006 * @info: a node info sequence pointer 2007 * 2008 * Insert node info record into the sorted sequence 2009 */ 2010 void 2011 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2012 const xmlParserNodeInfoPtr info) 2013 { 2014 unsigned long pos; 2015 2016 if ((ctxt == NULL) || (info == NULL)) return; 2017 2018 /* Find pos and check to see if node is already in the sequence */ 2019 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2020 info->node); 2021 2022 if ((pos < ctxt->node_seq.length) && 2023 (ctxt->node_seq.buffer != NULL) && 2024 (ctxt->node_seq.buffer[pos].node == info->node)) { 2025 ctxt->node_seq.buffer[pos] = *info; 2026 } 2027 2028 /* Otherwise, we need to add new node to buffer */ 2029 else { 2030 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2031 (ctxt->node_seq.buffer == NULL)) { 2032 xmlParserNodeInfo *tmp_buffer; 2033 unsigned int byte_size; 2034 2035 if (ctxt->node_seq.maximum == 0) 2036 ctxt->node_seq.maximum = 2; 2037 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2038 (2 * ctxt->node_seq.maximum)); 2039 2040 if (ctxt->node_seq.buffer == NULL) 2041 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2042 else 2043 tmp_buffer = 2044 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2045 byte_size); 2046 2047 if (tmp_buffer == NULL) { 2048 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2049 return; 2050 } 2051 ctxt->node_seq.buffer = tmp_buffer; 2052 ctxt->node_seq.maximum *= 2; 2053 } 2054 2055 /* If position is not at end, move elements out of the way */ 2056 if (pos != ctxt->node_seq.length) { 2057 unsigned long i; 2058 2059 for (i = ctxt->node_seq.length; i > pos; i--) 2060 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2061 } 2062 2063 /* Copy element and increase length */ 2064 ctxt->node_seq.buffer[pos] = *info; 2065 ctxt->node_seq.length++; 2066 } 2067 } 2068 2069 /************************************************************************ 2070 * * 2071 * Defaults settings * 2072 * * 2073 ************************************************************************/ 2074 /** 2075 * xmlPedanticParserDefault: 2076 * @val: int 0 or 1 2077 * 2078 * Set and return the previous value for enabling pedantic warnings. 2079 * 2080 * Returns the last value for 0 for no substitution, 1 for substitution. 2081 */ 2082 2083 int 2084 xmlPedanticParserDefault(int val) { 2085 int old = xmlPedanticParserDefaultValue; 2086 2087 xmlPedanticParserDefaultValue = val; 2088 return(old); 2089 } 2090 2091 /** 2092 * xmlLineNumbersDefault: 2093 * @val: int 0 or 1 2094 * 2095 * Set and return the previous value for enabling line numbers in elements 2096 * contents. This may break on old application and is turned off by default. 2097 * 2098 * Returns the last value for 0 for no substitution, 1 for substitution. 2099 */ 2100 2101 int 2102 xmlLineNumbersDefault(int val) { 2103 int old = xmlLineNumbersDefaultValue; 2104 2105 xmlLineNumbersDefaultValue = val; 2106 return(old); 2107 } 2108 2109 /** 2110 * xmlSubstituteEntitiesDefault: 2111 * @val: int 0 or 1 2112 * 2113 * Set and return the previous value for default entity support. 2114 * Initially the parser always keep entity references instead of substituting 2115 * entity values in the output. This function has to be used to change the 2116 * default parser behavior 2117 * SAX::substituteEntities() has to be used for changing that on a file by 2118 * file basis. 2119 * 2120 * Returns the last value for 0 for no substitution, 1 for substitution. 2121 */ 2122 2123 int 2124 xmlSubstituteEntitiesDefault(int val) { 2125 int old = xmlSubstituteEntitiesDefaultValue; 2126 2127 xmlSubstituteEntitiesDefaultValue = val; 2128 return(old); 2129 } 2130 2131 /** 2132 * xmlKeepBlanksDefault: 2133 * @val: int 0 or 1 2134 * 2135 * Set and return the previous value for default blanks text nodes support. 2136 * The 1.x version of the parser used an heuristic to try to detect 2137 * ignorable white spaces. As a result the SAX callback was generating 2138 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2139 * using the DOM output text nodes containing those blanks were not generated. 2140 * The 2.x and later version will switch to the XML standard way and 2141 * ignorableWhitespace() are only generated when running the parser in 2142 * validating mode and when the current element doesn't allow CDATA or 2143 * mixed content. 2144 * This function is provided as a way to force the standard behavior 2145 * on 1.X libs and to switch back to the old mode for compatibility when 2146 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2147 * by using xmlIsBlankNode() commodity function to detect the "empty" 2148 * nodes generated. 2149 * This value also affect autogeneration of indentation when saving code 2150 * if blanks sections are kept, indentation is not generated. 2151 * 2152 * Returns the last value for 0 for no substitution, 1 for substitution. 2153 */ 2154 2155 int 2156 xmlKeepBlanksDefault(int val) { 2157 int old = xmlKeepBlanksDefaultValue; 2158 2159 xmlKeepBlanksDefaultValue = val; 2160 if (!val) xmlIndentTreeOutput = 1; 2161 return(old); 2162 } 2163 2164 #define bottom_parserInternals 2165 #include "elfgcchack.h" 2166