1 /* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10 #define IN_LIBXML 11 #include "libxml.h" 12 13 #if defined(_WIN32) 14 #define XML_DIR_SEP '\\' 15 #else 16 #define XML_DIR_SEP '/' 17 #endif 18 19 #include <string.h> 20 #include <ctype.h> 21 #include <stdlib.h> 22 23 #include <libxml/xmlmemory.h> 24 #include <libxml/tree.h> 25 #include <libxml/parser.h> 26 #include <libxml/parserInternals.h> 27 #include <libxml/valid.h> 28 #include <libxml/entities.h> 29 #include <libxml/xmlerror.h> 30 #include <libxml/encoding.h> 31 #include <libxml/valid.h> 32 #include <libxml/xmlIO.h> 33 #include <libxml/uri.h> 34 #include <libxml/dict.h> 35 #include <libxml/SAX.h> 36 #ifdef LIBXML_CATALOG_ENABLED 37 #include <libxml/catalog.h> 38 #endif 39 #include <libxml/globals.h> 40 #include <libxml/chvalid.h> 41 42 #define CUR(ctxt) ctxt->input->cur 43 #define END(ctxt) ctxt->input->end 44 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) 45 46 #include "buf.h" 47 #include "enc.h" 48 49 /* 50 * Various global defaults for parsing 51 */ 52 53 /** 54 * xmlCheckVersion: 55 * @version: the include version number 56 * 57 * check the compiled lib version against the include one. 58 * This can warn or immediately kill the application 59 */ 60 void 61 xmlCheckVersion(int version) { 62 int myversion = (int) LIBXML_VERSION; 63 64 xmlInitParser(); 65 66 if ((myversion / 10000) != (version / 10000)) { 67 xmlGenericError(xmlGenericErrorContext, 68 "Fatal: program compiled against libxml %d using libxml %d\n", 69 (version / 10000), (myversion / 10000)); 70 fprintf(stderr, 71 "Fatal: program compiled against libxml %d using libxml %d\n", 72 (version / 10000), (myversion / 10000)); 73 } 74 if ((myversion / 100) < (version / 100)) { 75 xmlGenericError(xmlGenericErrorContext, 76 "Warning: program compiled against libxml %d using older %d\n", 77 (version / 100), (myversion / 100)); 78 } 79 } 80 81 82 /************************************************************************ 83 * * 84 * Some factorized error routines * 85 * * 86 ************************************************************************/ 87 88 89 /** 90 * xmlErrMemory: 91 * @ctxt: an XML parser context 92 * @extra: extra information 93 * 94 * Handle a redefinition of attribute error 95 */ 96 void 97 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 98 { 99 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 100 (ctxt->instate == XML_PARSER_EOF)) 101 return; 102 if (ctxt != NULL) { 103 ctxt->errNo = XML_ERR_NO_MEMORY; 104 ctxt->instate = XML_PARSER_EOF; 105 ctxt->disableSAX = 1; 106 } 107 if (extra) 108 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 109 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 110 NULL, NULL, 0, 0, 111 "Memory allocation failed : %s\n", extra); 112 else 113 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 114 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 115 NULL, NULL, 0, 0, "Memory allocation failed\n"); 116 } 117 118 /** 119 * __xmlErrEncoding: 120 * @ctxt: an XML parser context 121 * @xmlerr: the error number 122 * @msg: the error message 123 * @str1: an string info 124 * @str2: an string info 125 * 126 * Handle an encoding error 127 */ 128 void 129 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 130 const char *msg, const xmlChar * str1, const xmlChar * str2) 131 { 132 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 133 (ctxt->instate == XML_PARSER_EOF)) 134 return; 135 if (ctxt != NULL) 136 ctxt->errNo = xmlerr; 137 __xmlRaiseError(NULL, NULL, NULL, 138 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 139 NULL, 0, (const char *) str1, (const char *) str2, 140 NULL, 0, 0, msg, str1, str2); 141 if (ctxt != NULL) { 142 ctxt->wellFormed = 0; 143 if (ctxt->recovery == 0) 144 ctxt->disableSAX = 1; 145 } 146 } 147 148 /** 149 * xmlErrInternal: 150 * @ctxt: an XML parser context 151 * @msg: the error message 152 * @str: error information 153 * 154 * Handle an internal error 155 */ 156 static void LIBXML_ATTR_FORMAT(2,0) 157 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 158 { 159 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 160 (ctxt->instate == XML_PARSER_EOF)) 161 return; 162 if (ctxt != NULL) 163 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 164 __xmlRaiseError(NULL, NULL, NULL, 165 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 166 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 167 0, 0, msg, str); 168 if (ctxt != NULL) { 169 ctxt->wellFormed = 0; 170 if (ctxt->recovery == 0) 171 ctxt->disableSAX = 1; 172 } 173 } 174 175 /** 176 * xmlErrEncodingInt: 177 * @ctxt: an XML parser context 178 * @error: the error number 179 * @msg: the error message 180 * @val: an integer value 181 * 182 * n encoding error 183 */ 184 static void LIBXML_ATTR_FORMAT(3,0) 185 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 186 const char *msg, int val) 187 { 188 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 189 (ctxt->instate == XML_PARSER_EOF)) 190 return; 191 if (ctxt != NULL) 192 ctxt->errNo = error; 193 __xmlRaiseError(NULL, NULL, NULL, 194 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 195 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 196 if (ctxt != NULL) { 197 ctxt->wellFormed = 0; 198 if (ctxt->recovery == 0) 199 ctxt->disableSAX = 1; 200 } 201 } 202 203 /** 204 * xmlIsLetter: 205 * @c: an unicode character (int) 206 * 207 * Check whether the character is allowed by the production 208 * [84] Letter ::= BaseChar | Ideographic 209 * 210 * Returns 0 if not, non-zero otherwise 211 */ 212 int 213 xmlIsLetter(int c) { 214 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 215 } 216 217 /************************************************************************ 218 * * 219 * Input handling functions for progressive parsing * 220 * * 221 ************************************************************************/ 222 223 /* #define DEBUG_INPUT */ 224 /* #define DEBUG_STACK */ 225 /* #define DEBUG_PUSH */ 226 227 228 /* we need to keep enough input to show errors in context */ 229 #define LINE_LEN 80 230 231 #ifdef DEBUG_INPUT 232 #define CHECK_BUFFER(in) check_buffer(in) 233 234 static 235 void check_buffer(xmlParserInputPtr in) { 236 if (in->base != xmlBufContent(in->buf->buffer)) { 237 xmlGenericError(xmlGenericErrorContext, 238 "xmlParserInput: base mismatch problem\n"); 239 } 240 if (in->cur < in->base) { 241 xmlGenericError(xmlGenericErrorContext, 242 "xmlParserInput: cur < base problem\n"); 243 } 244 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 245 xmlGenericError(xmlGenericErrorContext, 246 "xmlParserInput: cur > base + use problem\n"); 247 } 248 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 249 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 250 xmlBufUse(in->buf->buffer)); 251 } 252 253 #else 254 #define CHECK_BUFFER(in) 255 #endif 256 257 258 /** 259 * xmlParserInputRead: 260 * @in: an XML parser input 261 * @len: an indicative size for the lookahead 262 * 263 * This function was internal and is deprecated. 264 * 265 * Returns -1 as this is an error to use it. 266 */ 267 int 268 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 269 return(-1); 270 } 271 272 /** 273 * xmlParserInputGrow: 274 * @in: an XML parser input 275 * @len: an indicative size for the lookahead 276 * 277 * This function increase the input for the parser. It tries to 278 * preserve pointers to the input buffer, and keep already read data 279 * 280 * Returns the amount of char read, or -1 in case of error, 0 indicate the 281 * end of this entity 282 */ 283 int 284 xmlParserInputGrow(xmlParserInputPtr in, int len) { 285 int ret; 286 size_t indx; 287 288 if ((in == NULL) || (len < 0)) return(-1); 289 #ifdef DEBUG_INPUT 290 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 291 #endif 292 if (in->buf == NULL) return(-1); 293 if (in->base == NULL) return(-1); 294 if (in->cur == NULL) return(-1); 295 if (in->buf->buffer == NULL) return(-1); 296 297 CHECK_BUFFER(in); 298 299 indx = in->cur - in->base; 300 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 301 302 CHECK_BUFFER(in); 303 304 return(0); 305 } 306 if (in->buf->readcallback != NULL) { 307 ret = xmlParserInputBufferGrow(in->buf, len); 308 } else 309 return(0); 310 311 in->base = xmlBufContent(in->buf->buffer); 312 in->cur = in->base + indx; 313 in->end = xmlBufEnd(in->buf->buffer); 314 315 CHECK_BUFFER(in); 316 317 return(ret); 318 } 319 320 /** 321 * xmlParserInputShrink: 322 * @in: an XML parser input 323 * 324 * This function removes used input for the parser. 325 */ 326 void 327 xmlParserInputShrink(xmlParserInputPtr in) { 328 size_t used; 329 size_t ret; 330 331 #ifdef DEBUG_INPUT 332 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 333 #endif 334 if (in == NULL) return; 335 if (in->buf == NULL) return; 336 if (in->base == NULL) return; 337 if (in->cur == NULL) return; 338 if (in->buf->buffer == NULL) return; 339 340 CHECK_BUFFER(in); 341 342 used = in->cur - in->base; 343 /* 344 * Do not shrink on large buffers whose only a tiny fraction 345 * was consumed 346 */ 347 if (used > INPUT_CHUNK) { 348 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 349 if (ret > 0) { 350 used -= ret; 351 in->consumed += ret; 352 } 353 } 354 355 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) { 356 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 357 } 358 359 in->base = xmlBufContent(in->buf->buffer); 360 in->cur = in->base + used; 361 in->end = xmlBufEnd(in->buf->buffer); 362 363 CHECK_BUFFER(in); 364 } 365 366 /************************************************************************ 367 * * 368 * UTF8 character input and related functions * 369 * * 370 ************************************************************************/ 371 372 /** 373 * xmlNextChar: 374 * @ctxt: the XML parser context 375 * 376 * Skip to the next char input char. 377 */ 378 379 void 380 xmlNextChar(xmlParserCtxtPtr ctxt) 381 { 382 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 383 (ctxt->input == NULL)) 384 return; 385 386 if (!(VALID_CTXT(ctxt))) { 387 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); 388 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 389 xmlStopParser(ctxt); 390 return; 391 } 392 393 if ((*ctxt->input->cur == 0) && 394 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { 395 return; 396 } 397 398 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 399 const unsigned char *cur; 400 unsigned char c; 401 402 /* 403 * 2.11 End-of-Line Handling 404 * the literal two-character sequence "#xD#xA" or a standalone 405 * literal #xD, an XML processor must pass to the application 406 * the single character #xA. 407 */ 408 if (*(ctxt->input->cur) == '\n') { 409 ctxt->input->line++; ctxt->input->col = 1; 410 } else 411 ctxt->input->col++; 412 413 /* 414 * We are supposed to handle UTF8, check it's valid 415 * From rfc2044: encoding of the Unicode values on UTF-8: 416 * 417 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 418 * 0000 0000-0000 007F 0xxxxxxx 419 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 420 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 421 * 422 * Check for the 0x110000 limit too 423 */ 424 cur = ctxt->input->cur; 425 426 c = *cur; 427 if (c & 0x80) { 428 if (c == 0xC0) 429 goto encoding_error; 430 if (cur[1] == 0) { 431 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 432 cur = ctxt->input->cur; 433 } 434 if ((cur[1] & 0xc0) != 0x80) 435 goto encoding_error; 436 if ((c & 0xe0) == 0xe0) { 437 unsigned int val; 438 439 if (cur[2] == 0) { 440 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 441 cur = ctxt->input->cur; 442 } 443 if ((cur[2] & 0xc0) != 0x80) 444 goto encoding_error; 445 if ((c & 0xf0) == 0xf0) { 446 if (cur[3] == 0) { 447 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 448 cur = ctxt->input->cur; 449 } 450 if (((c & 0xf8) != 0xf0) || 451 ((cur[3] & 0xc0) != 0x80)) 452 goto encoding_error; 453 /* 4-byte code */ 454 ctxt->input->cur += 4; 455 val = (cur[0] & 0x7) << 18; 456 val |= (cur[1] & 0x3f) << 12; 457 val |= (cur[2] & 0x3f) << 6; 458 val |= cur[3] & 0x3f; 459 } else { 460 /* 3-byte code */ 461 ctxt->input->cur += 3; 462 val = (cur[0] & 0xf) << 12; 463 val |= (cur[1] & 0x3f) << 6; 464 val |= cur[2] & 0x3f; 465 } 466 if (((val > 0xd7ff) && (val < 0xe000)) || 467 ((val > 0xfffd) && (val < 0x10000)) || 468 (val >= 0x110000)) { 469 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 470 "Char 0x%X out of allowed range\n", 471 val); 472 } 473 } else 474 /* 2-byte code */ 475 ctxt->input->cur += 2; 476 } else 477 /* 1-byte code */ 478 ctxt->input->cur++; 479 } else { 480 /* 481 * Assume it's a fixed length encoding (1) with 482 * a compatible encoding for the ASCII set, since 483 * XML constructs only use < 128 chars 484 */ 485 486 if (*(ctxt->input->cur) == '\n') { 487 ctxt->input->line++; ctxt->input->col = 1; 488 } else 489 ctxt->input->col++; 490 ctxt->input->cur++; 491 } 492 if (*ctxt->input->cur == 0) 493 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 494 return; 495 encoding_error: 496 /* 497 * If we detect an UTF8 error that probably mean that the 498 * input encoding didn't get properly advertised in the 499 * declaration header. Report the error and switch the encoding 500 * to ISO-Latin-1 (if you don't like this policy, just declare the 501 * encoding !) 502 */ 503 if ((ctxt == NULL) || (ctxt->input == NULL) || 504 (ctxt->input->end - ctxt->input->cur < 4)) { 505 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 506 "Input is not proper UTF-8, indicate encoding !\n", 507 NULL, NULL); 508 } else { 509 char buffer[150]; 510 511 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 512 ctxt->input->cur[0], ctxt->input->cur[1], 513 ctxt->input->cur[2], ctxt->input->cur[3]); 514 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 515 "Input is not proper UTF-8, indicate encoding !\n%s", 516 BAD_CAST buffer, NULL); 517 } 518 ctxt->charset = XML_CHAR_ENCODING_8859_1; 519 ctxt->input->cur++; 520 return; 521 } 522 523 /** 524 * xmlCurrentChar: 525 * @ctxt: the XML parser context 526 * @len: pointer to the length of the char read 527 * 528 * The current char value, if using UTF-8 this may actually span multiple 529 * bytes in the input buffer. Implement the end of line normalization: 530 * 2.11 End-of-Line Handling 531 * Wherever an external parsed entity or the literal entity value 532 * of an internal parsed entity contains either the literal two-character 533 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 534 * must pass to the application the single character #xA. 535 * This behavior can conveniently be produced by normalizing all 536 * line breaks to #xA on input, before parsing.) 537 * 538 * Returns the current char value and its length 539 */ 540 541 int 542 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 543 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 544 if (ctxt->instate == XML_PARSER_EOF) 545 return(0); 546 547 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 548 *len = 1; 549 return((int) *ctxt->input->cur); 550 } 551 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 552 /* 553 * We are supposed to handle UTF8, check it's valid 554 * From rfc2044: encoding of the Unicode values on UTF-8: 555 * 556 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 557 * 0000 0000-0000 007F 0xxxxxxx 558 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 559 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 560 * 561 * Check for the 0x110000 limit too 562 */ 563 const unsigned char *cur = ctxt->input->cur; 564 unsigned char c; 565 unsigned int val; 566 567 c = *cur; 568 if (c & 0x80) { 569 if (((c & 0x40) == 0) || (c == 0xC0)) 570 goto encoding_error; 571 if (cur[1] == 0) { 572 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 573 cur = ctxt->input->cur; 574 } 575 if ((cur[1] & 0xc0) != 0x80) 576 goto encoding_error; 577 if ((c & 0xe0) == 0xe0) { 578 if (cur[2] == 0) { 579 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 580 cur = ctxt->input->cur; 581 } 582 if ((cur[2] & 0xc0) != 0x80) 583 goto encoding_error; 584 if ((c & 0xf0) == 0xf0) { 585 if (cur[3] == 0) { 586 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 587 cur = ctxt->input->cur; 588 } 589 if (((c & 0xf8) != 0xf0) || 590 ((cur[3] & 0xc0) != 0x80)) 591 goto encoding_error; 592 /* 4-byte code */ 593 *len = 4; 594 val = (cur[0] & 0x7) << 18; 595 val |= (cur[1] & 0x3f) << 12; 596 val |= (cur[2] & 0x3f) << 6; 597 val |= cur[3] & 0x3f; 598 if (val < 0x10000) 599 goto encoding_error; 600 } else { 601 /* 3-byte code */ 602 *len = 3; 603 val = (cur[0] & 0xf) << 12; 604 val |= (cur[1] & 0x3f) << 6; 605 val |= cur[2] & 0x3f; 606 if (val < 0x800) 607 goto encoding_error; 608 } 609 } else { 610 /* 2-byte code */ 611 *len = 2; 612 val = (cur[0] & 0x1f) << 6; 613 val |= cur[1] & 0x3f; 614 if (val < 0x80) 615 goto encoding_error; 616 } 617 if (!IS_CHAR(val)) { 618 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 619 "Char 0x%X out of allowed range\n", val); 620 } 621 return(val); 622 } else { 623 /* 1-byte code */ 624 *len = 1; 625 if (*ctxt->input->cur == 0) 626 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 627 if ((*ctxt->input->cur == 0) && 628 (ctxt->input->end > ctxt->input->cur)) { 629 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 630 "Char 0x0 out of allowed range\n", 0); 631 } 632 if (*ctxt->input->cur == 0xD) { 633 if (ctxt->input->cur[1] == 0xA) { 634 ctxt->input->cur++; 635 } 636 return(0xA); 637 } 638 return((int) *ctxt->input->cur); 639 } 640 } 641 /* 642 * Assume it's a fixed length encoding (1) with 643 * a compatible encoding for the ASCII set, since 644 * XML constructs only use < 128 chars 645 */ 646 *len = 1; 647 if (*ctxt->input->cur == 0xD) { 648 if (ctxt->input->cur[1] == 0xA) { 649 ctxt->input->cur++; 650 } 651 return(0xA); 652 } 653 return((int) *ctxt->input->cur); 654 encoding_error: 655 /* 656 * An encoding problem may arise from a truncated input buffer 657 * splitting a character in the middle. In that case do not raise 658 * an error but return 0 to indicate an end of stream problem 659 */ 660 if (ctxt->input->end - ctxt->input->cur < 4) { 661 *len = 0; 662 return(0); 663 } 664 665 /* 666 * If we detect an UTF8 error that probably mean that the 667 * input encoding didn't get properly advertised in the 668 * declaration header. Report the error and switch the encoding 669 * to ISO-Latin-1 (if you don't like this policy, just declare the 670 * encoding !) 671 */ 672 { 673 char buffer[150]; 674 675 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 676 ctxt->input->cur[0], ctxt->input->cur[1], 677 ctxt->input->cur[2], ctxt->input->cur[3]); 678 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 679 "Input is not proper UTF-8, indicate encoding !\n%s", 680 BAD_CAST buffer, NULL); 681 } 682 ctxt->charset = XML_CHAR_ENCODING_8859_1; 683 *len = 1; 684 return((int) *ctxt->input->cur); 685 } 686 687 /** 688 * xmlStringCurrentChar: 689 * @ctxt: the XML parser context 690 * @cur: pointer to the beginning of the char 691 * @len: pointer to the length of the char read 692 * 693 * The current char value, if using UTF-8 this may actually span multiple 694 * bytes in the input buffer. 695 * 696 * Returns the current char value and its length 697 */ 698 699 int 700 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 701 { 702 if ((len == NULL) || (cur == NULL)) return(0); 703 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 704 /* 705 * We are supposed to handle UTF8, check it's valid 706 * From rfc2044: encoding of the Unicode values on UTF-8: 707 * 708 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 709 * 0000 0000-0000 007F 0xxxxxxx 710 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 711 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 712 * 713 * Check for the 0x110000 limit too 714 */ 715 unsigned char c; 716 unsigned int val; 717 718 c = *cur; 719 if (c & 0x80) { 720 if ((cur[1] & 0xc0) != 0x80) 721 goto encoding_error; 722 if ((c & 0xe0) == 0xe0) { 723 724 if ((cur[2] & 0xc0) != 0x80) 725 goto encoding_error; 726 if ((c & 0xf0) == 0xf0) { 727 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 728 goto encoding_error; 729 /* 4-byte code */ 730 *len = 4; 731 val = (cur[0] & 0x7) << 18; 732 val |= (cur[1] & 0x3f) << 12; 733 val |= (cur[2] & 0x3f) << 6; 734 val |= cur[3] & 0x3f; 735 } else { 736 /* 3-byte code */ 737 *len = 3; 738 val = (cur[0] & 0xf) << 12; 739 val |= (cur[1] & 0x3f) << 6; 740 val |= cur[2] & 0x3f; 741 } 742 } else { 743 /* 2-byte code */ 744 *len = 2; 745 val = (cur[0] & 0x1f) << 6; 746 val |= cur[1] & 0x3f; 747 } 748 if (!IS_CHAR(val)) { 749 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 750 "Char 0x%X out of allowed range\n", val); 751 } 752 return (val); 753 } else { 754 /* 1-byte code */ 755 *len = 1; 756 return ((int) *cur); 757 } 758 } 759 /* 760 * Assume it's a fixed length encoding (1) with 761 * a compatible encoding for the ASCII set, since 762 * XML constructs only use < 128 chars 763 */ 764 *len = 1; 765 return ((int) *cur); 766 encoding_error: 767 768 /* 769 * An encoding problem may arise from a truncated input buffer 770 * splitting a character in the middle. In that case do not raise 771 * an error but return 0 to indicate an end of stream problem 772 */ 773 if ((ctxt == NULL) || (ctxt->input == NULL) || 774 (ctxt->input->end - ctxt->input->cur < 4)) { 775 *len = 0; 776 return(0); 777 } 778 /* 779 * If we detect an UTF8 error that probably mean that the 780 * input encoding didn't get properly advertised in the 781 * declaration header. Report the error and switch the encoding 782 * to ISO-Latin-1 (if you don't like this policy, just declare the 783 * encoding !) 784 */ 785 { 786 char buffer[150]; 787 788 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 789 ctxt->input->cur[0], ctxt->input->cur[1], 790 ctxt->input->cur[2], ctxt->input->cur[3]); 791 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 792 "Input is not proper UTF-8, indicate encoding !\n%s", 793 BAD_CAST buffer, NULL); 794 } 795 *len = 1; 796 return ((int) *cur); 797 } 798 799 /** 800 * xmlCopyCharMultiByte: 801 * @out: pointer to an array of xmlChar 802 * @val: the char value 803 * 804 * append the char value in the array 805 * 806 * Returns the number of xmlChar written 807 */ 808 int 809 xmlCopyCharMultiByte(xmlChar *out, int val) { 810 if (out == NULL) return(0); 811 /* 812 * We are supposed to handle UTF8, check it's valid 813 * From rfc2044: encoding of the Unicode values on UTF-8: 814 * 815 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 816 * 0000 0000-0000 007F 0xxxxxxx 817 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 818 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 819 */ 820 if (val >= 0x80) { 821 xmlChar *savedout = out; 822 int bits; 823 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 824 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 825 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 826 else { 827 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 828 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 829 val); 830 return(0); 831 } 832 for ( ; bits >= 0; bits-= 6) 833 *out++= ((val >> bits) & 0x3F) | 0x80 ; 834 return (out - savedout); 835 } 836 *out = (xmlChar) val; 837 return 1; 838 } 839 840 /** 841 * xmlCopyChar: 842 * @len: Ignored, compatibility 843 * @out: pointer to an array of xmlChar 844 * @val: the char value 845 * 846 * append the char value in the array 847 * 848 * Returns the number of xmlChar written 849 */ 850 851 int 852 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 853 if (out == NULL) return(0); 854 /* the len parameter is ignored */ 855 if (val >= 0x80) { 856 return(xmlCopyCharMultiByte (out, val)); 857 } 858 *out = (xmlChar) val; 859 return 1; 860 } 861 862 /************************************************************************ 863 * * 864 * Commodity functions to switch encodings * 865 * * 866 ************************************************************************/ 867 868 static int 869 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 870 xmlCharEncodingHandlerPtr handler, int len); 871 /** 872 * xmlSwitchEncoding: 873 * @ctxt: the parser context 874 * @enc: the encoding value (number) 875 * 876 * change the input functions when discovering the character encoding 877 * of a given entity. 878 * 879 * Returns 0 in case of success, -1 otherwise 880 */ 881 int 882 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 883 { 884 xmlCharEncodingHandlerPtr handler; 885 int len = -1; 886 int ret; 887 888 if (ctxt == NULL) return(-1); 889 switch (enc) { 890 case XML_CHAR_ENCODING_ERROR: 891 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 892 "encoding unknown\n", NULL, NULL); 893 return(-1); 894 case XML_CHAR_ENCODING_NONE: 895 /* let's assume it's UTF-8 without the XML decl */ 896 ctxt->charset = XML_CHAR_ENCODING_UTF8; 897 return(0); 898 case XML_CHAR_ENCODING_UTF8: 899 /* default encoding, no conversion should be needed */ 900 ctxt->charset = XML_CHAR_ENCODING_UTF8; 901 902 /* 903 * Errata on XML-1.0 June 20 2001 904 * Specific handling of the Byte Order Mark for 905 * UTF-8 906 */ 907 if ((ctxt->input != NULL) && 908 (ctxt->input->cur[0] == 0xEF) && 909 (ctxt->input->cur[1] == 0xBB) && 910 (ctxt->input->cur[2] == 0xBF)) { 911 ctxt->input->cur += 3; 912 } 913 return(0); 914 case XML_CHAR_ENCODING_UTF16LE: 915 case XML_CHAR_ENCODING_UTF16BE: 916 /*The raw input characters are encoded 917 *in UTF-16. As we expect this function 918 *to be called after xmlCharEncInFunc, we expect 919 *ctxt->input->cur to contain UTF-8 encoded characters. 920 *So the raw UTF16 Byte Order Mark 921 *has also been converted into 922 *an UTF-8 BOM. Let's skip that BOM. 923 */ 924 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 925 (ctxt->input->cur[0] == 0xEF) && 926 (ctxt->input->cur[1] == 0xBB) && 927 (ctxt->input->cur[2] == 0xBF)) { 928 ctxt->input->cur += 3; 929 } 930 len = 90; 931 break; 932 case XML_CHAR_ENCODING_UCS2: 933 len = 90; 934 break; 935 case XML_CHAR_ENCODING_UCS4BE: 936 case XML_CHAR_ENCODING_UCS4LE: 937 case XML_CHAR_ENCODING_UCS4_2143: 938 case XML_CHAR_ENCODING_UCS4_3412: 939 len = 180; 940 break; 941 case XML_CHAR_ENCODING_EBCDIC: 942 case XML_CHAR_ENCODING_8859_1: 943 case XML_CHAR_ENCODING_8859_2: 944 case XML_CHAR_ENCODING_8859_3: 945 case XML_CHAR_ENCODING_8859_4: 946 case XML_CHAR_ENCODING_8859_5: 947 case XML_CHAR_ENCODING_8859_6: 948 case XML_CHAR_ENCODING_8859_7: 949 case XML_CHAR_ENCODING_8859_8: 950 case XML_CHAR_ENCODING_8859_9: 951 case XML_CHAR_ENCODING_ASCII: 952 case XML_CHAR_ENCODING_2022_JP: 953 case XML_CHAR_ENCODING_SHIFT_JIS: 954 case XML_CHAR_ENCODING_EUC_JP: 955 len = 45; 956 break; 957 } 958 handler = xmlGetCharEncodingHandler(enc); 959 if (handler == NULL) { 960 /* 961 * Default handlers. 962 */ 963 switch (enc) { 964 case XML_CHAR_ENCODING_ASCII: 965 /* default encoding, no conversion should be needed */ 966 ctxt->charset = XML_CHAR_ENCODING_UTF8; 967 return(0); 968 case XML_CHAR_ENCODING_8859_1: 969 if ((ctxt->inputNr == 1) && 970 (ctxt->encoding == NULL) && 971 (ctxt->input != NULL) && 972 (ctxt->input->encoding != NULL)) { 973 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 974 } 975 ctxt->charset = enc; 976 return(0); 977 default: 978 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 979 "encoding not supported: %s\n", 980 BAD_CAST xmlGetCharEncodingName(enc), NULL); 981 /* 982 * TODO: We could recover from errors in external entities 983 * if we didn't stop the parser. But most callers of this 984 * function don't check the return value. 985 */ 986 xmlStopParser(ctxt); 987 return(-1); 988 } 989 } 990 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 991 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { 992 /* 993 * on encoding conversion errors, stop the parser 994 */ 995 xmlStopParser(ctxt); 996 ctxt->errNo = XML_I18N_CONV_FAILED; 997 } 998 return(ret); 999 } 1000 1001 /** 1002 * xmlSwitchInputEncodingInt: 1003 * @ctxt: the parser context 1004 * @input: the input stream 1005 * @handler: the encoding handler 1006 * @len: the number of bytes to convert for the first line or -1 1007 * 1008 * change the input functions when discovering the character encoding 1009 * of a given entity. 1010 * 1011 * Returns 0 in case of success, -1 otherwise 1012 */ 1013 static int 1014 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1015 xmlCharEncodingHandlerPtr handler, int len) 1016 { 1017 int nbchars; 1018 1019 if (handler == NULL) 1020 return (-1); 1021 if (input == NULL) 1022 return (-1); 1023 if (input->buf != NULL) { 1024 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1025 1026 if (input->buf->encoder != NULL) { 1027 /* 1028 * Check in case the auto encoding detection triggered 1029 * in already. 1030 */ 1031 if (input->buf->encoder == handler) 1032 return (0); 1033 1034 /* 1035 * "UTF-16" can be used for both LE and BE 1036 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1037 BAD_CAST "UTF-16", 6)) && 1038 (!xmlStrncmp(BAD_CAST handler->name, 1039 BAD_CAST "UTF-16", 6))) { 1040 return(0); 1041 } 1042 */ 1043 1044 /* 1045 * Note: this is a bit dangerous, but that's what it 1046 * takes to use nearly compatible signature for different 1047 * encodings. 1048 * 1049 * FIXME: Encoders might buffer partial byte sequences, so 1050 * this probably can't work. We should return an error and 1051 * make sure that callers never try to switch the encoding 1052 * twice. 1053 */ 1054 xmlCharEncCloseFunc(input->buf->encoder); 1055 input->buf->encoder = handler; 1056 return (0); 1057 } 1058 input->buf->encoder = handler; 1059 1060 /* 1061 * Is there already some content down the pipe to convert ? 1062 */ 1063 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1064 int processed; 1065 unsigned int use; 1066 1067 /* 1068 * Specific handling of the Byte Order Mark for 1069 * UTF-16 1070 */ 1071 if ((handler->name != NULL) && 1072 (!strcmp(handler->name, "UTF-16LE") || 1073 !strcmp(handler->name, "UTF-16")) && 1074 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1075 input->cur += 2; 1076 } 1077 if ((handler->name != NULL) && 1078 (!strcmp(handler->name, "UTF-16BE")) && 1079 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1080 input->cur += 2; 1081 } 1082 /* 1083 * Errata on XML-1.0 June 20 2001 1084 * Specific handling of the Byte Order Mark for 1085 * UTF-8 1086 */ 1087 if ((handler->name != NULL) && 1088 (!strcmp(handler->name, "UTF-8")) && 1089 (input->cur[0] == 0xEF) && 1090 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1091 input->cur += 3; 1092 } 1093 1094 /* 1095 * Shrink the current input buffer. 1096 * Move it as the raw buffer and create a new input buffer 1097 */ 1098 processed = input->cur - input->base; 1099 xmlBufShrink(input->buf->buffer, processed); 1100 input->buf->raw = input->buf->buffer; 1101 input->buf->buffer = xmlBufCreate(); 1102 input->buf->rawconsumed = processed; 1103 use = xmlBufUse(input->buf->raw); 1104 1105 if (ctxt->html) { 1106 /* 1107 * convert as much as possible of the buffer 1108 */ 1109 nbchars = xmlCharEncInput(input->buf, 1); 1110 } else { 1111 /* 1112 * convert just enough to get 1113 * '<?xml version="1.0" encoding="xxx"?>' 1114 * parsed with the autodetected encoding 1115 * into the parser reading buffer. 1116 */ 1117 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1118 } 1119 xmlBufResetInput(input->buf->buffer, input); 1120 if (nbchars < 0) { 1121 xmlErrInternal(ctxt, 1122 "switching encoding: encoder error\n", 1123 NULL); 1124 return (-1); 1125 } 1126 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1127 } 1128 return (0); 1129 } else { 1130 xmlErrInternal(ctxt, 1131 "static memory buffer doesn't support encoding\n", NULL); 1132 /* 1133 * Callers assume that the input buffer takes ownership of the 1134 * encoding handler. xmlCharEncCloseFunc frees unregistered 1135 * handlers and avoids a memory leak. 1136 */ 1137 xmlCharEncCloseFunc(handler); 1138 return (-1); 1139 } 1140 } 1141 1142 /** 1143 * xmlSwitchInputEncoding: 1144 * @ctxt: the parser context 1145 * @input: the input stream 1146 * @handler: the encoding handler 1147 * 1148 * DEPRECATED: Use xmlSwitchToEncoding 1149 * 1150 * change the input functions when discovering the character encoding 1151 * of a given entity. 1152 * 1153 * Returns 0 in case of success, -1 otherwise 1154 */ 1155 int 1156 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1157 xmlCharEncodingHandlerPtr handler) { 1158 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1159 } 1160 1161 /** 1162 * xmlSwitchToEncoding: 1163 * @ctxt: the parser context 1164 * @handler: the encoding handler 1165 * 1166 * change the input functions when discovering the character encoding 1167 * of a given entity. 1168 * 1169 * Returns 0 in case of success, -1 otherwise 1170 */ 1171 int 1172 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1173 { 1174 if (ctxt == NULL) 1175 return(-1); 1176 return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1)); 1177 } 1178 1179 /************************************************************************ 1180 * * 1181 * Commodity functions to handle entities processing * 1182 * * 1183 ************************************************************************/ 1184 1185 /** 1186 * xmlFreeInputStream: 1187 * @input: an xmlParserInputPtr 1188 * 1189 * Free up an input stream. 1190 */ 1191 void 1192 xmlFreeInputStream(xmlParserInputPtr input) { 1193 if (input == NULL) return; 1194 1195 if (input->filename != NULL) xmlFree((char *) input->filename); 1196 if (input->directory != NULL) xmlFree((char *) input->directory); 1197 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1198 if (input->version != NULL) xmlFree((char *) input->version); 1199 if ((input->free != NULL) && (input->base != NULL)) 1200 input->free((xmlChar *) input->base); 1201 if (input->buf != NULL) 1202 xmlFreeParserInputBuffer(input->buf); 1203 xmlFree(input); 1204 } 1205 1206 /** 1207 * xmlNewInputStream: 1208 * @ctxt: an XML parser context 1209 * 1210 * Create a new input stream structure. 1211 * 1212 * Returns the new input stream or NULL 1213 */ 1214 xmlParserInputPtr 1215 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1216 xmlParserInputPtr input; 1217 1218 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1219 if (input == NULL) { 1220 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1221 return(NULL); 1222 } 1223 memset(input, 0, sizeof(xmlParserInput)); 1224 input->line = 1; 1225 input->col = 1; 1226 input->standalone = -1; 1227 1228 /* 1229 * If the context is NULL the id cannot be initialized, but that 1230 * should not happen while parsing which is the situation where 1231 * the id is actually needed. 1232 */ 1233 if (ctxt != NULL) 1234 input->id = ctxt->input_id++; 1235 1236 return(input); 1237 } 1238 1239 /** 1240 * xmlNewIOInputStream: 1241 * @ctxt: an XML parser context 1242 * @input: an I/O Input 1243 * @enc: the charset encoding if known 1244 * 1245 * Create a new input stream structure encapsulating the @input into 1246 * a stream suitable for the parser. 1247 * 1248 * Returns the new input stream or NULL 1249 */ 1250 xmlParserInputPtr 1251 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1252 xmlCharEncoding enc) { 1253 xmlParserInputPtr inputStream; 1254 1255 if (input == NULL) return(NULL); 1256 if (xmlParserDebugEntities) 1257 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1258 inputStream = xmlNewInputStream(ctxt); 1259 if (inputStream == NULL) { 1260 return(NULL); 1261 } 1262 inputStream->filename = NULL; 1263 inputStream->buf = input; 1264 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1265 1266 if (enc != XML_CHAR_ENCODING_NONE) { 1267 xmlSwitchEncoding(ctxt, enc); 1268 } 1269 1270 return(inputStream); 1271 } 1272 1273 /** 1274 * xmlNewEntityInputStream: 1275 * @ctxt: an XML parser context 1276 * @entity: an Entity pointer 1277 * 1278 * Create a new input stream based on an xmlEntityPtr 1279 * 1280 * Returns the new input stream or NULL 1281 */ 1282 xmlParserInputPtr 1283 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1284 xmlParserInputPtr input; 1285 1286 if (entity == NULL) { 1287 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1288 NULL); 1289 return(NULL); 1290 } 1291 if (xmlParserDebugEntities) 1292 xmlGenericError(xmlGenericErrorContext, 1293 "new input from entity: %s\n", entity->name); 1294 if (entity->content == NULL) { 1295 switch (entity->etype) { 1296 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1297 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1298 entity->name); 1299 break; 1300 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1301 case XML_EXTERNAL_PARAMETER_ENTITY: 1302 return(xmlLoadExternalEntity((char *) entity->URI, 1303 (char *) entity->ExternalID, ctxt)); 1304 case XML_INTERNAL_GENERAL_ENTITY: 1305 xmlErrInternal(ctxt, 1306 "Internal entity %s without content !\n", 1307 entity->name); 1308 break; 1309 case XML_INTERNAL_PARAMETER_ENTITY: 1310 xmlErrInternal(ctxt, 1311 "Internal parameter entity %s without content !\n", 1312 entity->name); 1313 break; 1314 case XML_INTERNAL_PREDEFINED_ENTITY: 1315 xmlErrInternal(ctxt, 1316 "Predefined entity %s without content !\n", 1317 entity->name); 1318 break; 1319 } 1320 return(NULL); 1321 } 1322 input = xmlNewInputStream(ctxt); 1323 if (input == NULL) { 1324 return(NULL); 1325 } 1326 if (entity->URI != NULL) 1327 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1328 input->base = entity->content; 1329 if (entity->length == 0) 1330 entity->length = xmlStrlen(entity->content); 1331 input->cur = entity->content; 1332 input->length = entity->length; 1333 input->end = &entity->content[input->length]; 1334 return(input); 1335 } 1336 1337 /** 1338 * xmlNewStringInputStream: 1339 * @ctxt: an XML parser context 1340 * @buffer: an memory buffer 1341 * 1342 * Create a new input stream based on a memory buffer. 1343 * Returns the new input stream 1344 */ 1345 xmlParserInputPtr 1346 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1347 xmlParserInputPtr input; 1348 1349 if (buffer == NULL) { 1350 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1351 NULL); 1352 return(NULL); 1353 } 1354 if (xmlParserDebugEntities) 1355 xmlGenericError(xmlGenericErrorContext, 1356 "new fixed input: %.30s\n", buffer); 1357 input = xmlNewInputStream(ctxt); 1358 if (input == NULL) { 1359 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1360 return(NULL); 1361 } 1362 input->base = buffer; 1363 input->cur = buffer; 1364 input->length = xmlStrlen(buffer); 1365 input->end = &buffer[input->length]; 1366 return(input); 1367 } 1368 1369 /** 1370 * xmlNewInputFromFile: 1371 * @ctxt: an XML parser context 1372 * @filename: the filename to use as entity 1373 * 1374 * Create a new input stream based on a file or an URL. 1375 * 1376 * Returns the new input stream or NULL in case of error 1377 */ 1378 xmlParserInputPtr 1379 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1380 xmlParserInputBufferPtr buf; 1381 xmlParserInputPtr inputStream; 1382 char *directory = NULL; 1383 xmlChar *URI = NULL; 1384 1385 if (xmlParserDebugEntities) 1386 xmlGenericError(xmlGenericErrorContext, 1387 "new input from file: %s\n", filename); 1388 if (ctxt == NULL) return(NULL); 1389 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1390 if (buf == NULL) { 1391 if (filename == NULL) 1392 __xmlLoaderErr(ctxt, 1393 "failed to load external entity: NULL filename \n", 1394 NULL); 1395 else 1396 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1397 (const char *) filename); 1398 return(NULL); 1399 } 1400 1401 inputStream = xmlNewInputStream(ctxt); 1402 if (inputStream == NULL) { 1403 xmlFreeParserInputBuffer(buf); 1404 return(NULL); 1405 } 1406 1407 inputStream->buf = buf; 1408 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1409 if (inputStream == NULL) 1410 return(NULL); 1411 1412 if (inputStream->filename == NULL) 1413 URI = xmlStrdup((xmlChar *) filename); 1414 else 1415 URI = xmlStrdup((xmlChar *) inputStream->filename); 1416 directory = xmlParserGetDirectory((const char *) URI); 1417 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1418 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1419 if (URI != NULL) xmlFree((char *) URI); 1420 inputStream->directory = directory; 1421 1422 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1423 if ((ctxt->directory == NULL) && (directory != NULL)) 1424 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1425 return(inputStream); 1426 } 1427 1428 /************************************************************************ 1429 * * 1430 * Commodity functions to handle parser contexts * 1431 * * 1432 ************************************************************************/ 1433 1434 /** 1435 * xmlInitParserCtxt: 1436 * @ctxt: an XML parser context 1437 * 1438 * Initialize a parser context 1439 * 1440 * Returns 0 in case of success and -1 in case of error 1441 */ 1442 1443 int 1444 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1445 { 1446 xmlParserInputPtr input; 1447 1448 if(ctxt==NULL) { 1449 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1450 return(-1); 1451 } 1452 1453 xmlInitParser(); 1454 1455 if (ctxt->dict == NULL) 1456 ctxt->dict = xmlDictCreate(); 1457 if (ctxt->dict == NULL) { 1458 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1459 return(-1); 1460 } 1461 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1462 1463 if (ctxt->sax == NULL) 1464 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1465 if (ctxt->sax == NULL) { 1466 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1467 return(-1); 1468 } 1469 else 1470 xmlSAXVersion(ctxt->sax, 2); 1471 1472 ctxt->maxatts = 0; 1473 ctxt->atts = NULL; 1474 /* Allocate the Input stack */ 1475 if (ctxt->inputTab == NULL) { 1476 ctxt->inputTab = (xmlParserInputPtr *) 1477 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1478 ctxt->inputMax = 5; 1479 } 1480 if (ctxt->inputTab == NULL) { 1481 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1482 ctxt->inputNr = 0; 1483 ctxt->inputMax = 0; 1484 ctxt->input = NULL; 1485 return(-1); 1486 } 1487 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1488 xmlFreeInputStream(input); 1489 } 1490 ctxt->inputNr = 0; 1491 ctxt->input = NULL; 1492 1493 ctxt->version = NULL; 1494 ctxt->encoding = NULL; 1495 ctxt->standalone = -1; 1496 ctxt->hasExternalSubset = 0; 1497 ctxt->hasPErefs = 0; 1498 ctxt->html = 0; 1499 ctxt->external = 0; 1500 ctxt->instate = XML_PARSER_START; 1501 ctxt->token = 0; 1502 ctxt->directory = NULL; 1503 1504 /* Allocate the Node stack */ 1505 if (ctxt->nodeTab == NULL) { 1506 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1507 ctxt->nodeMax = 10; 1508 } 1509 if (ctxt->nodeTab == NULL) { 1510 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1511 ctxt->nodeNr = 0; 1512 ctxt->nodeMax = 0; 1513 ctxt->node = NULL; 1514 ctxt->inputNr = 0; 1515 ctxt->inputMax = 0; 1516 ctxt->input = NULL; 1517 return(-1); 1518 } 1519 ctxt->nodeNr = 0; 1520 ctxt->node = NULL; 1521 1522 /* Allocate the Name stack */ 1523 if (ctxt->nameTab == NULL) { 1524 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1525 ctxt->nameMax = 10; 1526 } 1527 if (ctxt->nameTab == NULL) { 1528 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1529 ctxt->nodeNr = 0; 1530 ctxt->nodeMax = 0; 1531 ctxt->node = NULL; 1532 ctxt->inputNr = 0; 1533 ctxt->inputMax = 0; 1534 ctxt->input = NULL; 1535 ctxt->nameNr = 0; 1536 ctxt->nameMax = 0; 1537 ctxt->name = NULL; 1538 return(-1); 1539 } 1540 ctxt->nameNr = 0; 1541 ctxt->name = NULL; 1542 1543 /* Allocate the space stack */ 1544 if (ctxt->spaceTab == NULL) { 1545 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1546 ctxt->spaceMax = 10; 1547 } 1548 if (ctxt->spaceTab == NULL) { 1549 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1550 ctxt->nodeNr = 0; 1551 ctxt->nodeMax = 0; 1552 ctxt->node = NULL; 1553 ctxt->inputNr = 0; 1554 ctxt->inputMax = 0; 1555 ctxt->input = NULL; 1556 ctxt->nameNr = 0; 1557 ctxt->nameMax = 0; 1558 ctxt->name = NULL; 1559 ctxt->spaceNr = 0; 1560 ctxt->spaceMax = 0; 1561 ctxt->space = NULL; 1562 return(-1); 1563 } 1564 ctxt->spaceNr = 1; 1565 ctxt->spaceMax = 10; 1566 ctxt->spaceTab[0] = -1; 1567 ctxt->space = &ctxt->spaceTab[0]; 1568 ctxt->userData = ctxt; 1569 ctxt->myDoc = NULL; 1570 ctxt->wellFormed = 1; 1571 ctxt->nsWellFormed = 1; 1572 ctxt->valid = 1; 1573 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1574 if (ctxt->loadsubset) { 1575 ctxt->options |= XML_PARSE_DTDLOAD; 1576 } 1577 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1578 ctxt->pedantic = xmlPedanticParserDefaultValue; 1579 if (ctxt->pedantic) { 1580 ctxt->options |= XML_PARSE_PEDANTIC; 1581 } 1582 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1583 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1584 if (ctxt->keepBlanks == 0) { 1585 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1586 ctxt->options |= XML_PARSE_NOBLANKS; 1587 } 1588 1589 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT; 1590 ctxt->vctxt.userData = ctxt; 1591 ctxt->vctxt.error = xmlParserValidityError; 1592 ctxt->vctxt.warning = xmlParserValidityWarning; 1593 if (ctxt->validate) { 1594 if (xmlGetWarningsDefaultValue == 0) 1595 ctxt->vctxt.warning = NULL; 1596 else 1597 ctxt->vctxt.warning = xmlParserValidityWarning; 1598 ctxt->vctxt.nodeMax = 0; 1599 ctxt->options |= XML_PARSE_DTDVALID; 1600 } 1601 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1602 if (ctxt->replaceEntities) { 1603 ctxt->options |= XML_PARSE_NOENT; 1604 } 1605 ctxt->record_info = 0; 1606 ctxt->checkIndex = 0; 1607 ctxt->inSubset = 0; 1608 ctxt->errNo = XML_ERR_OK; 1609 ctxt->depth = 0; 1610 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1611 ctxt->catalogs = NULL; 1612 ctxt->nbentities = 0; 1613 ctxt->sizeentities = 0; 1614 ctxt->sizeentcopy = 0; 1615 ctxt->input_id = 1; 1616 xmlInitNodeInfoSeq(&ctxt->node_seq); 1617 return(0); 1618 } 1619 1620 /** 1621 * xmlFreeParserCtxt: 1622 * @ctxt: an XML parser context 1623 * 1624 * Free all the memory used by a parser context. However the parsed 1625 * document in ctxt->myDoc is not freed. 1626 */ 1627 1628 void 1629 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1630 { 1631 xmlParserInputPtr input; 1632 1633 if (ctxt == NULL) return; 1634 1635 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1636 xmlFreeInputStream(input); 1637 } 1638 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1639 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1640 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1641 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1642 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1643 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1644 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1645 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1646 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1647 #ifdef LIBXML_SAX1_ENABLED 1648 if ((ctxt->sax != NULL) && 1649 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1650 #else 1651 if (ctxt->sax != NULL) 1652 #endif /* LIBXML_SAX1_ENABLED */ 1653 xmlFree(ctxt->sax); 1654 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1655 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1656 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1657 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1658 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1659 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1660 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1661 if (ctxt->attsDefault != NULL) 1662 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 1663 if (ctxt->attsSpecial != NULL) 1664 xmlHashFree(ctxt->attsSpecial, NULL); 1665 if (ctxt->freeElems != NULL) { 1666 xmlNodePtr cur, next; 1667 1668 cur = ctxt->freeElems; 1669 while (cur != NULL) { 1670 next = cur->next; 1671 xmlFree(cur); 1672 cur = next; 1673 } 1674 } 1675 if (ctxt->freeAttrs != NULL) { 1676 xmlAttrPtr cur, next; 1677 1678 cur = ctxt->freeAttrs; 1679 while (cur != NULL) { 1680 next = cur->next; 1681 xmlFree(cur); 1682 cur = next; 1683 } 1684 } 1685 /* 1686 * cleanup the error strings 1687 */ 1688 if (ctxt->lastError.message != NULL) 1689 xmlFree(ctxt->lastError.message); 1690 if (ctxt->lastError.file != NULL) 1691 xmlFree(ctxt->lastError.file); 1692 if (ctxt->lastError.str1 != NULL) 1693 xmlFree(ctxt->lastError.str1); 1694 if (ctxt->lastError.str2 != NULL) 1695 xmlFree(ctxt->lastError.str2); 1696 if (ctxt->lastError.str3 != NULL) 1697 xmlFree(ctxt->lastError.str3); 1698 1699 #ifdef LIBXML_CATALOG_ENABLED 1700 if (ctxt->catalogs != NULL) 1701 xmlCatalogFreeLocal(ctxt->catalogs); 1702 #endif 1703 xmlFree(ctxt); 1704 } 1705 1706 /** 1707 * xmlNewParserCtxt: 1708 * 1709 * Allocate and initialize a new parser context. 1710 * 1711 * Returns the xmlParserCtxtPtr or NULL 1712 */ 1713 1714 xmlParserCtxtPtr 1715 xmlNewParserCtxt(void) 1716 { 1717 xmlParserCtxtPtr ctxt; 1718 1719 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1720 if (ctxt == NULL) { 1721 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1722 return(NULL); 1723 } 1724 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1725 if (xmlInitParserCtxt(ctxt) < 0) { 1726 xmlFreeParserCtxt(ctxt); 1727 return(NULL); 1728 } 1729 return(ctxt); 1730 } 1731 1732 /************************************************************************ 1733 * * 1734 * Handling of node information * 1735 * * 1736 ************************************************************************/ 1737 1738 /** 1739 * xmlClearParserCtxt: 1740 * @ctxt: an XML parser context 1741 * 1742 * Clear (release owned resources) and reinitialize a parser context 1743 */ 1744 1745 void 1746 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1747 { 1748 if (ctxt==NULL) 1749 return; 1750 xmlClearNodeInfoSeq(&ctxt->node_seq); 1751 xmlCtxtReset(ctxt); 1752 } 1753 1754 1755 /** 1756 * xmlParserFindNodeInfo: 1757 * @ctx: an XML parser context 1758 * @node: an XML node within the tree 1759 * 1760 * Find the parser node info struct for a given node 1761 * 1762 * Returns an xmlParserNodeInfo block pointer or NULL 1763 */ 1764 const xmlParserNodeInfo * 1765 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1766 { 1767 unsigned long pos; 1768 1769 if ((ctx == NULL) || (node == NULL)) 1770 return (NULL); 1771 /* Find position where node should be at */ 1772 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1773 if (pos < ctx->node_seq.length 1774 && ctx->node_seq.buffer[pos].node == node) 1775 return &ctx->node_seq.buffer[pos]; 1776 else 1777 return NULL; 1778 } 1779 1780 1781 /** 1782 * xmlInitNodeInfoSeq: 1783 * @seq: a node info sequence pointer 1784 * 1785 * -- Initialize (set to initial state) node info sequence 1786 */ 1787 void 1788 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1789 { 1790 if (seq == NULL) 1791 return; 1792 seq->length = 0; 1793 seq->maximum = 0; 1794 seq->buffer = NULL; 1795 } 1796 1797 /** 1798 * xmlClearNodeInfoSeq: 1799 * @seq: a node info sequence pointer 1800 * 1801 * -- Clear (release memory and reinitialize) node 1802 * info sequence 1803 */ 1804 void 1805 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1806 { 1807 if (seq == NULL) 1808 return; 1809 if (seq->buffer != NULL) 1810 xmlFree(seq->buffer); 1811 xmlInitNodeInfoSeq(seq); 1812 } 1813 1814 /** 1815 * xmlParserFindNodeInfoIndex: 1816 * @seq: a node info sequence pointer 1817 * @node: an XML node pointer 1818 * 1819 * 1820 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1821 * the given node is or should be at in a sorted sequence 1822 * 1823 * Returns a long indicating the position of the record 1824 */ 1825 unsigned long 1826 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1827 const xmlNodePtr node) 1828 { 1829 unsigned long upper, lower, middle; 1830 int found = 0; 1831 1832 if ((seq == NULL) || (node == NULL)) 1833 return ((unsigned long) -1); 1834 1835 /* Do a binary search for the key */ 1836 lower = 1; 1837 upper = seq->length; 1838 middle = 0; 1839 while (lower <= upper && !found) { 1840 middle = lower + (upper - lower) / 2; 1841 if (node == seq->buffer[middle - 1].node) 1842 found = 1; 1843 else if (node < seq->buffer[middle - 1].node) 1844 upper = middle - 1; 1845 else 1846 lower = middle + 1; 1847 } 1848 1849 /* Return position */ 1850 if (middle == 0 || seq->buffer[middle - 1].node < node) 1851 return middle; 1852 else 1853 return middle - 1; 1854 } 1855 1856 1857 /** 1858 * xmlParserAddNodeInfo: 1859 * @ctxt: an XML parser context 1860 * @info: a node info sequence pointer 1861 * 1862 * Insert node info record into the sorted sequence 1863 */ 1864 void 1865 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1866 const xmlParserNodeInfoPtr info) 1867 { 1868 unsigned long pos; 1869 1870 if ((ctxt == NULL) || (info == NULL)) return; 1871 1872 /* Find pos and check to see if node is already in the sequence */ 1873 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1874 info->node); 1875 1876 if ((pos < ctxt->node_seq.length) && 1877 (ctxt->node_seq.buffer != NULL) && 1878 (ctxt->node_seq.buffer[pos].node == info->node)) { 1879 ctxt->node_seq.buffer[pos] = *info; 1880 } 1881 1882 /* Otherwise, we need to add new node to buffer */ 1883 else { 1884 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 1885 (ctxt->node_seq.buffer == NULL)) { 1886 xmlParserNodeInfo *tmp_buffer; 1887 unsigned int byte_size; 1888 1889 if (ctxt->node_seq.maximum == 0) 1890 ctxt->node_seq.maximum = 2; 1891 byte_size = (sizeof(*ctxt->node_seq.buffer) * 1892 (2 * ctxt->node_seq.maximum)); 1893 1894 if (ctxt->node_seq.buffer == NULL) 1895 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 1896 else 1897 tmp_buffer = 1898 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 1899 byte_size); 1900 1901 if (tmp_buffer == NULL) { 1902 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 1903 return; 1904 } 1905 ctxt->node_seq.buffer = tmp_buffer; 1906 ctxt->node_seq.maximum *= 2; 1907 } 1908 1909 /* If position is not at end, move elements out of the way */ 1910 if (pos != ctxt->node_seq.length) { 1911 unsigned long i; 1912 1913 for (i = ctxt->node_seq.length; i > pos; i--) 1914 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 1915 } 1916 1917 /* Copy element and increase length */ 1918 ctxt->node_seq.buffer[pos] = *info; 1919 ctxt->node_seq.length++; 1920 } 1921 } 1922 1923 /************************************************************************ 1924 * * 1925 * Defaults settings * 1926 * * 1927 ************************************************************************/ 1928 /** 1929 * xmlPedanticParserDefault: 1930 * @val: int 0 or 1 1931 * 1932 * Set and return the previous value for enabling pedantic warnings. 1933 * 1934 * Returns the last value for 0 for no substitution, 1 for substitution. 1935 */ 1936 1937 int 1938 xmlPedanticParserDefault(int val) { 1939 int old = xmlPedanticParserDefaultValue; 1940 1941 xmlPedanticParserDefaultValue = val; 1942 return(old); 1943 } 1944 1945 /** 1946 * xmlLineNumbersDefault: 1947 * @val: int 0 or 1 1948 * 1949 * Set and return the previous value for enabling line numbers in elements 1950 * contents. This may break on old application and is turned off by default. 1951 * 1952 * Returns the last value for 0 for no substitution, 1 for substitution. 1953 */ 1954 1955 int 1956 xmlLineNumbersDefault(int val) { 1957 int old = xmlLineNumbersDefaultValue; 1958 1959 xmlLineNumbersDefaultValue = val; 1960 return(old); 1961 } 1962 1963 /** 1964 * xmlSubstituteEntitiesDefault: 1965 * @val: int 0 or 1 1966 * 1967 * Set and return the previous value for default entity support. 1968 * Initially the parser always keep entity references instead of substituting 1969 * entity values in the output. This function has to be used to change the 1970 * default parser behavior 1971 * SAX::substituteEntities() has to be used for changing that on a file by 1972 * file basis. 1973 * 1974 * Returns the last value for 0 for no substitution, 1 for substitution. 1975 */ 1976 1977 int 1978 xmlSubstituteEntitiesDefault(int val) { 1979 int old = xmlSubstituteEntitiesDefaultValue; 1980 1981 xmlSubstituteEntitiesDefaultValue = val; 1982 return(old); 1983 } 1984 1985 /** 1986 * xmlKeepBlanksDefault: 1987 * @val: int 0 or 1 1988 * 1989 * Set and return the previous value for default blanks text nodes support. 1990 * The 1.x version of the parser used an heuristic to try to detect 1991 * ignorable white spaces. As a result the SAX callback was generating 1992 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 1993 * using the DOM output text nodes containing those blanks were not generated. 1994 * The 2.x and later version will switch to the XML standard way and 1995 * ignorableWhitespace() are only generated when running the parser in 1996 * validating mode and when the current element doesn't allow CDATA or 1997 * mixed content. 1998 * This function is provided as a way to force the standard behavior 1999 * on 1.X libs and to switch back to the old mode for compatibility when 2000 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2001 * by using xmlIsBlankNode() commodity function to detect the "empty" 2002 * nodes generated. 2003 * This value also affect autogeneration of indentation when saving code 2004 * if blanks sections are kept, indentation is not generated. 2005 * 2006 * Returns the last value for 0 for no substitution, 1 for substitution. 2007 */ 2008 2009 int 2010 xmlKeepBlanksDefault(int val) { 2011 int old = xmlKeepBlanksDefaultValue; 2012 2013 xmlKeepBlanksDefaultValue = val; 2014 if (!val) xmlIndentTreeOutput = 1; 2015 return(old); 2016 } 2017 2018