1 /* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel@veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21 */ 22 23 #define IN_LIBXML 24 #include "libxml.h" 25 26 #include <string.h> 27 #include <limits.h> 28 29 #ifdef HAVE_CTYPE_H 30 #include <ctype.h> 31 #endif 32 #ifdef HAVE_STDLIB_H 33 #include <stdlib.h> 34 #endif 35 #ifdef LIBXML_ICONV_ENABLED 36 #ifdef HAVE_ERRNO_H 37 #include <errno.h> 38 #endif 39 #endif 40 #include <libxml/encoding.h> 41 #include <libxml/xmlmemory.h> 42 #ifdef LIBXML_HTML_ENABLED 43 #include <libxml/HTMLparser.h> 44 #endif 45 #include <libxml/globals.h> 46 #include <libxml/xmlerror.h> 47 48 #include "buf.h" 49 #include "enc.h" 50 51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 53 54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 56 struct _xmlCharEncodingAlias { 57 const char *name; 58 const char *alias; 59 }; 60 61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 62 static int xmlCharEncodingAliasesNb = 0; 63 static int xmlCharEncodingAliasesMax = 0; 64 65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 66 #if 0 67 #define DEBUG_ENCODING /* Define this to get encoding traces */ 68 #endif 69 #else 70 #ifdef LIBXML_ISO8859X_ENABLED 71 static void xmlRegisterCharEncodingHandlersISO8859x (void); 72 #endif 73 #endif 74 75 static int xmlLittleEndian = 1; 76 77 /** 78 * xmlEncodingErrMemory: 79 * @extra: extra informations 80 * 81 * Handle an out of memory condition 82 */ 83 static void 84 xmlEncodingErrMemory(const char *extra) 85 { 86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 87 } 88 89 /** 90 * xmlErrEncoding: 91 * @error: the error number 92 * @msg: the error message 93 * 94 * n encoding error 95 */ 96 static void LIBXML_ATTR_FORMAT(2,0) 97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 98 { 99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 100 XML_FROM_I18N, error, XML_ERR_FATAL, 101 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 102 } 103 104 #ifdef LIBXML_ICU_ENABLED 105 static uconv_t* 106 openIcuConverter(const char* name, int toUnicode) 107 { 108 UErrorCode status = U_ZERO_ERROR; 109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 110 if (conv == NULL) 111 return NULL; 112 113 conv->pivot_source = conv->pivot_buf; 114 conv->pivot_target = conv->pivot_buf; 115 116 conv->uconv = ucnv_open(name, &status); 117 if (U_FAILURE(status)) 118 goto error; 119 120 status = U_ZERO_ERROR; 121 if (toUnicode) { 122 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 123 NULL, NULL, NULL, &status); 124 } 125 else { 126 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 127 NULL, NULL, NULL, &status); 128 } 129 if (U_FAILURE(status)) 130 goto error; 131 132 status = U_ZERO_ERROR; 133 conv->utf8 = ucnv_open("UTF-8", &status); 134 if (U_SUCCESS(status)) 135 return conv; 136 137 error: 138 if (conv->uconv) 139 ucnv_close(conv->uconv); 140 xmlFree(conv); 141 return NULL; 142 } 143 144 static void 145 closeIcuConverter(uconv_t *conv) 146 { 147 if (conv != NULL) { 148 ucnv_close(conv->uconv); 149 ucnv_close(conv->utf8); 150 xmlFree(conv); 151 } 152 } 153 #endif /* LIBXML_ICU_ENABLED */ 154 155 /************************************************************************ 156 * * 157 * Conversions To/From UTF8 encoding * 158 * * 159 ************************************************************************/ 160 161 /** 162 * asciiToUTF8: 163 * @out: a pointer to an array of bytes to store the result 164 * @outlen: the length of @out 165 * @in: a pointer to an array of ASCII chars 166 * @inlen: the length of @in 167 * 168 * Take a block of ASCII chars in and try to convert it to an UTF-8 169 * block of chars out. 170 * Returns 0 if success, or -1 otherwise 171 * The value of @inlen after return is the number of octets consumed 172 * if the return value is positive, else unpredictable. 173 * The value of @outlen after return is the number of octets consumed. 174 */ 175 static int 176 asciiToUTF8(unsigned char* out, int *outlen, 177 const unsigned char* in, int *inlen) { 178 unsigned char* outstart = out; 179 const unsigned char* base = in; 180 const unsigned char* processed = in; 181 unsigned char* outend = out + *outlen; 182 const unsigned char* inend; 183 unsigned int c; 184 185 inend = in + (*inlen); 186 while ((in < inend) && (out - outstart + 5 < *outlen)) { 187 c= *in++; 188 189 if (out >= outend) 190 break; 191 if (c < 0x80) { 192 *out++ = c; 193 } else { 194 *outlen = out - outstart; 195 *inlen = processed - base; 196 return(-1); 197 } 198 199 processed = (const unsigned char*) in; 200 } 201 *outlen = out - outstart; 202 *inlen = processed - base; 203 return(*outlen); 204 } 205 206 #ifdef LIBXML_OUTPUT_ENABLED 207 /** 208 * UTF8Toascii: 209 * @out: a pointer to an array of bytes to store the result 210 * @outlen: the length of @out 211 * @in: a pointer to an array of UTF-8 chars 212 * @inlen: the length of @in 213 * 214 * Take a block of UTF-8 chars in and try to convert it to an ASCII 215 * block of chars out. 216 * 217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 218 * The value of @inlen after return is the number of octets consumed 219 * if the return value is positive, else unpredictable. 220 * The value of @outlen after return is the number of octets consumed. 221 */ 222 static int 223 UTF8Toascii(unsigned char* out, int *outlen, 224 const unsigned char* in, int *inlen) { 225 const unsigned char* processed = in; 226 const unsigned char* outend; 227 const unsigned char* outstart = out; 228 const unsigned char* instart = in; 229 const unsigned char* inend; 230 unsigned int c, d; 231 int trailing; 232 233 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 234 if (in == NULL) { 235 /* 236 * initialization nothing to do 237 */ 238 *outlen = 0; 239 *inlen = 0; 240 return(0); 241 } 242 inend = in + (*inlen); 243 outend = out + (*outlen); 244 while (in < inend) { 245 d = *in++; 246 if (d < 0x80) { c= d; trailing= 0; } 247 else if (d < 0xC0) { 248 /* trailing byte in leading position */ 249 *outlen = out - outstart; 250 *inlen = processed - instart; 251 return(-2); 252 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 253 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 254 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 255 else { 256 /* no chance for this in Ascii */ 257 *outlen = out - outstart; 258 *inlen = processed - instart; 259 return(-2); 260 } 261 262 if (inend - in < trailing) { 263 break; 264 } 265 266 for ( ; trailing; trailing--) { 267 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 268 break; 269 c <<= 6; 270 c |= d & 0x3F; 271 } 272 273 /* assertion: c is a single UTF-4 value */ 274 if (c < 0x80) { 275 if (out >= outend) 276 break; 277 *out++ = c; 278 } else { 279 /* no chance for this in Ascii */ 280 *outlen = out - outstart; 281 *inlen = processed - instart; 282 return(-2); 283 } 284 processed = in; 285 } 286 *outlen = out - outstart; 287 *inlen = processed - instart; 288 return(*outlen); 289 } 290 #endif /* LIBXML_OUTPUT_ENABLED */ 291 292 /** 293 * isolat1ToUTF8: 294 * @out: a pointer to an array of bytes to store the result 295 * @outlen: the length of @out 296 * @in: a pointer to an array of ISO Latin 1 chars 297 * @inlen: the length of @in 298 * 299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 300 * block of chars out. 301 * Returns the number of bytes written if success, or -1 otherwise 302 * The value of @inlen after return is the number of octets consumed 303 * if the return value is positive, else unpredictable. 304 * The value of @outlen after return is the number of octets consumed. 305 */ 306 int 307 isolat1ToUTF8(unsigned char* out, int *outlen, 308 const unsigned char* in, int *inlen) { 309 unsigned char* outstart = out; 310 const unsigned char* base = in; 311 unsigned char* outend; 312 const unsigned char* inend; 313 const unsigned char* instop; 314 315 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 316 return(-1); 317 318 outend = out + *outlen; 319 inend = in + (*inlen); 320 instop = inend; 321 322 while ((in < inend) && (out < outend - 1)) { 323 if (*in >= 0x80) { 324 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 325 *out++ = ((*in) & 0x3F) | 0x80; 326 ++in; 327 } 328 if ((instop - in) > (outend - out)) instop = in + (outend - out); 329 while ((in < instop) && (*in < 0x80)) { 330 *out++ = *in++; 331 } 332 } 333 if ((in < inend) && (out < outend) && (*in < 0x80)) { 334 *out++ = *in++; 335 } 336 *outlen = out - outstart; 337 *inlen = in - base; 338 return(*outlen); 339 } 340 341 /** 342 * UTF8ToUTF8: 343 * @out: a pointer to an array of bytes to store the result 344 * @outlen: the length of @out 345 * @inb: a pointer to an array of UTF-8 chars 346 * @inlenb: the length of @in in UTF-8 chars 347 * 348 * No op copy operation for UTF8 handling. 349 * 350 * Returns the number of bytes written, or -1 if lack of space. 351 * The value of *inlen after return is the number of octets consumed 352 * if the return value is positive, else unpredictable. 353 */ 354 static int 355 UTF8ToUTF8(unsigned char* out, int *outlen, 356 const unsigned char* inb, int *inlenb) 357 { 358 int len; 359 360 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL)) 361 return(-1); 362 if (inb == NULL) { 363 /* inb == NULL means output is initialized. */ 364 *outlen = 0; 365 *inlenb = 0; 366 return(0); 367 } 368 if (*outlen > *inlenb) { 369 len = *inlenb; 370 } else { 371 len = *outlen; 372 } 373 if (len < 0) 374 return(-1); 375 376 memcpy(out, inb, len); 377 378 *outlen = len; 379 *inlenb = len; 380 return(*outlen); 381 } 382 383 384 #ifdef LIBXML_OUTPUT_ENABLED 385 /** 386 * UTF8Toisolat1: 387 * @out: a pointer to an array of bytes to store the result 388 * @outlen: the length of @out 389 * @in: a pointer to an array of UTF-8 chars 390 * @inlen: the length of @in 391 * 392 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 393 * block of chars out. 394 * 395 * Returns the number of bytes written if success, -2 if the transcoding fails, 396 or -1 otherwise 397 * The value of @inlen after return is the number of octets consumed 398 * if the return value is positive, else unpredictable. 399 * The value of @outlen after return is the number of octets consumed. 400 */ 401 int 402 UTF8Toisolat1(unsigned char* out, int *outlen, 403 const unsigned char* in, int *inlen) { 404 const unsigned char* processed = in; 405 const unsigned char* outend; 406 const unsigned char* outstart = out; 407 const unsigned char* instart = in; 408 const unsigned char* inend; 409 unsigned int c, d; 410 int trailing; 411 412 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 413 if (in == NULL) { 414 /* 415 * initialization nothing to do 416 */ 417 *outlen = 0; 418 *inlen = 0; 419 return(0); 420 } 421 inend = in + (*inlen); 422 outend = out + (*outlen); 423 while (in < inend) { 424 d = *in++; 425 if (d < 0x80) { c= d; trailing= 0; } 426 else if (d < 0xC0) { 427 /* trailing byte in leading position */ 428 *outlen = out - outstart; 429 *inlen = processed - instart; 430 return(-2); 431 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 432 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 433 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 434 else { 435 /* no chance for this in IsoLat1 */ 436 *outlen = out - outstart; 437 *inlen = processed - instart; 438 return(-2); 439 } 440 441 if (inend - in < trailing) { 442 break; 443 } 444 445 for ( ; trailing; trailing--) { 446 if (in >= inend) 447 break; 448 if (((d= *in++) & 0xC0) != 0x80) { 449 *outlen = out - outstart; 450 *inlen = processed - instart; 451 return(-2); 452 } 453 c <<= 6; 454 c |= d & 0x3F; 455 } 456 457 /* assertion: c is a single UTF-4 value */ 458 if (c <= 0xFF) { 459 if (out >= outend) 460 break; 461 *out++ = c; 462 } else { 463 /* no chance for this in IsoLat1 */ 464 *outlen = out - outstart; 465 *inlen = processed - instart; 466 return(-2); 467 } 468 processed = in; 469 } 470 *outlen = out - outstart; 471 *inlen = processed - instart; 472 return(*outlen); 473 } 474 #endif /* LIBXML_OUTPUT_ENABLED */ 475 476 /** 477 * UTF16LEToUTF8: 478 * @out: a pointer to an array of bytes to store the result 479 * @outlen: the length of @out 480 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 481 * @inlenb: the length of @in in UTF-16LE chars 482 * 483 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 484 * block of chars out. This function assumes the endian property 485 * is the same between the native type of this machine and the 486 * inputed one. 487 * 488 * Returns the number of bytes written, or -1 if lack of space, or -2 489 * if the transcoding fails (if *in is not a valid utf16 string) 490 * The value of *inlen after return is the number of octets consumed 491 * if the return value is positive, else unpredictable. 492 */ 493 static int 494 UTF16LEToUTF8(unsigned char* out, int *outlen, 495 const unsigned char* inb, int *inlenb) 496 { 497 unsigned char* outstart = out; 498 const unsigned char* processed = inb; 499 unsigned char* outend = out + *outlen; 500 unsigned short* in = (unsigned short*) inb; 501 unsigned short* inend; 502 unsigned int c, d, inlen; 503 unsigned char *tmp; 504 int bits; 505 506 if ((*inlenb % 2) == 1) 507 (*inlenb)--; 508 inlen = *inlenb / 2; 509 inend = in + inlen; 510 while ((in < inend) && (out - outstart + 5 < *outlen)) { 511 if (xmlLittleEndian) { 512 c= *in++; 513 } else { 514 tmp = (unsigned char *) in; 515 c = *tmp++; 516 c = c | (((unsigned int)*tmp) << 8); 517 in++; 518 } 519 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 520 if (in >= inend) { /* (in > inend) shouldn't happens */ 521 break; 522 } 523 if (xmlLittleEndian) { 524 d = *in++; 525 } else { 526 tmp = (unsigned char *) in; 527 d = *tmp++; 528 d = d | (((unsigned int)*tmp) << 8); 529 in++; 530 } 531 if ((d & 0xFC00) == 0xDC00) { 532 c &= 0x03FF; 533 c <<= 10; 534 c |= d & 0x03FF; 535 c += 0x10000; 536 } 537 else { 538 *outlen = out - outstart; 539 *inlenb = processed - inb; 540 return(-2); 541 } 542 } 543 544 /* assertion: c is a single UTF-4 value */ 545 if (out >= outend) 546 break; 547 if (c < 0x80) { *out++= c; bits= -6; } 548 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 549 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 550 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 551 552 for ( ; bits >= 0; bits-= 6) { 553 if (out >= outend) 554 break; 555 *out++= ((c >> bits) & 0x3F) | 0x80; 556 } 557 processed = (const unsigned char*) in; 558 } 559 *outlen = out - outstart; 560 *inlenb = processed - inb; 561 return(*outlen); 562 } 563 564 #ifdef LIBXML_OUTPUT_ENABLED 565 /** 566 * UTF8ToUTF16LE: 567 * @outb: a pointer to an array of bytes to store the result 568 * @outlen: the length of @outb 569 * @in: a pointer to an array of UTF-8 chars 570 * @inlen: the length of @in 571 * 572 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 573 * block of chars out. 574 * 575 * Returns the number of bytes written, or -1 if lack of space, or -2 576 * if the transcoding failed. 577 */ 578 static int 579 UTF8ToUTF16LE(unsigned char* outb, int *outlen, 580 const unsigned char* in, int *inlen) 581 { 582 unsigned short* out = (unsigned short*) outb; 583 const unsigned char* processed = in; 584 const unsigned char *const instart = in; 585 unsigned short* outstart= out; 586 unsigned short* outend; 587 const unsigned char* inend; 588 unsigned int c, d; 589 int trailing; 590 unsigned char *tmp; 591 unsigned short tmp1, tmp2; 592 593 /* UTF16LE encoding has no BOM */ 594 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 595 if (in == NULL) { 596 *outlen = 0; 597 *inlen = 0; 598 return(0); 599 } 600 inend= in + *inlen; 601 outend = out + (*outlen / 2); 602 while (in < inend) { 603 d= *in++; 604 if (d < 0x80) { c= d; trailing= 0; } 605 else if (d < 0xC0) { 606 /* trailing byte in leading position */ 607 *outlen = (out - outstart) * 2; 608 *inlen = processed - instart; 609 return(-2); 610 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 611 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 612 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 613 else { 614 /* no chance for this in UTF-16 */ 615 *outlen = (out - outstart) * 2; 616 *inlen = processed - instart; 617 return(-2); 618 } 619 620 if (inend - in < trailing) { 621 break; 622 } 623 624 for ( ; trailing; trailing--) { 625 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 626 break; 627 c <<= 6; 628 c |= d & 0x3F; 629 } 630 631 /* assertion: c is a single UTF-4 value */ 632 if (c < 0x10000) { 633 if (out >= outend) 634 break; 635 if (xmlLittleEndian) { 636 *out++ = c; 637 } else { 638 tmp = (unsigned char *) out; 639 *tmp = c ; 640 *(tmp + 1) = c >> 8 ; 641 out++; 642 } 643 } 644 else if (c < 0x110000) { 645 if (out+1 >= outend) 646 break; 647 c -= 0x10000; 648 if (xmlLittleEndian) { 649 *out++ = 0xD800 | (c >> 10); 650 *out++ = 0xDC00 | (c & 0x03FF); 651 } else { 652 tmp1 = 0xD800 | (c >> 10); 653 tmp = (unsigned char *) out; 654 *tmp = (unsigned char) tmp1; 655 *(tmp + 1) = tmp1 >> 8; 656 out++; 657 658 tmp2 = 0xDC00 | (c & 0x03FF); 659 tmp = (unsigned char *) out; 660 *tmp = (unsigned char) tmp2; 661 *(tmp + 1) = tmp2 >> 8; 662 out++; 663 } 664 } 665 else 666 break; 667 processed = in; 668 } 669 *outlen = (out - outstart) * 2; 670 *inlen = processed - instart; 671 return(*outlen); 672 } 673 674 /** 675 * UTF8ToUTF16: 676 * @outb: a pointer to an array of bytes to store the result 677 * @outlen: the length of @outb 678 * @in: a pointer to an array of UTF-8 chars 679 * @inlen: the length of @in 680 * 681 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 682 * block of chars out. 683 * 684 * Returns the number of bytes written, or -1 if lack of space, or -2 685 * if the transcoding failed. 686 */ 687 static int 688 UTF8ToUTF16(unsigned char* outb, int *outlen, 689 const unsigned char* in, int *inlen) 690 { 691 if (in == NULL) { 692 /* 693 * initialization, add the Byte Order Mark for UTF-16LE 694 */ 695 if (*outlen >= 2) { 696 outb[0] = 0xFF; 697 outb[1] = 0xFE; 698 *outlen = 2; 699 *inlen = 0; 700 #ifdef DEBUG_ENCODING 701 xmlGenericError(xmlGenericErrorContext, 702 "Added FFFE Byte Order Mark\n"); 703 #endif 704 return(2); 705 } 706 *outlen = 0; 707 *inlen = 0; 708 return(0); 709 } 710 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 711 } 712 #endif /* LIBXML_OUTPUT_ENABLED */ 713 714 /** 715 * UTF16BEToUTF8: 716 * @out: a pointer to an array of bytes to store the result 717 * @outlen: the length of @out 718 * @inb: a pointer to an array of UTF-16 passed as a byte array 719 * @inlenb: the length of @in in UTF-16 chars 720 * 721 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 722 * block of chars out. This function assumes the endian property 723 * is the same between the native type of this machine and the 724 * inputed one. 725 * 726 * Returns the number of bytes written, or -1 if lack of space, or -2 727 * if the transcoding fails (if *in is not a valid utf16 string) 728 * The value of *inlen after return is the number of octets consumed 729 * if the return value is positive, else unpredictable. 730 */ 731 static int 732 UTF16BEToUTF8(unsigned char* out, int *outlen, 733 const unsigned char* inb, int *inlenb) 734 { 735 unsigned char* outstart = out; 736 const unsigned char* processed = inb; 737 unsigned char* outend = out + *outlen; 738 unsigned short* in = (unsigned short*) inb; 739 unsigned short* inend; 740 unsigned int c, d, inlen; 741 unsigned char *tmp; 742 int bits; 743 744 if ((*inlenb % 2) == 1) 745 (*inlenb)--; 746 inlen = *inlenb / 2; 747 inend= in + inlen; 748 while (in < inend) { 749 if (xmlLittleEndian) { 750 tmp = (unsigned char *) in; 751 c = *tmp++; 752 c = c << 8; 753 c = c | (unsigned int) *tmp; 754 in++; 755 } else { 756 c= *in++; 757 } 758 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 759 if (in >= inend) { /* (in > inend) shouldn't happens */ 760 *outlen = out - outstart; 761 *inlenb = processed - inb; 762 return(-2); 763 } 764 if (xmlLittleEndian) { 765 tmp = (unsigned char *) in; 766 d = *tmp++; 767 d = d << 8; 768 d = d | (unsigned int) *tmp; 769 in++; 770 } else { 771 d= *in++; 772 } 773 if ((d & 0xFC00) == 0xDC00) { 774 c &= 0x03FF; 775 c <<= 10; 776 c |= d & 0x03FF; 777 c += 0x10000; 778 } 779 else { 780 *outlen = out - outstart; 781 *inlenb = processed - inb; 782 return(-2); 783 } 784 } 785 786 /* assertion: c is a single UTF-4 value */ 787 if (out >= outend) 788 break; 789 if (c < 0x80) { *out++= c; bits= -6; } 790 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 791 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 792 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 793 794 for ( ; bits >= 0; bits-= 6) { 795 if (out >= outend) 796 break; 797 *out++= ((c >> bits) & 0x3F) | 0x80; 798 } 799 processed = (const unsigned char*) in; 800 } 801 *outlen = out - outstart; 802 *inlenb = processed - inb; 803 return(*outlen); 804 } 805 806 #ifdef LIBXML_OUTPUT_ENABLED 807 /** 808 * UTF8ToUTF16BE: 809 * @outb: a pointer to an array of bytes to store the result 810 * @outlen: the length of @outb 811 * @in: a pointer to an array of UTF-8 chars 812 * @inlen: the length of @in 813 * 814 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 815 * block of chars out. 816 * 817 * Returns the number of byte written, or -1 by lack of space, or -2 818 * if the transcoding failed. 819 */ 820 static int 821 UTF8ToUTF16BE(unsigned char* outb, int *outlen, 822 const unsigned char* in, int *inlen) 823 { 824 unsigned short* out = (unsigned short*) outb; 825 const unsigned char* processed = in; 826 const unsigned char *const instart = in; 827 unsigned short* outstart= out; 828 unsigned short* outend; 829 const unsigned char* inend; 830 unsigned int c, d; 831 int trailing; 832 unsigned char *tmp; 833 unsigned short tmp1, tmp2; 834 835 /* UTF-16BE has no BOM */ 836 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 837 if (in == NULL) { 838 *outlen = 0; 839 *inlen = 0; 840 return(0); 841 } 842 inend= in + *inlen; 843 outend = out + (*outlen / 2); 844 while (in < inend) { 845 d= *in++; 846 if (d < 0x80) { c= d; trailing= 0; } 847 else if (d < 0xC0) { 848 /* trailing byte in leading position */ 849 *outlen = out - outstart; 850 *inlen = processed - instart; 851 return(-2); 852 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 853 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 854 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 855 else { 856 /* no chance for this in UTF-16 */ 857 *outlen = out - outstart; 858 *inlen = processed - instart; 859 return(-2); 860 } 861 862 if (inend - in < trailing) { 863 break; 864 } 865 866 for ( ; trailing; trailing--) { 867 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 868 c <<= 6; 869 c |= d & 0x3F; 870 } 871 872 /* assertion: c is a single UTF-4 value */ 873 if (c < 0x10000) { 874 if (out >= outend) break; 875 if (xmlLittleEndian) { 876 tmp = (unsigned char *) out; 877 *tmp = c >> 8; 878 *(tmp + 1) = c; 879 out++; 880 } else { 881 *out++ = c; 882 } 883 } 884 else if (c < 0x110000) { 885 if (out+1 >= outend) break; 886 c -= 0x10000; 887 if (xmlLittleEndian) { 888 tmp1 = 0xD800 | (c >> 10); 889 tmp = (unsigned char *) out; 890 *tmp = tmp1 >> 8; 891 *(tmp + 1) = (unsigned char) tmp1; 892 out++; 893 894 tmp2 = 0xDC00 | (c & 0x03FF); 895 tmp = (unsigned char *) out; 896 *tmp = tmp2 >> 8; 897 *(tmp + 1) = (unsigned char) tmp2; 898 out++; 899 } else { 900 *out++ = 0xD800 | (c >> 10); 901 *out++ = 0xDC00 | (c & 0x03FF); 902 } 903 } 904 else 905 break; 906 processed = in; 907 } 908 *outlen = (out - outstart) * 2; 909 *inlen = processed - instart; 910 return(*outlen); 911 } 912 #endif /* LIBXML_OUTPUT_ENABLED */ 913 914 /************************************************************************ 915 * * 916 * Generic encoding handling routines * 917 * * 918 ************************************************************************/ 919 920 /** 921 * xmlDetectCharEncoding: 922 * @in: a pointer to the first bytes of the XML entity, must be at least 923 * 2 bytes long (at least 4 if encoding is UTF4 variant). 924 * @len: pointer to the length of the buffer 925 * 926 * Guess the encoding of the entity using the first bytes of the entity content 927 * according to the non-normative appendix F of the XML-1.0 recommendation. 928 * 929 * Returns one of the XML_CHAR_ENCODING_... values. 930 */ 931 xmlCharEncoding 932 xmlDetectCharEncoding(const unsigned char* in, int len) 933 { 934 if (in == NULL) 935 return(XML_CHAR_ENCODING_NONE); 936 if (len >= 4) { 937 if ((in[0] == 0x00) && (in[1] == 0x00) && 938 (in[2] == 0x00) && (in[3] == 0x3C)) 939 return(XML_CHAR_ENCODING_UCS4BE); 940 if ((in[0] == 0x3C) && (in[1] == 0x00) && 941 (in[2] == 0x00) && (in[3] == 0x00)) 942 return(XML_CHAR_ENCODING_UCS4LE); 943 if ((in[0] == 0x00) && (in[1] == 0x00) && 944 (in[2] == 0x3C) && (in[3] == 0x00)) 945 return(XML_CHAR_ENCODING_UCS4_2143); 946 if ((in[0] == 0x00) && (in[1] == 0x3C) && 947 (in[2] == 0x00) && (in[3] == 0x00)) 948 return(XML_CHAR_ENCODING_UCS4_3412); 949 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 950 (in[2] == 0xA7) && (in[3] == 0x94)) 951 return(XML_CHAR_ENCODING_EBCDIC); 952 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 953 (in[2] == 0x78) && (in[3] == 0x6D)) 954 return(XML_CHAR_ENCODING_UTF8); 955 /* 956 * Although not part of the recommendation, we also 957 * attempt an "auto-recognition" of UTF-16LE and 958 * UTF-16BE encodings. 959 */ 960 if ((in[0] == 0x3C) && (in[1] == 0x00) && 961 (in[2] == 0x3F) && (in[3] == 0x00)) 962 return(XML_CHAR_ENCODING_UTF16LE); 963 if ((in[0] == 0x00) && (in[1] == 0x3C) && 964 (in[2] == 0x00) && (in[3] == 0x3F)) 965 return(XML_CHAR_ENCODING_UTF16BE); 966 } 967 if (len >= 3) { 968 /* 969 * Errata on XML-1.0 June 20 2001 970 * We now allow an UTF8 encoded BOM 971 */ 972 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 973 (in[2] == 0xBF)) 974 return(XML_CHAR_ENCODING_UTF8); 975 } 976 /* For UTF-16 we can recognize by the BOM */ 977 if (len >= 2) { 978 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 979 return(XML_CHAR_ENCODING_UTF16BE); 980 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 981 return(XML_CHAR_ENCODING_UTF16LE); 982 } 983 return(XML_CHAR_ENCODING_NONE); 984 } 985 986 /** 987 * xmlCleanupEncodingAliases: 988 * 989 * Unregisters all aliases 990 */ 991 void 992 xmlCleanupEncodingAliases(void) { 993 int i; 994 995 if (xmlCharEncodingAliases == NULL) 996 return; 997 998 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 999 if (xmlCharEncodingAliases[i].name != NULL) 1000 xmlFree((char *) xmlCharEncodingAliases[i].name); 1001 if (xmlCharEncodingAliases[i].alias != NULL) 1002 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1003 } 1004 xmlCharEncodingAliasesNb = 0; 1005 xmlCharEncodingAliasesMax = 0; 1006 xmlFree(xmlCharEncodingAliases); 1007 xmlCharEncodingAliases = NULL; 1008 } 1009 1010 /** 1011 * xmlGetEncodingAlias: 1012 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1013 * 1014 * Lookup an encoding name for the given alias. 1015 * 1016 * Returns NULL if not found, otherwise the original name 1017 */ 1018 const char * 1019 xmlGetEncodingAlias(const char *alias) { 1020 int i; 1021 char upper[100]; 1022 1023 if (alias == NULL) 1024 return(NULL); 1025 1026 if (xmlCharEncodingAliases == NULL) 1027 return(NULL); 1028 1029 for (i = 0;i < 99;i++) { 1030 upper[i] = toupper(alias[i]); 1031 if (upper[i] == 0) break; 1032 } 1033 upper[i] = 0; 1034 1035 /* 1036 * Walk down the list looking for a definition of the alias 1037 */ 1038 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1039 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1040 return(xmlCharEncodingAliases[i].name); 1041 } 1042 } 1043 return(NULL); 1044 } 1045 1046 /** 1047 * xmlAddEncodingAlias: 1048 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1049 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1050 * 1051 * Registers an alias @alias for an encoding named @name. Existing alias 1052 * will be overwritten. 1053 * 1054 * Returns 0 in case of success, -1 in case of error 1055 */ 1056 int 1057 xmlAddEncodingAlias(const char *name, const char *alias) { 1058 int i; 1059 char upper[100]; 1060 1061 if ((name == NULL) || (alias == NULL)) 1062 return(-1); 1063 1064 for (i = 0;i < 99;i++) { 1065 upper[i] = toupper(alias[i]); 1066 if (upper[i] == 0) break; 1067 } 1068 upper[i] = 0; 1069 1070 if (xmlCharEncodingAliases == NULL) { 1071 xmlCharEncodingAliasesNb = 0; 1072 xmlCharEncodingAliasesMax = 20; 1073 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1074 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1075 if (xmlCharEncodingAliases == NULL) 1076 return(-1); 1077 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1078 xmlCharEncodingAliasesMax *= 2; 1079 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1080 xmlRealloc(xmlCharEncodingAliases, 1081 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1082 } 1083 /* 1084 * Walk down the list looking for a definition of the alias 1085 */ 1086 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1087 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1088 /* 1089 * Replace the definition. 1090 */ 1091 xmlFree((char *) xmlCharEncodingAliases[i].name); 1092 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1093 return(0); 1094 } 1095 } 1096 /* 1097 * Add the definition 1098 */ 1099 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1100 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1101 xmlCharEncodingAliasesNb++; 1102 return(0); 1103 } 1104 1105 /** 1106 * xmlDelEncodingAlias: 1107 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1108 * 1109 * Unregisters an encoding alias @alias 1110 * 1111 * Returns 0 in case of success, -1 in case of error 1112 */ 1113 int 1114 xmlDelEncodingAlias(const char *alias) { 1115 int i; 1116 1117 if (alias == NULL) 1118 return(-1); 1119 1120 if (xmlCharEncodingAliases == NULL) 1121 return(-1); 1122 /* 1123 * Walk down the list looking for a definition of the alias 1124 */ 1125 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1126 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1127 xmlFree((char *) xmlCharEncodingAliases[i].name); 1128 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1129 xmlCharEncodingAliasesNb--; 1130 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1131 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1132 return(0); 1133 } 1134 } 1135 return(-1); 1136 } 1137 1138 /** 1139 * xmlParseCharEncoding: 1140 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1141 * 1142 * Compare the string to the encoding schemes already known. Note 1143 * that the comparison is case insensitive accordingly to the section 1144 * [XML] 4.3.3 Character Encoding in Entities. 1145 * 1146 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1147 * if not recognized. 1148 */ 1149 xmlCharEncoding 1150 xmlParseCharEncoding(const char* name) 1151 { 1152 const char *alias; 1153 char upper[500]; 1154 int i; 1155 1156 if (name == NULL) 1157 return(XML_CHAR_ENCODING_NONE); 1158 1159 /* 1160 * Do the alias resolution 1161 */ 1162 alias = xmlGetEncodingAlias(name); 1163 if (alias != NULL) 1164 name = alias; 1165 1166 for (i = 0;i < 499;i++) { 1167 upper[i] = toupper(name[i]); 1168 if (upper[i] == 0) break; 1169 } 1170 upper[i] = 0; 1171 1172 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1173 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1174 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1175 1176 /* 1177 * NOTE: if we were able to parse this, the endianness of UTF16 is 1178 * already found and in use 1179 */ 1180 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1181 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1182 1183 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1184 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1185 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1186 1187 /* 1188 * NOTE: if we were able to parse this, the endianness of UCS4 is 1189 * already found and in use 1190 */ 1191 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1192 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1193 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1194 1195 1196 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1197 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1198 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1199 1200 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1201 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1202 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1203 1204 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1205 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1206 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1207 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1208 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1209 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1210 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1211 1212 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1213 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1214 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1215 1216 #ifdef DEBUG_ENCODING 1217 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1218 #endif 1219 return(XML_CHAR_ENCODING_ERROR); 1220 } 1221 1222 /** 1223 * xmlGetCharEncodingName: 1224 * @enc: the encoding 1225 * 1226 * The "canonical" name for XML encoding. 1227 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1228 * Section 4.3.3 Character Encoding in Entities 1229 * 1230 * Returns the canonical name for the given encoding 1231 */ 1232 1233 const char* 1234 xmlGetCharEncodingName(xmlCharEncoding enc) { 1235 switch (enc) { 1236 case XML_CHAR_ENCODING_ERROR: 1237 return(NULL); 1238 case XML_CHAR_ENCODING_NONE: 1239 return(NULL); 1240 case XML_CHAR_ENCODING_UTF8: 1241 return("UTF-8"); 1242 case XML_CHAR_ENCODING_UTF16LE: 1243 return("UTF-16"); 1244 case XML_CHAR_ENCODING_UTF16BE: 1245 return("UTF-16"); 1246 case XML_CHAR_ENCODING_EBCDIC: 1247 return("EBCDIC"); 1248 case XML_CHAR_ENCODING_UCS4LE: 1249 return("ISO-10646-UCS-4"); 1250 case XML_CHAR_ENCODING_UCS4BE: 1251 return("ISO-10646-UCS-4"); 1252 case XML_CHAR_ENCODING_UCS4_2143: 1253 return("ISO-10646-UCS-4"); 1254 case XML_CHAR_ENCODING_UCS4_3412: 1255 return("ISO-10646-UCS-4"); 1256 case XML_CHAR_ENCODING_UCS2: 1257 return("ISO-10646-UCS-2"); 1258 case XML_CHAR_ENCODING_8859_1: 1259 return("ISO-8859-1"); 1260 case XML_CHAR_ENCODING_8859_2: 1261 return("ISO-8859-2"); 1262 case XML_CHAR_ENCODING_8859_3: 1263 return("ISO-8859-3"); 1264 case XML_CHAR_ENCODING_8859_4: 1265 return("ISO-8859-4"); 1266 case XML_CHAR_ENCODING_8859_5: 1267 return("ISO-8859-5"); 1268 case XML_CHAR_ENCODING_8859_6: 1269 return("ISO-8859-6"); 1270 case XML_CHAR_ENCODING_8859_7: 1271 return("ISO-8859-7"); 1272 case XML_CHAR_ENCODING_8859_8: 1273 return("ISO-8859-8"); 1274 case XML_CHAR_ENCODING_8859_9: 1275 return("ISO-8859-9"); 1276 case XML_CHAR_ENCODING_2022_JP: 1277 return("ISO-2022-JP"); 1278 case XML_CHAR_ENCODING_SHIFT_JIS: 1279 return("Shift-JIS"); 1280 case XML_CHAR_ENCODING_EUC_JP: 1281 return("EUC-JP"); 1282 case XML_CHAR_ENCODING_ASCII: 1283 return(NULL); 1284 } 1285 return(NULL); 1286 } 1287 1288 /************************************************************************ 1289 * * 1290 * Char encoding handlers * 1291 * * 1292 ************************************************************************/ 1293 1294 1295 /* the size should be growable, but it's not a big deal ... */ 1296 #define MAX_ENCODING_HANDLERS 50 1297 static xmlCharEncodingHandlerPtr *handlers = NULL; 1298 static int nbCharEncodingHandler = 0; 1299 1300 /* 1301 * The default is UTF-8 for XML, that's also the default used for the 1302 * parser internals, so the default encoding handler is NULL 1303 */ 1304 1305 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1306 1307 /** 1308 * xmlNewCharEncodingHandler: 1309 * @name: the encoding name, in UTF-8 format (ASCII actually) 1310 * @input: the xmlCharEncodingInputFunc to read that encoding 1311 * @output: the xmlCharEncodingOutputFunc to write that encoding 1312 * 1313 * Create and registers an xmlCharEncodingHandler. 1314 * 1315 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1316 */ 1317 xmlCharEncodingHandlerPtr 1318 xmlNewCharEncodingHandler(const char *name, 1319 xmlCharEncodingInputFunc input, 1320 xmlCharEncodingOutputFunc output) { 1321 xmlCharEncodingHandlerPtr handler; 1322 const char *alias; 1323 char upper[500]; 1324 int i; 1325 char *up = NULL; 1326 1327 /* 1328 * Do the alias resolution 1329 */ 1330 alias = xmlGetEncodingAlias(name); 1331 if (alias != NULL) 1332 name = alias; 1333 1334 /* 1335 * Keep only the uppercase version of the encoding. 1336 */ 1337 if (name == NULL) { 1338 xmlEncodingErr(XML_I18N_NO_NAME, 1339 "xmlNewCharEncodingHandler : no name !\n", NULL); 1340 return(NULL); 1341 } 1342 for (i = 0;i < 499;i++) { 1343 upper[i] = toupper(name[i]); 1344 if (upper[i] == 0) break; 1345 } 1346 upper[i] = 0; 1347 up = xmlMemStrdup(upper); 1348 if (up == NULL) { 1349 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1350 return(NULL); 1351 } 1352 1353 /* 1354 * allocate and fill-up an handler block. 1355 */ 1356 handler = (xmlCharEncodingHandlerPtr) 1357 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1358 if (handler == NULL) { 1359 xmlFree(up); 1360 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1361 return(NULL); 1362 } 1363 memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1364 handler->input = input; 1365 handler->output = output; 1366 handler->name = up; 1367 1368 #ifdef LIBXML_ICONV_ENABLED 1369 handler->iconv_in = NULL; 1370 handler->iconv_out = NULL; 1371 #endif 1372 #ifdef LIBXML_ICU_ENABLED 1373 handler->uconv_in = NULL; 1374 handler->uconv_out = NULL; 1375 #endif 1376 1377 /* 1378 * registers and returns the handler. 1379 */ 1380 xmlRegisterCharEncodingHandler(handler); 1381 #ifdef DEBUG_ENCODING 1382 xmlGenericError(xmlGenericErrorContext, 1383 "Registered encoding handler for %s\n", name); 1384 #endif 1385 return(handler); 1386 } 1387 1388 /** 1389 * xmlInitCharEncodingHandlers: 1390 * 1391 * Initialize the char encoding support, it registers the default 1392 * encoding supported. 1393 * NOTE: while public, this function usually doesn't need to be called 1394 * in normal processing. 1395 */ 1396 void 1397 xmlInitCharEncodingHandlers(void) { 1398 unsigned short int tst = 0x1234; 1399 unsigned char *ptr = (unsigned char *) &tst; 1400 1401 if (handlers != NULL) return; 1402 1403 handlers = (xmlCharEncodingHandlerPtr *) 1404 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1405 1406 if (*ptr == 0x12) xmlLittleEndian = 0; 1407 else if (*ptr == 0x34) xmlLittleEndian = 1; 1408 else { 1409 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1410 "Odd problem at endianness detection\n", NULL); 1411 } 1412 1413 if (handlers == NULL) { 1414 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1415 return; 1416 } 1417 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1418 #ifdef LIBXML_OUTPUT_ENABLED 1419 xmlUTF16LEHandler = 1420 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1421 xmlUTF16BEHandler = 1422 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1423 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1424 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1425 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1426 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1427 #ifdef LIBXML_HTML_ENABLED 1428 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1429 #endif 1430 #else 1431 xmlUTF16LEHandler = 1432 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1433 xmlUTF16BEHandler = 1434 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1435 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1436 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1437 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1438 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1439 #endif /* LIBXML_OUTPUT_ENABLED */ 1440 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1441 #ifdef LIBXML_ISO8859X_ENABLED 1442 xmlRegisterCharEncodingHandlersISO8859x (); 1443 #endif 1444 #endif 1445 1446 } 1447 1448 /** 1449 * xmlCleanupCharEncodingHandlers: 1450 * 1451 * Cleanup the memory allocated for the char encoding support, it 1452 * unregisters all the encoding handlers and the aliases. 1453 */ 1454 void 1455 xmlCleanupCharEncodingHandlers(void) { 1456 xmlCleanupEncodingAliases(); 1457 1458 if (handlers == NULL) return; 1459 1460 for (;nbCharEncodingHandler > 0;) { 1461 nbCharEncodingHandler--; 1462 if (handlers[nbCharEncodingHandler] != NULL) { 1463 if (handlers[nbCharEncodingHandler]->name != NULL) 1464 xmlFree(handlers[nbCharEncodingHandler]->name); 1465 xmlFree(handlers[nbCharEncodingHandler]); 1466 } 1467 } 1468 xmlFree(handlers); 1469 handlers = NULL; 1470 nbCharEncodingHandler = 0; 1471 xmlDefaultCharEncodingHandler = NULL; 1472 } 1473 1474 /** 1475 * xmlRegisterCharEncodingHandler: 1476 * @handler: the xmlCharEncodingHandlerPtr handler block 1477 * 1478 * Register the char encoding handler, surprising, isn't it ? 1479 */ 1480 void 1481 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1482 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1483 if ((handler == NULL) || (handlers == NULL)) { 1484 xmlEncodingErr(XML_I18N_NO_HANDLER, 1485 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1486 return; 1487 } 1488 1489 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1490 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1491 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1492 "MAX_ENCODING_HANDLERS"); 1493 return; 1494 } 1495 handlers[nbCharEncodingHandler++] = handler; 1496 } 1497 1498 /** 1499 * xmlGetCharEncodingHandler: 1500 * @enc: an xmlCharEncoding value. 1501 * 1502 * Search in the registered set the handler able to read/write that encoding. 1503 * 1504 * Returns the handler or NULL if not found 1505 */ 1506 xmlCharEncodingHandlerPtr 1507 xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1508 xmlCharEncodingHandlerPtr handler; 1509 1510 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1511 switch (enc) { 1512 case XML_CHAR_ENCODING_ERROR: 1513 return(NULL); 1514 case XML_CHAR_ENCODING_NONE: 1515 return(NULL); 1516 case XML_CHAR_ENCODING_UTF8: 1517 return(NULL); 1518 case XML_CHAR_ENCODING_UTF16LE: 1519 return(xmlUTF16LEHandler); 1520 case XML_CHAR_ENCODING_UTF16BE: 1521 return(xmlUTF16BEHandler); 1522 case XML_CHAR_ENCODING_EBCDIC: 1523 handler = xmlFindCharEncodingHandler("EBCDIC"); 1524 if (handler != NULL) return(handler); 1525 handler = xmlFindCharEncodingHandler("ebcdic"); 1526 if (handler != NULL) return(handler); 1527 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1528 if (handler != NULL) return(handler); 1529 handler = xmlFindCharEncodingHandler("IBM-037"); 1530 if (handler != NULL) return(handler); 1531 break; 1532 case XML_CHAR_ENCODING_UCS4BE: 1533 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1534 if (handler != NULL) return(handler); 1535 handler = xmlFindCharEncodingHandler("UCS-4"); 1536 if (handler != NULL) return(handler); 1537 handler = xmlFindCharEncodingHandler("UCS4"); 1538 if (handler != NULL) return(handler); 1539 break; 1540 case XML_CHAR_ENCODING_UCS4LE: 1541 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1542 if (handler != NULL) return(handler); 1543 handler = xmlFindCharEncodingHandler("UCS-4"); 1544 if (handler != NULL) return(handler); 1545 handler = xmlFindCharEncodingHandler("UCS4"); 1546 if (handler != NULL) return(handler); 1547 break; 1548 case XML_CHAR_ENCODING_UCS4_2143: 1549 break; 1550 case XML_CHAR_ENCODING_UCS4_3412: 1551 break; 1552 case XML_CHAR_ENCODING_UCS2: 1553 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1554 if (handler != NULL) return(handler); 1555 handler = xmlFindCharEncodingHandler("UCS-2"); 1556 if (handler != NULL) return(handler); 1557 handler = xmlFindCharEncodingHandler("UCS2"); 1558 if (handler != NULL) return(handler); 1559 break; 1560 1561 /* 1562 * We used to keep ISO Latin encodings native in the 1563 * generated data. This led to so many problems that 1564 * this has been removed. One can still change this 1565 * back by registering no-ops encoders for those 1566 */ 1567 case XML_CHAR_ENCODING_8859_1: 1568 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1569 if (handler != NULL) return(handler); 1570 break; 1571 case XML_CHAR_ENCODING_8859_2: 1572 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1573 if (handler != NULL) return(handler); 1574 break; 1575 case XML_CHAR_ENCODING_8859_3: 1576 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1577 if (handler != NULL) return(handler); 1578 break; 1579 case XML_CHAR_ENCODING_8859_4: 1580 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1581 if (handler != NULL) return(handler); 1582 break; 1583 case XML_CHAR_ENCODING_8859_5: 1584 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1585 if (handler != NULL) return(handler); 1586 break; 1587 case XML_CHAR_ENCODING_8859_6: 1588 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1589 if (handler != NULL) return(handler); 1590 break; 1591 case XML_CHAR_ENCODING_8859_7: 1592 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1593 if (handler != NULL) return(handler); 1594 break; 1595 case XML_CHAR_ENCODING_8859_8: 1596 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1597 if (handler != NULL) return(handler); 1598 break; 1599 case XML_CHAR_ENCODING_8859_9: 1600 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1601 if (handler != NULL) return(handler); 1602 break; 1603 1604 1605 case XML_CHAR_ENCODING_2022_JP: 1606 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1607 if (handler != NULL) return(handler); 1608 break; 1609 case XML_CHAR_ENCODING_SHIFT_JIS: 1610 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1611 if (handler != NULL) return(handler); 1612 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1613 if (handler != NULL) return(handler); 1614 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1615 if (handler != NULL) return(handler); 1616 break; 1617 case XML_CHAR_ENCODING_EUC_JP: 1618 handler = xmlFindCharEncodingHandler("EUC-JP"); 1619 if (handler != NULL) return(handler); 1620 break; 1621 default: 1622 break; 1623 } 1624 1625 #ifdef DEBUG_ENCODING 1626 xmlGenericError(xmlGenericErrorContext, 1627 "No handler found for encoding %d\n", enc); 1628 #endif 1629 return(NULL); 1630 } 1631 1632 /** 1633 * xmlFindCharEncodingHandler: 1634 * @name: a string describing the char encoding. 1635 * 1636 * Search in the registered set the handler able to read/write that encoding. 1637 * 1638 * Returns the handler or NULL if not found 1639 */ 1640 xmlCharEncodingHandlerPtr 1641 xmlFindCharEncodingHandler(const char *name) { 1642 const char *nalias; 1643 const char *norig; 1644 xmlCharEncoding alias; 1645 #ifdef LIBXML_ICONV_ENABLED 1646 xmlCharEncodingHandlerPtr enc; 1647 iconv_t icv_in, icv_out; 1648 #endif /* LIBXML_ICONV_ENABLED */ 1649 #ifdef LIBXML_ICU_ENABLED 1650 xmlCharEncodingHandlerPtr encu; 1651 uconv_t *ucv_in, *ucv_out; 1652 #endif /* LIBXML_ICU_ENABLED */ 1653 char upper[100]; 1654 int i; 1655 1656 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1657 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1658 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1659 1660 /* 1661 * Do the alias resolution 1662 */ 1663 norig = name; 1664 nalias = xmlGetEncodingAlias(name); 1665 if (nalias != NULL) 1666 name = nalias; 1667 1668 /* 1669 * Check first for directly registered encoding names 1670 */ 1671 for (i = 0;i < 99;i++) { 1672 upper[i] = toupper(name[i]); 1673 if (upper[i] == 0) break; 1674 } 1675 upper[i] = 0; 1676 1677 if (handlers != NULL) { 1678 for (i = 0;i < nbCharEncodingHandler; i++) { 1679 if (!strcmp(upper, handlers[i]->name)) { 1680 #ifdef DEBUG_ENCODING 1681 xmlGenericError(xmlGenericErrorContext, 1682 "Found registered handler for encoding %s\n", name); 1683 #endif 1684 return(handlers[i]); 1685 } 1686 } 1687 } 1688 1689 #ifdef LIBXML_ICONV_ENABLED 1690 /* check whether iconv can handle this */ 1691 icv_in = iconv_open("UTF-8", name); 1692 icv_out = iconv_open(name, "UTF-8"); 1693 if (icv_in == (iconv_t) -1) { 1694 icv_in = iconv_open("UTF-8", upper); 1695 } 1696 if (icv_out == (iconv_t) -1) { 1697 icv_out = iconv_open(upper, "UTF-8"); 1698 } 1699 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1700 enc = (xmlCharEncodingHandlerPtr) 1701 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1702 if (enc == NULL) { 1703 iconv_close(icv_in); 1704 iconv_close(icv_out); 1705 return(NULL); 1706 } 1707 memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1708 enc->name = xmlMemStrdup(name); 1709 enc->input = NULL; 1710 enc->output = NULL; 1711 enc->iconv_in = icv_in; 1712 enc->iconv_out = icv_out; 1713 #ifdef DEBUG_ENCODING 1714 xmlGenericError(xmlGenericErrorContext, 1715 "Found iconv handler for encoding %s\n", name); 1716 #endif 1717 return enc; 1718 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1719 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1720 "iconv : problems with filters for '%s'\n", name); 1721 } 1722 #endif /* LIBXML_ICONV_ENABLED */ 1723 #ifdef LIBXML_ICU_ENABLED 1724 /* check whether icu can handle this */ 1725 ucv_in = openIcuConverter(name, 1); 1726 ucv_out = openIcuConverter(name, 0); 1727 if (ucv_in != NULL && ucv_out != NULL) { 1728 encu = (xmlCharEncodingHandlerPtr) 1729 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1730 if (encu == NULL) { 1731 closeIcuConverter(ucv_in); 1732 closeIcuConverter(ucv_out); 1733 return(NULL); 1734 } 1735 memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1736 encu->name = xmlMemStrdup(name); 1737 encu->input = NULL; 1738 encu->output = NULL; 1739 encu->uconv_in = ucv_in; 1740 encu->uconv_out = ucv_out; 1741 #ifdef DEBUG_ENCODING 1742 xmlGenericError(xmlGenericErrorContext, 1743 "Found ICU converter handler for encoding %s\n", name); 1744 #endif 1745 return encu; 1746 } else if (ucv_in != NULL || ucv_out != NULL) { 1747 closeIcuConverter(ucv_in); 1748 closeIcuConverter(ucv_out); 1749 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1750 "ICU converter : problems with filters for '%s'\n", name); 1751 } 1752 #endif /* LIBXML_ICU_ENABLED */ 1753 1754 #ifdef DEBUG_ENCODING 1755 xmlGenericError(xmlGenericErrorContext, 1756 "No handler found for encoding %s\n", name); 1757 #endif 1758 1759 /* 1760 * Fallback using the canonical names 1761 */ 1762 alias = xmlParseCharEncoding(norig); 1763 if (alias != XML_CHAR_ENCODING_ERROR) { 1764 const char* canon; 1765 canon = xmlGetCharEncodingName(alias); 1766 if ((canon != NULL) && (strcmp(name, canon))) { 1767 return(xmlFindCharEncodingHandler(canon)); 1768 } 1769 } 1770 1771 /* If "none of the above", give up */ 1772 return(NULL); 1773 } 1774 1775 /************************************************************************ 1776 * * 1777 * ICONV based generic conversion functions * 1778 * * 1779 ************************************************************************/ 1780 1781 #ifdef LIBXML_ICONV_ENABLED 1782 /** 1783 * xmlIconvWrapper: 1784 * @cd: iconv converter data structure 1785 * @out: a pointer to an array of bytes to store the result 1786 * @outlen: the length of @out 1787 * @in: a pointer to an array of ISO Latin 1 chars 1788 * @inlen: the length of @in 1789 * 1790 * Returns 0 if success, or 1791 * -1 by lack of space, or 1792 * -2 if the transcoding fails (for *in is not valid utf8 string or 1793 * the result of transformation can't fit into the encoding we want), or 1794 * -3 if there the last byte can't form a single output char. 1795 * 1796 * The value of @inlen after return is the number of octets consumed 1797 * as the return value is positive, else unpredictable. 1798 * The value of @outlen after return is the number of ocetes consumed. 1799 */ 1800 static int 1801 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1802 const unsigned char *in, int *inlen) { 1803 size_t icv_inlen, icv_outlen; 1804 const char *icv_in = (const char *) in; 1805 char *icv_out = (char *) out; 1806 int ret; 1807 1808 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1809 if (outlen != NULL) *outlen = 0; 1810 return(-1); 1811 } 1812 icv_inlen = *inlen; 1813 icv_outlen = *outlen; 1814 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1815 *inlen -= icv_inlen; 1816 *outlen -= icv_outlen; 1817 if ((icv_inlen != 0) || (ret == -1)) { 1818 #ifdef EILSEQ 1819 if (errno == EILSEQ) { 1820 return -2; 1821 } else 1822 #endif 1823 #ifdef E2BIG 1824 if (errno == E2BIG) { 1825 return -1; 1826 } else 1827 #endif 1828 #ifdef EINVAL 1829 if (errno == EINVAL) { 1830 return -3; 1831 } else 1832 #endif 1833 { 1834 return -3; 1835 } 1836 } 1837 return 0; 1838 } 1839 #endif /* LIBXML_ICONV_ENABLED */ 1840 1841 /************************************************************************ 1842 * * 1843 * ICU based generic conversion functions * 1844 * * 1845 ************************************************************************/ 1846 1847 #ifdef LIBXML_ICU_ENABLED 1848 /** 1849 * xmlUconvWrapper: 1850 * @cd: ICU uconverter data structure 1851 * @toUnicode : non-zero if toUnicode. 0 otherwise. 1852 * @out: a pointer to an array of bytes to store the result 1853 * @outlen: the length of @out 1854 * @in: a pointer to an array of ISO Latin 1 chars 1855 * @inlen: the length of @in 1856 * @flush: if true, indicates end of input 1857 * 1858 * Returns 0 if success, or 1859 * -1 by lack of space, or 1860 * -2 if the transcoding fails (for *in is not valid utf8 string or 1861 * the result of transformation can't fit into the encoding we want), or 1862 * -3 if there the last byte can't form a single output char. 1863 * 1864 * The value of @inlen after return is the number of octets consumed 1865 * as the return value is positive, else unpredictable. 1866 * The value of @outlen after return is the number of ocetes consumed. 1867 */ 1868 static int 1869 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1870 const unsigned char *in, int *inlen, int flush) { 1871 const char *ucv_in = (const char *) in; 1872 char *ucv_out = (char *) out; 1873 UErrorCode err = U_ZERO_ERROR; 1874 1875 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1876 if (outlen != NULL) *outlen = 0; 1877 return(-1); 1878 } 1879 1880 if (toUnicode) { 1881 /* encoding => UTF-16 => UTF-8 */ 1882 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1883 &ucv_in, ucv_in + *inlen, cd->pivot_buf, 1884 &cd->pivot_source, &cd->pivot_target, 1885 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err); 1886 } else { 1887 /* UTF-8 => UTF-16 => encoding */ 1888 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1889 &ucv_in, ucv_in + *inlen, cd->pivot_buf, 1890 &cd->pivot_source, &cd->pivot_target, 1891 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err); 1892 } 1893 *inlen = ucv_in - (const char*) in; 1894 *outlen = ucv_out - (char *) out; 1895 if (U_SUCCESS(err)) { 1896 /* reset pivot buf if this is the last call for input (flush==TRUE) */ 1897 if (flush) 1898 cd->pivot_source = cd->pivot_target = cd->pivot_buf; 1899 return 0; 1900 } 1901 if (err == U_BUFFER_OVERFLOW_ERROR) 1902 return -1; 1903 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1904 return -2; 1905 return -3; 1906 } 1907 #endif /* LIBXML_ICU_ENABLED */ 1908 1909 /************************************************************************ 1910 * * 1911 * The real API used by libxml for on-the-fly conversion * 1912 * * 1913 ************************************************************************/ 1914 1915 static int 1916 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, 1917 int *outlen, const unsigned char *in, int *inlen, int flush) { 1918 int ret; 1919 (void)flush; 1920 1921 if (handler->input != NULL) { 1922 ret = handler->input(out, outlen, in, inlen); 1923 } 1924 #ifdef LIBXML_ICONV_ENABLED 1925 else if (handler->iconv_in != NULL) { 1926 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen); 1927 } 1928 #endif /* LIBXML_ICONV_ENABLED */ 1929 #ifdef LIBXML_ICU_ENABLED 1930 else if (handler->uconv_in != NULL) { 1931 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen, 1932 flush); 1933 } 1934 #endif /* LIBXML_ICU_ENABLED */ 1935 else { 1936 *outlen = 0; 1937 *inlen = 0; 1938 ret = -2; 1939 } 1940 1941 return(ret); 1942 } 1943 1944 /* Returns -4 if no output function was found. */ 1945 static int 1946 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, 1947 int *outlen, const unsigned char *in, int *inlen) { 1948 int ret; 1949 1950 if (handler->output != NULL) { 1951 ret = handler->output(out, outlen, in, inlen); 1952 } 1953 #ifdef LIBXML_ICONV_ENABLED 1954 else if (handler->iconv_out != NULL) { 1955 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen); 1956 } 1957 #endif /* LIBXML_ICONV_ENABLED */ 1958 #ifdef LIBXML_ICU_ENABLED 1959 else if (handler->uconv_out != NULL) { 1960 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen, 1961 TRUE); 1962 } 1963 #endif /* LIBXML_ICU_ENABLED */ 1964 else { 1965 *outlen = 0; 1966 *inlen = 0; 1967 ret = -4; 1968 } 1969 1970 return(ret); 1971 } 1972 1973 /** 1974 * xmlCharEncFirstLineInt: 1975 * @handler: char enconding transformation data structure 1976 * @out: an xmlBuffer for the output. 1977 * @in: an xmlBuffer for the input 1978 * @len: number of bytes to convert for the first line, or -1 1979 * 1980 * Front-end for the encoding handler input function, but handle only 1981 * the very first line, i.e. limit itself to 45 chars. 1982 * 1983 * Returns the number of byte written if success, or 1984 * -1 general error 1985 * -2 if the transcoding fails (for *in is not valid utf8 string or 1986 * the result of transformation can't fit into the encoding we want), or 1987 */ 1988 int 1989 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1990 xmlBufferPtr in, int len) { 1991 int ret; 1992 int written; 1993 int toconv; 1994 1995 if (handler == NULL) return(-1); 1996 if (out == NULL) return(-1); 1997 if (in == NULL) return(-1); 1998 1999 /* calculate space available */ 2000 written = out->size - out->use - 1; /* count '\0' */ 2001 toconv = in->use; 2002 /* 2003 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2004 * 45 chars should be sufficient to reach the end of the encoding 2005 * declaration without going too far inside the document content. 2006 * on UTF-16 this means 90bytes, on UCS4 this means 180 2007 * The actual value depending on guessed encoding is passed as @len 2008 * if provided 2009 */ 2010 if (len >= 0) { 2011 if (toconv > len) 2012 toconv = len; 2013 } else { 2014 if (toconv > 180) 2015 toconv = 180; 2016 } 2017 if (toconv * 2 >= written) { 2018 xmlBufferGrow(out, toconv * 2); 2019 written = out->size - out->use - 1; 2020 } 2021 2022 ret = xmlEncInputChunk(handler, &out->content[out->use], &written, 2023 in->content, &toconv, 0); 2024 xmlBufferShrink(in, toconv); 2025 out->use += written; 2026 out->content[out->use] = 0; 2027 if (ret == -1) ret = -3; 2028 2029 #ifdef DEBUG_ENCODING 2030 switch (ret) { 2031 case 0: 2032 xmlGenericError(xmlGenericErrorContext, 2033 "converted %d bytes to %d bytes of input\n", 2034 toconv, written); 2035 break; 2036 case -1: 2037 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2038 toconv, written, in->use); 2039 break; 2040 case -2: 2041 xmlGenericError(xmlGenericErrorContext, 2042 "input conversion failed due to input error\n"); 2043 break; 2044 case -3: 2045 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2046 toconv, written, in->use); 2047 break; 2048 default: 2049 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2050 } 2051 #endif /* DEBUG_ENCODING */ 2052 /* 2053 * Ignore when input buffer is not on a boundary 2054 */ 2055 if (ret == -3) ret = 0; 2056 if (ret == -1) ret = 0; 2057 return(ret); 2058 } 2059 2060 /** 2061 * xmlCharEncFirstLine: 2062 * @handler: char enconding transformation data structure 2063 * @out: an xmlBuffer for the output. 2064 * @in: an xmlBuffer for the input 2065 * 2066 * Front-end for the encoding handler input function, but handle only 2067 * the very first line, i.e. limit itself to 45 chars. 2068 * 2069 * Returns the number of byte written if success, or 2070 * -1 general error 2071 * -2 if the transcoding fails (for *in is not valid utf8 string or 2072 * the result of transformation can't fit into the encoding we want), or 2073 */ 2074 int 2075 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2076 xmlBufferPtr in) { 2077 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2078 } 2079 2080 /** 2081 * xmlCharEncFirstLineInput: 2082 * @input: a parser input buffer 2083 * @len: number of bytes to convert for the first line, or -1 2084 * 2085 * Front-end for the encoding handler input function, but handle only 2086 * the very first line. Point is that this is based on autodetection 2087 * of the encoding and once that first line is converted we may find 2088 * out that a different decoder is needed to process the input. 2089 * 2090 * Returns the number of byte written if success, or 2091 * -1 general error 2092 * -2 if the transcoding fails (for *in is not valid utf8 string or 2093 * the result of transformation can't fit into the encoding we want), or 2094 */ 2095 int 2096 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) 2097 { 2098 int ret; 2099 size_t written; 2100 size_t toconv; 2101 int c_in; 2102 int c_out; 2103 xmlBufPtr in; 2104 xmlBufPtr out; 2105 2106 if ((input == NULL) || (input->encoder == NULL) || 2107 (input->buffer == NULL) || (input->raw == NULL)) 2108 return (-1); 2109 out = input->buffer; 2110 in = input->raw; 2111 2112 toconv = xmlBufUse(in); 2113 if (toconv == 0) 2114 return (0); 2115 written = xmlBufAvail(out) - 1; /* count '\0' */ 2116 /* 2117 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2118 * 45 chars should be sufficient to reach the end of the encoding 2119 * declaration without going too far inside the document content. 2120 * on UTF-16 this means 90bytes, on UCS4 this means 180 2121 * The actual value depending on guessed encoding is passed as @len 2122 * if provided 2123 */ 2124 if (len >= 0) { 2125 if (toconv > (unsigned int) len) 2126 toconv = len; 2127 } else { 2128 if (toconv > 180) 2129 toconv = 180; 2130 } 2131 if (toconv * 2 >= written) { 2132 xmlBufGrow(out, toconv * 2); 2133 written = xmlBufAvail(out) - 1; 2134 } 2135 if (written > 360) 2136 written = 360; 2137 2138 c_in = toconv; 2139 c_out = written; 2140 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, 2141 xmlBufContent(in), &c_in, 0); 2142 xmlBufShrink(in, c_in); 2143 xmlBufAddLen(out, c_out); 2144 if (ret == -1) 2145 ret = -3; 2146 2147 switch (ret) { 2148 case 0: 2149 #ifdef DEBUG_ENCODING 2150 xmlGenericError(xmlGenericErrorContext, 2151 "converted %d bytes to %d bytes of input\n", 2152 c_in, c_out); 2153 #endif 2154 break; 2155 case -1: 2156 #ifdef DEBUG_ENCODING 2157 xmlGenericError(xmlGenericErrorContext, 2158 "converted %d bytes to %d bytes of input, %d left\n", 2159 c_in, c_out, (int)xmlBufUse(in)); 2160 #endif 2161 break; 2162 case -3: 2163 #ifdef DEBUG_ENCODING 2164 xmlGenericError(xmlGenericErrorContext, 2165 "converted %d bytes to %d bytes of input, %d left\n", 2166 c_in, c_out, (int)xmlBufUse(in)); 2167 #endif 2168 break; 2169 case -2: { 2170 char buf[50]; 2171 const xmlChar *content = xmlBufContent(in); 2172 2173 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2174 content[0], content[1], 2175 content[2], content[3]); 2176 buf[49] = 0; 2177 xmlEncodingErr(XML_I18N_CONV_FAILED, 2178 "input conversion failed due to input error, bytes %s\n", 2179 buf); 2180 } 2181 } 2182 /* 2183 * Ignore when input buffer is not on a boundary 2184 */ 2185 if (ret == -3) ret = 0; 2186 if (ret == -1) ret = 0; 2187 return(ret); 2188 } 2189 2190 /** 2191 * xmlCharEncInput: 2192 * @input: a parser input buffer 2193 * @flush: try to flush all the raw buffer 2194 * 2195 * Generic front-end for the encoding handler on parser input 2196 * 2197 * Returns the number of byte written if success, or 2198 * -1 general error 2199 * -2 if the transcoding fails (for *in is not valid utf8 string or 2200 * the result of transformation can't fit into the encoding we want), or 2201 */ 2202 int 2203 xmlCharEncInput(xmlParserInputBufferPtr input, int flush) 2204 { 2205 int ret; 2206 size_t written; 2207 size_t toconv; 2208 int c_in; 2209 int c_out; 2210 xmlBufPtr in; 2211 xmlBufPtr out; 2212 2213 if ((input == NULL) || (input->encoder == NULL) || 2214 (input->buffer == NULL) || (input->raw == NULL)) 2215 return (-1); 2216 out = input->buffer; 2217 in = input->raw; 2218 2219 toconv = xmlBufUse(in); 2220 if (toconv == 0) 2221 return (0); 2222 if ((toconv > 64 * 1024) && (flush == 0)) 2223 toconv = 64 * 1024; 2224 written = xmlBufAvail(out); 2225 if (written > 0) 2226 written--; /* count '\0' */ 2227 if (toconv * 2 >= written) { 2228 xmlBufGrow(out, toconv * 2); 2229 written = xmlBufAvail(out); 2230 if (written > 0) 2231 written--; /* count '\0' */ 2232 } 2233 if ((written > 128 * 1024) && (flush == 0)) 2234 written = 128 * 1024; 2235 2236 c_in = toconv; 2237 c_out = written; 2238 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, 2239 xmlBufContent(in), &c_in, flush); 2240 xmlBufShrink(in, c_in); 2241 xmlBufAddLen(out, c_out); 2242 if (ret == -1) 2243 ret = -3; 2244 2245 switch (ret) { 2246 case 0: 2247 #ifdef DEBUG_ENCODING 2248 xmlGenericError(xmlGenericErrorContext, 2249 "converted %d bytes to %d bytes of input\n", 2250 c_in, c_out); 2251 #endif 2252 break; 2253 case -1: 2254 #ifdef DEBUG_ENCODING 2255 xmlGenericError(xmlGenericErrorContext, 2256 "converted %d bytes to %d bytes of input, %d left\n", 2257 c_in, c_out, (int)xmlBufUse(in)); 2258 #endif 2259 break; 2260 case -3: 2261 #ifdef DEBUG_ENCODING 2262 xmlGenericError(xmlGenericErrorContext, 2263 "converted %d bytes to %d bytes of input, %d left\n", 2264 c_in, c_out, (int)xmlBufUse(in)); 2265 #endif 2266 break; 2267 case -2: { 2268 char buf[50]; 2269 const xmlChar *content = xmlBufContent(in); 2270 2271 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2272 content[0], content[1], 2273 content[2], content[3]); 2274 buf[49] = 0; 2275 xmlEncodingErr(XML_I18N_CONV_FAILED, 2276 "input conversion failed due to input error, bytes %s\n", 2277 buf); 2278 } 2279 } 2280 /* 2281 * Ignore when input buffer is not on a boundary 2282 */ 2283 if (ret == -3) 2284 ret = 0; 2285 return (c_out? c_out : ret); 2286 } 2287 2288 /** 2289 * xmlCharEncInFunc: 2290 * @handler: char encoding transformation data structure 2291 * @out: an xmlBuffer for the output. 2292 * @in: an xmlBuffer for the input 2293 * 2294 * Generic front-end for the encoding handler input function 2295 * 2296 * Returns the number of byte written if success, or 2297 * -1 general error 2298 * -2 if the transcoding fails (for *in is not valid utf8 string or 2299 * the result of transformation can't fit into the encoding we want), or 2300 */ 2301 int 2302 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2303 xmlBufferPtr in) 2304 { 2305 int ret; 2306 int written; 2307 int toconv; 2308 2309 if (handler == NULL) 2310 return (-1); 2311 if (out == NULL) 2312 return (-1); 2313 if (in == NULL) 2314 return (-1); 2315 2316 toconv = in->use; 2317 if (toconv == 0) 2318 return (0); 2319 written = out->size - out->use -1; /* count '\0' */ 2320 if (toconv * 2 >= written) { 2321 xmlBufferGrow(out, out->size + toconv * 2); 2322 written = out->size - out->use - 1; 2323 } 2324 ret = xmlEncInputChunk(handler, &out->content[out->use], &written, 2325 in->content, &toconv, 1); 2326 xmlBufferShrink(in, toconv); 2327 out->use += written; 2328 out->content[out->use] = 0; 2329 if (ret == -1) 2330 ret = -3; 2331 2332 switch (ret) { 2333 case 0: 2334 #ifdef DEBUG_ENCODING 2335 xmlGenericError(xmlGenericErrorContext, 2336 "converted %d bytes to %d bytes of input\n", 2337 toconv, written); 2338 #endif 2339 break; 2340 case -1: 2341 #ifdef DEBUG_ENCODING 2342 xmlGenericError(xmlGenericErrorContext, 2343 "converted %d bytes to %d bytes of input, %d left\n", 2344 toconv, written, in->use); 2345 #endif 2346 break; 2347 case -3: 2348 #ifdef DEBUG_ENCODING 2349 xmlGenericError(xmlGenericErrorContext, 2350 "converted %d bytes to %d bytes of input, %d left\n", 2351 toconv, written, in->use); 2352 #endif 2353 break; 2354 case -2: { 2355 char buf[50]; 2356 2357 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2358 in->content[0], in->content[1], 2359 in->content[2], in->content[3]); 2360 buf[49] = 0; 2361 xmlEncodingErr(XML_I18N_CONV_FAILED, 2362 "input conversion failed due to input error, bytes %s\n", 2363 buf); 2364 } 2365 } 2366 /* 2367 * Ignore when input buffer is not on a boundary 2368 */ 2369 if (ret == -3) 2370 ret = 0; 2371 return (written? written : ret); 2372 } 2373 2374 #ifdef LIBXML_OUTPUT_ENABLED 2375 /** 2376 * xmlCharEncOutput: 2377 * @output: a parser output buffer 2378 * @init: is this an initialization call without data 2379 * 2380 * Generic front-end for the encoding handler on parser output 2381 * a first call with @init == 1 has to be made first to initiate the 2382 * output in case of non-stateless encoding needing to initiate their 2383 * state or the output (like the BOM in UTF16). 2384 * In case of UTF8 sequence conversion errors for the given encoder, 2385 * the content will be automatically remapped to a CharRef sequence. 2386 * 2387 * Returns the number of byte written if success, or 2388 * -1 general error 2389 * -2 if the transcoding fails (for *in is not valid utf8 string or 2390 * the result of transformation can't fit into the encoding we want), or 2391 */ 2392 int 2393 xmlCharEncOutput(xmlOutputBufferPtr output, int init) 2394 { 2395 int ret; 2396 size_t written; 2397 size_t writtentot = 0; 2398 size_t toconv; 2399 int c_in; 2400 int c_out; 2401 xmlBufPtr in; 2402 xmlBufPtr out; 2403 2404 if ((output == NULL) || (output->encoder == NULL) || 2405 (output->buffer == NULL) || (output->conv == NULL)) 2406 return (-1); 2407 out = output->conv; 2408 in = output->buffer; 2409 2410 retry: 2411 2412 written = xmlBufAvail(out); 2413 if (written > 0) 2414 written--; /* count '\0' */ 2415 2416 /* 2417 * First specific handling of the initialization call 2418 */ 2419 if (init) { 2420 c_in = 0; 2421 c_out = written; 2422 /* TODO: Check return value. */ 2423 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, 2424 NULL, &c_in); 2425 xmlBufAddLen(out, c_out); 2426 #ifdef DEBUG_ENCODING 2427 xmlGenericError(xmlGenericErrorContext, 2428 "initialized encoder\n"); 2429 #endif 2430 return(0); 2431 } 2432 2433 /* 2434 * Conversion itself. 2435 */ 2436 toconv = xmlBufUse(in); 2437 if (toconv == 0) 2438 return (0); 2439 if (toconv > 64 * 1024) 2440 toconv = 64 * 1024; 2441 if (toconv * 4 >= written) { 2442 xmlBufGrow(out, toconv * 4); 2443 written = xmlBufAvail(out) - 1; 2444 } 2445 if (written > 256 * 1024) 2446 written = 256 * 1024; 2447 2448 c_in = toconv; 2449 c_out = written; 2450 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, 2451 xmlBufContent(in), &c_in); 2452 xmlBufShrink(in, c_in); 2453 xmlBufAddLen(out, c_out); 2454 writtentot += c_out; 2455 if (ret == -1) { 2456 if (c_out > 0) { 2457 /* Can be a limitation of iconv or uconv */ 2458 goto retry; 2459 } 2460 ret = -3; 2461 } 2462 2463 if (ret >= 0) output += ret; 2464 2465 /* 2466 * Attempt to handle error cases 2467 */ 2468 switch (ret) { 2469 case 0: 2470 #ifdef DEBUG_ENCODING 2471 xmlGenericError(xmlGenericErrorContext, 2472 "converted %d bytes to %d bytes of output\n", 2473 c_in, c_out); 2474 #endif 2475 break; 2476 case -1: 2477 #ifdef DEBUG_ENCODING 2478 xmlGenericError(xmlGenericErrorContext, 2479 "output conversion failed by lack of space\n"); 2480 #endif 2481 break; 2482 case -3: 2483 #ifdef DEBUG_ENCODING 2484 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2485 c_in, c_out, (int) xmlBufUse(in)); 2486 #endif 2487 break; 2488 case -4: 2489 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2490 "xmlCharEncOutFunc: no output function !\n", NULL); 2491 ret = -1; 2492 break; 2493 case -2: { 2494 xmlChar charref[20]; 2495 int len = (int) xmlBufUse(in); 2496 xmlChar *content = xmlBufContent(in); 2497 int cur, charrefLen; 2498 2499 cur = xmlGetUTF8Char(content, &len); 2500 if (cur <= 0) 2501 break; 2502 2503 #ifdef DEBUG_ENCODING 2504 xmlGenericError(xmlGenericErrorContext, 2505 "handling output conversion error\n"); 2506 xmlGenericError(xmlGenericErrorContext, 2507 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2508 content[0], content[1], 2509 content[2], content[3]); 2510 #endif 2511 /* 2512 * Removes the UTF8 sequence, and replace it by a charref 2513 * and continue the transcoding phase, hoping the error 2514 * did not mangle the encoder state. 2515 */ 2516 charrefLen = snprintf((char *) &charref[0], sizeof(charref), 2517 "&#%d;", cur); 2518 xmlBufShrink(in, len); 2519 xmlBufGrow(out, charrefLen * 4); 2520 c_out = xmlBufAvail(out) - 1; 2521 c_in = charrefLen; 2522 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, 2523 charref, &c_in); 2524 2525 if ((ret < 0) || (c_in != charrefLen)) { 2526 char buf[50]; 2527 2528 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2529 content[0], content[1], 2530 content[2], content[3]); 2531 buf[49] = 0; 2532 xmlEncodingErr(XML_I18N_CONV_FAILED, 2533 "output conversion failed due to conv error, bytes %s\n", 2534 buf); 2535 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) 2536 content[0] = ' '; 2537 break; 2538 } 2539 2540 xmlBufAddLen(out, c_out); 2541 writtentot += c_out; 2542 goto retry; 2543 } 2544 } 2545 return(ret); 2546 } 2547 #endif 2548 2549 /** 2550 * xmlCharEncOutFunc: 2551 * @handler: char enconding transformation data structure 2552 * @out: an xmlBuffer for the output. 2553 * @in: an xmlBuffer for the input 2554 * 2555 * Generic front-end for the encoding handler output function 2556 * a first call with @in == NULL has to be made firs to initiate the 2557 * output in case of non-stateless encoding needing to initiate their 2558 * state or the output (like the BOM in UTF16). 2559 * In case of UTF8 sequence conversion errors for the given encoder, 2560 * the content will be automatically remapped to a CharRef sequence. 2561 * 2562 * Returns the number of byte written if success, or 2563 * -1 general error 2564 * -2 if the transcoding fails (for *in is not valid utf8 string or 2565 * the result of transformation can't fit into the encoding we want), or 2566 */ 2567 int 2568 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2569 xmlBufferPtr in) { 2570 int ret; 2571 int written; 2572 int writtentot = 0; 2573 int toconv; 2574 int output = 0; 2575 2576 if (handler == NULL) return(-1); 2577 if (out == NULL) return(-1); 2578 2579 retry: 2580 2581 written = out->size - out->use; 2582 2583 if (written > 0) 2584 written--; /* Gennady: count '/0' */ 2585 2586 /* 2587 * First specific handling of in = NULL, i.e. the initialization call 2588 */ 2589 if (in == NULL) { 2590 toconv = 0; 2591 /* TODO: Check return value. */ 2592 xmlEncOutputChunk(handler, &out->content[out->use], &written, 2593 NULL, &toconv); 2594 out->use += written; 2595 out->content[out->use] = 0; 2596 #ifdef DEBUG_ENCODING 2597 xmlGenericError(xmlGenericErrorContext, 2598 "initialized encoder\n"); 2599 #endif 2600 return(0); 2601 } 2602 2603 /* 2604 * Conversion itself. 2605 */ 2606 toconv = in->use; 2607 if (toconv == 0) 2608 return(0); 2609 if (toconv * 4 >= written) { 2610 xmlBufferGrow(out, toconv * 4); 2611 written = out->size - out->use - 1; 2612 } 2613 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, 2614 in->content, &toconv); 2615 xmlBufferShrink(in, toconv); 2616 out->use += written; 2617 writtentot += written; 2618 out->content[out->use] = 0; 2619 if (ret == -1) { 2620 if (written > 0) { 2621 /* Can be a limitation of iconv or uconv */ 2622 goto retry; 2623 } 2624 ret = -3; 2625 } 2626 2627 if (ret >= 0) output += ret; 2628 2629 /* 2630 * Attempt to handle error cases 2631 */ 2632 switch (ret) { 2633 case 0: 2634 #ifdef DEBUG_ENCODING 2635 xmlGenericError(xmlGenericErrorContext, 2636 "converted %d bytes to %d bytes of output\n", 2637 toconv, written); 2638 #endif 2639 break; 2640 case -1: 2641 #ifdef DEBUG_ENCODING 2642 xmlGenericError(xmlGenericErrorContext, 2643 "output conversion failed by lack of space\n"); 2644 #endif 2645 break; 2646 case -3: 2647 #ifdef DEBUG_ENCODING 2648 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2649 toconv, written, in->use); 2650 #endif 2651 break; 2652 case -4: 2653 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2654 "xmlCharEncOutFunc: no output function !\n", NULL); 2655 ret = -1; 2656 break; 2657 case -2: { 2658 xmlChar charref[20]; 2659 int len = in->use; 2660 const xmlChar *utf = (const xmlChar *) in->content; 2661 int cur, charrefLen; 2662 2663 cur = xmlGetUTF8Char(utf, &len); 2664 if (cur <= 0) 2665 break; 2666 2667 #ifdef DEBUG_ENCODING 2668 xmlGenericError(xmlGenericErrorContext, 2669 "handling output conversion error\n"); 2670 xmlGenericError(xmlGenericErrorContext, 2671 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2672 in->content[0], in->content[1], 2673 in->content[2], in->content[3]); 2674 #endif 2675 /* 2676 * Removes the UTF8 sequence, and replace it by a charref 2677 * and continue the transcoding phase, hoping the error 2678 * did not mangle the encoder state. 2679 */ 2680 charrefLen = snprintf((char *) &charref[0], sizeof(charref), 2681 "&#%d;", cur); 2682 xmlBufferShrink(in, len); 2683 xmlBufferGrow(out, charrefLen * 4); 2684 written = out->size - out->use - 1; 2685 toconv = charrefLen; 2686 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, 2687 charref, &toconv); 2688 2689 if ((ret < 0) || (toconv != charrefLen)) { 2690 char buf[50]; 2691 2692 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2693 in->content[0], in->content[1], 2694 in->content[2], in->content[3]); 2695 buf[49] = 0; 2696 xmlEncodingErr(XML_I18N_CONV_FAILED, 2697 "output conversion failed due to conv error, bytes %s\n", 2698 buf); 2699 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2700 in->content[0] = ' '; 2701 break; 2702 } 2703 2704 out->use += written; 2705 writtentot += written; 2706 out->content[out->use] = 0; 2707 goto retry; 2708 } 2709 } 2710 return(ret); 2711 } 2712 2713 /** 2714 * xmlCharEncCloseFunc: 2715 * @handler: char enconding transformation data structure 2716 * 2717 * Generic front-end for encoding handler close function 2718 * 2719 * Returns 0 if success, or -1 in case of error 2720 */ 2721 int 2722 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2723 int ret = 0; 2724 int tofree = 0; 2725 int i, handler_in_list = 0; 2726 2727 if (handler == NULL) return(-1); 2728 if (handler->name == NULL) return(-1); 2729 if (handlers != NULL) { 2730 for (i = 0;i < nbCharEncodingHandler; i++) { 2731 if (handler == handlers[i]) { 2732 handler_in_list = 1; 2733 break; 2734 } 2735 } 2736 } 2737 #ifdef LIBXML_ICONV_ENABLED 2738 /* 2739 * Iconv handlers can be used only once, free the whole block. 2740 * and the associated icon resources. 2741 */ 2742 if ((handler_in_list == 0) && 2743 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) { 2744 tofree = 1; 2745 if (handler->iconv_out != NULL) { 2746 if (iconv_close(handler->iconv_out)) 2747 ret = -1; 2748 handler->iconv_out = NULL; 2749 } 2750 if (handler->iconv_in != NULL) { 2751 if (iconv_close(handler->iconv_in)) 2752 ret = -1; 2753 handler->iconv_in = NULL; 2754 } 2755 } 2756 #endif /* LIBXML_ICONV_ENABLED */ 2757 #ifdef LIBXML_ICU_ENABLED 2758 if ((handler_in_list == 0) && 2759 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) { 2760 tofree = 1; 2761 if (handler->uconv_out != NULL) { 2762 closeIcuConverter(handler->uconv_out); 2763 handler->uconv_out = NULL; 2764 } 2765 if (handler->uconv_in != NULL) { 2766 closeIcuConverter(handler->uconv_in); 2767 handler->uconv_in = NULL; 2768 } 2769 } 2770 #endif 2771 if (tofree) { 2772 /* free up only dynamic handlers iconv/uconv */ 2773 if (handler->name != NULL) 2774 xmlFree(handler->name); 2775 handler->name = NULL; 2776 xmlFree(handler); 2777 } 2778 #ifdef DEBUG_ENCODING 2779 if (ret) 2780 xmlGenericError(xmlGenericErrorContext, 2781 "failed to close the encoding handler\n"); 2782 else 2783 xmlGenericError(xmlGenericErrorContext, 2784 "closed the encoding handler\n"); 2785 #endif 2786 2787 return(ret); 2788 } 2789 2790 /** 2791 * xmlByteConsumed: 2792 * @ctxt: an XML parser context 2793 * 2794 * This function provides the current index of the parser relative 2795 * to the start of the current entity. This function is computed in 2796 * bytes from the beginning starting at zero and finishing at the 2797 * size in byte of the file if parsing a file. The function is 2798 * of constant cost if the input is UTF-8 but can be costly if run 2799 * on non-UTF-8 input. 2800 * 2801 * Returns the index in bytes from the beginning of the entity or -1 2802 * in case the index could not be computed. 2803 */ 2804 long 2805 xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2806 xmlParserInputPtr in; 2807 2808 if (ctxt == NULL) return(-1); 2809 in = ctxt->input; 2810 if (in == NULL) return(-1); 2811 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2812 unsigned int unused = 0; 2813 xmlCharEncodingHandler * handler = in->buf->encoder; 2814 /* 2815 * Encoding conversion, compute the number of unused original 2816 * bytes from the input not consumed and substract that from 2817 * the raw consumed value, this is not a cheap operation 2818 */ 2819 if (in->end - in->cur > 0) { 2820 unsigned char convbuf[32000]; 2821 const unsigned char *cur = (const unsigned char *)in->cur; 2822 int toconv = in->end - in->cur, written = 32000; 2823 2824 int ret; 2825 2826 do { 2827 toconv = in->end - cur; 2828 written = 32000; 2829 ret = xmlEncOutputChunk(handler, &convbuf[0], &written, 2830 cur, &toconv); 2831 if (ret < 0) { 2832 if (written > 0) 2833 ret = -2; 2834 else 2835 return(-1); 2836 } 2837 unused += written; 2838 cur += toconv; 2839 } while (ret == -2); 2840 } 2841 if (in->buf->rawconsumed < unused) 2842 return(-1); 2843 return(in->buf->rawconsumed - unused); 2844 } 2845 return(in->consumed + (in->cur - in->base)); 2846 } 2847 2848 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 2849 #ifdef LIBXML_ISO8859X_ENABLED 2850 2851 /** 2852 * UTF8ToISO8859x: 2853 * @out: a pointer to an array of bytes to store the result 2854 * @outlen: the length of @out 2855 * @in: a pointer to an array of UTF-8 chars 2856 * @inlen: the length of @in 2857 * @xlattable: the 2-level transcoding table 2858 * 2859 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 2860 * block of chars out. 2861 * 2862 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 2863 * The value of @inlen after return is the number of octets consumed 2864 * as the return value is positive, else unpredictable. 2865 * The value of @outlen after return is the number of ocetes consumed. 2866 */ 2867 static int 2868 UTF8ToISO8859x(unsigned char* out, int *outlen, 2869 const unsigned char* in, int *inlen, 2870 unsigned char const *xlattable) { 2871 const unsigned char* outstart = out; 2872 const unsigned char* inend; 2873 const unsigned char* instart = in; 2874 const unsigned char* processed = in; 2875 2876 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2877 (xlattable == NULL)) 2878 return(-1); 2879 if (in == NULL) { 2880 /* 2881 * initialization nothing to do 2882 */ 2883 *outlen = 0; 2884 *inlen = 0; 2885 return(0); 2886 } 2887 inend = in + (*inlen); 2888 while (in < inend) { 2889 unsigned char d = *in++; 2890 if (d < 0x80) { 2891 *out++ = d; 2892 } else if (d < 0xC0) { 2893 /* trailing byte in leading position */ 2894 *outlen = out - outstart; 2895 *inlen = processed - instart; 2896 return(-2); 2897 } else if (d < 0xE0) { 2898 unsigned char c; 2899 if (!(in < inend)) { 2900 /* trailing byte not in input buffer */ 2901 *outlen = out - outstart; 2902 *inlen = processed - instart; 2903 return(-3); 2904 } 2905 c = *in++; 2906 if ((c & 0xC0) != 0x80) { 2907 /* not a trailing byte */ 2908 *outlen = out - outstart; 2909 *inlen = processed - instart; 2910 return(-2); 2911 } 2912 c = c & 0x3F; 2913 d = d & 0x1F; 2914 d = xlattable [48 + c + xlattable [d] * 64]; 2915 if (d == 0) { 2916 /* not in character set */ 2917 *outlen = out - outstart; 2918 *inlen = processed - instart; 2919 return(-2); 2920 } 2921 *out++ = d; 2922 } else if (d < 0xF0) { 2923 unsigned char c1; 2924 unsigned char c2; 2925 if (!(in < inend - 1)) { 2926 /* trailing bytes not in input buffer */ 2927 *outlen = out - outstart; 2928 *inlen = processed - instart; 2929 return(-3); 2930 } 2931 c1 = *in++; 2932 if ((c1 & 0xC0) != 0x80) { 2933 /* not a trailing byte (c1) */ 2934 *outlen = out - outstart; 2935 *inlen = processed - instart; 2936 return(-2); 2937 } 2938 c2 = *in++; 2939 if ((c2 & 0xC0) != 0x80) { 2940 /* not a trailing byte (c2) */ 2941 *outlen = out - outstart; 2942 *inlen = processed - instart; 2943 return(-2); 2944 } 2945 c1 = c1 & 0x3F; 2946 c2 = c2 & 0x3F; 2947 d = d & 0x0F; 2948 d = xlattable [48 + c2 + xlattable [48 + c1 + 2949 xlattable [32 + d] * 64] * 64]; 2950 if (d == 0) { 2951 /* not in character set */ 2952 *outlen = out - outstart; 2953 *inlen = processed - instart; 2954 return(-2); 2955 } 2956 *out++ = d; 2957 } else { 2958 /* cannot transcode >= U+010000 */ 2959 *outlen = out - outstart; 2960 *inlen = processed - instart; 2961 return(-2); 2962 } 2963 processed = in; 2964 } 2965 *outlen = out - outstart; 2966 *inlen = processed - instart; 2967 return(*outlen); 2968 } 2969 2970 /** 2971 * ISO8859xToUTF8 2972 * @out: a pointer to an array of bytes to store the result 2973 * @outlen: the length of @out 2974 * @in: a pointer to an array of ISO Latin 1 chars 2975 * @inlen: the length of @in 2976 * 2977 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 2978 * block of chars out. 2979 * Returns 0 if success, or -1 otherwise 2980 * The value of @inlen after return is the number of octets consumed 2981 * The value of @outlen after return is the number of ocetes produced. 2982 */ 2983 static int 2984 ISO8859xToUTF8(unsigned char* out, int *outlen, 2985 const unsigned char* in, int *inlen, 2986 unsigned short const *unicodetable) { 2987 unsigned char* outstart = out; 2988 unsigned char* outend; 2989 const unsigned char* instart = in; 2990 const unsigned char* inend; 2991 const unsigned char* instop; 2992 unsigned int c; 2993 2994 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2995 (in == NULL) || (unicodetable == NULL)) 2996 return(-1); 2997 outend = out + *outlen; 2998 inend = in + *inlen; 2999 instop = inend; 3000 3001 while ((in < inend) && (out < outend - 2)) { 3002 if (*in >= 0x80) { 3003 c = unicodetable [*in - 0x80]; 3004 if (c == 0) { 3005 /* undefined code point */ 3006 *outlen = out - outstart; 3007 *inlen = in - instart; 3008 return (-1); 3009 } 3010 if (c < 0x800) { 3011 *out++ = ((c >> 6) & 0x1F) | 0xC0; 3012 *out++ = (c & 0x3F) | 0x80; 3013 } else { 3014 *out++ = ((c >> 12) & 0x0F) | 0xE0; 3015 *out++ = ((c >> 6) & 0x3F) | 0x80; 3016 *out++ = (c & 0x3F) | 0x80; 3017 } 3018 ++in; 3019 } 3020 if (instop - in > outend - out) instop = in + (outend - out); 3021 while ((*in < 0x80) && (in < instop)) { 3022 *out++ = *in++; 3023 } 3024 } 3025 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3026 *out++ = *in++; 3027 } 3028 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3029 *out++ = *in++; 3030 } 3031 *outlen = out - outstart; 3032 *inlen = in - instart; 3033 return (*outlen); 3034 } 3035 3036 3037 /************************************************************************ 3038 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 3039 ************************************************************************/ 3040 3041 static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 3042 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3043 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3044 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3045 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3046 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 3047 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 3048 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 3049 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 3050 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 3051 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 3052 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 3053 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 3054 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 3055 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 3056 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 3057 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 3058 }; 3059 3060 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 3061 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3068 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3069 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3070 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3071 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3072 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 3073 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 3076 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3077 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 3078 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3079 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3080 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 3081 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 3082 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 3083 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 3084 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3085 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 3086 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 3087 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 3088 }; 3089 3090 static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 3091 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3092 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3093 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3094 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3095 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 3096 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 3097 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 3098 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 3099 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 3100 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3101 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 3102 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 3103 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 3104 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3105 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 3106 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 3107 }; 3108 3109 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 3110 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3114 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3117 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3118 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3119 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3120 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 3121 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 3123 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 3124 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3126 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3129 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 3137 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3138 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3139 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3140 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 3141 }; 3142 3143 static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 3144 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3145 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3146 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3147 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3148 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 3149 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 3150 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 3151 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 3152 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3153 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 3154 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3155 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 3156 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3157 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 3158 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3159 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 3160 }; 3161 3162 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 3163 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3170 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3171 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3172 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 3173 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3174 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3175 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3176 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 3177 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 3178 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 3179 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 3180 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 3181 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 3182 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3186 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3187 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 3188 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 3189 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 3190 }; 3191 3192 static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 3193 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3194 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3195 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3196 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3197 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 3198 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 3199 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 3200 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 3201 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 3202 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 3203 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 3204 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 3205 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 3206 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 3207 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 3208 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 3209 }; 3210 3211 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 3212 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3213 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3214 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3218 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3219 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3220 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3221 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3223 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 3224 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3225 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3226 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3227 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3228 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3231 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3236 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3239 }; 3240 3241 static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 3242 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3243 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3244 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3245 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3246 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 3247 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 3248 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3249 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 3250 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 3251 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 3252 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 3253 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3254 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 3255 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 3256 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3257 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3258 }; 3259 3260 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 3261 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3268 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3269 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3270 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3272 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 3278 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3279 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 3280 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3281 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3284 }; 3285 3286 static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 3287 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3288 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3289 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3290 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3291 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 3292 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 3293 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 3294 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 3295 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 3296 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 3297 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 3298 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 3299 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 3300 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 3301 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 3302 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 3303 }; 3304 3305 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 3306 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3308 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3313 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3314 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3315 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 3316 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 3317 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3322 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3325 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3328 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3329 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3330 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3331 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3332 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3333 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3337 }; 3338 3339 static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3340 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3341 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3342 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3343 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3344 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3345 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3346 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3347 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3348 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3349 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3350 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3351 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3352 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3353 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3354 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3355 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3356 }; 3357 3358 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3359 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3360 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3361 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3366 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3367 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3368 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3369 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3370 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3375 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3377 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3378 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3383 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3387 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3388 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3390 }; 3391 3392 static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3393 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3394 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3395 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3396 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3397 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3398 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3399 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3400 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3401 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3402 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3403 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3404 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3405 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3406 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3407 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3408 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3409 }; 3410 3411 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3412 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3419 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3420 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3421 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3422 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3423 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3424 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3425 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3426 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3430 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3435 }; 3436 3437 static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3438 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3439 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3440 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3441 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3442 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3443 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3444 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3445 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3446 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3447 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3448 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3449 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3450 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3451 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3452 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3453 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3454 }; 3455 3456 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3457 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3459 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3462 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3464 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3465 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3466 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3467 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3468 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3469 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3470 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3471 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3472 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3474 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3475 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3476 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3481 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3482 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3484 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3485 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3486 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3487 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3488 }; 3489 3490 static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3491 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3492 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3493 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3494 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3495 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3496 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3497 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3498 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3499 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3500 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3501 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3502 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3503 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3504 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3505 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3506 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3507 }; 3508 3509 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3510 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3512 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3515 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3517 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3518 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3519 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3523 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3524 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3525 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3526 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3527 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3528 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3529 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3532 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3533 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3534 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3537 }; 3538 3539 static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3540 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3541 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3542 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3543 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3544 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3545 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3546 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3547 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3548 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3549 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3550 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3551 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3552 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3553 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3554 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3555 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3556 }; 3557 3558 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3559 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3561 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3564 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3566 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3567 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3568 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3569 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3570 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3578 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3579 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3580 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3581 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3582 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3583 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3584 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3585 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3586 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3587 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3588 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3589 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3590 }; 3591 3592 static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3593 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3594 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3595 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3596 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3597 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3598 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3599 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3600 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3601 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3602 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3603 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3604 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3605 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3606 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3607 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3608 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3609 }; 3610 3611 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3612 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3614 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3617 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3619 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3620 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3621 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3622 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3627 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3633 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3635 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3642 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3646 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3647 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3648 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3649 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3651 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3652 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3653 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3654 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3655 }; 3656 3657 static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3658 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3659 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3660 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3661 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3662 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3663 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3664 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3665 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3666 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3667 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3668 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3669 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3670 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3671 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3672 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3673 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3674 }; 3675 3676 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3677 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3679 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3684 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3685 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3686 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3687 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3688 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3697 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3698 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3699 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3700 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3701 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3702 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3703 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3704 }; 3705 3706 static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3707 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3708 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3709 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3710 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3711 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3712 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3713 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3714 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3715 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3716 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3717 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3718 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3719 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3720 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3721 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3722 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3723 }; 3724 3725 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3726 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3728 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3733 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3734 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3735 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3736 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3737 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3738 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3741 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3742 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3743 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3744 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3745 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3758 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3761 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3762 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3763 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3764 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3765 }; 3766 3767 3768 /* 3769 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3770 */ 3771 3772 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3773 const unsigned char* in, int *inlen) { 3774 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3775 } 3776 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3777 const unsigned char* in, int *inlen) { 3778 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3779 } 3780 3781 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3782 const unsigned char* in, int *inlen) { 3783 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3784 } 3785 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3786 const unsigned char* in, int *inlen) { 3787 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3788 } 3789 3790 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3791 const unsigned char* in, int *inlen) { 3792 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3793 } 3794 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3795 const unsigned char* in, int *inlen) { 3796 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3797 } 3798 3799 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3800 const unsigned char* in, int *inlen) { 3801 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3802 } 3803 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3804 const unsigned char* in, int *inlen) { 3805 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3806 } 3807 3808 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3809 const unsigned char* in, int *inlen) { 3810 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3811 } 3812 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3813 const unsigned char* in, int *inlen) { 3814 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3815 } 3816 3817 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3818 const unsigned char* in, int *inlen) { 3819 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3820 } 3821 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3822 const unsigned char* in, int *inlen) { 3823 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3824 } 3825 3826 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3827 const unsigned char* in, int *inlen) { 3828 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3829 } 3830 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3831 const unsigned char* in, int *inlen) { 3832 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3833 } 3834 3835 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3836 const unsigned char* in, int *inlen) { 3837 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3838 } 3839 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3840 const unsigned char* in, int *inlen) { 3841 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3842 } 3843 3844 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3845 const unsigned char* in, int *inlen) { 3846 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3847 } 3848 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 3849 const unsigned char* in, int *inlen) { 3850 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 3851 } 3852 3853 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 3854 const unsigned char* in, int *inlen) { 3855 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 3856 } 3857 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 3858 const unsigned char* in, int *inlen) { 3859 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 3860 } 3861 3862 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 3863 const unsigned char* in, int *inlen) { 3864 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 3865 } 3866 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 3867 const unsigned char* in, int *inlen) { 3868 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 3869 } 3870 3871 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 3872 const unsigned char* in, int *inlen) { 3873 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 3874 } 3875 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 3876 const unsigned char* in, int *inlen) { 3877 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 3878 } 3879 3880 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 3881 const unsigned char* in, int *inlen) { 3882 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 3883 } 3884 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 3885 const unsigned char* in, int *inlen) { 3886 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 3887 } 3888 3889 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 3890 const unsigned char* in, int *inlen) { 3891 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 3892 } 3893 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 3894 const unsigned char* in, int *inlen) { 3895 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 3896 } 3897 3898 static void 3899 xmlRegisterCharEncodingHandlersISO8859x (void) { 3900 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 3901 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 3902 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 3903 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 3904 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 3905 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 3906 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 3907 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 3908 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 3909 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 3910 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 3911 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 3912 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 3913 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 3914 } 3915 3916 #endif 3917 #endif 3918 3919 #define bottom_encoding 3920 #include "elfgcchack.h" 3921