1 /* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel@veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21 */ 22 23 #define IN_LIBXML 24 #include "libxml.h" 25 26 #include <string.h> 27 #include <limits.h> 28 29 #ifdef HAVE_CTYPE_H 30 #include <ctype.h> 31 #endif 32 #ifdef HAVE_STDLIB_H 33 #include <stdlib.h> 34 #endif 35 #ifdef LIBXML_ICONV_ENABLED 36 #ifdef HAVE_ERRNO_H 37 #include <errno.h> 38 #endif 39 #endif 40 #include <libxml/encoding.h> 41 #include <libxml/xmlmemory.h> 42 #ifdef LIBXML_HTML_ENABLED 43 #include <libxml/HTMLparser.h> 44 #endif 45 #include <libxml/globals.h> 46 #include <libxml/xmlerror.h> 47 48 #include "buf.h" 49 #include "enc.h" 50 51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 53 54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 56 struct _xmlCharEncodingAlias { 57 const char *name; 58 const char *alias; 59 }; 60 61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 62 static int xmlCharEncodingAliasesNb = 0; 63 static int xmlCharEncodingAliasesMax = 0; 64 65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 66 #if 0 67 #define DEBUG_ENCODING /* Define this to get encoding traces */ 68 #endif 69 #else 70 #ifdef LIBXML_ISO8859X_ENABLED 71 static void xmlRegisterCharEncodingHandlersISO8859x (void); 72 #endif 73 #endif 74 75 static int xmlLittleEndian = 1; 76 77 /** 78 * xmlEncodingErrMemory: 79 * @extra: extra informations 80 * 81 * Handle an out of memory condition 82 */ 83 static void 84 xmlEncodingErrMemory(const char *extra) 85 { 86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 87 } 88 89 /** 90 * xmlErrEncoding: 91 * @error: the error number 92 * @msg: the error message 93 * 94 * n encoding error 95 */ 96 static void LIBXML_ATTR_FORMAT(2,0) 97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 98 { 99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 100 XML_FROM_I18N, error, XML_ERR_FATAL, 101 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 102 } 103 104 #ifdef LIBXML_ICU_ENABLED 105 static uconv_t* 106 openIcuConverter(const char* name, int toUnicode) 107 { 108 UErrorCode status = U_ZERO_ERROR; 109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 110 if (conv == NULL) 111 return NULL; 112 113 conv->pivot_source = conv->pivot_buf; 114 conv->pivot_target = conv->pivot_buf; 115 116 conv->uconv = ucnv_open(name, &status); 117 if (U_FAILURE(status)) 118 goto error; 119 120 status = U_ZERO_ERROR; 121 if (toUnicode) { 122 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 123 NULL, NULL, NULL, &status); 124 } 125 else { 126 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 127 NULL, NULL, NULL, &status); 128 } 129 if (U_FAILURE(status)) 130 goto error; 131 132 status = U_ZERO_ERROR; 133 conv->utf8 = ucnv_open("UTF-8", &status); 134 if (U_SUCCESS(status)) 135 return conv; 136 137 error: 138 if (conv->uconv) 139 ucnv_close(conv->uconv); 140 xmlFree(conv); 141 return NULL; 142 } 143 144 static void 145 closeIcuConverter(uconv_t *conv) 146 { 147 if (conv != NULL) { 148 ucnv_close(conv->uconv); 149 ucnv_close(conv->utf8); 150 xmlFree(conv); 151 } 152 } 153 #endif /* LIBXML_ICU_ENABLED */ 154 155 /************************************************************************ 156 * * 157 * Conversions To/From UTF8 encoding * 158 * * 159 ************************************************************************/ 160 161 /** 162 * asciiToUTF8: 163 * @out: a pointer to an array of bytes to store the result 164 * @outlen: the length of @out 165 * @in: a pointer to an array of ASCII chars 166 * @inlen: the length of @in 167 * 168 * Take a block of ASCII chars in and try to convert it to an UTF-8 169 * block of chars out. 170 * Returns 0 if success, or -1 otherwise 171 * The value of @inlen after return is the number of octets consumed 172 * if the return value is positive, else unpredictable. 173 * The value of @outlen after return is the number of octets consumed. 174 */ 175 static int 176 asciiToUTF8(unsigned char* out, int *outlen, 177 const unsigned char* in, int *inlen) { 178 unsigned char* outstart = out; 179 const unsigned char* base = in; 180 const unsigned char* processed = in; 181 unsigned char* outend = out + *outlen; 182 const unsigned char* inend; 183 unsigned int c; 184 185 inend = in + (*inlen); 186 while ((in < inend) && (out - outstart + 5 < *outlen)) { 187 c= *in++; 188 189 if (out >= outend) 190 break; 191 if (c < 0x80) { 192 *out++ = c; 193 } else { 194 *outlen = out - outstart; 195 *inlen = processed - base; 196 return(-1); 197 } 198 199 processed = (const unsigned char*) in; 200 } 201 *outlen = out - outstart; 202 *inlen = processed - base; 203 return(*outlen); 204 } 205 206 #ifdef LIBXML_OUTPUT_ENABLED 207 /** 208 * UTF8Toascii: 209 * @out: a pointer to an array of bytes to store the result 210 * @outlen: the length of @out 211 * @in: a pointer to an array of UTF-8 chars 212 * @inlen: the length of @in 213 * 214 * Take a block of UTF-8 chars in and try to convert it to an ASCII 215 * block of chars out. 216 * 217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 218 * The value of @inlen after return is the number of octets consumed 219 * if the return value is positive, else unpredictable. 220 * The value of @outlen after return is the number of octets consumed. 221 */ 222 static int 223 UTF8Toascii(unsigned char* out, int *outlen, 224 const unsigned char* in, int *inlen) { 225 const unsigned char* processed = in; 226 const unsigned char* outend; 227 const unsigned char* outstart = out; 228 const unsigned char* instart = in; 229 const unsigned char* inend; 230 unsigned int c, d; 231 int trailing; 232 233 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 234 if (in == NULL) { 235 /* 236 * initialization nothing to do 237 */ 238 *outlen = 0; 239 *inlen = 0; 240 return(0); 241 } 242 inend = in + (*inlen); 243 outend = out + (*outlen); 244 while (in < inend) { 245 d = *in++; 246 if (d < 0x80) { c= d; trailing= 0; } 247 else if (d < 0xC0) { 248 /* trailing byte in leading position */ 249 *outlen = out - outstart; 250 *inlen = processed - instart; 251 return(-2); 252 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 253 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 254 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 255 else { 256 /* no chance for this in Ascii */ 257 *outlen = out - outstart; 258 *inlen = processed - instart; 259 return(-2); 260 } 261 262 if (inend - in < trailing) { 263 break; 264 } 265 266 for ( ; trailing; trailing--) { 267 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 268 break; 269 c <<= 6; 270 c |= d & 0x3F; 271 } 272 273 /* assertion: c is a single UTF-4 value */ 274 if (c < 0x80) { 275 if (out >= outend) 276 break; 277 *out++ = c; 278 } else { 279 /* no chance for this in Ascii */ 280 *outlen = out - outstart; 281 *inlen = processed - instart; 282 return(-2); 283 } 284 processed = in; 285 } 286 *outlen = out - outstart; 287 *inlen = processed - instart; 288 return(*outlen); 289 } 290 #endif /* LIBXML_OUTPUT_ENABLED */ 291 292 /** 293 * isolat1ToUTF8: 294 * @out: a pointer to an array of bytes to store the result 295 * @outlen: the length of @out 296 * @in: a pointer to an array of ISO Latin 1 chars 297 * @inlen: the length of @in 298 * 299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 300 * block of chars out. 301 * Returns the number of bytes written if success, or -1 otherwise 302 * The value of @inlen after return is the number of octets consumed 303 * if the return value is positive, else unpredictable. 304 * The value of @outlen after return is the number of octets consumed. 305 */ 306 int 307 isolat1ToUTF8(unsigned char* out, int *outlen, 308 const unsigned char* in, int *inlen) { 309 unsigned char* outstart = out; 310 const unsigned char* base = in; 311 unsigned char* outend; 312 const unsigned char* inend; 313 const unsigned char* instop; 314 315 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 316 return(-1); 317 318 outend = out + *outlen; 319 inend = in + (*inlen); 320 instop = inend; 321 322 while ((in < inend) && (out < outend - 1)) { 323 if (*in >= 0x80) { 324 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 325 *out++ = ((*in) & 0x3F) | 0x80; 326 ++in; 327 } 328 if ((instop - in) > (outend - out)) instop = in + (outend - out); 329 while ((in < instop) && (*in < 0x80)) { 330 *out++ = *in++; 331 } 332 } 333 if ((in < inend) && (out < outend) && (*in < 0x80)) { 334 *out++ = *in++; 335 } 336 *outlen = out - outstart; 337 *inlen = in - base; 338 return(*outlen); 339 } 340 341 /** 342 * UTF8ToUTF8: 343 * @out: a pointer to an array of bytes to store the result 344 * @outlen: the length of @out 345 * @inb: a pointer to an array of UTF-8 chars 346 * @inlenb: the length of @in in UTF-8 chars 347 * 348 * No op copy operation for UTF8 handling. 349 * 350 * Returns the number of bytes written, or -1 if lack of space. 351 * The value of *inlen after return is the number of octets consumed 352 * if the return value is positive, else unpredictable. 353 */ 354 static int 355 UTF8ToUTF8(unsigned char* out, int *outlen, 356 const unsigned char* inb, int *inlenb) 357 { 358 int len; 359 360 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL)) 361 return(-1); 362 if (inb == NULL) { 363 /* inb == NULL means output is initialized. */ 364 *outlen = 0; 365 *inlenb = 0; 366 return(0); 367 } 368 if (*outlen > *inlenb) { 369 len = *inlenb; 370 } else { 371 len = *outlen; 372 } 373 if (len < 0) 374 return(-1); 375 376 memcpy(out, inb, len); 377 378 *outlen = len; 379 *inlenb = len; 380 return(*outlen); 381 } 382 383 384 #ifdef LIBXML_OUTPUT_ENABLED 385 /** 386 * UTF8Toisolat1: 387 * @out: a pointer to an array of bytes to store the result 388 * @outlen: the length of @out 389 * @in: a pointer to an array of UTF-8 chars 390 * @inlen: the length of @in 391 * 392 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 393 * block of chars out. 394 * 395 * Returns the number of bytes written if success, -2 if the transcoding fails, 396 or -1 otherwise 397 * The value of @inlen after return is the number of octets consumed 398 * if the return value is positive, else unpredictable. 399 * The value of @outlen after return is the number of octets consumed. 400 */ 401 int 402 UTF8Toisolat1(unsigned char* out, int *outlen, 403 const unsigned char* in, int *inlen) { 404 const unsigned char* processed = in; 405 const unsigned char* outend; 406 const unsigned char* outstart = out; 407 const unsigned char* instart = in; 408 const unsigned char* inend; 409 unsigned int c, d; 410 int trailing; 411 412 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 413 if (in == NULL) { 414 /* 415 * initialization nothing to do 416 */ 417 *outlen = 0; 418 *inlen = 0; 419 return(0); 420 } 421 inend = in + (*inlen); 422 outend = out + (*outlen); 423 while (in < inend) { 424 d = *in++; 425 if (d < 0x80) { c= d; trailing= 0; } 426 else if (d < 0xC0) { 427 /* trailing byte in leading position */ 428 *outlen = out - outstart; 429 *inlen = processed - instart; 430 return(-2); 431 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 432 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 433 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 434 else { 435 /* no chance for this in IsoLat1 */ 436 *outlen = out - outstart; 437 *inlen = processed - instart; 438 return(-2); 439 } 440 441 if (inend - in < trailing) { 442 break; 443 } 444 445 for ( ; trailing; trailing--) { 446 if (in >= inend) 447 break; 448 if (((d= *in++) & 0xC0) != 0x80) { 449 *outlen = out - outstart; 450 *inlen = processed - instart; 451 return(-2); 452 } 453 c <<= 6; 454 c |= d & 0x3F; 455 } 456 457 /* assertion: c is a single UTF-4 value */ 458 if (c <= 0xFF) { 459 if (out >= outend) 460 break; 461 *out++ = c; 462 } else { 463 /* no chance for this in IsoLat1 */ 464 *outlen = out - outstart; 465 *inlen = processed - instart; 466 return(-2); 467 } 468 processed = in; 469 } 470 *outlen = out - outstart; 471 *inlen = processed - instart; 472 return(*outlen); 473 } 474 #endif /* LIBXML_OUTPUT_ENABLED */ 475 476 /** 477 * UTF16LEToUTF8: 478 * @out: a pointer to an array of bytes to store the result 479 * @outlen: the length of @out 480 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 481 * @inlenb: the length of @in in UTF-16LE chars 482 * 483 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 484 * block of chars out. This function assumes the endian property 485 * is the same between the native type of this machine and the 486 * inputed one. 487 * 488 * Returns the number of bytes written, or -1 if lack of space, or -2 489 * if the transcoding fails (if *in is not a valid utf16 string) 490 * The value of *inlen after return is the number of octets consumed 491 * if the return value is positive, else unpredictable. 492 */ 493 static int 494 UTF16LEToUTF8(unsigned char* out, int *outlen, 495 const unsigned char* inb, int *inlenb) 496 { 497 unsigned char* outstart = out; 498 const unsigned char* processed = inb; 499 unsigned char* outend = out + *outlen; 500 unsigned short* in = (unsigned short*) inb; 501 unsigned short* inend; 502 unsigned int c, d, inlen; 503 unsigned char *tmp; 504 int bits; 505 506 if ((*inlenb % 2) == 1) 507 (*inlenb)--; 508 inlen = *inlenb / 2; 509 inend = in + inlen; 510 while ((in < inend) && (out - outstart + 5 < *outlen)) { 511 if (xmlLittleEndian) { 512 c= *in++; 513 } else { 514 tmp = (unsigned char *) in; 515 c = *tmp++; 516 c = c | (((unsigned int)*tmp) << 8); 517 in++; 518 } 519 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 520 if (in >= inend) { /* (in > inend) shouldn't happens */ 521 break; 522 } 523 if (xmlLittleEndian) { 524 d = *in++; 525 } else { 526 tmp = (unsigned char *) in; 527 d = *tmp++; 528 d = d | (((unsigned int)*tmp) << 8); 529 in++; 530 } 531 if ((d & 0xFC00) == 0xDC00) { 532 c &= 0x03FF; 533 c <<= 10; 534 c |= d & 0x03FF; 535 c += 0x10000; 536 } 537 else { 538 *outlen = out - outstart; 539 *inlenb = processed - inb; 540 return(-2); 541 } 542 } 543 544 /* assertion: c is a single UTF-4 value */ 545 if (out >= outend) 546 break; 547 if (c < 0x80) { *out++= c; bits= -6; } 548 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 549 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 550 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 551 552 for ( ; bits >= 0; bits-= 6) { 553 if (out >= outend) 554 break; 555 *out++= ((c >> bits) & 0x3F) | 0x80; 556 } 557 processed = (const unsigned char*) in; 558 } 559 *outlen = out - outstart; 560 *inlenb = processed - inb; 561 return(*outlen); 562 } 563 564 #ifdef LIBXML_OUTPUT_ENABLED 565 /** 566 * UTF8ToUTF16LE: 567 * @outb: a pointer to an array of bytes to store the result 568 * @outlen: the length of @outb 569 * @in: a pointer to an array of UTF-8 chars 570 * @inlen: the length of @in 571 * 572 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 573 * block of chars out. 574 * 575 * Returns the number of bytes written, or -1 if lack of space, or -2 576 * if the transcoding failed. 577 */ 578 static int 579 UTF8ToUTF16LE(unsigned char* outb, int *outlen, 580 const unsigned char* in, int *inlen) 581 { 582 unsigned short* out = (unsigned short*) outb; 583 const unsigned char* processed = in; 584 const unsigned char *const instart = in; 585 unsigned short* outstart= out; 586 unsigned short* outend; 587 const unsigned char* inend; 588 unsigned int c, d; 589 int trailing; 590 unsigned char *tmp; 591 unsigned short tmp1, tmp2; 592 593 /* UTF16LE encoding has no BOM */ 594 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 595 if (in == NULL) { 596 *outlen = 0; 597 *inlen = 0; 598 return(0); 599 } 600 inend= in + *inlen; 601 outend = out + (*outlen / 2); 602 while (in < inend) { 603 d= *in++; 604 if (d < 0x80) { c= d; trailing= 0; } 605 else if (d < 0xC0) { 606 /* trailing byte in leading position */ 607 *outlen = (out - outstart) * 2; 608 *inlen = processed - instart; 609 return(-2); 610 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 611 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 612 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 613 else { 614 /* no chance for this in UTF-16 */ 615 *outlen = (out - outstart) * 2; 616 *inlen = processed - instart; 617 return(-2); 618 } 619 620 if (inend - in < trailing) { 621 break; 622 } 623 624 for ( ; trailing; trailing--) { 625 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 626 break; 627 c <<= 6; 628 c |= d & 0x3F; 629 } 630 631 /* assertion: c is a single UTF-4 value */ 632 if (c < 0x10000) { 633 if (out >= outend) 634 break; 635 if (xmlLittleEndian) { 636 *out++ = c; 637 } else { 638 tmp = (unsigned char *) out; 639 *tmp = c ; 640 *(tmp + 1) = c >> 8 ; 641 out++; 642 } 643 } 644 else if (c < 0x110000) { 645 if (out+1 >= outend) 646 break; 647 c -= 0x10000; 648 if (xmlLittleEndian) { 649 *out++ = 0xD800 | (c >> 10); 650 *out++ = 0xDC00 | (c & 0x03FF); 651 } else { 652 tmp1 = 0xD800 | (c >> 10); 653 tmp = (unsigned char *) out; 654 *tmp = (unsigned char) tmp1; 655 *(tmp + 1) = tmp1 >> 8; 656 out++; 657 658 tmp2 = 0xDC00 | (c & 0x03FF); 659 tmp = (unsigned char *) out; 660 *tmp = (unsigned char) tmp2; 661 *(tmp + 1) = tmp2 >> 8; 662 out++; 663 } 664 } 665 else 666 break; 667 processed = in; 668 } 669 *outlen = (out - outstart) * 2; 670 *inlen = processed - instart; 671 return(*outlen); 672 } 673 674 /** 675 * UTF8ToUTF16: 676 * @outb: a pointer to an array of bytes to store the result 677 * @outlen: the length of @outb 678 * @in: a pointer to an array of UTF-8 chars 679 * @inlen: the length of @in 680 * 681 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 682 * block of chars out. 683 * 684 * Returns the number of bytes written, or -1 if lack of space, or -2 685 * if the transcoding failed. 686 */ 687 static int 688 UTF8ToUTF16(unsigned char* outb, int *outlen, 689 const unsigned char* in, int *inlen) 690 { 691 if (in == NULL) { 692 /* 693 * initialization, add the Byte Order Mark for UTF-16LE 694 */ 695 if (*outlen >= 2) { 696 outb[0] = 0xFF; 697 outb[1] = 0xFE; 698 *outlen = 2; 699 *inlen = 0; 700 #ifdef DEBUG_ENCODING 701 xmlGenericError(xmlGenericErrorContext, 702 "Added FFFE Byte Order Mark\n"); 703 #endif 704 return(2); 705 } 706 *outlen = 0; 707 *inlen = 0; 708 return(0); 709 } 710 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 711 } 712 #endif /* LIBXML_OUTPUT_ENABLED */ 713 714 /** 715 * UTF16BEToUTF8: 716 * @out: a pointer to an array of bytes to store the result 717 * @outlen: the length of @out 718 * @inb: a pointer to an array of UTF-16 passed as a byte array 719 * @inlenb: the length of @in in UTF-16 chars 720 * 721 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 722 * block of chars out. This function assumes the endian property 723 * is the same between the native type of this machine and the 724 * inputed one. 725 * 726 * Returns the number of bytes written, or -1 if lack of space, or -2 727 * if the transcoding fails (if *in is not a valid utf16 string) 728 * The value of *inlen after return is the number of octets consumed 729 * if the return value is positive, else unpredictable. 730 */ 731 static int 732 UTF16BEToUTF8(unsigned char* out, int *outlen, 733 const unsigned char* inb, int *inlenb) 734 { 735 unsigned char* outstart = out; 736 const unsigned char* processed = inb; 737 unsigned char* outend = out + *outlen; 738 unsigned short* in = (unsigned short*) inb; 739 unsigned short* inend; 740 unsigned int c, d, inlen; 741 unsigned char *tmp; 742 int bits; 743 744 if ((*inlenb % 2) == 1) 745 (*inlenb)--; 746 inlen = *inlenb / 2; 747 inend= in + inlen; 748 while (in < inend) { 749 if (xmlLittleEndian) { 750 tmp = (unsigned char *) in; 751 c = *tmp++; 752 c = c << 8; 753 c = c | (unsigned int) *tmp; 754 in++; 755 } else { 756 c= *in++; 757 } 758 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 759 if (in >= inend) { /* (in > inend) shouldn't happens */ 760 *outlen = out - outstart; 761 *inlenb = processed - inb; 762 return(-2); 763 } 764 if (xmlLittleEndian) { 765 tmp = (unsigned char *) in; 766 d = *tmp++; 767 d = d << 8; 768 d = d | (unsigned int) *tmp; 769 in++; 770 } else { 771 d= *in++; 772 } 773 if ((d & 0xFC00) == 0xDC00) { 774 c &= 0x03FF; 775 c <<= 10; 776 c |= d & 0x03FF; 777 c += 0x10000; 778 } 779 else { 780 *outlen = out - outstart; 781 *inlenb = processed - inb; 782 return(-2); 783 } 784 } 785 786 /* assertion: c is a single UTF-4 value */ 787 if (out >= outend) 788 break; 789 if (c < 0x80) { *out++= c; bits= -6; } 790 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 791 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 792 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 793 794 for ( ; bits >= 0; bits-= 6) { 795 if (out >= outend) 796 break; 797 *out++= ((c >> bits) & 0x3F) | 0x80; 798 } 799 processed = (const unsigned char*) in; 800 } 801 *outlen = out - outstart; 802 *inlenb = processed - inb; 803 return(*outlen); 804 } 805 806 #ifdef LIBXML_OUTPUT_ENABLED 807 /** 808 * UTF8ToUTF16BE: 809 * @outb: a pointer to an array of bytes to store the result 810 * @outlen: the length of @outb 811 * @in: a pointer to an array of UTF-8 chars 812 * @inlen: the length of @in 813 * 814 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 815 * block of chars out. 816 * 817 * Returns the number of byte written, or -1 by lack of space, or -2 818 * if the transcoding failed. 819 */ 820 static int 821 UTF8ToUTF16BE(unsigned char* outb, int *outlen, 822 const unsigned char* in, int *inlen) 823 { 824 unsigned short* out = (unsigned short*) outb; 825 const unsigned char* processed = in; 826 const unsigned char *const instart = in; 827 unsigned short* outstart= out; 828 unsigned short* outend; 829 const unsigned char* inend; 830 unsigned int c, d; 831 int trailing; 832 unsigned char *tmp; 833 unsigned short tmp1, tmp2; 834 835 /* UTF-16BE has no BOM */ 836 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 837 if (in == NULL) { 838 *outlen = 0; 839 *inlen = 0; 840 return(0); 841 } 842 inend= in + *inlen; 843 outend = out + (*outlen / 2); 844 while (in < inend) { 845 d= *in++; 846 if (d < 0x80) { c= d; trailing= 0; } 847 else if (d < 0xC0) { 848 /* trailing byte in leading position */ 849 *outlen = out - outstart; 850 *inlen = processed - instart; 851 return(-2); 852 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 853 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 854 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 855 else { 856 /* no chance for this in UTF-16 */ 857 *outlen = out - outstart; 858 *inlen = processed - instart; 859 return(-2); 860 } 861 862 if (inend - in < trailing) { 863 break; 864 } 865 866 for ( ; trailing; trailing--) { 867 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 868 c <<= 6; 869 c |= d & 0x3F; 870 } 871 872 /* assertion: c is a single UTF-4 value */ 873 if (c < 0x10000) { 874 if (out >= outend) break; 875 if (xmlLittleEndian) { 876 tmp = (unsigned char *) out; 877 *tmp = c >> 8; 878 *(tmp + 1) = c; 879 out++; 880 } else { 881 *out++ = c; 882 } 883 } 884 else if (c < 0x110000) { 885 if (out+1 >= outend) break; 886 c -= 0x10000; 887 if (xmlLittleEndian) { 888 tmp1 = 0xD800 | (c >> 10); 889 tmp = (unsigned char *) out; 890 *tmp = tmp1 >> 8; 891 *(tmp + 1) = (unsigned char) tmp1; 892 out++; 893 894 tmp2 = 0xDC00 | (c & 0x03FF); 895 tmp = (unsigned char *) out; 896 *tmp = tmp2 >> 8; 897 *(tmp + 1) = (unsigned char) tmp2; 898 out++; 899 } else { 900 *out++ = 0xD800 | (c >> 10); 901 *out++ = 0xDC00 | (c & 0x03FF); 902 } 903 } 904 else 905 break; 906 processed = in; 907 } 908 *outlen = (out - outstart) * 2; 909 *inlen = processed - instart; 910 return(*outlen); 911 } 912 #endif /* LIBXML_OUTPUT_ENABLED */ 913 914 /************************************************************************ 915 * * 916 * Generic encoding handling routines * 917 * * 918 ************************************************************************/ 919 920 /** 921 * xmlDetectCharEncoding: 922 * @in: a pointer to the first bytes of the XML entity, must be at least 923 * 2 bytes long (at least 4 if encoding is UTF4 variant). 924 * @len: pointer to the length of the buffer 925 * 926 * Guess the encoding of the entity using the first bytes of the entity content 927 * according to the non-normative appendix F of the XML-1.0 recommendation. 928 * 929 * Returns one of the XML_CHAR_ENCODING_... values. 930 */ 931 xmlCharEncoding 932 xmlDetectCharEncoding(const unsigned char* in, int len) 933 { 934 if (in == NULL) 935 return(XML_CHAR_ENCODING_NONE); 936 if (len >= 4) { 937 if ((in[0] == 0x00) && (in[1] == 0x00) && 938 (in[2] == 0x00) && (in[3] == 0x3C)) 939 return(XML_CHAR_ENCODING_UCS4BE); 940 if ((in[0] == 0x3C) && (in[1] == 0x00) && 941 (in[2] == 0x00) && (in[3] == 0x00)) 942 return(XML_CHAR_ENCODING_UCS4LE); 943 if ((in[0] == 0x00) && (in[1] == 0x00) && 944 (in[2] == 0x3C) && (in[3] == 0x00)) 945 return(XML_CHAR_ENCODING_UCS4_2143); 946 if ((in[0] == 0x00) && (in[1] == 0x3C) && 947 (in[2] == 0x00) && (in[3] == 0x00)) 948 return(XML_CHAR_ENCODING_UCS4_3412); 949 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 950 (in[2] == 0xA7) && (in[3] == 0x94)) 951 return(XML_CHAR_ENCODING_EBCDIC); 952 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 953 (in[2] == 0x78) && (in[3] == 0x6D)) 954 return(XML_CHAR_ENCODING_UTF8); 955 /* 956 * Although not part of the recommendation, we also 957 * attempt an "auto-recognition" of UTF-16LE and 958 * UTF-16BE encodings. 959 */ 960 if ((in[0] == 0x3C) && (in[1] == 0x00) && 961 (in[2] == 0x3F) && (in[3] == 0x00)) 962 return(XML_CHAR_ENCODING_UTF16LE); 963 if ((in[0] == 0x00) && (in[1] == 0x3C) && 964 (in[2] == 0x00) && (in[3] == 0x3F)) 965 return(XML_CHAR_ENCODING_UTF16BE); 966 } 967 if (len >= 3) { 968 /* 969 * Errata on XML-1.0 June 20 2001 970 * We now allow an UTF8 encoded BOM 971 */ 972 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 973 (in[2] == 0xBF)) 974 return(XML_CHAR_ENCODING_UTF8); 975 } 976 /* For UTF-16 we can recognize by the BOM */ 977 if (len >= 2) { 978 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 979 return(XML_CHAR_ENCODING_UTF16BE); 980 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 981 return(XML_CHAR_ENCODING_UTF16LE); 982 } 983 return(XML_CHAR_ENCODING_NONE); 984 } 985 986 /** 987 * xmlCleanupEncodingAliases: 988 * 989 * Unregisters all aliases 990 */ 991 void 992 xmlCleanupEncodingAliases(void) { 993 int i; 994 995 if (xmlCharEncodingAliases == NULL) 996 return; 997 998 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 999 if (xmlCharEncodingAliases[i].name != NULL) 1000 xmlFree((char *) xmlCharEncodingAliases[i].name); 1001 if (xmlCharEncodingAliases[i].alias != NULL) 1002 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1003 } 1004 xmlCharEncodingAliasesNb = 0; 1005 xmlCharEncodingAliasesMax = 0; 1006 xmlFree(xmlCharEncodingAliases); 1007 xmlCharEncodingAliases = NULL; 1008 } 1009 1010 /** 1011 * xmlGetEncodingAlias: 1012 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1013 * 1014 * Lookup an encoding name for the given alias. 1015 * 1016 * Returns NULL if not found, otherwise the original name 1017 */ 1018 const char * 1019 xmlGetEncodingAlias(const char *alias) { 1020 int i; 1021 char upper[100]; 1022 1023 if (alias == NULL) 1024 return(NULL); 1025 1026 if (xmlCharEncodingAliases == NULL) 1027 return(NULL); 1028 1029 for (i = 0;i < 99;i++) { 1030 upper[i] = toupper(alias[i]); 1031 if (upper[i] == 0) break; 1032 } 1033 upper[i] = 0; 1034 1035 /* 1036 * Walk down the list looking for a definition of the alias 1037 */ 1038 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1039 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1040 return(xmlCharEncodingAliases[i].name); 1041 } 1042 } 1043 return(NULL); 1044 } 1045 1046 /** 1047 * xmlAddEncodingAlias: 1048 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1049 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1050 * 1051 * Registers an alias @alias for an encoding named @name. Existing alias 1052 * will be overwritten. 1053 * 1054 * Returns 0 in case of success, -1 in case of error 1055 */ 1056 int 1057 xmlAddEncodingAlias(const char *name, const char *alias) { 1058 int i; 1059 char upper[100]; 1060 1061 if ((name == NULL) || (alias == NULL)) 1062 return(-1); 1063 1064 for (i = 0;i < 99;i++) { 1065 upper[i] = toupper(alias[i]); 1066 if (upper[i] == 0) break; 1067 } 1068 upper[i] = 0; 1069 1070 if (xmlCharEncodingAliases == NULL) { 1071 xmlCharEncodingAliasesNb = 0; 1072 xmlCharEncodingAliasesMax = 20; 1073 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1074 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1075 if (xmlCharEncodingAliases == NULL) 1076 return(-1); 1077 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1078 xmlCharEncodingAliasesMax *= 2; 1079 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1080 xmlRealloc(xmlCharEncodingAliases, 1081 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1082 } 1083 /* 1084 * Walk down the list looking for a definition of the alias 1085 */ 1086 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1087 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1088 /* 1089 * Replace the definition. 1090 */ 1091 xmlFree((char *) xmlCharEncodingAliases[i].name); 1092 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1093 return(0); 1094 } 1095 } 1096 /* 1097 * Add the definition 1098 */ 1099 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1100 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1101 xmlCharEncodingAliasesNb++; 1102 return(0); 1103 } 1104 1105 /** 1106 * xmlDelEncodingAlias: 1107 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1108 * 1109 * Unregisters an encoding alias @alias 1110 * 1111 * Returns 0 in case of success, -1 in case of error 1112 */ 1113 int 1114 xmlDelEncodingAlias(const char *alias) { 1115 int i; 1116 1117 if (alias == NULL) 1118 return(-1); 1119 1120 if (xmlCharEncodingAliases == NULL) 1121 return(-1); 1122 /* 1123 * Walk down the list looking for a definition of the alias 1124 */ 1125 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1126 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1127 xmlFree((char *) xmlCharEncodingAliases[i].name); 1128 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1129 xmlCharEncodingAliasesNb--; 1130 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1131 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1132 return(0); 1133 } 1134 } 1135 return(-1); 1136 } 1137 1138 /** 1139 * xmlParseCharEncoding: 1140 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1141 * 1142 * Compare the string to the encoding schemes already known. Note 1143 * that the comparison is case insensitive accordingly to the section 1144 * [XML] 4.3.3 Character Encoding in Entities. 1145 * 1146 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1147 * if not recognized. 1148 */ 1149 xmlCharEncoding 1150 xmlParseCharEncoding(const char* name) 1151 { 1152 const char *alias; 1153 char upper[500]; 1154 int i; 1155 1156 if (name == NULL) 1157 return(XML_CHAR_ENCODING_NONE); 1158 1159 /* 1160 * Do the alias resolution 1161 */ 1162 alias = xmlGetEncodingAlias(name); 1163 if (alias != NULL) 1164 name = alias; 1165 1166 for (i = 0;i < 499;i++) { 1167 upper[i] = toupper(name[i]); 1168 if (upper[i] == 0) break; 1169 } 1170 upper[i] = 0; 1171 1172 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1173 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1174 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1175 1176 /* 1177 * NOTE: if we were able to parse this, the endianness of UTF16 is 1178 * already found and in use 1179 */ 1180 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1181 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1182 1183 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1184 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1185 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1186 1187 /* 1188 * NOTE: if we were able to parse this, the endianness of UCS4 is 1189 * already found and in use 1190 */ 1191 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1192 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1193 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1194 1195 1196 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1197 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1198 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1199 1200 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1201 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1202 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1203 1204 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1205 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1206 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1207 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1208 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1209 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1210 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1211 1212 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1213 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1214 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1215 1216 #ifdef DEBUG_ENCODING 1217 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1218 #endif 1219 return(XML_CHAR_ENCODING_ERROR); 1220 } 1221 1222 /** 1223 * xmlGetCharEncodingName: 1224 * @enc: the encoding 1225 * 1226 * The "canonical" name for XML encoding. 1227 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1228 * Section 4.3.3 Character Encoding in Entities 1229 * 1230 * Returns the canonical name for the given encoding 1231 */ 1232 1233 const char* 1234 xmlGetCharEncodingName(xmlCharEncoding enc) { 1235 switch (enc) { 1236 case XML_CHAR_ENCODING_ERROR: 1237 return(NULL); 1238 case XML_CHAR_ENCODING_NONE: 1239 return(NULL); 1240 case XML_CHAR_ENCODING_UTF8: 1241 return("UTF-8"); 1242 case XML_CHAR_ENCODING_UTF16LE: 1243 return("UTF-16"); 1244 case XML_CHAR_ENCODING_UTF16BE: 1245 return("UTF-16"); 1246 case XML_CHAR_ENCODING_EBCDIC: 1247 return("EBCDIC"); 1248 case XML_CHAR_ENCODING_UCS4LE: 1249 return("ISO-10646-UCS-4"); 1250 case XML_CHAR_ENCODING_UCS4BE: 1251 return("ISO-10646-UCS-4"); 1252 case XML_CHAR_ENCODING_UCS4_2143: 1253 return("ISO-10646-UCS-4"); 1254 case XML_CHAR_ENCODING_UCS4_3412: 1255 return("ISO-10646-UCS-4"); 1256 case XML_CHAR_ENCODING_UCS2: 1257 return("ISO-10646-UCS-2"); 1258 case XML_CHAR_ENCODING_8859_1: 1259 return("ISO-8859-1"); 1260 case XML_CHAR_ENCODING_8859_2: 1261 return("ISO-8859-2"); 1262 case XML_CHAR_ENCODING_8859_3: 1263 return("ISO-8859-3"); 1264 case XML_CHAR_ENCODING_8859_4: 1265 return("ISO-8859-4"); 1266 case XML_CHAR_ENCODING_8859_5: 1267 return("ISO-8859-5"); 1268 case XML_CHAR_ENCODING_8859_6: 1269 return("ISO-8859-6"); 1270 case XML_CHAR_ENCODING_8859_7: 1271 return("ISO-8859-7"); 1272 case XML_CHAR_ENCODING_8859_8: 1273 return("ISO-8859-8"); 1274 case XML_CHAR_ENCODING_8859_9: 1275 return("ISO-8859-9"); 1276 case XML_CHAR_ENCODING_2022_JP: 1277 return("ISO-2022-JP"); 1278 case XML_CHAR_ENCODING_SHIFT_JIS: 1279 return("Shift-JIS"); 1280 case XML_CHAR_ENCODING_EUC_JP: 1281 return("EUC-JP"); 1282 case XML_CHAR_ENCODING_ASCII: 1283 return(NULL); 1284 } 1285 return(NULL); 1286 } 1287 1288 /************************************************************************ 1289 * * 1290 * Char encoding handlers * 1291 * * 1292 ************************************************************************/ 1293 1294 1295 /* the size should be growable, but it's not a big deal ... */ 1296 #define MAX_ENCODING_HANDLERS 50 1297 static xmlCharEncodingHandlerPtr *handlers = NULL; 1298 static int nbCharEncodingHandler = 0; 1299 1300 /* 1301 * The default is UTF-8 for XML, that's also the default used for the 1302 * parser internals, so the default encoding handler is NULL 1303 */ 1304 1305 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1306 1307 /** 1308 * xmlNewCharEncodingHandler: 1309 * @name: the encoding name, in UTF-8 format (ASCII actually) 1310 * @input: the xmlCharEncodingInputFunc to read that encoding 1311 * @output: the xmlCharEncodingOutputFunc to write that encoding 1312 * 1313 * Create and registers an xmlCharEncodingHandler. 1314 * 1315 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1316 */ 1317 xmlCharEncodingHandlerPtr 1318 xmlNewCharEncodingHandler(const char *name, 1319 xmlCharEncodingInputFunc input, 1320 xmlCharEncodingOutputFunc output) { 1321 xmlCharEncodingHandlerPtr handler; 1322 const char *alias; 1323 char upper[500]; 1324 int i; 1325 char *up = NULL; 1326 1327 /* 1328 * Do the alias resolution 1329 */ 1330 alias = xmlGetEncodingAlias(name); 1331 if (alias != NULL) 1332 name = alias; 1333 1334 /* 1335 * Keep only the uppercase version of the encoding. 1336 */ 1337 if (name == NULL) { 1338 xmlEncodingErr(XML_I18N_NO_NAME, 1339 "xmlNewCharEncodingHandler : no name !\n", NULL); 1340 return(NULL); 1341 } 1342 for (i = 0;i < 499;i++) { 1343 upper[i] = toupper(name[i]); 1344 if (upper[i] == 0) break; 1345 } 1346 upper[i] = 0; 1347 up = xmlMemStrdup(upper); 1348 if (up == NULL) { 1349 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1350 return(NULL); 1351 } 1352 1353 /* 1354 * allocate and fill-up an handler block. 1355 */ 1356 handler = (xmlCharEncodingHandlerPtr) 1357 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1358 if (handler == NULL) { 1359 xmlFree(up); 1360 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1361 return(NULL); 1362 } 1363 memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1364 handler->input = input; 1365 handler->output = output; 1366 handler->name = up; 1367 1368 #ifdef LIBXML_ICONV_ENABLED 1369 handler->iconv_in = NULL; 1370 handler->iconv_out = NULL; 1371 #endif 1372 #ifdef LIBXML_ICU_ENABLED 1373 handler->uconv_in = NULL; 1374 handler->uconv_out = NULL; 1375 #endif 1376 1377 /* 1378 * registers and returns the handler. 1379 */ 1380 xmlRegisterCharEncodingHandler(handler); 1381 #ifdef DEBUG_ENCODING 1382 xmlGenericError(xmlGenericErrorContext, 1383 "Registered encoding handler for %s\n", name); 1384 #endif 1385 return(handler); 1386 } 1387 1388 /** 1389 * xmlInitCharEncodingHandlers: 1390 * 1391 * Initialize the char encoding support, it registers the default 1392 * encoding supported. 1393 * NOTE: while public, this function usually doesn't need to be called 1394 * in normal processing. 1395 */ 1396 void 1397 xmlInitCharEncodingHandlers(void) { 1398 unsigned short int tst = 0x1234; 1399 unsigned char *ptr = (unsigned char *) &tst; 1400 1401 if (handlers != NULL) return; 1402 1403 handlers = (xmlCharEncodingHandlerPtr *) 1404 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1405 1406 if (*ptr == 0x12) xmlLittleEndian = 0; 1407 else if (*ptr == 0x34) xmlLittleEndian = 1; 1408 else { 1409 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1410 "Odd problem at endianness detection\n", NULL); 1411 } 1412 1413 if (handlers == NULL) { 1414 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1415 return; 1416 } 1417 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1418 #ifdef LIBXML_OUTPUT_ENABLED 1419 xmlUTF16LEHandler = 1420 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1421 xmlUTF16BEHandler = 1422 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1423 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1424 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1425 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1426 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1427 #ifdef LIBXML_HTML_ENABLED 1428 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1429 #endif 1430 #else 1431 xmlUTF16LEHandler = 1432 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1433 xmlUTF16BEHandler = 1434 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1435 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1436 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1437 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1438 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1439 #endif /* LIBXML_OUTPUT_ENABLED */ 1440 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1441 #ifdef LIBXML_ISO8859X_ENABLED 1442 xmlRegisterCharEncodingHandlersISO8859x (); 1443 #endif 1444 #endif 1445 1446 } 1447 1448 /** 1449 * xmlCleanupCharEncodingHandlers: 1450 * 1451 * Cleanup the memory allocated for the char encoding support, it 1452 * unregisters all the encoding handlers and the aliases. 1453 */ 1454 void 1455 xmlCleanupCharEncodingHandlers(void) { 1456 xmlCleanupEncodingAliases(); 1457 1458 if (handlers == NULL) return; 1459 1460 for (;nbCharEncodingHandler > 0;) { 1461 nbCharEncodingHandler--; 1462 if (handlers[nbCharEncodingHandler] != NULL) { 1463 if (handlers[nbCharEncodingHandler]->name != NULL) 1464 xmlFree(handlers[nbCharEncodingHandler]->name); 1465 xmlFree(handlers[nbCharEncodingHandler]); 1466 } 1467 } 1468 xmlFree(handlers); 1469 handlers = NULL; 1470 nbCharEncodingHandler = 0; 1471 xmlDefaultCharEncodingHandler = NULL; 1472 } 1473 1474 /** 1475 * xmlRegisterCharEncodingHandler: 1476 * @handler: the xmlCharEncodingHandlerPtr handler block 1477 * 1478 * Register the char encoding handler, surprising, isn't it ? 1479 */ 1480 void 1481 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1482 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1483 if ((handler == NULL) || (handlers == NULL)) { 1484 xmlEncodingErr(XML_I18N_NO_HANDLER, 1485 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1486 return; 1487 } 1488 1489 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1490 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1491 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1492 "MAX_ENCODING_HANDLERS"); 1493 return; 1494 } 1495 handlers[nbCharEncodingHandler++] = handler; 1496 } 1497 1498 /** 1499 * xmlGetCharEncodingHandler: 1500 * @enc: an xmlCharEncoding value. 1501 * 1502 * Search in the registered set the handler able to read/write that encoding. 1503 * 1504 * Returns the handler or NULL if not found 1505 */ 1506 xmlCharEncodingHandlerPtr 1507 xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1508 xmlCharEncodingHandlerPtr handler; 1509 1510 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1511 switch (enc) { 1512 case XML_CHAR_ENCODING_ERROR: 1513 return(NULL); 1514 case XML_CHAR_ENCODING_NONE: 1515 return(NULL); 1516 case XML_CHAR_ENCODING_UTF8: 1517 return(NULL); 1518 case XML_CHAR_ENCODING_UTF16LE: 1519 return(xmlUTF16LEHandler); 1520 case XML_CHAR_ENCODING_UTF16BE: 1521 return(xmlUTF16BEHandler); 1522 case XML_CHAR_ENCODING_EBCDIC: 1523 handler = xmlFindCharEncodingHandler("EBCDIC"); 1524 if (handler != NULL) return(handler); 1525 handler = xmlFindCharEncodingHandler("ebcdic"); 1526 if (handler != NULL) return(handler); 1527 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1528 if (handler != NULL) return(handler); 1529 handler = xmlFindCharEncodingHandler("IBM-037"); 1530 if (handler != NULL) return(handler); 1531 break; 1532 case XML_CHAR_ENCODING_UCS4BE: 1533 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1534 if (handler != NULL) return(handler); 1535 handler = xmlFindCharEncodingHandler("UCS-4"); 1536 if (handler != NULL) return(handler); 1537 handler = xmlFindCharEncodingHandler("UCS4"); 1538 if (handler != NULL) return(handler); 1539 break; 1540 case XML_CHAR_ENCODING_UCS4LE: 1541 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1542 if (handler != NULL) return(handler); 1543 handler = xmlFindCharEncodingHandler("UCS-4"); 1544 if (handler != NULL) return(handler); 1545 handler = xmlFindCharEncodingHandler("UCS4"); 1546 if (handler != NULL) return(handler); 1547 break; 1548 case XML_CHAR_ENCODING_UCS4_2143: 1549 break; 1550 case XML_CHAR_ENCODING_UCS4_3412: 1551 break; 1552 case XML_CHAR_ENCODING_UCS2: 1553 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1554 if (handler != NULL) return(handler); 1555 handler = xmlFindCharEncodingHandler("UCS-2"); 1556 if (handler != NULL) return(handler); 1557 handler = xmlFindCharEncodingHandler("UCS2"); 1558 if (handler != NULL) return(handler); 1559 break; 1560 1561 /* 1562 * We used to keep ISO Latin encodings native in the 1563 * generated data. This led to so many problems that 1564 * this has been removed. One can still change this 1565 * back by registering no-ops encoders for those 1566 */ 1567 case XML_CHAR_ENCODING_8859_1: 1568 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1569 if (handler != NULL) return(handler); 1570 break; 1571 case XML_CHAR_ENCODING_8859_2: 1572 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1573 if (handler != NULL) return(handler); 1574 break; 1575 case XML_CHAR_ENCODING_8859_3: 1576 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1577 if (handler != NULL) return(handler); 1578 break; 1579 case XML_CHAR_ENCODING_8859_4: 1580 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1581 if (handler != NULL) return(handler); 1582 break; 1583 case XML_CHAR_ENCODING_8859_5: 1584 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1585 if (handler != NULL) return(handler); 1586 break; 1587 case XML_CHAR_ENCODING_8859_6: 1588 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1589 if (handler != NULL) return(handler); 1590 break; 1591 case XML_CHAR_ENCODING_8859_7: 1592 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1593 if (handler != NULL) return(handler); 1594 break; 1595 case XML_CHAR_ENCODING_8859_8: 1596 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1597 if (handler != NULL) return(handler); 1598 break; 1599 case XML_CHAR_ENCODING_8859_9: 1600 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1601 if (handler != NULL) return(handler); 1602 break; 1603 1604 1605 case XML_CHAR_ENCODING_2022_JP: 1606 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1607 if (handler != NULL) return(handler); 1608 break; 1609 case XML_CHAR_ENCODING_SHIFT_JIS: 1610 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1611 if (handler != NULL) return(handler); 1612 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1613 if (handler != NULL) return(handler); 1614 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1615 if (handler != NULL) return(handler); 1616 break; 1617 case XML_CHAR_ENCODING_EUC_JP: 1618 handler = xmlFindCharEncodingHandler("EUC-JP"); 1619 if (handler != NULL) return(handler); 1620 break; 1621 default: 1622 break; 1623 } 1624 1625 #ifdef DEBUG_ENCODING 1626 xmlGenericError(xmlGenericErrorContext, 1627 "No handler found for encoding %d\n", enc); 1628 #endif 1629 return(NULL); 1630 } 1631 1632 /** 1633 * xmlFindCharEncodingHandler: 1634 * @name: a string describing the char encoding. 1635 * 1636 * Search in the registered set the handler able to read/write that encoding. 1637 * 1638 * Returns the handler or NULL if not found 1639 */ 1640 xmlCharEncodingHandlerPtr 1641 xmlFindCharEncodingHandler(const char *name) { 1642 const char *nalias; 1643 const char *norig; 1644 xmlCharEncoding alias; 1645 #ifdef LIBXML_ICONV_ENABLED 1646 xmlCharEncodingHandlerPtr enc; 1647 iconv_t icv_in, icv_out; 1648 #endif /* LIBXML_ICONV_ENABLED */ 1649 #ifdef LIBXML_ICU_ENABLED 1650 xmlCharEncodingHandlerPtr encu; 1651 uconv_t *ucv_in, *ucv_out; 1652 #endif /* LIBXML_ICU_ENABLED */ 1653 char upper[100]; 1654 int i; 1655 1656 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1657 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1658 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1659 1660 /* 1661 * Do the alias resolution 1662 */ 1663 norig = name; 1664 nalias = xmlGetEncodingAlias(name); 1665 if (nalias != NULL) 1666 name = nalias; 1667 1668 /* 1669 * Check first for directly registered encoding names 1670 */ 1671 for (i = 0;i < 99;i++) { 1672 upper[i] = toupper(name[i]); 1673 if (upper[i] == 0) break; 1674 } 1675 upper[i] = 0; 1676 1677 if (handlers != NULL) { 1678 for (i = 0;i < nbCharEncodingHandler; i++) { 1679 if (!strcmp(upper, handlers[i]->name)) { 1680 #ifdef DEBUG_ENCODING 1681 xmlGenericError(xmlGenericErrorContext, 1682 "Found registered handler for encoding %s\n", name); 1683 #endif 1684 return(handlers[i]); 1685 } 1686 } 1687 } 1688 1689 #ifdef LIBXML_ICONV_ENABLED 1690 /* check whether iconv can handle this */ 1691 icv_in = iconv_open("UTF-8", name); 1692 icv_out = iconv_open(name, "UTF-8"); 1693 if (icv_in == (iconv_t) -1) { 1694 icv_in = iconv_open("UTF-8", upper); 1695 } 1696 if (icv_out == (iconv_t) -1) { 1697 icv_out = iconv_open(upper, "UTF-8"); 1698 } 1699 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1700 enc = (xmlCharEncodingHandlerPtr) 1701 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1702 if (enc == NULL) { 1703 iconv_close(icv_in); 1704 iconv_close(icv_out); 1705 return(NULL); 1706 } 1707 memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1708 enc->name = xmlMemStrdup(name); 1709 enc->input = NULL; 1710 enc->output = NULL; 1711 enc->iconv_in = icv_in; 1712 enc->iconv_out = icv_out; 1713 #ifdef DEBUG_ENCODING 1714 xmlGenericError(xmlGenericErrorContext, 1715 "Found iconv handler for encoding %s\n", name); 1716 #endif 1717 return enc; 1718 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1719 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1720 "iconv : problems with filters for '%s'\n", name); 1721 } 1722 #endif /* LIBXML_ICONV_ENABLED */ 1723 #ifdef LIBXML_ICU_ENABLED 1724 /* check whether icu can handle this */ 1725 ucv_in = openIcuConverter(name, 1); 1726 ucv_out = openIcuConverter(name, 0); 1727 if (ucv_in != NULL && ucv_out != NULL) { 1728 encu = (xmlCharEncodingHandlerPtr) 1729 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1730 if (encu == NULL) { 1731 closeIcuConverter(ucv_in); 1732 closeIcuConverter(ucv_out); 1733 return(NULL); 1734 } 1735 memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1736 encu->name = xmlMemStrdup(name); 1737 encu->input = NULL; 1738 encu->output = NULL; 1739 encu->uconv_in = ucv_in; 1740 encu->uconv_out = ucv_out; 1741 #ifdef DEBUG_ENCODING 1742 xmlGenericError(xmlGenericErrorContext, 1743 "Found ICU converter handler for encoding %s\n", name); 1744 #endif 1745 return encu; 1746 } else if (ucv_in != NULL || ucv_out != NULL) { 1747 closeIcuConverter(ucv_in); 1748 closeIcuConverter(ucv_out); 1749 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1750 "ICU converter : problems with filters for '%s'\n", name); 1751 } 1752 #endif /* LIBXML_ICU_ENABLED */ 1753 1754 #ifdef DEBUG_ENCODING 1755 xmlGenericError(xmlGenericErrorContext, 1756 "No handler found for encoding %s\n", name); 1757 #endif 1758 1759 /* 1760 * Fallback using the canonical names 1761 */ 1762 alias = xmlParseCharEncoding(norig); 1763 if (alias != XML_CHAR_ENCODING_ERROR) { 1764 const char* canon; 1765 canon = xmlGetCharEncodingName(alias); 1766 if ((canon != NULL) && (strcmp(name, canon))) { 1767 return(xmlFindCharEncodingHandler(canon)); 1768 } 1769 } 1770 1771 /* If "none of the above", give up */ 1772 return(NULL); 1773 } 1774 1775 /************************************************************************ 1776 * * 1777 * ICONV based generic conversion functions * 1778 * * 1779 ************************************************************************/ 1780 1781 #ifdef LIBXML_ICONV_ENABLED 1782 /** 1783 * xmlIconvWrapper: 1784 * @cd: iconv converter data structure 1785 * @out: a pointer to an array of bytes to store the result 1786 * @outlen: the length of @out 1787 * @in: a pointer to an array of ISO Latin 1 chars 1788 * @inlen: the length of @in 1789 * 1790 * Returns 0 if success, or 1791 * -1 by lack of space, or 1792 * -2 if the transcoding fails (for *in is not valid utf8 string or 1793 * the result of transformation can't fit into the encoding we want), or 1794 * -3 if there the last byte can't form a single output char. 1795 * 1796 * The value of @inlen after return is the number of octets consumed 1797 * as the return value is positive, else unpredictable. 1798 * The value of @outlen after return is the number of octets consumed. 1799 */ 1800 static int 1801 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1802 const unsigned char *in, int *inlen) { 1803 size_t icv_inlen, icv_outlen; 1804 const char *icv_in = (const char *) in; 1805 char *icv_out = (char *) out; 1806 int ret; 1807 1808 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1809 if (outlen != NULL) *outlen = 0; 1810 return(-1); 1811 } 1812 icv_inlen = *inlen; 1813 icv_outlen = *outlen; 1814 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1815 *inlen -= icv_inlen; 1816 *outlen -= icv_outlen; 1817 if ((icv_inlen != 0) || (ret == -1)) { 1818 #ifdef EILSEQ 1819 if (errno == EILSEQ) { 1820 return -2; 1821 } else 1822 #endif 1823 #ifdef E2BIG 1824 if (errno == E2BIG) { 1825 return -1; 1826 } else 1827 #endif 1828 #ifdef EINVAL 1829 if (errno == EINVAL) { 1830 return -3; 1831 } else 1832 #endif 1833 { 1834 return -3; 1835 } 1836 } 1837 return 0; 1838 } 1839 #endif /* LIBXML_ICONV_ENABLED */ 1840 1841 /************************************************************************ 1842 * * 1843 * ICU based generic conversion functions * 1844 * * 1845 ************************************************************************/ 1846 1847 #ifdef LIBXML_ICU_ENABLED 1848 /** 1849 * xmlUconvWrapper: 1850 * @cd: ICU uconverter data structure 1851 * @toUnicode : non-zero if toUnicode. 0 otherwise. 1852 * @out: a pointer to an array of bytes to store the result 1853 * @outlen: the length of @out 1854 * @in: a pointer to an array of ISO Latin 1 chars 1855 * @inlen: the length of @in 1856 * @flush: if true, indicates end of input 1857 * 1858 * Returns 0 if success, or 1859 * -1 by lack of space, or 1860 * -2 if the transcoding fails (for *in is not valid utf8 string or 1861 * the result of transformation can't fit into the encoding we want), or 1862 * -3 if there the last byte can't form a single output char. 1863 * 1864 * The value of @inlen after return is the number of octets consumed 1865 * as the return value is positive, else unpredictable. 1866 * The value of @outlen after return is the number of octets consumed. 1867 */ 1868 static int 1869 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1870 const unsigned char *in, int *inlen, int flush) { 1871 const char *ucv_in = (const char *) in; 1872 char *ucv_out = (char *) out; 1873 UErrorCode err = U_ZERO_ERROR; 1874 1875 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1876 if (outlen != NULL) *outlen = 0; 1877 return(-1); 1878 } 1879 1880 if (toUnicode) { 1881 /* encoding => UTF-16 => UTF-8 */ 1882 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1883 &ucv_in, ucv_in + *inlen, cd->pivot_buf, 1884 &cd->pivot_source, &cd->pivot_target, 1885 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err); 1886 } else { 1887 /* UTF-8 => UTF-16 => encoding */ 1888 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1889 &ucv_in, ucv_in + *inlen, cd->pivot_buf, 1890 &cd->pivot_source, &cd->pivot_target, 1891 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err); 1892 } 1893 *inlen = ucv_in - (const char*) in; 1894 *outlen = ucv_out - (char *) out; 1895 if (U_SUCCESS(err)) { 1896 /* reset pivot buf if this is the last call for input (flush==TRUE) */ 1897 if (flush) 1898 cd->pivot_source = cd->pivot_target = cd->pivot_buf; 1899 return 0; 1900 } 1901 if (err == U_BUFFER_OVERFLOW_ERROR) 1902 return -1; 1903 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1904 return -2; 1905 return -3; 1906 } 1907 #endif /* LIBXML_ICU_ENABLED */ 1908 1909 /************************************************************************ 1910 * * 1911 * The real API used by libxml for on-the-fly conversion * 1912 * * 1913 ************************************************************************/ 1914 1915 static int 1916 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, 1917 int *outlen, const unsigned char *in, int *inlen, int flush) { 1918 int ret; 1919 (void)flush; 1920 1921 if (handler->input != NULL) { 1922 ret = handler->input(out, outlen, in, inlen); 1923 } 1924 #ifdef LIBXML_ICONV_ENABLED 1925 else if (handler->iconv_in != NULL) { 1926 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen); 1927 } 1928 #endif /* LIBXML_ICONV_ENABLED */ 1929 #ifdef LIBXML_ICU_ENABLED 1930 else if (handler->uconv_in != NULL) { 1931 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen, 1932 flush); 1933 } 1934 #endif /* LIBXML_ICU_ENABLED */ 1935 else { 1936 *outlen = 0; 1937 *inlen = 0; 1938 ret = -2; 1939 } 1940 1941 return(ret); 1942 } 1943 1944 /* Returns -4 if no output function was found. */ 1945 static int 1946 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, 1947 int *outlen, const unsigned char *in, int *inlen) { 1948 int ret; 1949 1950 if (handler->output != NULL) { 1951 ret = handler->output(out, outlen, in, inlen); 1952 } 1953 #ifdef LIBXML_ICONV_ENABLED 1954 else if (handler->iconv_out != NULL) { 1955 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen); 1956 } 1957 #endif /* LIBXML_ICONV_ENABLED */ 1958 #ifdef LIBXML_ICU_ENABLED 1959 else if (handler->uconv_out != NULL) { 1960 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen, 1961 TRUE); 1962 } 1963 #endif /* LIBXML_ICU_ENABLED */ 1964 else { 1965 *outlen = 0; 1966 *inlen = 0; 1967 ret = -4; 1968 } 1969 1970 return(ret); 1971 } 1972 1973 /** 1974 * xmlCharEncFirstLineInt: 1975 * @handler: char encoding transformation data structure 1976 * @out: an xmlBuffer for the output. 1977 * @in: an xmlBuffer for the input 1978 * @len: number of bytes to convert for the first line, or -1 1979 * 1980 * Front-end for the encoding handler input function, but handle only 1981 * the very first line, i.e. limit itself to 45 chars. 1982 * 1983 * Returns the number of byte written if success, or 1984 * -1 general error 1985 * -2 if the transcoding fails (for *in is not valid utf8 string or 1986 * the result of transformation can't fit into the encoding we want), or 1987 */ 1988 int 1989 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1990 xmlBufferPtr in, int len) { 1991 int ret; 1992 int written; 1993 int toconv; 1994 1995 if (handler == NULL) return(-1); 1996 if (out == NULL) return(-1); 1997 if (in == NULL) return(-1); 1998 1999 /* calculate space available */ 2000 written = out->size - out->use - 1; /* count '\0' */ 2001 toconv = in->use; 2002 /* 2003 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2004 * 45 chars should be sufficient to reach the end of the encoding 2005 * declaration without going too far inside the document content. 2006 * on UTF-16 this means 90bytes, on UCS4 this means 180 2007 * The actual value depending on guessed encoding is passed as @len 2008 * if provided 2009 */ 2010 if (len >= 0) { 2011 if (toconv > len) 2012 toconv = len; 2013 } else { 2014 if (toconv > 180) 2015 toconv = 180; 2016 } 2017 if (toconv * 2 >= written) { 2018 xmlBufferGrow(out, toconv * 2); 2019 written = out->size - out->use - 1; 2020 } 2021 2022 ret = xmlEncInputChunk(handler, &out->content[out->use], &written, 2023 in->content, &toconv, 0); 2024 xmlBufferShrink(in, toconv); 2025 out->use += written; 2026 out->content[out->use] = 0; 2027 if (ret == -1) ret = -3; 2028 2029 #ifdef DEBUG_ENCODING 2030 switch (ret) { 2031 case 0: 2032 xmlGenericError(xmlGenericErrorContext, 2033 "converted %d bytes to %d bytes of input\n", 2034 toconv, written); 2035 break; 2036 case -1: 2037 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2038 toconv, written, in->use); 2039 break; 2040 case -2: 2041 xmlGenericError(xmlGenericErrorContext, 2042 "input conversion failed due to input error\n"); 2043 break; 2044 case -3: 2045 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2046 toconv, written, in->use); 2047 break; 2048 default: 2049 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2050 } 2051 #endif /* DEBUG_ENCODING */ 2052 /* 2053 * Ignore when input buffer is not on a boundary 2054 */ 2055 if (ret == -3) ret = 0; 2056 if (ret == -1) ret = 0; 2057 return(ret); 2058 } 2059 2060 /** 2061 * xmlCharEncFirstLine: 2062 * @handler: char encoding transformation data structure 2063 * @out: an xmlBuffer for the output. 2064 * @in: an xmlBuffer for the input 2065 * 2066 * Front-end for the encoding handler input function, but handle only 2067 * the very first line, i.e. limit itself to 45 chars. 2068 * 2069 * Returns the number of byte written if success, or 2070 * -1 general error 2071 * -2 if the transcoding fails (for *in is not valid utf8 string or 2072 * the result of transformation can't fit into the encoding we want), or 2073 */ 2074 int 2075 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2076 xmlBufferPtr in) { 2077 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2078 } 2079 2080 /** 2081 * xmlCharEncFirstLineInput: 2082 * @input: a parser input buffer 2083 * @len: number of bytes to convert for the first line, or -1 2084 * 2085 * Front-end for the encoding handler input function, but handle only 2086 * the very first line. Point is that this is based on autodetection 2087 * of the encoding and once that first line is converted we may find 2088 * out that a different decoder is needed to process the input. 2089 * 2090 * Returns the number of byte written if success, or 2091 * -1 general error 2092 * -2 if the transcoding fails (for *in is not valid utf8 string or 2093 * the result of transformation can't fit into the encoding we want), or 2094 */ 2095 int 2096 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) 2097 { 2098 int ret; 2099 size_t written; 2100 size_t toconv; 2101 int c_in; 2102 int c_out; 2103 xmlBufPtr in; 2104 xmlBufPtr out; 2105 2106 if ((input == NULL) || (input->encoder == NULL) || 2107 (input->buffer == NULL) || (input->raw == NULL)) 2108 return (-1); 2109 out = input->buffer; 2110 in = input->raw; 2111 2112 toconv = xmlBufUse(in); 2113 if (toconv == 0) 2114 return (0); 2115 written = xmlBufAvail(out) - 1; /* count '\0' */ 2116 /* 2117 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2118 * 45 chars should be sufficient to reach the end of the encoding 2119 * declaration without going too far inside the document content. 2120 * on UTF-16 this means 90bytes, on UCS4 this means 180 2121 * The actual value depending on guessed encoding is passed as @len 2122 * if provided 2123 */ 2124 if (len >= 0) { 2125 if (toconv > (unsigned int) len) 2126 toconv = len; 2127 } else { 2128 if (toconv > 180) 2129 toconv = 180; 2130 } 2131 if (toconv * 2 >= written) { 2132 xmlBufGrow(out, toconv * 2); 2133 written = xmlBufAvail(out) - 1; 2134 } 2135 if (written > 360) 2136 written = 360; 2137 2138 c_in = toconv; 2139 c_out = written; 2140 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, 2141 xmlBufContent(in), &c_in, 0); 2142 xmlBufShrink(in, c_in); 2143 xmlBufAddLen(out, c_out); 2144 if (ret == -1) 2145 ret = -3; 2146 2147 switch (ret) { 2148 case 0: 2149 #ifdef DEBUG_ENCODING 2150 xmlGenericError(xmlGenericErrorContext, 2151 "converted %d bytes to %d bytes of input\n", 2152 c_in, c_out); 2153 #endif 2154 break; 2155 case -1: 2156 #ifdef DEBUG_ENCODING 2157 xmlGenericError(xmlGenericErrorContext, 2158 "converted %d bytes to %d bytes of input, %d left\n", 2159 c_in, c_out, (int)xmlBufUse(in)); 2160 #endif 2161 break; 2162 case -3: 2163 #ifdef DEBUG_ENCODING 2164 xmlGenericError(xmlGenericErrorContext, 2165 "converted %d bytes to %d bytes of input, %d left\n", 2166 c_in, c_out, (int)xmlBufUse(in)); 2167 #endif 2168 break; 2169 case -2: { 2170 char buf[50]; 2171 const xmlChar *content = xmlBufContent(in); 2172 2173 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2174 content[0], content[1], 2175 content[2], content[3]); 2176 buf[49] = 0; 2177 xmlEncodingErr(XML_I18N_CONV_FAILED, 2178 "input conversion failed due to input error, bytes %s\n", 2179 buf); 2180 } 2181 } 2182 /* 2183 * Ignore when input buffer is not on a boundary 2184 */ 2185 if (ret == -3) ret = 0; 2186 if (ret == -1) ret = 0; 2187 return(ret); 2188 } 2189 2190 /** 2191 * xmlCharEncInput: 2192 * @input: a parser input buffer 2193 * @flush: try to flush all the raw buffer 2194 * 2195 * Generic front-end for the encoding handler on parser input 2196 * 2197 * Returns the number of byte written if success, or 2198 * -1 general error 2199 * -2 if the transcoding fails (for *in is not valid utf8 string or 2200 * the result of transformation can't fit into the encoding we want), or 2201 */ 2202 int 2203 xmlCharEncInput(xmlParserInputBufferPtr input, int flush) 2204 { 2205 int ret; 2206 size_t written; 2207 size_t toconv; 2208 int c_in; 2209 int c_out; 2210 xmlBufPtr in; 2211 xmlBufPtr out; 2212 2213 if ((input == NULL) || (input->encoder == NULL) || 2214 (input->buffer == NULL) || (input->raw == NULL)) 2215 return (-1); 2216 out = input->buffer; 2217 in = input->raw; 2218 2219 toconv = xmlBufUse(in); 2220 if (toconv == 0) 2221 return (0); 2222 if ((toconv > 64 * 1024) && (flush == 0)) 2223 toconv = 64 * 1024; 2224 written = xmlBufAvail(out); 2225 if (written > 0) 2226 written--; /* count '\0' */ 2227 if (toconv * 2 >= written) { 2228 xmlBufGrow(out, toconv * 2); 2229 written = xmlBufAvail(out); 2230 if (written > 0) 2231 written--; /* count '\0' */ 2232 } 2233 if ((written > 128 * 1024) && (flush == 0)) 2234 written = 128 * 1024; 2235 2236 c_in = toconv; 2237 c_out = written; 2238 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, 2239 xmlBufContent(in), &c_in, flush); 2240 xmlBufShrink(in, c_in); 2241 xmlBufAddLen(out, c_out); 2242 if (ret == -1) 2243 ret = -3; 2244 2245 switch (ret) { 2246 case 0: 2247 #ifdef DEBUG_ENCODING 2248 xmlGenericError(xmlGenericErrorContext, 2249 "converted %d bytes to %d bytes of input\n", 2250 c_in, c_out); 2251 #endif 2252 break; 2253 case -1: 2254 #ifdef DEBUG_ENCODING 2255 xmlGenericError(xmlGenericErrorContext, 2256 "converted %d bytes to %d bytes of input, %d left\n", 2257 c_in, c_out, (int)xmlBufUse(in)); 2258 #endif 2259 break; 2260 case -3: 2261 #ifdef DEBUG_ENCODING 2262 xmlGenericError(xmlGenericErrorContext, 2263 "converted %d bytes to %d bytes of input, %d left\n", 2264 c_in, c_out, (int)xmlBufUse(in)); 2265 #endif 2266 break; 2267 case -2: { 2268 char buf[50]; 2269 const xmlChar *content = xmlBufContent(in); 2270 2271 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2272 content[0], content[1], 2273 content[2], content[3]); 2274 buf[49] = 0; 2275 xmlEncodingErr(XML_I18N_CONV_FAILED, 2276 "input conversion failed due to input error, bytes %s\n", 2277 buf); 2278 } 2279 } 2280 /* 2281 * Ignore when input buffer is not on a boundary 2282 */ 2283 if (ret == -3) 2284 ret = 0; 2285 return (c_out? c_out : ret); 2286 } 2287 2288 /** 2289 * xmlCharEncInFunc: 2290 * @handler: char encoding transformation data structure 2291 * @out: an xmlBuffer for the output. 2292 * @in: an xmlBuffer for the input 2293 * 2294 * Generic front-end for the encoding handler input function 2295 * 2296 * Returns the number of byte written if success, or 2297 * -1 general error 2298 * -2 if the transcoding fails (for *in is not valid utf8 string or 2299 * the result of transformation can't fit into the encoding we want), or 2300 */ 2301 int 2302 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2303 xmlBufferPtr in) 2304 { 2305 int ret; 2306 int written; 2307 int toconv; 2308 2309 if (handler == NULL) 2310 return (-1); 2311 if (out == NULL) 2312 return (-1); 2313 if (in == NULL) 2314 return (-1); 2315 2316 toconv = in->use; 2317 if (toconv == 0) 2318 return (0); 2319 written = out->size - out->use -1; /* count '\0' */ 2320 if (toconv * 2 >= written) { 2321 xmlBufferGrow(out, out->size + toconv * 2); 2322 written = out->size - out->use - 1; 2323 } 2324 ret = xmlEncInputChunk(handler, &out->content[out->use], &written, 2325 in->content, &toconv, 1); 2326 xmlBufferShrink(in, toconv); 2327 out->use += written; 2328 out->content[out->use] = 0; 2329 if (ret == -1) 2330 ret = -3; 2331 2332 switch (ret) { 2333 case 0: 2334 #ifdef DEBUG_ENCODING 2335 xmlGenericError(xmlGenericErrorContext, 2336 "converted %d bytes to %d bytes of input\n", 2337 toconv, written); 2338 #endif 2339 break; 2340 case -1: 2341 #ifdef DEBUG_ENCODING 2342 xmlGenericError(xmlGenericErrorContext, 2343 "converted %d bytes to %d bytes of input, %d left\n", 2344 toconv, written, in->use); 2345 #endif 2346 break; 2347 case -3: 2348 #ifdef DEBUG_ENCODING 2349 xmlGenericError(xmlGenericErrorContext, 2350 "converted %d bytes to %d bytes of input, %d left\n", 2351 toconv, written, in->use); 2352 #endif 2353 break; 2354 case -2: { 2355 char buf[50]; 2356 2357 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2358 in->content[0], in->content[1], 2359 in->content[2], in->content[3]); 2360 buf[49] = 0; 2361 xmlEncodingErr(XML_I18N_CONV_FAILED, 2362 "input conversion failed due to input error, bytes %s\n", 2363 buf); 2364 } 2365 } 2366 /* 2367 * Ignore when input buffer is not on a boundary 2368 */ 2369 if (ret == -3) 2370 ret = 0; 2371 return (written? written : ret); 2372 } 2373 2374 #ifdef LIBXML_OUTPUT_ENABLED 2375 /** 2376 * xmlCharEncOutput: 2377 * @output: a parser output buffer 2378 * @init: is this an initialization call without data 2379 * 2380 * Generic front-end for the encoding handler on parser output 2381 * a first call with @init == 1 has to be made first to initiate the 2382 * output in case of non-stateless encoding needing to initiate their 2383 * state or the output (like the BOM in UTF16). 2384 * In case of UTF8 sequence conversion errors for the given encoder, 2385 * the content will be automatically remapped to a CharRef sequence. 2386 * 2387 * Returns the number of byte written if success, or 2388 * -1 general error 2389 * -2 if the transcoding fails (for *in is not valid utf8 string or 2390 * the result of transformation can't fit into the encoding we want), or 2391 */ 2392 int 2393 xmlCharEncOutput(xmlOutputBufferPtr output, int init) 2394 { 2395 int ret; 2396 size_t written; 2397 size_t writtentot = 0; 2398 size_t toconv; 2399 int c_in; 2400 int c_out; 2401 xmlBufPtr in; 2402 xmlBufPtr out; 2403 2404 if ((output == NULL) || (output->encoder == NULL) || 2405 (output->buffer == NULL) || (output->conv == NULL)) 2406 return (-1); 2407 out = output->conv; 2408 in = output->buffer; 2409 2410 retry: 2411 2412 written = xmlBufAvail(out); 2413 if (written > 0) 2414 written--; /* count '\0' */ 2415 2416 /* 2417 * First specific handling of the initialization call 2418 */ 2419 if (init) { 2420 c_in = 0; 2421 c_out = written; 2422 /* TODO: Check return value. */ 2423 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, 2424 NULL, &c_in); 2425 xmlBufAddLen(out, c_out); 2426 #ifdef DEBUG_ENCODING 2427 xmlGenericError(xmlGenericErrorContext, 2428 "initialized encoder\n"); 2429 #endif 2430 return(0); 2431 } 2432 2433 /* 2434 * Conversion itself. 2435 */ 2436 toconv = xmlBufUse(in); 2437 if (toconv == 0) 2438 return (0); 2439 if (toconv > 64 * 1024) 2440 toconv = 64 * 1024; 2441 if (toconv * 4 >= written) { 2442 xmlBufGrow(out, toconv * 4); 2443 written = xmlBufAvail(out) - 1; 2444 } 2445 if (written > 256 * 1024) 2446 written = 256 * 1024; 2447 2448 c_in = toconv; 2449 c_out = written; 2450 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, 2451 xmlBufContent(in), &c_in); 2452 xmlBufShrink(in, c_in); 2453 xmlBufAddLen(out, c_out); 2454 writtentot += c_out; 2455 if (ret == -1) { 2456 if (c_out > 0) { 2457 /* Can be a limitation of iconv or uconv */ 2458 goto retry; 2459 } 2460 ret = -3; 2461 } 2462 2463 /* 2464 * Attempt to handle error cases 2465 */ 2466 switch (ret) { 2467 case 0: 2468 #ifdef DEBUG_ENCODING 2469 xmlGenericError(xmlGenericErrorContext, 2470 "converted %d bytes to %d bytes of output\n", 2471 c_in, c_out); 2472 #endif 2473 break; 2474 case -1: 2475 #ifdef DEBUG_ENCODING 2476 xmlGenericError(xmlGenericErrorContext, 2477 "output conversion failed by lack of space\n"); 2478 #endif 2479 break; 2480 case -3: 2481 #ifdef DEBUG_ENCODING 2482 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2483 c_in, c_out, (int) xmlBufUse(in)); 2484 #endif 2485 break; 2486 case -4: 2487 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2488 "xmlCharEncOutFunc: no output function !\n", NULL); 2489 ret = -1; 2490 break; 2491 case -2: { 2492 xmlChar charref[20]; 2493 int len = (int) xmlBufUse(in); 2494 xmlChar *content = xmlBufContent(in); 2495 int cur, charrefLen; 2496 2497 cur = xmlGetUTF8Char(content, &len); 2498 if (cur <= 0) 2499 break; 2500 2501 #ifdef DEBUG_ENCODING 2502 xmlGenericError(xmlGenericErrorContext, 2503 "handling output conversion error\n"); 2504 xmlGenericError(xmlGenericErrorContext, 2505 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2506 content[0], content[1], 2507 content[2], content[3]); 2508 #endif 2509 /* 2510 * Removes the UTF8 sequence, and replace it by a charref 2511 * and continue the transcoding phase, hoping the error 2512 * did not mangle the encoder state. 2513 */ 2514 charrefLen = snprintf((char *) &charref[0], sizeof(charref), 2515 "&#%d;", cur); 2516 xmlBufShrink(in, len); 2517 xmlBufGrow(out, charrefLen * 4); 2518 c_out = xmlBufAvail(out) - 1; 2519 c_in = charrefLen; 2520 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, 2521 charref, &c_in); 2522 2523 if ((ret < 0) || (c_in != charrefLen)) { 2524 char buf[50]; 2525 2526 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2527 content[0], content[1], 2528 content[2], content[3]); 2529 buf[49] = 0; 2530 xmlEncodingErr(XML_I18N_CONV_FAILED, 2531 "output conversion failed due to conv error, bytes %s\n", 2532 buf); 2533 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) 2534 content[0] = ' '; 2535 break; 2536 } 2537 2538 xmlBufAddLen(out, c_out); 2539 writtentot += c_out; 2540 goto retry; 2541 } 2542 } 2543 return(ret); 2544 } 2545 #endif 2546 2547 /** 2548 * xmlCharEncOutFunc: 2549 * @handler: char encoding transformation data structure 2550 * @out: an xmlBuffer for the output. 2551 * @in: an xmlBuffer for the input 2552 * 2553 * Generic front-end for the encoding handler output function 2554 * a first call with @in == NULL has to be made firs to initiate the 2555 * output in case of non-stateless encoding needing to initiate their 2556 * state or the output (like the BOM in UTF16). 2557 * In case of UTF8 sequence conversion errors for the given encoder, 2558 * the content will be automatically remapped to a CharRef sequence. 2559 * 2560 * Returns the number of byte written if success, or 2561 * -1 general error 2562 * -2 if the transcoding fails (for *in is not valid utf8 string or 2563 * the result of transformation can't fit into the encoding we want), or 2564 */ 2565 int 2566 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2567 xmlBufferPtr in) { 2568 int ret; 2569 int written; 2570 int writtentot = 0; 2571 int toconv; 2572 int output = 0; 2573 2574 if (handler == NULL) return(-1); 2575 if (out == NULL) return(-1); 2576 2577 retry: 2578 2579 written = out->size - out->use; 2580 2581 if (written > 0) 2582 written--; /* Gennady: count '/0' */ 2583 2584 /* 2585 * First specific handling of in = NULL, i.e. the initialization call 2586 */ 2587 if (in == NULL) { 2588 toconv = 0; 2589 /* TODO: Check return value. */ 2590 xmlEncOutputChunk(handler, &out->content[out->use], &written, 2591 NULL, &toconv); 2592 out->use += written; 2593 out->content[out->use] = 0; 2594 #ifdef DEBUG_ENCODING 2595 xmlGenericError(xmlGenericErrorContext, 2596 "initialized encoder\n"); 2597 #endif 2598 return(0); 2599 } 2600 2601 /* 2602 * Conversion itself. 2603 */ 2604 toconv = in->use; 2605 if (toconv == 0) 2606 return(0); 2607 if (toconv * 4 >= written) { 2608 xmlBufferGrow(out, toconv * 4); 2609 written = out->size - out->use - 1; 2610 } 2611 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, 2612 in->content, &toconv); 2613 xmlBufferShrink(in, toconv); 2614 out->use += written; 2615 writtentot += written; 2616 out->content[out->use] = 0; 2617 if (ret == -1) { 2618 if (written > 0) { 2619 /* Can be a limitation of iconv or uconv */ 2620 goto retry; 2621 } 2622 ret = -3; 2623 } 2624 2625 if (ret >= 0) output += ret; 2626 2627 /* 2628 * Attempt to handle error cases 2629 */ 2630 switch (ret) { 2631 case 0: 2632 #ifdef DEBUG_ENCODING 2633 xmlGenericError(xmlGenericErrorContext, 2634 "converted %d bytes to %d bytes of output\n", 2635 toconv, written); 2636 #endif 2637 break; 2638 case -1: 2639 #ifdef DEBUG_ENCODING 2640 xmlGenericError(xmlGenericErrorContext, 2641 "output conversion failed by lack of space\n"); 2642 #endif 2643 break; 2644 case -3: 2645 #ifdef DEBUG_ENCODING 2646 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2647 toconv, written, in->use); 2648 #endif 2649 break; 2650 case -4: 2651 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2652 "xmlCharEncOutFunc: no output function !\n", NULL); 2653 ret = -1; 2654 break; 2655 case -2: { 2656 xmlChar charref[20]; 2657 int len = in->use; 2658 const xmlChar *utf = (const xmlChar *) in->content; 2659 int cur, charrefLen; 2660 2661 cur = xmlGetUTF8Char(utf, &len); 2662 if (cur <= 0) 2663 break; 2664 2665 #ifdef DEBUG_ENCODING 2666 xmlGenericError(xmlGenericErrorContext, 2667 "handling output conversion error\n"); 2668 xmlGenericError(xmlGenericErrorContext, 2669 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2670 in->content[0], in->content[1], 2671 in->content[2], in->content[3]); 2672 #endif 2673 /* 2674 * Removes the UTF8 sequence, and replace it by a charref 2675 * and continue the transcoding phase, hoping the error 2676 * did not mangle the encoder state. 2677 */ 2678 charrefLen = snprintf((char *) &charref[0], sizeof(charref), 2679 "&#%d;", cur); 2680 xmlBufferShrink(in, len); 2681 xmlBufferGrow(out, charrefLen * 4); 2682 written = out->size - out->use - 1; 2683 toconv = charrefLen; 2684 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, 2685 charref, &toconv); 2686 2687 if ((ret < 0) || (toconv != charrefLen)) { 2688 char buf[50]; 2689 2690 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2691 in->content[0], in->content[1], 2692 in->content[2], in->content[3]); 2693 buf[49] = 0; 2694 xmlEncodingErr(XML_I18N_CONV_FAILED, 2695 "output conversion failed due to conv error, bytes %s\n", 2696 buf); 2697 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2698 in->content[0] = ' '; 2699 break; 2700 } 2701 2702 out->use += written; 2703 writtentot += written; 2704 out->content[out->use] = 0; 2705 goto retry; 2706 } 2707 } 2708 return(ret); 2709 } 2710 2711 /** 2712 * xmlCharEncCloseFunc: 2713 * @handler: char encoding transformation data structure 2714 * 2715 * Generic front-end for encoding handler close function 2716 * 2717 * Returns 0 if success, or -1 in case of error 2718 */ 2719 int 2720 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2721 int ret = 0; 2722 int tofree = 0; 2723 int i, handler_in_list = 0; 2724 2725 if (handler == NULL) return(-1); 2726 if (handler->name == NULL) return(-1); 2727 if (handlers != NULL) { 2728 for (i = 0;i < nbCharEncodingHandler; i++) { 2729 if (handler == handlers[i]) { 2730 handler_in_list = 1; 2731 break; 2732 } 2733 } 2734 } 2735 #ifdef LIBXML_ICONV_ENABLED 2736 /* 2737 * Iconv handlers can be used only once, free the whole block. 2738 * and the associated icon resources. 2739 */ 2740 if ((handler_in_list == 0) && 2741 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) { 2742 tofree = 1; 2743 if (handler->iconv_out != NULL) { 2744 if (iconv_close(handler->iconv_out)) 2745 ret = -1; 2746 handler->iconv_out = NULL; 2747 } 2748 if (handler->iconv_in != NULL) { 2749 if (iconv_close(handler->iconv_in)) 2750 ret = -1; 2751 handler->iconv_in = NULL; 2752 } 2753 } 2754 #endif /* LIBXML_ICONV_ENABLED */ 2755 #ifdef LIBXML_ICU_ENABLED 2756 if ((handler_in_list == 0) && 2757 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) { 2758 tofree = 1; 2759 if (handler->uconv_out != NULL) { 2760 closeIcuConverter(handler->uconv_out); 2761 handler->uconv_out = NULL; 2762 } 2763 if (handler->uconv_in != NULL) { 2764 closeIcuConverter(handler->uconv_in); 2765 handler->uconv_in = NULL; 2766 } 2767 } 2768 #endif 2769 if (tofree) { 2770 /* free up only dynamic handlers iconv/uconv */ 2771 if (handler->name != NULL) 2772 xmlFree(handler->name); 2773 handler->name = NULL; 2774 xmlFree(handler); 2775 } 2776 #ifdef DEBUG_ENCODING 2777 if (ret) 2778 xmlGenericError(xmlGenericErrorContext, 2779 "failed to close the encoding handler\n"); 2780 else 2781 xmlGenericError(xmlGenericErrorContext, 2782 "closed the encoding handler\n"); 2783 #endif 2784 2785 return(ret); 2786 } 2787 2788 /** 2789 * xmlByteConsumed: 2790 * @ctxt: an XML parser context 2791 * 2792 * This function provides the current index of the parser relative 2793 * to the start of the current entity. This function is computed in 2794 * bytes from the beginning starting at zero and finishing at the 2795 * size in byte of the file if parsing a file. The function is 2796 * of constant cost if the input is UTF-8 but can be costly if run 2797 * on non-UTF-8 input. 2798 * 2799 * Returns the index in bytes from the beginning of the entity or -1 2800 * in case the index could not be computed. 2801 */ 2802 long 2803 xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2804 xmlParserInputPtr in; 2805 2806 if (ctxt == NULL) return(-1); 2807 in = ctxt->input; 2808 if (in == NULL) return(-1); 2809 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2810 unsigned int unused = 0; 2811 xmlCharEncodingHandler * handler = in->buf->encoder; 2812 /* 2813 * Encoding conversion, compute the number of unused original 2814 * bytes from the input not consumed and subtract that from 2815 * the raw consumed value, this is not a cheap operation 2816 */ 2817 if (in->end - in->cur > 0) { 2818 unsigned char convbuf[32000]; 2819 const unsigned char *cur = (const unsigned char *)in->cur; 2820 int toconv = in->end - in->cur, written = 32000; 2821 2822 int ret; 2823 2824 do { 2825 toconv = in->end - cur; 2826 written = 32000; 2827 ret = xmlEncOutputChunk(handler, &convbuf[0], &written, 2828 cur, &toconv); 2829 if (ret < 0) { 2830 if (written > 0) 2831 ret = -2; 2832 else 2833 return(-1); 2834 } 2835 unused += written; 2836 cur += toconv; 2837 } while (ret == -2); 2838 } 2839 if (in->buf->rawconsumed < unused) 2840 return(-1); 2841 return(in->buf->rawconsumed - unused); 2842 } 2843 return(in->consumed + (in->cur - in->base)); 2844 } 2845 2846 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 2847 #ifdef LIBXML_ISO8859X_ENABLED 2848 2849 /** 2850 * UTF8ToISO8859x: 2851 * @out: a pointer to an array of bytes to store the result 2852 * @outlen: the length of @out 2853 * @in: a pointer to an array of UTF-8 chars 2854 * @inlen: the length of @in 2855 * @xlattable: the 2-level transcoding table 2856 * 2857 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 2858 * block of chars out. 2859 * 2860 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 2861 * The value of @inlen after return is the number of octets consumed 2862 * as the return value is positive, else unpredictable. 2863 * The value of @outlen after return is the number of octets consumed. 2864 */ 2865 static int 2866 UTF8ToISO8859x(unsigned char* out, int *outlen, 2867 const unsigned char* in, int *inlen, 2868 unsigned char const *xlattable) { 2869 const unsigned char* outstart = out; 2870 const unsigned char* inend; 2871 const unsigned char* instart = in; 2872 const unsigned char* processed = in; 2873 2874 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2875 (xlattable == NULL)) 2876 return(-1); 2877 if (in == NULL) { 2878 /* 2879 * initialization nothing to do 2880 */ 2881 *outlen = 0; 2882 *inlen = 0; 2883 return(0); 2884 } 2885 inend = in + (*inlen); 2886 while (in < inend) { 2887 unsigned char d = *in++; 2888 if (d < 0x80) { 2889 *out++ = d; 2890 } else if (d < 0xC0) { 2891 /* trailing byte in leading position */ 2892 *outlen = out - outstart; 2893 *inlen = processed - instart; 2894 return(-2); 2895 } else if (d < 0xE0) { 2896 unsigned char c; 2897 if (!(in < inend)) { 2898 /* trailing byte not in input buffer */ 2899 *outlen = out - outstart; 2900 *inlen = processed - instart; 2901 return(-3); 2902 } 2903 c = *in++; 2904 if ((c & 0xC0) != 0x80) { 2905 /* not a trailing byte */ 2906 *outlen = out - outstart; 2907 *inlen = processed - instart; 2908 return(-2); 2909 } 2910 c = c & 0x3F; 2911 d = d & 0x1F; 2912 d = xlattable [48 + c + xlattable [d] * 64]; 2913 if (d == 0) { 2914 /* not in character set */ 2915 *outlen = out - outstart; 2916 *inlen = processed - instart; 2917 return(-2); 2918 } 2919 *out++ = d; 2920 } else if (d < 0xF0) { 2921 unsigned char c1; 2922 unsigned char c2; 2923 if (!(in < inend - 1)) { 2924 /* trailing bytes not in input buffer */ 2925 *outlen = out - outstart; 2926 *inlen = processed - instart; 2927 return(-3); 2928 } 2929 c1 = *in++; 2930 if ((c1 & 0xC0) != 0x80) { 2931 /* not a trailing byte (c1) */ 2932 *outlen = out - outstart; 2933 *inlen = processed - instart; 2934 return(-2); 2935 } 2936 c2 = *in++; 2937 if ((c2 & 0xC0) != 0x80) { 2938 /* not a trailing byte (c2) */ 2939 *outlen = out - outstart; 2940 *inlen = processed - instart; 2941 return(-2); 2942 } 2943 c1 = c1 & 0x3F; 2944 c2 = c2 & 0x3F; 2945 d = d & 0x0F; 2946 d = xlattable [48 + c2 + xlattable [48 + c1 + 2947 xlattable [32 + d] * 64] * 64]; 2948 if (d == 0) { 2949 /* not in character set */ 2950 *outlen = out - outstart; 2951 *inlen = processed - instart; 2952 return(-2); 2953 } 2954 *out++ = d; 2955 } else { 2956 /* cannot transcode >= U+010000 */ 2957 *outlen = out - outstart; 2958 *inlen = processed - instart; 2959 return(-2); 2960 } 2961 processed = in; 2962 } 2963 *outlen = out - outstart; 2964 *inlen = processed - instart; 2965 return(*outlen); 2966 } 2967 2968 /** 2969 * ISO8859xToUTF8 2970 * @out: a pointer to an array of bytes to store the result 2971 * @outlen: the length of @out 2972 * @in: a pointer to an array of ISO Latin 1 chars 2973 * @inlen: the length of @in 2974 * 2975 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 2976 * block of chars out. 2977 * Returns 0 if success, or -1 otherwise 2978 * The value of @inlen after return is the number of octets consumed 2979 * The value of @outlen after return is the number of octets produced. 2980 */ 2981 static int 2982 ISO8859xToUTF8(unsigned char* out, int *outlen, 2983 const unsigned char* in, int *inlen, 2984 unsigned short const *unicodetable) { 2985 unsigned char* outstart = out; 2986 unsigned char* outend; 2987 const unsigned char* instart = in; 2988 const unsigned char* inend; 2989 const unsigned char* instop; 2990 unsigned int c; 2991 2992 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2993 (in == NULL) || (unicodetable == NULL)) 2994 return(-1); 2995 outend = out + *outlen; 2996 inend = in + *inlen; 2997 instop = inend; 2998 2999 while ((in < inend) && (out < outend - 2)) { 3000 if (*in >= 0x80) { 3001 c = unicodetable [*in - 0x80]; 3002 if (c == 0) { 3003 /* undefined code point */ 3004 *outlen = out - outstart; 3005 *inlen = in - instart; 3006 return (-1); 3007 } 3008 if (c < 0x800) { 3009 *out++ = ((c >> 6) & 0x1F) | 0xC0; 3010 *out++ = (c & 0x3F) | 0x80; 3011 } else { 3012 *out++ = ((c >> 12) & 0x0F) | 0xE0; 3013 *out++ = ((c >> 6) & 0x3F) | 0x80; 3014 *out++ = (c & 0x3F) | 0x80; 3015 } 3016 ++in; 3017 } 3018 if (instop - in > outend - out) instop = in + (outend - out); 3019 while ((*in < 0x80) && (in < instop)) { 3020 *out++ = *in++; 3021 } 3022 } 3023 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3024 *out++ = *in++; 3025 } 3026 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3027 *out++ = *in++; 3028 } 3029 *outlen = out - outstart; 3030 *inlen = in - instart; 3031 return (*outlen); 3032 } 3033 3034 3035 /************************************************************************ 3036 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 3037 ************************************************************************/ 3038 3039 static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 3040 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3041 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3042 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3043 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3044 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 3045 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 3046 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 3047 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 3048 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 3049 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 3050 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 3051 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 3052 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 3053 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 3054 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 3055 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 3056 }; 3057 3058 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 3059 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3066 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3067 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3068 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3069 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3070 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 3071 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 3074 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3075 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3078 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 3079 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 3080 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 3081 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 3082 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3083 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 3084 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 3085 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 3086 }; 3087 3088 static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 3089 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3090 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3091 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3092 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3093 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 3094 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 3095 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 3096 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 3097 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 3098 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3099 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 3100 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 3101 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 3102 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3103 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 3104 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 3105 }; 3106 3107 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 3108 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3114 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3115 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3116 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3117 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3118 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 3119 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 3121 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 3122 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3124 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3127 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 3135 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3136 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3137 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3138 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 3139 }; 3140 3141 static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 3142 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3143 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3144 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3145 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3146 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 3147 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 3148 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 3149 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 3150 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3151 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 3152 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3153 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 3154 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3155 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 3156 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3157 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 3158 }; 3159 3160 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 3161 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3168 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3169 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3170 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 3171 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3172 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3173 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3174 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 3175 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 3176 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 3177 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 3178 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 3179 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 3180 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3181 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 3182 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3184 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3185 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 3186 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 3187 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 3188 }; 3189 3190 static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 3191 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3192 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3193 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3194 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3195 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 3196 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 3197 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 3198 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 3199 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 3200 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 3201 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 3202 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 3203 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 3204 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 3205 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 3206 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 3207 }; 3208 3209 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 3210 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3211 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3212 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3217 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3218 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3219 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3221 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 3222 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3223 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3224 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3225 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3226 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3229 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3234 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3237 }; 3238 3239 static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 3240 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3241 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3242 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3243 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3244 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 3245 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 3246 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3247 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 3248 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 3249 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 3250 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 3251 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3252 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 3253 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 3254 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3255 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3256 }; 3257 3258 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 3259 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3266 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3267 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3268 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3270 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 3276 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3277 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 3278 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3279 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3282 }; 3283 3284 static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 3285 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3286 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3287 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3288 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3289 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 3290 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 3291 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 3292 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 3293 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 3294 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 3295 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 3296 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 3297 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 3298 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 3299 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 3300 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 3301 }; 3302 3303 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 3304 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3306 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3311 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3312 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3313 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 3314 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 3315 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3320 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3323 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3325 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3327 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3328 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3329 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3330 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3331 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3335 }; 3336 3337 static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3338 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3339 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3340 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3341 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3342 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3343 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3344 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3345 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3346 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3347 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3348 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3349 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3350 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3351 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3352 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3353 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3354 }; 3355 3356 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3357 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3358 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3359 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3364 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3365 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3366 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3367 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3368 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3373 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3375 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3376 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3381 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3385 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3386 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3388 }; 3389 3390 static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3391 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3392 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3393 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3394 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3395 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3396 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3397 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3398 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3399 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3400 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3401 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3402 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3403 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3404 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3405 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3406 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3407 }; 3408 3409 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3410 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3417 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3418 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3419 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3420 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3421 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3422 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3423 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3424 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3428 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3433 }; 3434 3435 static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3436 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3437 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3439 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3440 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3441 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3442 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3443 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3444 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3445 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3446 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3447 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3448 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3449 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3450 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3451 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3452 }; 3453 3454 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3455 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3457 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3462 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3463 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3464 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3465 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3466 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3467 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3468 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3469 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3470 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3472 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3473 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3474 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3479 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3482 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3483 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3484 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3485 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3486 }; 3487 3488 static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3489 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3490 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3491 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3492 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3493 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3494 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3495 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3496 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3497 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3498 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3499 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3500 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3501 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3502 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3503 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3504 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3505 }; 3506 3507 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3508 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3510 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3515 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3516 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3517 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3523 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3524 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3525 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3526 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3527 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3531 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3532 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3535 }; 3536 3537 static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3538 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3539 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3540 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3541 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3542 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3543 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3544 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3545 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3546 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3547 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3548 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3549 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3550 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3551 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3552 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3553 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3554 }; 3555 3556 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3557 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3559 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3564 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3565 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3566 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3567 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3568 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3576 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3577 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3578 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3579 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3580 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3581 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3582 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3583 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3584 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3585 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3586 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3587 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3588 }; 3589 3590 static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3591 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3592 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3593 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3594 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3595 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3596 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3597 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3598 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3599 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3600 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3601 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3602 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3603 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3604 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3605 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3606 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3607 }; 3608 3609 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3610 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3612 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3617 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3618 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3619 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3620 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3625 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3631 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3633 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3640 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3644 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3645 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3646 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3647 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3649 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3650 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3651 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3652 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3653 }; 3654 3655 static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3656 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3657 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3658 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3659 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3660 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3661 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3662 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3663 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3664 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3665 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3666 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3667 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3668 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3669 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3670 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3671 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3672 }; 3673 3674 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3675 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3677 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3682 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3683 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3684 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3685 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3686 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3695 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3696 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3697 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3698 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3699 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3700 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3701 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3702 }; 3703 3704 static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3705 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3706 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3707 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3708 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3709 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3710 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3711 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3712 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3713 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3714 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3715 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3716 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3717 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3718 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3719 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3720 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3721 }; 3722 3723 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3724 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3726 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3731 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3732 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3733 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3734 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3735 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3736 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3739 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3740 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3741 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3742 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3743 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3756 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3759 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3760 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3761 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3762 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3763 }; 3764 3765 3766 /* 3767 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3768 */ 3769 3770 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3771 const unsigned char* in, int *inlen) { 3772 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3773 } 3774 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3775 const unsigned char* in, int *inlen) { 3776 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3777 } 3778 3779 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3780 const unsigned char* in, int *inlen) { 3781 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3782 } 3783 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3784 const unsigned char* in, int *inlen) { 3785 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3786 } 3787 3788 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3789 const unsigned char* in, int *inlen) { 3790 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3791 } 3792 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3793 const unsigned char* in, int *inlen) { 3794 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3795 } 3796 3797 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3798 const unsigned char* in, int *inlen) { 3799 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3800 } 3801 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3802 const unsigned char* in, int *inlen) { 3803 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3804 } 3805 3806 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3807 const unsigned char* in, int *inlen) { 3808 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3809 } 3810 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3811 const unsigned char* in, int *inlen) { 3812 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3813 } 3814 3815 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3816 const unsigned char* in, int *inlen) { 3817 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3818 } 3819 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3820 const unsigned char* in, int *inlen) { 3821 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3822 } 3823 3824 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3825 const unsigned char* in, int *inlen) { 3826 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3827 } 3828 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3829 const unsigned char* in, int *inlen) { 3830 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3831 } 3832 3833 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3834 const unsigned char* in, int *inlen) { 3835 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3836 } 3837 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3838 const unsigned char* in, int *inlen) { 3839 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3840 } 3841 3842 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3843 const unsigned char* in, int *inlen) { 3844 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3845 } 3846 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 3847 const unsigned char* in, int *inlen) { 3848 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 3849 } 3850 3851 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 3852 const unsigned char* in, int *inlen) { 3853 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 3854 } 3855 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 3856 const unsigned char* in, int *inlen) { 3857 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 3858 } 3859 3860 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 3861 const unsigned char* in, int *inlen) { 3862 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 3863 } 3864 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 3865 const unsigned char* in, int *inlen) { 3866 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 3867 } 3868 3869 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 3870 const unsigned char* in, int *inlen) { 3871 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 3872 } 3873 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 3874 const unsigned char* in, int *inlen) { 3875 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 3876 } 3877 3878 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 3879 const unsigned char* in, int *inlen) { 3880 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 3881 } 3882 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 3883 const unsigned char* in, int *inlen) { 3884 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 3885 } 3886 3887 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 3888 const unsigned char* in, int *inlen) { 3889 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 3890 } 3891 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 3892 const unsigned char* in, int *inlen) { 3893 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 3894 } 3895 3896 static void 3897 xmlRegisterCharEncodingHandlersISO8859x (void) { 3898 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 3899 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 3900 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 3901 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 3902 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 3903 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 3904 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 3905 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 3906 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 3907 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 3908 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 3909 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 3910 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 3911 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 3912 } 3913 3914 #endif 3915 #endif 3916 3917 #define bottom_encoding 3918 #include "elfgcchack.h" 3919