1 /* libxml2 - Library for parsing XML documents
2 * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 *
4 * This file is not part of the GNU gettext program, but is used with
5 * GNU gettext.
6 *
7 * The original copyright notice is as follows:
8 */
9
10 /*
11 * Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved.
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this software and associated documentation files (the "Software"), to deal
15 * in the Software without restriction, including without limitation the rights
16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 * copies of the Software, and to permit persons to whom the Software is fur-
18 * nished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25 * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 * THE SOFTWARE.
30 *
31 * daniel@veillard.com
32 *
33 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
34 */
35
36 /*
37 * encoding.c : implements the encoding conversion functions needed for XML
38 *
39 * Related specs:
40 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
41 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
42 * [ISO-10646] UTF-8 and UTF-16 in Annexes
43 * [ISO-8859-1] ISO Latin-1 characters codes.
44 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
45 * Worldwide Character Encoding -- Version 1.0", Addison-
46 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
47 * described in Unicode Technical Report #4.
48 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
49 * Information Interchange, ANSI X3.4-1986.
50 */
51
52 #define IN_LIBXML
53 #include "libxml.h"
54
55 #include <string.h>
56 #include <limits.h>
57
58 #ifdef HAVE_CTYPE_H
59 #include <ctype.h>
60 #endif
61 #ifdef HAVE_STDLIB_H
62 #include <stdlib.h>
63 #endif
64 #ifdef LIBXML_ICONV_ENABLED
65 #ifdef HAVE_ERRNO_H
66 #include <errno.h>
67 #endif
68 #endif
69 #include <libxml/encoding.h>
70 #include <libxml/xmlmemory.h>
71 #ifdef LIBXML_HTML_ENABLED
72 #include <libxml/HTMLparser.h>
73 #endif
74 #include <libxml/globals.h>
75 #include <libxml/xmlerror.h>
76
77 #include "buf.h"
78 #include "enc.h"
79
80 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
81 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
82
83 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
84 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
85 struct _xmlCharEncodingAlias {
86 const char *name;
87 const char *alias;
88 };
89
90 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
91 static int xmlCharEncodingAliasesNb = 0;
92 static int xmlCharEncodingAliasesMax = 0;
93
94 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
95 #if 0
96 #define DEBUG_ENCODING /* Define this to get encoding traces */
97 #endif
98 #else
99 #ifdef LIBXML_ISO8859X_ENABLED
100 static void xmlRegisterCharEncodingHandlersISO8859x (void);
101 #endif
102 #endif
103
104 static int xmlLittleEndian = 1;
105
106 /**
107 * xmlEncodingErrMemory:
108 * @extra: extra informations
109 *
110 * Handle an out of memory condition
111 */
112 static void
xmlEncodingErrMemory(const char * extra)113 xmlEncodingErrMemory(const char *extra)
114 {
115 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
116 }
117
118 /**
119 * xmlErrEncoding:
120 * @error: the error number
121 * @msg: the error message
122 *
123 * n encoding error
124 */
125 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)126 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
127 {
128 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
129 XML_FROM_I18N, error, XML_ERR_FATAL,
130 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
131 }
132
133 #ifdef LIBXML_ICU_ENABLED
134 static uconv_t*
openIcuConverter(const char * name,int toUnicode)135 openIcuConverter(const char* name, int toUnicode)
136 {
137 UErrorCode status = U_ZERO_ERROR;
138 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
139 if (conv == NULL)
140 return NULL;
141
142 conv->pivot_source = conv->pivot_buf;
143 conv->pivot_target = conv->pivot_buf;
144
145 conv->uconv = ucnv_open(name, &status);
146 if (U_FAILURE(status))
147 goto error;
148
149 status = U_ZERO_ERROR;
150 if (toUnicode) {
151 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
152 NULL, NULL, NULL, &status);
153 }
154 else {
155 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
156 NULL, NULL, NULL, &status);
157 }
158 if (U_FAILURE(status))
159 goto error;
160
161 status = U_ZERO_ERROR;
162 conv->utf8 = ucnv_open("UTF-8", &status);
163 if (U_SUCCESS(status))
164 return conv;
165
166 error:
167 if (conv->uconv)
168 ucnv_close(conv->uconv);
169 xmlFree(conv);
170 return NULL;
171 }
172
173 static void
closeIcuConverter(uconv_t * conv)174 closeIcuConverter(uconv_t *conv)
175 {
176 if (conv != NULL) {
177 ucnv_close(conv->uconv);
178 ucnv_close(conv->utf8);
179 xmlFree(conv);
180 }
181 }
182 #endif /* LIBXML_ICU_ENABLED */
183
184 /************************************************************************
185 * *
186 * Conversions To/From UTF8 encoding *
187 * *
188 ************************************************************************/
189
190 /**
191 * asciiToUTF8:
192 * @out: a pointer to an array of bytes to store the result
193 * @outlen: the length of @out
194 * @in: a pointer to an array of ASCII chars
195 * @inlen: the length of @in
196 *
197 * Take a block of ASCII chars in and try to convert it to an UTF-8
198 * block of chars out.
199 * Returns 0 if success, or -1 otherwise
200 * The value of @inlen after return is the number of octets consumed
201 * if the return value is positive, else unpredictable.
202 * The value of @outlen after return is the number of octets consumed.
203 */
204 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)205 asciiToUTF8(unsigned char* out, int *outlen,
206 const unsigned char* in, int *inlen) {
207 unsigned char* outstart = out;
208 const unsigned char* base = in;
209 const unsigned char* processed = in;
210 unsigned char* outend = out + *outlen;
211 const unsigned char* inend;
212 unsigned int c;
213
214 inend = in + (*inlen);
215 while ((in < inend) && (out - outstart + 5 < *outlen)) {
216 c= *in++;
217
218 if (out >= outend)
219 break;
220 if (c < 0x80) {
221 *out++ = c;
222 } else {
223 *outlen = out - outstart;
224 *inlen = processed - base;
225 return(-1);
226 }
227
228 processed = (const unsigned char*) in;
229 }
230 *outlen = out - outstart;
231 *inlen = processed - base;
232 return(*outlen);
233 }
234
235 #ifdef LIBXML_OUTPUT_ENABLED
236 /**
237 * UTF8Toascii:
238 * @out: a pointer to an array of bytes to store the result
239 * @outlen: the length of @out
240 * @in: a pointer to an array of UTF-8 chars
241 * @inlen: the length of @in
242 *
243 * Take a block of UTF-8 chars in and try to convert it to an ASCII
244 * block of chars out.
245 *
246 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
247 * The value of @inlen after return is the number of octets consumed
248 * if the return value is positive, else unpredictable.
249 * The value of @outlen after return is the number of octets consumed.
250 */
251 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)252 UTF8Toascii(unsigned char* out, int *outlen,
253 const unsigned char* in, int *inlen) {
254 const unsigned char* processed = in;
255 const unsigned char* outend;
256 const unsigned char* outstart = out;
257 const unsigned char* instart = in;
258 const unsigned char* inend;
259 unsigned int c, d;
260 int trailing;
261
262 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
263 if (in == NULL) {
264 /*
265 * initialization nothing to do
266 */
267 *outlen = 0;
268 *inlen = 0;
269 return(0);
270 }
271 inend = in + (*inlen);
272 outend = out + (*outlen);
273 while (in < inend) {
274 d = *in++;
275 if (d < 0x80) { c= d; trailing= 0; }
276 else if (d < 0xC0) {
277 /* trailing byte in leading position */
278 *outlen = out - outstart;
279 *inlen = processed - instart;
280 return(-2);
281 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
282 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
283 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
284 else {
285 /* no chance for this in Ascii */
286 *outlen = out - outstart;
287 *inlen = processed - instart;
288 return(-2);
289 }
290
291 if (inend - in < trailing) {
292 break;
293 }
294
295 for ( ; trailing; trailing--) {
296 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
297 break;
298 c <<= 6;
299 c |= d & 0x3F;
300 }
301
302 /* assertion: c is a single UTF-4 value */
303 if (c < 0x80) {
304 if (out >= outend)
305 break;
306 *out++ = c;
307 } else {
308 /* no chance for this in Ascii */
309 *outlen = out - outstart;
310 *inlen = processed - instart;
311 return(-2);
312 }
313 processed = in;
314 }
315 *outlen = out - outstart;
316 *inlen = processed - instart;
317 return(*outlen);
318 }
319 #endif /* LIBXML_OUTPUT_ENABLED */
320
321 /**
322 * isolat1ToUTF8:
323 * @out: a pointer to an array of bytes to store the result
324 * @outlen: the length of @out
325 * @in: a pointer to an array of ISO Latin 1 chars
326 * @inlen: the length of @in
327 *
328 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
329 * block of chars out.
330 * Returns the number of bytes written if success, or -1 otherwise
331 * The value of @inlen after return is the number of octets consumed
332 * if the return value is positive, else unpredictable.
333 * The value of @outlen after return is the number of octets consumed.
334 */
335 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)336 isolat1ToUTF8(unsigned char* out, int *outlen,
337 const unsigned char* in, int *inlen) {
338 unsigned char* outstart = out;
339 const unsigned char* base = in;
340 unsigned char* outend;
341 const unsigned char* inend;
342 const unsigned char* instop;
343
344 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
345 return(-1);
346
347 outend = out + *outlen;
348 inend = in + (*inlen);
349 instop = inend;
350
351 while ((in < inend) && (out < outend - 1)) {
352 if (*in >= 0x80) {
353 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
354 *out++ = ((*in) & 0x3F) | 0x80;
355 ++in;
356 }
357 if ((instop - in) > (outend - out)) instop = in + (outend - out);
358 while ((in < instop) && (*in < 0x80)) {
359 *out++ = *in++;
360 }
361 }
362 if ((in < inend) && (out < outend) && (*in < 0x80)) {
363 *out++ = *in++;
364 }
365 *outlen = out - outstart;
366 *inlen = in - base;
367 return(*outlen);
368 }
369
370 /**
371 * UTF8ToUTF8:
372 * @out: a pointer to an array of bytes to store the result
373 * @outlen: the length of @out
374 * @inb: a pointer to an array of UTF-8 chars
375 * @inlenb: the length of @in in UTF-8 chars
376 *
377 * No op copy operation for UTF8 handling.
378 *
379 * Returns the number of bytes written, or -1 if lack of space.
380 * The value of *inlen after return is the number of octets consumed
381 * if the return value is positive, else unpredictable.
382 */
383 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)384 UTF8ToUTF8(unsigned char* out, int *outlen,
385 const unsigned char* inb, int *inlenb)
386 {
387 int len;
388
389 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
390 return(-1);
391 if (inb == NULL) {
392 /* inb == NULL means output is initialized. */
393 *outlen = 0;
394 *inlenb = 0;
395 return(0);
396 }
397 if (*outlen > *inlenb) {
398 len = *inlenb;
399 } else {
400 len = *outlen;
401 }
402 if (len < 0)
403 return(-1);
404
405 memcpy(out, inb, len);
406
407 *outlen = len;
408 *inlenb = len;
409 return(*outlen);
410 }
411
412
413 #ifdef LIBXML_OUTPUT_ENABLED
414 /**
415 * UTF8Toisolat1:
416 * @out: a pointer to an array of bytes to store the result
417 * @outlen: the length of @out
418 * @in: a pointer to an array of UTF-8 chars
419 * @inlen: the length of @in
420 *
421 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
422 * block of chars out.
423 *
424 * Returns the number of bytes written if success, -2 if the transcoding fails,
425 or -1 otherwise
426 * The value of @inlen after return is the number of octets consumed
427 * if the return value is positive, else unpredictable.
428 * The value of @outlen after return is the number of octets consumed.
429 */
430 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)431 UTF8Toisolat1(unsigned char* out, int *outlen,
432 const unsigned char* in, int *inlen) {
433 const unsigned char* processed = in;
434 const unsigned char* outend;
435 const unsigned char* outstart = out;
436 const unsigned char* instart = in;
437 const unsigned char* inend;
438 unsigned int c, d;
439 int trailing;
440
441 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
442 if (in == NULL) {
443 /*
444 * initialization nothing to do
445 */
446 *outlen = 0;
447 *inlen = 0;
448 return(0);
449 }
450 inend = in + (*inlen);
451 outend = out + (*outlen);
452 while (in < inend) {
453 d = *in++;
454 if (d < 0x80) { c= d; trailing= 0; }
455 else if (d < 0xC0) {
456 /* trailing byte in leading position */
457 *outlen = out - outstart;
458 *inlen = processed - instart;
459 return(-2);
460 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
461 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
462 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
463 else {
464 /* no chance for this in IsoLat1 */
465 *outlen = out - outstart;
466 *inlen = processed - instart;
467 return(-2);
468 }
469
470 if (inend - in < trailing) {
471 break;
472 }
473
474 for ( ; trailing; trailing--) {
475 if (in >= inend)
476 break;
477 if (((d= *in++) & 0xC0) != 0x80) {
478 *outlen = out - outstart;
479 *inlen = processed - instart;
480 return(-2);
481 }
482 c <<= 6;
483 c |= d & 0x3F;
484 }
485
486 /* assertion: c is a single UTF-4 value */
487 if (c <= 0xFF) {
488 if (out >= outend)
489 break;
490 *out++ = c;
491 } else {
492 /* no chance for this in IsoLat1 */
493 *outlen = out - outstart;
494 *inlen = processed - instart;
495 return(-2);
496 }
497 processed = in;
498 }
499 *outlen = out - outstart;
500 *inlen = processed - instart;
501 return(*outlen);
502 }
503 #endif /* LIBXML_OUTPUT_ENABLED */
504
505 /**
506 * UTF16LEToUTF8:
507 * @out: a pointer to an array of bytes to store the result
508 * @outlen: the length of @out
509 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
510 * @inlenb: the length of @in in UTF-16LE chars
511 *
512 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
513 * block of chars out. This function assumes the endian property
514 * is the same between the native type of this machine and the
515 * inputed one.
516 *
517 * Returns the number of bytes written, or -1 if lack of space, or -2
518 * if the transcoding fails (if *in is not a valid utf16 string)
519 * The value of *inlen after return is the number of octets consumed
520 * if the return value is positive, else unpredictable.
521 */
522 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)523 UTF16LEToUTF8(unsigned char* out, int *outlen,
524 const unsigned char* inb, int *inlenb)
525 {
526 unsigned char* outstart = out;
527 const unsigned char* processed = inb;
528 unsigned char* outend = out + *outlen;
529 unsigned short* in = (unsigned short*) inb;
530 unsigned short* inend;
531 unsigned int c, d, inlen;
532 unsigned char *tmp;
533 int bits;
534
535 if ((*inlenb % 2) == 1)
536 (*inlenb)--;
537 inlen = *inlenb / 2;
538 inend = in + inlen;
539 while ((in < inend) && (out - outstart + 5 < *outlen)) {
540 if (xmlLittleEndian) {
541 c= *in++;
542 } else {
543 tmp = (unsigned char *) in;
544 c = *tmp++;
545 c = c | (((unsigned int)*tmp) << 8);
546 in++;
547 }
548 if ((c & 0xFC00) == 0xD800) { /* surrogates */
549 if (in >= inend) { /* (in > inend) shouldn't happens */
550 break;
551 }
552 if (xmlLittleEndian) {
553 d = *in++;
554 } else {
555 tmp = (unsigned char *) in;
556 d = *tmp++;
557 d = d | (((unsigned int)*tmp) << 8);
558 in++;
559 }
560 if ((d & 0xFC00) == 0xDC00) {
561 c &= 0x03FF;
562 c <<= 10;
563 c |= d & 0x03FF;
564 c += 0x10000;
565 }
566 else {
567 *outlen = out - outstart;
568 *inlenb = processed - inb;
569 return(-2);
570 }
571 }
572
573 /* assertion: c is a single UTF-4 value */
574 if (out >= outend)
575 break;
576 if (c < 0x80) { *out++= c; bits= -6; }
577 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
578 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
579 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
580
581 for ( ; bits >= 0; bits-= 6) {
582 if (out >= outend)
583 break;
584 *out++= ((c >> bits) & 0x3F) | 0x80;
585 }
586 processed = (const unsigned char*) in;
587 }
588 *outlen = out - outstart;
589 *inlenb = processed - inb;
590 return(*outlen);
591 }
592
593 #ifdef LIBXML_OUTPUT_ENABLED
594 /**
595 * UTF8ToUTF16LE:
596 * @outb: a pointer to an array of bytes to store the result
597 * @outlen: the length of @outb
598 * @in: a pointer to an array of UTF-8 chars
599 * @inlen: the length of @in
600 *
601 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
602 * block of chars out.
603 *
604 * Returns the number of bytes written, or -1 if lack of space, or -2
605 * if the transcoding failed.
606 */
607 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)608 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
609 const unsigned char* in, int *inlen)
610 {
611 unsigned short* out = (unsigned short*) outb;
612 const unsigned char* processed = in;
613 const unsigned char *const instart = in;
614 unsigned short* outstart= out;
615 unsigned short* outend;
616 const unsigned char* inend;
617 unsigned int c, d;
618 int trailing;
619 unsigned char *tmp;
620 unsigned short tmp1, tmp2;
621
622 /* UTF16LE encoding has no BOM */
623 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
624 if (in == NULL) {
625 *outlen = 0;
626 *inlen = 0;
627 return(0);
628 }
629 inend= in + *inlen;
630 outend = out + (*outlen / 2);
631 while (in < inend) {
632 d= *in++;
633 if (d < 0x80) { c= d; trailing= 0; }
634 else if (d < 0xC0) {
635 /* trailing byte in leading position */
636 *outlen = (out - outstart) * 2;
637 *inlen = processed - instart;
638 return(-2);
639 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
640 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
641 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
642 else {
643 /* no chance for this in UTF-16 */
644 *outlen = (out - outstart) * 2;
645 *inlen = processed - instart;
646 return(-2);
647 }
648
649 if (inend - in < trailing) {
650 break;
651 }
652
653 for ( ; trailing; trailing--) {
654 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
655 break;
656 c <<= 6;
657 c |= d & 0x3F;
658 }
659
660 /* assertion: c is a single UTF-4 value */
661 if (c < 0x10000) {
662 if (out >= outend)
663 break;
664 if (xmlLittleEndian) {
665 *out++ = c;
666 } else {
667 tmp = (unsigned char *) out;
668 *tmp = c ;
669 *(tmp + 1) = c >> 8 ;
670 out++;
671 }
672 }
673 else if (c < 0x110000) {
674 if (out+1 >= outend)
675 break;
676 c -= 0x10000;
677 if (xmlLittleEndian) {
678 *out++ = 0xD800 | (c >> 10);
679 *out++ = 0xDC00 | (c & 0x03FF);
680 } else {
681 tmp1 = 0xD800 | (c >> 10);
682 tmp = (unsigned char *) out;
683 *tmp = (unsigned char) tmp1;
684 *(tmp + 1) = tmp1 >> 8;
685 out++;
686
687 tmp2 = 0xDC00 | (c & 0x03FF);
688 tmp = (unsigned char *) out;
689 *tmp = (unsigned char) tmp2;
690 *(tmp + 1) = tmp2 >> 8;
691 out++;
692 }
693 }
694 else
695 break;
696 processed = in;
697 }
698 *outlen = (out - outstart) * 2;
699 *inlen = processed - instart;
700 return(*outlen);
701 }
702
703 /**
704 * UTF8ToUTF16:
705 * @outb: a pointer to an array of bytes to store the result
706 * @outlen: the length of @outb
707 * @in: a pointer to an array of UTF-8 chars
708 * @inlen: the length of @in
709 *
710 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
711 * block of chars out.
712 *
713 * Returns the number of bytes written, or -1 if lack of space, or -2
714 * if the transcoding failed.
715 */
716 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)717 UTF8ToUTF16(unsigned char* outb, int *outlen,
718 const unsigned char* in, int *inlen)
719 {
720 if (in == NULL) {
721 /*
722 * initialization, add the Byte Order Mark for UTF-16LE
723 */
724 if (*outlen >= 2) {
725 outb[0] = 0xFF;
726 outb[1] = 0xFE;
727 *outlen = 2;
728 *inlen = 0;
729 #ifdef DEBUG_ENCODING
730 xmlGenericError(xmlGenericErrorContext,
731 "Added FFFE Byte Order Mark\n");
732 #endif
733 return(2);
734 }
735 *outlen = 0;
736 *inlen = 0;
737 return(0);
738 }
739 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
740 }
741 #endif /* LIBXML_OUTPUT_ENABLED */
742
743 /**
744 * UTF16BEToUTF8:
745 * @out: a pointer to an array of bytes to store the result
746 * @outlen: the length of @out
747 * @inb: a pointer to an array of UTF-16 passed as a byte array
748 * @inlenb: the length of @in in UTF-16 chars
749 *
750 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
751 * block of chars out. This function assumes the endian property
752 * is the same between the native type of this machine and the
753 * inputed one.
754 *
755 * Returns the number of bytes written, or -1 if lack of space, or -2
756 * if the transcoding fails (if *in is not a valid utf16 string)
757 * The value of *inlen after return is the number of octets consumed
758 * if the return value is positive, else unpredictable.
759 */
760 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)761 UTF16BEToUTF8(unsigned char* out, int *outlen,
762 const unsigned char* inb, int *inlenb)
763 {
764 unsigned char* outstart = out;
765 const unsigned char* processed = inb;
766 unsigned char* outend = out + *outlen;
767 unsigned short* in = (unsigned short*) inb;
768 unsigned short* inend;
769 unsigned int c, d, inlen;
770 unsigned char *tmp;
771 int bits;
772
773 if ((*inlenb % 2) == 1)
774 (*inlenb)--;
775 inlen = *inlenb / 2;
776 inend= in + inlen;
777 while (in < inend) {
778 if (xmlLittleEndian) {
779 tmp = (unsigned char *) in;
780 c = *tmp++;
781 c = c << 8;
782 c = c | (unsigned int) *tmp;
783 in++;
784 } else {
785 c= *in++;
786 }
787 if ((c & 0xFC00) == 0xD800) { /* surrogates */
788 if (in >= inend) { /* (in > inend) shouldn't happens */
789 *outlen = out - outstart;
790 *inlenb = processed - inb;
791 return(-2);
792 }
793 if (xmlLittleEndian) {
794 tmp = (unsigned char *) in;
795 d = *tmp++;
796 d = d << 8;
797 d = d | (unsigned int) *tmp;
798 in++;
799 } else {
800 d= *in++;
801 }
802 if ((d & 0xFC00) == 0xDC00) {
803 c &= 0x03FF;
804 c <<= 10;
805 c |= d & 0x03FF;
806 c += 0x10000;
807 }
808 else {
809 *outlen = out - outstart;
810 *inlenb = processed - inb;
811 return(-2);
812 }
813 }
814
815 /* assertion: c is a single UTF-4 value */
816 if (out >= outend)
817 break;
818 if (c < 0x80) { *out++= c; bits= -6; }
819 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
820 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
821 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
822
823 for ( ; bits >= 0; bits-= 6) {
824 if (out >= outend)
825 break;
826 *out++= ((c >> bits) & 0x3F) | 0x80;
827 }
828 processed = (const unsigned char*) in;
829 }
830 *outlen = out - outstart;
831 *inlenb = processed - inb;
832 return(*outlen);
833 }
834
835 #ifdef LIBXML_OUTPUT_ENABLED
836 /**
837 * UTF8ToUTF16BE:
838 * @outb: a pointer to an array of bytes to store the result
839 * @outlen: the length of @outb
840 * @in: a pointer to an array of UTF-8 chars
841 * @inlen: the length of @in
842 *
843 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
844 * block of chars out.
845 *
846 * Returns the number of byte written, or -1 by lack of space, or -2
847 * if the transcoding failed.
848 */
849 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)850 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
851 const unsigned char* in, int *inlen)
852 {
853 unsigned short* out = (unsigned short*) outb;
854 const unsigned char* processed = in;
855 const unsigned char *const instart = in;
856 unsigned short* outstart= out;
857 unsigned short* outend;
858 const unsigned char* inend;
859 unsigned int c, d;
860 int trailing;
861 unsigned char *tmp;
862 unsigned short tmp1, tmp2;
863
864 /* UTF-16BE has no BOM */
865 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
866 if (in == NULL) {
867 *outlen = 0;
868 *inlen = 0;
869 return(0);
870 }
871 inend= in + *inlen;
872 outend = out + (*outlen / 2);
873 while (in < inend) {
874 d= *in++;
875 if (d < 0x80) { c= d; trailing= 0; }
876 else if (d < 0xC0) {
877 /* trailing byte in leading position */
878 *outlen = out - outstart;
879 *inlen = processed - instart;
880 return(-2);
881 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
882 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
883 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
884 else {
885 /* no chance for this in UTF-16 */
886 *outlen = out - outstart;
887 *inlen = processed - instart;
888 return(-2);
889 }
890
891 if (inend - in < trailing) {
892 break;
893 }
894
895 for ( ; trailing; trailing--) {
896 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
897 c <<= 6;
898 c |= d & 0x3F;
899 }
900
901 /* assertion: c is a single UTF-4 value */
902 if (c < 0x10000) {
903 if (out >= outend) break;
904 if (xmlLittleEndian) {
905 tmp = (unsigned char *) out;
906 *tmp = c >> 8;
907 *(tmp + 1) = c;
908 out++;
909 } else {
910 *out++ = c;
911 }
912 }
913 else if (c < 0x110000) {
914 if (out+1 >= outend) break;
915 c -= 0x10000;
916 if (xmlLittleEndian) {
917 tmp1 = 0xD800 | (c >> 10);
918 tmp = (unsigned char *) out;
919 *tmp = tmp1 >> 8;
920 *(tmp + 1) = (unsigned char) tmp1;
921 out++;
922
923 tmp2 = 0xDC00 | (c & 0x03FF);
924 tmp = (unsigned char *) out;
925 *tmp = tmp2 >> 8;
926 *(tmp + 1) = (unsigned char) tmp2;
927 out++;
928 } else {
929 *out++ = 0xD800 | (c >> 10);
930 *out++ = 0xDC00 | (c & 0x03FF);
931 }
932 }
933 else
934 break;
935 processed = in;
936 }
937 *outlen = (out - outstart) * 2;
938 *inlen = processed - instart;
939 return(*outlen);
940 }
941 #endif /* LIBXML_OUTPUT_ENABLED */
942
943 /************************************************************************
944 * *
945 * Generic encoding handling routines *
946 * *
947 ************************************************************************/
948
949 /**
950 * xmlDetectCharEncoding:
951 * @in: a pointer to the first bytes of the XML entity, must be at least
952 * 2 bytes long (at least 4 if encoding is UTF4 variant).
953 * @len: pointer to the length of the buffer
954 *
955 * Guess the encoding of the entity using the first bytes of the entity content
956 * according to the non-normative appendix F of the XML-1.0 recommendation.
957 *
958 * Returns one of the XML_CHAR_ENCODING_... values.
959 */
960 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)961 xmlDetectCharEncoding(const unsigned char* in, int len)
962 {
963 if (in == NULL)
964 return(XML_CHAR_ENCODING_NONE);
965 if (len >= 4) {
966 if ((in[0] == 0x00) && (in[1] == 0x00) &&
967 (in[2] == 0x00) && (in[3] == 0x3C))
968 return(XML_CHAR_ENCODING_UCS4BE);
969 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
970 (in[2] == 0x00) && (in[3] == 0x00))
971 return(XML_CHAR_ENCODING_UCS4LE);
972 if ((in[0] == 0x00) && (in[1] == 0x00) &&
973 (in[2] == 0x3C) && (in[3] == 0x00))
974 return(XML_CHAR_ENCODING_UCS4_2143);
975 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
976 (in[2] == 0x00) && (in[3] == 0x00))
977 return(XML_CHAR_ENCODING_UCS4_3412);
978 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
979 (in[2] == 0xA7) && (in[3] == 0x94))
980 return(XML_CHAR_ENCODING_EBCDIC);
981 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
982 (in[2] == 0x78) && (in[3] == 0x6D))
983 return(XML_CHAR_ENCODING_UTF8);
984 /*
985 * Although not part of the recommendation, we also
986 * attempt an "auto-recognition" of UTF-16LE and
987 * UTF-16BE encodings.
988 */
989 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
990 (in[2] == 0x3F) && (in[3] == 0x00))
991 return(XML_CHAR_ENCODING_UTF16LE);
992 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
993 (in[2] == 0x00) && (in[3] == 0x3F))
994 return(XML_CHAR_ENCODING_UTF16BE);
995 }
996 if (len >= 3) {
997 /*
998 * Errata on XML-1.0 June 20 2001
999 * We now allow an UTF8 encoded BOM
1000 */
1001 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1002 (in[2] == 0xBF))
1003 return(XML_CHAR_ENCODING_UTF8);
1004 }
1005 /* For UTF-16 we can recognize by the BOM */
1006 if (len >= 2) {
1007 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1008 return(XML_CHAR_ENCODING_UTF16BE);
1009 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1010 return(XML_CHAR_ENCODING_UTF16LE);
1011 }
1012 return(XML_CHAR_ENCODING_NONE);
1013 }
1014
1015 /**
1016 * xmlCleanupEncodingAliases:
1017 *
1018 * Unregisters all aliases
1019 */
1020 void
xmlCleanupEncodingAliases(void)1021 xmlCleanupEncodingAliases(void) {
1022 int i;
1023
1024 if (xmlCharEncodingAliases == NULL)
1025 return;
1026
1027 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1028 if (xmlCharEncodingAliases[i].name != NULL)
1029 xmlFree((char *) xmlCharEncodingAliases[i].name);
1030 if (xmlCharEncodingAliases[i].alias != NULL)
1031 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1032 }
1033 xmlCharEncodingAliasesNb = 0;
1034 xmlCharEncodingAliasesMax = 0;
1035 xmlFree(xmlCharEncodingAliases);
1036 xmlCharEncodingAliases = NULL;
1037 }
1038
1039 /**
1040 * xmlGetEncodingAlias:
1041 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1042 *
1043 * Lookup an encoding name for the given alias.
1044 *
1045 * Returns NULL if not found, otherwise the original name
1046 */
1047 const char *
xmlGetEncodingAlias(const char * alias)1048 xmlGetEncodingAlias(const char *alias) {
1049 int i;
1050 char upper[100];
1051
1052 if (alias == NULL)
1053 return(NULL);
1054
1055 if (xmlCharEncodingAliases == NULL)
1056 return(NULL);
1057
1058 for (i = 0;i < 99;i++) {
1059 upper[i] = toupper(alias[i]);
1060 if (upper[i] == 0) break;
1061 }
1062 upper[i] = 0;
1063
1064 /*
1065 * Walk down the list looking for a definition of the alias
1066 */
1067 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1068 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1069 return(xmlCharEncodingAliases[i].name);
1070 }
1071 }
1072 return(NULL);
1073 }
1074
1075 /**
1076 * xmlAddEncodingAlias:
1077 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1078 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1079 *
1080 * Registers an alias @alias for an encoding named @name. Existing alias
1081 * will be overwritten.
1082 *
1083 * Returns 0 in case of success, -1 in case of error
1084 */
1085 int
xmlAddEncodingAlias(const char * name,const char * alias)1086 xmlAddEncodingAlias(const char *name, const char *alias) {
1087 int i;
1088 char upper[100];
1089
1090 if ((name == NULL) || (alias == NULL))
1091 return(-1);
1092
1093 for (i = 0;i < 99;i++) {
1094 upper[i] = toupper(alias[i]);
1095 if (upper[i] == 0) break;
1096 }
1097 upper[i] = 0;
1098
1099 if (xmlCharEncodingAliases == NULL) {
1100 xmlCharEncodingAliasesNb = 0;
1101 xmlCharEncodingAliasesMax = 20;
1102 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1103 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1104 if (xmlCharEncodingAliases == NULL)
1105 return(-1);
1106 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1107 xmlCharEncodingAliasesMax *= 2;
1108 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1109 xmlRealloc(xmlCharEncodingAliases,
1110 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1111 }
1112 /*
1113 * Walk down the list looking for a definition of the alias
1114 */
1115 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1116 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1117 /*
1118 * Replace the definition.
1119 */
1120 xmlFree((char *) xmlCharEncodingAliases[i].name);
1121 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1122 return(0);
1123 }
1124 }
1125 /*
1126 * Add the definition
1127 */
1128 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1129 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1130 xmlCharEncodingAliasesNb++;
1131 return(0);
1132 }
1133
1134 /**
1135 * xmlDelEncodingAlias:
1136 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1137 *
1138 * Unregisters an encoding alias @alias
1139 *
1140 * Returns 0 in case of success, -1 in case of error
1141 */
1142 int
xmlDelEncodingAlias(const char * alias)1143 xmlDelEncodingAlias(const char *alias) {
1144 int i;
1145
1146 if (alias == NULL)
1147 return(-1);
1148
1149 if (xmlCharEncodingAliases == NULL)
1150 return(-1);
1151 /*
1152 * Walk down the list looking for a definition of the alias
1153 */
1154 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1155 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1156 xmlFree((char *) xmlCharEncodingAliases[i].name);
1157 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1158 xmlCharEncodingAliasesNb--;
1159 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1160 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1161 return(0);
1162 }
1163 }
1164 return(-1);
1165 }
1166
1167 /**
1168 * xmlParseCharEncoding:
1169 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1170 *
1171 * Compare the string to the encoding schemes already known. Note
1172 * that the comparison is case insensitive accordingly to the section
1173 * [XML] 4.3.3 Character Encoding in Entities.
1174 *
1175 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1176 * if not recognized.
1177 */
1178 xmlCharEncoding
xmlParseCharEncoding(const char * name)1179 xmlParseCharEncoding(const char* name)
1180 {
1181 const char *alias;
1182 char upper[500];
1183 int i;
1184
1185 if (name == NULL)
1186 return(XML_CHAR_ENCODING_NONE);
1187
1188 /*
1189 * Do the alias resolution
1190 */
1191 alias = xmlGetEncodingAlias(name);
1192 if (alias != NULL)
1193 name = alias;
1194
1195 for (i = 0;i < 499;i++) {
1196 upper[i] = toupper(name[i]);
1197 if (upper[i] == 0) break;
1198 }
1199 upper[i] = 0;
1200
1201 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1202 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1203 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1204
1205 /*
1206 * NOTE: if we were able to parse this, the endianness of UTF16 is
1207 * already found and in use
1208 */
1209 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1210 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1211
1212 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1213 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1214 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1215
1216 /*
1217 * NOTE: if we were able to parse this, the endianness of UCS4 is
1218 * already found and in use
1219 */
1220 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1221 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1222 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1223
1224
1225 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1226 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1227 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1228
1229 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1230 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1231 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1232
1233 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1234 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1235 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1236 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1237 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1238 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1239 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1240
1241 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1242 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1243 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1244
1245 #ifdef DEBUG_ENCODING
1246 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1247 #endif
1248 return(XML_CHAR_ENCODING_ERROR);
1249 }
1250
1251 /**
1252 * xmlGetCharEncodingName:
1253 * @enc: the encoding
1254 *
1255 * The "canonical" name for XML encoding.
1256 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1257 * Section 4.3.3 Character Encoding in Entities
1258 *
1259 * Returns the canonical name for the given encoding
1260 */
1261
1262 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1263 xmlGetCharEncodingName(xmlCharEncoding enc) {
1264 switch (enc) {
1265 case XML_CHAR_ENCODING_ERROR:
1266 return(NULL);
1267 case XML_CHAR_ENCODING_NONE:
1268 return(NULL);
1269 case XML_CHAR_ENCODING_UTF8:
1270 return("UTF-8");
1271 case XML_CHAR_ENCODING_UTF16LE:
1272 return("UTF-16");
1273 case XML_CHAR_ENCODING_UTF16BE:
1274 return("UTF-16");
1275 case XML_CHAR_ENCODING_EBCDIC:
1276 return("EBCDIC");
1277 case XML_CHAR_ENCODING_UCS4LE:
1278 return("ISO-10646-UCS-4");
1279 case XML_CHAR_ENCODING_UCS4BE:
1280 return("ISO-10646-UCS-4");
1281 case XML_CHAR_ENCODING_UCS4_2143:
1282 return("ISO-10646-UCS-4");
1283 case XML_CHAR_ENCODING_UCS4_3412:
1284 return("ISO-10646-UCS-4");
1285 case XML_CHAR_ENCODING_UCS2:
1286 return("ISO-10646-UCS-2");
1287 case XML_CHAR_ENCODING_8859_1:
1288 return("ISO-8859-1");
1289 case XML_CHAR_ENCODING_8859_2:
1290 return("ISO-8859-2");
1291 case XML_CHAR_ENCODING_8859_3:
1292 return("ISO-8859-3");
1293 case XML_CHAR_ENCODING_8859_4:
1294 return("ISO-8859-4");
1295 case XML_CHAR_ENCODING_8859_5:
1296 return("ISO-8859-5");
1297 case XML_CHAR_ENCODING_8859_6:
1298 return("ISO-8859-6");
1299 case XML_CHAR_ENCODING_8859_7:
1300 return("ISO-8859-7");
1301 case XML_CHAR_ENCODING_8859_8:
1302 return("ISO-8859-8");
1303 case XML_CHAR_ENCODING_8859_9:
1304 return("ISO-8859-9");
1305 case XML_CHAR_ENCODING_2022_JP:
1306 return("ISO-2022-JP");
1307 case XML_CHAR_ENCODING_SHIFT_JIS:
1308 return("Shift-JIS");
1309 case XML_CHAR_ENCODING_EUC_JP:
1310 return("EUC-JP");
1311 case XML_CHAR_ENCODING_ASCII:
1312 return(NULL);
1313 }
1314 return(NULL);
1315 }
1316
1317 /************************************************************************
1318 * *
1319 * Char encoding handlers *
1320 * *
1321 ************************************************************************/
1322
1323
1324 /* the size should be growable, but it's not a big deal ... */
1325 #define MAX_ENCODING_HANDLERS 50
1326 static xmlCharEncodingHandlerPtr *handlers = NULL;
1327 static int nbCharEncodingHandler = 0;
1328
1329 /*
1330 * The default is UTF-8 for XML, that's also the default used for the
1331 * parser internals, so the default encoding handler is NULL
1332 */
1333
1334 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1335
1336 /**
1337 * xmlNewCharEncodingHandler:
1338 * @name: the encoding name, in UTF-8 format (ASCII actually)
1339 * @input: the xmlCharEncodingInputFunc to read that encoding
1340 * @output: the xmlCharEncodingOutputFunc to write that encoding
1341 *
1342 * Create and registers an xmlCharEncodingHandler.
1343 *
1344 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1345 */
1346 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1347 xmlNewCharEncodingHandler(const char *name,
1348 xmlCharEncodingInputFunc input,
1349 xmlCharEncodingOutputFunc output) {
1350 xmlCharEncodingHandlerPtr handler;
1351 const char *alias;
1352 char upper[500];
1353 int i;
1354 char *up = NULL;
1355
1356 /*
1357 * Do the alias resolution
1358 */
1359 alias = xmlGetEncodingAlias(name);
1360 if (alias != NULL)
1361 name = alias;
1362
1363 /*
1364 * Keep only the uppercase version of the encoding.
1365 */
1366 if (name == NULL) {
1367 xmlEncodingErr(XML_I18N_NO_NAME,
1368 "xmlNewCharEncodingHandler : no name !\n", NULL);
1369 return(NULL);
1370 }
1371 for (i = 0;i < 499;i++) {
1372 upper[i] = toupper(name[i]);
1373 if (upper[i] == 0) break;
1374 }
1375 upper[i] = 0;
1376 up = xmlMemStrdup(upper);
1377 if (up == NULL) {
1378 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1379 return(NULL);
1380 }
1381
1382 /*
1383 * allocate and fill-up an handler block.
1384 */
1385 handler = (xmlCharEncodingHandlerPtr)
1386 xmlMalloc(sizeof(xmlCharEncodingHandler));
1387 if (handler == NULL) {
1388 xmlFree(up);
1389 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1390 return(NULL);
1391 }
1392 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1393 handler->input = input;
1394 handler->output = output;
1395 handler->name = up;
1396
1397 #ifdef LIBXML_ICONV_ENABLED
1398 handler->iconv_in = NULL;
1399 handler->iconv_out = NULL;
1400 #endif
1401 #ifdef LIBXML_ICU_ENABLED
1402 handler->uconv_in = NULL;
1403 handler->uconv_out = NULL;
1404 #endif
1405
1406 /*
1407 * registers and returns the handler.
1408 */
1409 xmlRegisterCharEncodingHandler(handler);
1410 #ifdef DEBUG_ENCODING
1411 xmlGenericError(xmlGenericErrorContext,
1412 "Registered encoding handler for %s\n", name);
1413 #endif
1414 return(handler);
1415 }
1416
1417 /**
1418 * xmlInitCharEncodingHandlers:
1419 *
1420 * Initialize the char encoding support, it registers the default
1421 * encoding supported.
1422 * NOTE: while public, this function usually doesn't need to be called
1423 * in normal processing.
1424 */
1425 void
xmlInitCharEncodingHandlers(void)1426 xmlInitCharEncodingHandlers(void) {
1427 unsigned short int tst = 0x1234;
1428 unsigned char *ptr = (unsigned char *) &tst;
1429
1430 if (handlers != NULL) return;
1431
1432 handlers = (xmlCharEncodingHandlerPtr *)
1433 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1434
1435 if (*ptr == 0x12) xmlLittleEndian = 0;
1436 else if (*ptr == 0x34) xmlLittleEndian = 1;
1437 else {
1438 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1439 "Odd problem at endianness detection\n", NULL);
1440 }
1441
1442 if (handlers == NULL) {
1443 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1444 return;
1445 }
1446 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1447 #ifdef LIBXML_OUTPUT_ENABLED
1448 xmlUTF16LEHandler =
1449 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1450 xmlUTF16BEHandler =
1451 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1452 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1453 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1454 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1455 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1456 #ifdef LIBXML_HTML_ENABLED
1457 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1458 #endif
1459 #else
1460 xmlUTF16LEHandler =
1461 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1462 xmlUTF16BEHandler =
1463 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1464 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1465 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1466 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1467 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1468 #endif /* LIBXML_OUTPUT_ENABLED */
1469 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1470 #ifdef LIBXML_ISO8859X_ENABLED
1471 xmlRegisterCharEncodingHandlersISO8859x ();
1472 #endif
1473 #endif
1474
1475 }
1476
1477 /**
1478 * xmlCleanupCharEncodingHandlers:
1479 *
1480 * Cleanup the memory allocated for the char encoding support, it
1481 * unregisters all the encoding handlers and the aliases.
1482 */
1483 void
xmlCleanupCharEncodingHandlers(void)1484 xmlCleanupCharEncodingHandlers(void) {
1485 xmlCleanupEncodingAliases();
1486
1487 if (handlers == NULL) return;
1488
1489 for (;nbCharEncodingHandler > 0;) {
1490 nbCharEncodingHandler--;
1491 if (handlers[nbCharEncodingHandler] != NULL) {
1492 if (handlers[nbCharEncodingHandler]->name != NULL)
1493 xmlFree(handlers[nbCharEncodingHandler]->name);
1494 xmlFree(handlers[nbCharEncodingHandler]);
1495 }
1496 }
1497 xmlFree(handlers);
1498 handlers = NULL;
1499 nbCharEncodingHandler = 0;
1500 xmlDefaultCharEncodingHandler = NULL;
1501 }
1502
1503 /**
1504 * xmlRegisterCharEncodingHandler:
1505 * @handler: the xmlCharEncodingHandlerPtr handler block
1506 *
1507 * Register the char encoding handler, surprising, isn't it ?
1508 */
1509 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1510 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1511 if (handlers == NULL) xmlInitCharEncodingHandlers();
1512 if ((handler == NULL) || (handlers == NULL)) {
1513 xmlEncodingErr(XML_I18N_NO_HANDLER,
1514 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1515 return;
1516 }
1517
1518 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1519 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1520 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1521 "MAX_ENCODING_HANDLERS");
1522 return;
1523 }
1524 handlers[nbCharEncodingHandler++] = handler;
1525 }
1526
1527 /**
1528 * xmlGetCharEncodingHandler:
1529 * @enc: an xmlCharEncoding value.
1530 *
1531 * Search in the registered set the handler able to read/write that encoding.
1532 *
1533 * Returns the handler or NULL if not found
1534 */
1535 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1536 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1537 xmlCharEncodingHandlerPtr handler;
1538
1539 if (handlers == NULL) xmlInitCharEncodingHandlers();
1540 switch (enc) {
1541 case XML_CHAR_ENCODING_ERROR:
1542 return(NULL);
1543 case XML_CHAR_ENCODING_NONE:
1544 return(NULL);
1545 case XML_CHAR_ENCODING_UTF8:
1546 return(NULL);
1547 case XML_CHAR_ENCODING_UTF16LE:
1548 return(xmlUTF16LEHandler);
1549 case XML_CHAR_ENCODING_UTF16BE:
1550 return(xmlUTF16BEHandler);
1551 case XML_CHAR_ENCODING_EBCDIC:
1552 handler = xmlFindCharEncodingHandler("EBCDIC");
1553 if (handler != NULL) return(handler);
1554 handler = xmlFindCharEncodingHandler("ebcdic");
1555 if (handler != NULL) return(handler);
1556 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1557 if (handler != NULL) return(handler);
1558 handler = xmlFindCharEncodingHandler("IBM-037");
1559 if (handler != NULL) return(handler);
1560 break;
1561 case XML_CHAR_ENCODING_UCS4BE:
1562 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1563 if (handler != NULL) return(handler);
1564 handler = xmlFindCharEncodingHandler("UCS-4");
1565 if (handler != NULL) return(handler);
1566 handler = xmlFindCharEncodingHandler("UCS4");
1567 if (handler != NULL) return(handler);
1568 break;
1569 case XML_CHAR_ENCODING_UCS4LE:
1570 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571 if (handler != NULL) return(handler);
1572 handler = xmlFindCharEncodingHandler("UCS-4");
1573 if (handler != NULL) return(handler);
1574 handler = xmlFindCharEncodingHandler("UCS4");
1575 if (handler != NULL) return(handler);
1576 break;
1577 case XML_CHAR_ENCODING_UCS4_2143:
1578 break;
1579 case XML_CHAR_ENCODING_UCS4_3412:
1580 break;
1581 case XML_CHAR_ENCODING_UCS2:
1582 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1583 if (handler != NULL) return(handler);
1584 handler = xmlFindCharEncodingHandler("UCS-2");
1585 if (handler != NULL) return(handler);
1586 handler = xmlFindCharEncodingHandler("UCS2");
1587 if (handler != NULL) return(handler);
1588 break;
1589
1590 /*
1591 * We used to keep ISO Latin encodings native in the
1592 * generated data. This led to so many problems that
1593 * this has been removed. One can still change this
1594 * back by registering no-ops encoders for those
1595 */
1596 case XML_CHAR_ENCODING_8859_1:
1597 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1598 if (handler != NULL) return(handler);
1599 break;
1600 case XML_CHAR_ENCODING_8859_2:
1601 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1602 if (handler != NULL) return(handler);
1603 break;
1604 case XML_CHAR_ENCODING_8859_3:
1605 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1606 if (handler != NULL) return(handler);
1607 break;
1608 case XML_CHAR_ENCODING_8859_4:
1609 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1610 if (handler != NULL) return(handler);
1611 break;
1612 case XML_CHAR_ENCODING_8859_5:
1613 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1614 if (handler != NULL) return(handler);
1615 break;
1616 case XML_CHAR_ENCODING_8859_6:
1617 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1618 if (handler != NULL) return(handler);
1619 break;
1620 case XML_CHAR_ENCODING_8859_7:
1621 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1622 if (handler != NULL) return(handler);
1623 break;
1624 case XML_CHAR_ENCODING_8859_8:
1625 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1626 if (handler != NULL) return(handler);
1627 break;
1628 case XML_CHAR_ENCODING_8859_9:
1629 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1630 if (handler != NULL) return(handler);
1631 break;
1632
1633
1634 case XML_CHAR_ENCODING_2022_JP:
1635 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1636 if (handler != NULL) return(handler);
1637 break;
1638 case XML_CHAR_ENCODING_SHIFT_JIS:
1639 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1640 if (handler != NULL) return(handler);
1641 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1642 if (handler != NULL) return(handler);
1643 handler = xmlFindCharEncodingHandler("Shift_JIS");
1644 if (handler != NULL) return(handler);
1645 break;
1646 case XML_CHAR_ENCODING_EUC_JP:
1647 handler = xmlFindCharEncodingHandler("EUC-JP");
1648 if (handler != NULL) return(handler);
1649 break;
1650 default:
1651 break;
1652 }
1653
1654 #ifdef DEBUG_ENCODING
1655 xmlGenericError(xmlGenericErrorContext,
1656 "No handler found for encoding %d\n", enc);
1657 #endif
1658 return(NULL);
1659 }
1660
1661 /**
1662 * xmlFindCharEncodingHandler:
1663 * @name: a string describing the char encoding.
1664 *
1665 * Search in the registered set the handler able to read/write that encoding.
1666 *
1667 * Returns the handler or NULL if not found
1668 */
1669 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1670 xmlFindCharEncodingHandler(const char *name) {
1671 const char *nalias;
1672 const char *norig;
1673 xmlCharEncoding alias;
1674 #ifdef LIBXML_ICONV_ENABLED
1675 xmlCharEncodingHandlerPtr enc;
1676 iconv_t icv_in, icv_out;
1677 #endif /* LIBXML_ICONV_ENABLED */
1678 #ifdef LIBXML_ICU_ENABLED
1679 xmlCharEncodingHandlerPtr encu;
1680 uconv_t *ucv_in, *ucv_out;
1681 #endif /* LIBXML_ICU_ENABLED */
1682 char upper[100];
1683 int i;
1684
1685 if (handlers == NULL) xmlInitCharEncodingHandlers();
1686 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1687 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1688
1689 /*
1690 * Do the alias resolution
1691 */
1692 norig = name;
1693 nalias = xmlGetEncodingAlias(name);
1694 if (nalias != NULL)
1695 name = nalias;
1696
1697 /*
1698 * Check first for directly registered encoding names
1699 */
1700 for (i = 0;i < 99;i++) {
1701 upper[i] = toupper(name[i]);
1702 if (upper[i] == 0) break;
1703 }
1704 upper[i] = 0;
1705
1706 if (handlers != NULL) {
1707 for (i = 0;i < nbCharEncodingHandler; i++) {
1708 if (!strcmp(upper, handlers[i]->name)) {
1709 #ifdef DEBUG_ENCODING
1710 xmlGenericError(xmlGenericErrorContext,
1711 "Found registered handler for encoding %s\n", name);
1712 #endif
1713 return(handlers[i]);
1714 }
1715 }
1716 }
1717
1718 #ifdef LIBXML_ICONV_ENABLED
1719 /* check whether iconv can handle this */
1720 icv_in = iconv_open("UTF-8", name);
1721 icv_out = iconv_open(name, "UTF-8");
1722 if (icv_in == (iconv_t) -1) {
1723 icv_in = iconv_open("UTF-8", upper);
1724 }
1725 if (icv_out == (iconv_t) -1) {
1726 icv_out = iconv_open(upper, "UTF-8");
1727 }
1728 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1729 enc = (xmlCharEncodingHandlerPtr)
1730 xmlMalloc(sizeof(xmlCharEncodingHandler));
1731 if (enc == NULL) {
1732 iconv_close(icv_in);
1733 iconv_close(icv_out);
1734 return(NULL);
1735 }
1736 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1737 enc->name = xmlMemStrdup(name);
1738 enc->input = NULL;
1739 enc->output = NULL;
1740 enc->iconv_in = icv_in;
1741 enc->iconv_out = icv_out;
1742 #ifdef DEBUG_ENCODING
1743 xmlGenericError(xmlGenericErrorContext,
1744 "Found iconv handler for encoding %s\n", name);
1745 #endif
1746 return enc;
1747 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1748 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1749 "iconv : problems with filters for '%s'\n", name);
1750 }
1751 #endif /* LIBXML_ICONV_ENABLED */
1752 #ifdef LIBXML_ICU_ENABLED
1753 /* check whether icu can handle this */
1754 ucv_in = openIcuConverter(name, 1);
1755 ucv_out = openIcuConverter(name, 0);
1756 if (ucv_in != NULL && ucv_out != NULL) {
1757 encu = (xmlCharEncodingHandlerPtr)
1758 xmlMalloc(sizeof(xmlCharEncodingHandler));
1759 if (encu == NULL) {
1760 closeIcuConverter(ucv_in);
1761 closeIcuConverter(ucv_out);
1762 return(NULL);
1763 }
1764 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1765 encu->name = xmlMemStrdup(name);
1766 encu->input = NULL;
1767 encu->output = NULL;
1768 encu->uconv_in = ucv_in;
1769 encu->uconv_out = ucv_out;
1770 #ifdef DEBUG_ENCODING
1771 xmlGenericError(xmlGenericErrorContext,
1772 "Found ICU converter handler for encoding %s\n", name);
1773 #endif
1774 return encu;
1775 } else if (ucv_in != NULL || ucv_out != NULL) {
1776 closeIcuConverter(ucv_in);
1777 closeIcuConverter(ucv_out);
1778 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1779 "ICU converter : problems with filters for '%s'\n", name);
1780 }
1781 #endif /* LIBXML_ICU_ENABLED */
1782
1783 #ifdef DEBUG_ENCODING
1784 xmlGenericError(xmlGenericErrorContext,
1785 "No handler found for encoding %s\n", name);
1786 #endif
1787
1788 /*
1789 * Fallback using the canonical names
1790 */
1791 alias = xmlParseCharEncoding(norig);
1792 if (alias != XML_CHAR_ENCODING_ERROR) {
1793 const char* canon;
1794 canon = xmlGetCharEncodingName(alias);
1795 if ((canon != NULL) && (strcmp(name, canon))) {
1796 return(xmlFindCharEncodingHandler(canon));
1797 }
1798 }
1799
1800 /* If "none of the above", give up */
1801 return(NULL);
1802 }
1803
1804 /************************************************************************
1805 * *
1806 * ICONV based generic conversion functions *
1807 * *
1808 ************************************************************************/
1809
1810 #ifdef LIBXML_ICONV_ENABLED
1811 /**
1812 * xmlIconvWrapper:
1813 * @cd: iconv converter data structure
1814 * @out: a pointer to an array of bytes to store the result
1815 * @outlen: the length of @out
1816 * @in: a pointer to an array of ISO Latin 1 chars
1817 * @inlen: the length of @in
1818 *
1819 * Returns 0 if success, or
1820 * -1 by lack of space, or
1821 * -2 if the transcoding fails (for *in is not valid utf8 string or
1822 * the result of transformation can't fit into the encoding we want), or
1823 * -3 if there the last byte can't form a single output char.
1824 *
1825 * The value of @inlen after return is the number of octets consumed
1826 * as the return value is positive, else unpredictable.
1827 * The value of @outlen after return is the number of ocetes consumed.
1828 */
1829 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1830 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1831 const unsigned char *in, int *inlen) {
1832 size_t icv_inlen, icv_outlen;
1833 const char *icv_in = (const char *) in;
1834 char *icv_out = (char *) out;
1835 int ret;
1836
1837 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1838 if (outlen != NULL) *outlen = 0;
1839 return(-1);
1840 }
1841 icv_inlen = *inlen;
1842 icv_outlen = *outlen;
1843 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1844 *inlen -= icv_inlen;
1845 *outlen -= icv_outlen;
1846 if ((icv_inlen != 0) || (ret == -1)) {
1847 #ifdef EILSEQ
1848 if (errno == EILSEQ) {
1849 return -2;
1850 } else
1851 #endif
1852 #ifdef E2BIG
1853 if (errno == E2BIG) {
1854 return -1;
1855 } else
1856 #endif
1857 #ifdef EINVAL
1858 if (errno == EINVAL) {
1859 return -3;
1860 } else
1861 #endif
1862 {
1863 return -3;
1864 }
1865 }
1866 return 0;
1867 }
1868 #endif /* LIBXML_ICONV_ENABLED */
1869
1870 /************************************************************************
1871 * *
1872 * ICU based generic conversion functions *
1873 * *
1874 ************************************************************************/
1875
1876 #ifdef LIBXML_ICU_ENABLED
1877 /**
1878 * xmlUconvWrapper:
1879 * @cd: ICU uconverter data structure
1880 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1881 * @out: a pointer to an array of bytes to store the result
1882 * @outlen: the length of @out
1883 * @in: a pointer to an array of ISO Latin 1 chars
1884 * @inlen: the length of @in
1885 * @flush: if true, indicates end of input
1886 *
1887 * Returns 0 if success, or
1888 * -1 by lack of space, or
1889 * -2 if the transcoding fails (for *in is not valid utf8 string or
1890 * the result of transformation can't fit into the encoding we want), or
1891 * -3 if there the last byte can't form a single output char.
1892 *
1893 * The value of @inlen after return is the number of octets consumed
1894 * as the return value is positive, else unpredictable.
1895 * The value of @outlen after return is the number of ocetes consumed.
1896 */
1897 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1898 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1899 const unsigned char *in, int *inlen, int flush) {
1900 const char *ucv_in = (const char *) in;
1901 char *ucv_out = (char *) out;
1902 UErrorCode err = U_ZERO_ERROR;
1903
1904 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1905 if (outlen != NULL) *outlen = 0;
1906 return(-1);
1907 }
1908
1909 if (toUnicode) {
1910 /* encoding => UTF-16 => UTF-8 */
1911 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1912 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1913 &cd->pivot_source, &cd->pivot_target,
1914 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1915 } else {
1916 /* UTF-8 => UTF-16 => encoding */
1917 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1918 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1919 &cd->pivot_source, &cd->pivot_target,
1920 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1921 }
1922 *inlen = ucv_in - (const char*) in;
1923 *outlen = ucv_out - (char *) out;
1924 if (U_SUCCESS(err)) {
1925 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1926 if (flush)
1927 cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1928 return 0;
1929 }
1930 if (err == U_BUFFER_OVERFLOW_ERROR)
1931 return -1;
1932 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1933 return -2;
1934 return -3;
1935 }
1936 #endif /* LIBXML_ICU_ENABLED */
1937
1938 /************************************************************************
1939 * *
1940 * The real API used by libxml for on-the-fly conversion *
1941 * *
1942 ************************************************************************/
1943
1944 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1945 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1946 int *outlen, const unsigned char *in, int *inlen, int flush) {
1947 int ret;
1948 (void)flush;
1949
1950 if (handler->input != NULL) {
1951 ret = handler->input(out, outlen, in, inlen);
1952 }
1953 #ifdef LIBXML_ICONV_ENABLED
1954 else if (handler->iconv_in != NULL) {
1955 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1956 }
1957 #endif /* LIBXML_ICONV_ENABLED */
1958 #ifdef LIBXML_ICU_ENABLED
1959 else if (handler->uconv_in != NULL) {
1960 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1961 flush);
1962 }
1963 #endif /* LIBXML_ICU_ENABLED */
1964 else {
1965 *outlen = 0;
1966 *inlen = 0;
1967 ret = -2;
1968 }
1969
1970 return(ret);
1971 }
1972
1973 /* Returns -4 if no output function was found. */
1974 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1975 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1976 int *outlen, const unsigned char *in, int *inlen) {
1977 int ret;
1978
1979 if (handler->output != NULL) {
1980 ret = handler->output(out, outlen, in, inlen);
1981 }
1982 #ifdef LIBXML_ICONV_ENABLED
1983 else if (handler->iconv_out != NULL) {
1984 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1985 }
1986 #endif /* LIBXML_ICONV_ENABLED */
1987 #ifdef LIBXML_ICU_ENABLED
1988 else if (handler->uconv_out != NULL) {
1989 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
1990 TRUE);
1991 }
1992 #endif /* LIBXML_ICU_ENABLED */
1993 else {
1994 *outlen = 0;
1995 *inlen = 0;
1996 ret = -4;
1997 }
1998
1999 return(ret);
2000 }
2001
2002 /**
2003 * xmlCharEncFirstLineInt:
2004 * @handler: char enconding transformation data structure
2005 * @out: an xmlBuffer for the output.
2006 * @in: an xmlBuffer for the input
2007 * @len: number of bytes to convert for the first line, or -1
2008 *
2009 * Front-end for the encoding handler input function, but handle only
2010 * the very first line, i.e. limit itself to 45 chars.
2011 *
2012 * Returns the number of byte written if success, or
2013 * -1 general error
2014 * -2 if the transcoding fails (for *in is not valid utf8 string or
2015 * the result of transformation can't fit into the encoding we want), or
2016 */
2017 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2018 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2019 xmlBufferPtr in, int len) {
2020 int ret;
2021 int written;
2022 int toconv;
2023
2024 if (handler == NULL) return(-1);
2025 if (out == NULL) return(-1);
2026 if (in == NULL) return(-1);
2027
2028 /* calculate space available */
2029 written = out->size - out->use - 1; /* count '\0' */
2030 toconv = in->use;
2031 /*
2032 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2033 * 45 chars should be sufficient to reach the end of the encoding
2034 * declaration without going too far inside the document content.
2035 * on UTF-16 this means 90bytes, on UCS4 this means 180
2036 * The actual value depending on guessed encoding is passed as @len
2037 * if provided
2038 */
2039 if (len >= 0) {
2040 if (toconv > len)
2041 toconv = len;
2042 } else {
2043 if (toconv > 180)
2044 toconv = 180;
2045 }
2046 if (toconv * 2 >= written) {
2047 xmlBufferGrow(out, toconv * 2);
2048 written = out->size - out->use - 1;
2049 }
2050
2051 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2052 in->content, &toconv, 0);
2053 xmlBufferShrink(in, toconv);
2054 out->use += written;
2055 out->content[out->use] = 0;
2056 if (ret == -1) ret = -3;
2057
2058 #ifdef DEBUG_ENCODING
2059 switch (ret) {
2060 case 0:
2061 xmlGenericError(xmlGenericErrorContext,
2062 "converted %d bytes to %d bytes of input\n",
2063 toconv, written);
2064 break;
2065 case -1:
2066 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2067 toconv, written, in->use);
2068 break;
2069 case -2:
2070 xmlGenericError(xmlGenericErrorContext,
2071 "input conversion failed due to input error\n");
2072 break;
2073 case -3:
2074 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2075 toconv, written, in->use);
2076 break;
2077 default:
2078 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2079 }
2080 #endif /* DEBUG_ENCODING */
2081 /*
2082 * Ignore when input buffer is not on a boundary
2083 */
2084 if (ret == -3) ret = 0;
2085 if (ret == -1) ret = 0;
2086 return(ret);
2087 }
2088
2089 /**
2090 * xmlCharEncFirstLine:
2091 * @handler: char enconding transformation data structure
2092 * @out: an xmlBuffer for the output.
2093 * @in: an xmlBuffer for the input
2094 *
2095 * Front-end for the encoding handler input function, but handle only
2096 * the very first line, i.e. limit itself to 45 chars.
2097 *
2098 * Returns the number of byte written if success, or
2099 * -1 general error
2100 * -2 if the transcoding fails (for *in is not valid utf8 string or
2101 * the result of transformation can't fit into the encoding we want), or
2102 */
2103 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2104 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2105 xmlBufferPtr in) {
2106 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2107 }
2108
2109 /**
2110 * xmlCharEncFirstLineInput:
2111 * @input: a parser input buffer
2112 * @len: number of bytes to convert for the first line, or -1
2113 *
2114 * Front-end for the encoding handler input function, but handle only
2115 * the very first line. Point is that this is based on autodetection
2116 * of the encoding and once that first line is converted we may find
2117 * out that a different decoder is needed to process the input.
2118 *
2119 * Returns the number of byte written if success, or
2120 * -1 general error
2121 * -2 if the transcoding fails (for *in is not valid utf8 string or
2122 * the result of transformation can't fit into the encoding we want), or
2123 */
2124 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2125 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2126 {
2127 int ret;
2128 size_t written;
2129 size_t toconv;
2130 int c_in;
2131 int c_out;
2132 xmlBufPtr in;
2133 xmlBufPtr out;
2134
2135 if ((input == NULL) || (input->encoder == NULL) ||
2136 (input->buffer == NULL) || (input->raw == NULL))
2137 return (-1);
2138 out = input->buffer;
2139 in = input->raw;
2140
2141 toconv = xmlBufUse(in);
2142 if (toconv == 0)
2143 return (0);
2144 written = xmlBufAvail(out) - 1; /* count '\0' */
2145 /*
2146 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2147 * 45 chars should be sufficient to reach the end of the encoding
2148 * declaration without going too far inside the document content.
2149 * on UTF-16 this means 90bytes, on UCS4 this means 180
2150 * The actual value depending on guessed encoding is passed as @len
2151 * if provided
2152 */
2153 if (len >= 0) {
2154 if (toconv > (unsigned int) len)
2155 toconv = len;
2156 } else {
2157 if (toconv > 180)
2158 toconv = 180;
2159 }
2160 if (toconv * 2 >= written) {
2161 xmlBufGrow(out, toconv * 2);
2162 written = xmlBufAvail(out) - 1;
2163 }
2164 if (written > 360)
2165 written = 360;
2166
2167 c_in = toconv;
2168 c_out = written;
2169 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2170 xmlBufContent(in), &c_in, 0);
2171 xmlBufShrink(in, c_in);
2172 xmlBufAddLen(out, c_out);
2173 if (ret == -1)
2174 ret = -3;
2175
2176 switch (ret) {
2177 case 0:
2178 #ifdef DEBUG_ENCODING
2179 xmlGenericError(xmlGenericErrorContext,
2180 "converted %d bytes to %d bytes of input\n",
2181 c_in, c_out);
2182 #endif
2183 break;
2184 case -1:
2185 #ifdef DEBUG_ENCODING
2186 xmlGenericError(xmlGenericErrorContext,
2187 "converted %d bytes to %d bytes of input, %d left\n",
2188 c_in, c_out, (int)xmlBufUse(in));
2189 #endif
2190 break;
2191 case -3:
2192 #ifdef DEBUG_ENCODING
2193 xmlGenericError(xmlGenericErrorContext,
2194 "converted %d bytes to %d bytes of input, %d left\n",
2195 c_in, c_out, (int)xmlBufUse(in));
2196 #endif
2197 break;
2198 case -2: {
2199 char buf[50];
2200 const xmlChar *content = xmlBufContent(in);
2201
2202 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2203 content[0], content[1],
2204 content[2], content[3]);
2205 buf[49] = 0;
2206 xmlEncodingErr(XML_I18N_CONV_FAILED,
2207 "input conversion failed due to input error, bytes %s\n",
2208 buf);
2209 }
2210 }
2211 /*
2212 * Ignore when input buffer is not on a boundary
2213 */
2214 if (ret == -3) ret = 0;
2215 if (ret == -1) ret = 0;
2216 return(ret);
2217 }
2218
2219 /**
2220 * xmlCharEncInput:
2221 * @input: a parser input buffer
2222 * @flush: try to flush all the raw buffer
2223 *
2224 * Generic front-end for the encoding handler on parser input
2225 *
2226 * Returns the number of byte written if success, or
2227 * -1 general error
2228 * -2 if the transcoding fails (for *in is not valid utf8 string or
2229 * the result of transformation can't fit into the encoding we want), or
2230 */
2231 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2232 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2233 {
2234 int ret;
2235 size_t written;
2236 size_t toconv;
2237 int c_in;
2238 int c_out;
2239 xmlBufPtr in;
2240 xmlBufPtr out;
2241
2242 if ((input == NULL) || (input->encoder == NULL) ||
2243 (input->buffer == NULL) || (input->raw == NULL))
2244 return (-1);
2245 out = input->buffer;
2246 in = input->raw;
2247
2248 toconv = xmlBufUse(in);
2249 if (toconv == 0)
2250 return (0);
2251 if ((toconv > 64 * 1024) && (flush == 0))
2252 toconv = 64 * 1024;
2253 written = xmlBufAvail(out);
2254 if (written > 0)
2255 written--; /* count '\0' */
2256 if (toconv * 2 >= written) {
2257 xmlBufGrow(out, toconv * 2);
2258 written = xmlBufAvail(out);
2259 if (written > 0)
2260 written--; /* count '\0' */
2261 }
2262 if ((written > 128 * 1024) && (flush == 0))
2263 written = 128 * 1024;
2264
2265 c_in = toconv;
2266 c_out = written;
2267 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2268 xmlBufContent(in), &c_in, flush);
2269 xmlBufShrink(in, c_in);
2270 xmlBufAddLen(out, c_out);
2271 if (ret == -1)
2272 ret = -3;
2273
2274 switch (ret) {
2275 case 0:
2276 #ifdef DEBUG_ENCODING
2277 xmlGenericError(xmlGenericErrorContext,
2278 "converted %d bytes to %d bytes of input\n",
2279 c_in, c_out);
2280 #endif
2281 break;
2282 case -1:
2283 #ifdef DEBUG_ENCODING
2284 xmlGenericError(xmlGenericErrorContext,
2285 "converted %d bytes to %d bytes of input, %d left\n",
2286 c_in, c_out, (int)xmlBufUse(in));
2287 #endif
2288 break;
2289 case -3:
2290 #ifdef DEBUG_ENCODING
2291 xmlGenericError(xmlGenericErrorContext,
2292 "converted %d bytes to %d bytes of input, %d left\n",
2293 c_in, c_out, (int)xmlBufUse(in));
2294 #endif
2295 break;
2296 case -2: {
2297 char buf[50];
2298 const xmlChar *content = xmlBufContent(in);
2299
2300 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2301 content[0], content[1],
2302 content[2], content[3]);
2303 buf[49] = 0;
2304 xmlEncodingErr(XML_I18N_CONV_FAILED,
2305 "input conversion failed due to input error, bytes %s\n",
2306 buf);
2307 }
2308 }
2309 /*
2310 * Ignore when input buffer is not on a boundary
2311 */
2312 if (ret == -3)
2313 ret = 0;
2314 return (c_out? c_out : ret);
2315 }
2316
2317 /**
2318 * xmlCharEncInFunc:
2319 * @handler: char encoding transformation data structure
2320 * @out: an xmlBuffer for the output.
2321 * @in: an xmlBuffer for the input
2322 *
2323 * Generic front-end for the encoding handler input function
2324 *
2325 * Returns the number of byte written if success, or
2326 * -1 general error
2327 * -2 if the transcoding fails (for *in is not valid utf8 string or
2328 * the result of transformation can't fit into the encoding we want), or
2329 */
2330 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2331 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2332 xmlBufferPtr in)
2333 {
2334 int ret;
2335 int written;
2336 int toconv;
2337
2338 if (handler == NULL)
2339 return (-1);
2340 if (out == NULL)
2341 return (-1);
2342 if (in == NULL)
2343 return (-1);
2344
2345 toconv = in->use;
2346 if (toconv == 0)
2347 return (0);
2348 written = out->size - out->use -1; /* count '\0' */
2349 if (toconv * 2 >= written) {
2350 xmlBufferGrow(out, out->size + toconv * 2);
2351 written = out->size - out->use - 1;
2352 }
2353 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2354 in->content, &toconv, 1);
2355 xmlBufferShrink(in, toconv);
2356 out->use += written;
2357 out->content[out->use] = 0;
2358 if (ret == -1)
2359 ret = -3;
2360
2361 switch (ret) {
2362 case 0:
2363 #ifdef DEBUG_ENCODING
2364 xmlGenericError(xmlGenericErrorContext,
2365 "converted %d bytes to %d bytes of input\n",
2366 toconv, written);
2367 #endif
2368 break;
2369 case -1:
2370 #ifdef DEBUG_ENCODING
2371 xmlGenericError(xmlGenericErrorContext,
2372 "converted %d bytes to %d bytes of input, %d left\n",
2373 toconv, written, in->use);
2374 #endif
2375 break;
2376 case -3:
2377 #ifdef DEBUG_ENCODING
2378 xmlGenericError(xmlGenericErrorContext,
2379 "converted %d bytes to %d bytes of input, %d left\n",
2380 toconv, written, in->use);
2381 #endif
2382 break;
2383 case -2: {
2384 char buf[50];
2385
2386 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2387 in->content[0], in->content[1],
2388 in->content[2], in->content[3]);
2389 buf[49] = 0;
2390 xmlEncodingErr(XML_I18N_CONV_FAILED,
2391 "input conversion failed due to input error, bytes %s\n",
2392 buf);
2393 }
2394 }
2395 /*
2396 * Ignore when input buffer is not on a boundary
2397 */
2398 if (ret == -3)
2399 ret = 0;
2400 return (written? written : ret);
2401 }
2402
2403 #ifdef LIBXML_OUTPUT_ENABLED
2404 /**
2405 * xmlCharEncOutput:
2406 * @output: a parser output buffer
2407 * @init: is this an initialization call without data
2408 *
2409 * Generic front-end for the encoding handler on parser output
2410 * a first call with @init == 1 has to be made first to initiate the
2411 * output in case of non-stateless encoding needing to initiate their
2412 * state or the output (like the BOM in UTF16).
2413 * In case of UTF8 sequence conversion errors for the given encoder,
2414 * the content will be automatically remapped to a CharRef sequence.
2415 *
2416 * Returns the number of byte written if success, or
2417 * -1 general error
2418 * -2 if the transcoding fails (for *in is not valid utf8 string or
2419 * the result of transformation can't fit into the encoding we want), or
2420 */
2421 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2422 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2423 {
2424 int ret;
2425 size_t written;
2426 size_t writtentot = 0;
2427 size_t toconv;
2428 int c_in;
2429 int c_out;
2430 xmlBufPtr in;
2431 xmlBufPtr out;
2432
2433 if ((output == NULL) || (output->encoder == NULL) ||
2434 (output->buffer == NULL) || (output->conv == NULL))
2435 return (-1);
2436 out = output->conv;
2437 in = output->buffer;
2438
2439 retry:
2440
2441 written = xmlBufAvail(out);
2442 if (written > 0)
2443 written--; /* count '\0' */
2444
2445 /*
2446 * First specific handling of the initialization call
2447 */
2448 if (init) {
2449 c_in = 0;
2450 c_out = written;
2451 /* TODO: Check return value. */
2452 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2453 NULL, &c_in);
2454 xmlBufAddLen(out, c_out);
2455 #ifdef DEBUG_ENCODING
2456 xmlGenericError(xmlGenericErrorContext,
2457 "initialized encoder\n");
2458 #endif
2459 return(0);
2460 }
2461
2462 /*
2463 * Conversion itself.
2464 */
2465 toconv = xmlBufUse(in);
2466 if (toconv == 0)
2467 return (0);
2468 if (toconv > 64 * 1024)
2469 toconv = 64 * 1024;
2470 if (toconv * 4 >= written) {
2471 xmlBufGrow(out, toconv * 4);
2472 written = xmlBufAvail(out) - 1;
2473 }
2474 if (written > 256 * 1024)
2475 written = 256 * 1024;
2476
2477 c_in = toconv;
2478 c_out = written;
2479 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2480 xmlBufContent(in), &c_in);
2481 xmlBufShrink(in, c_in);
2482 xmlBufAddLen(out, c_out);
2483 writtentot += c_out;
2484 if (ret == -1) {
2485 if (c_out > 0) {
2486 /* Can be a limitation of iconv or uconv */
2487 goto retry;
2488 }
2489 ret = -3;
2490 }
2491
2492 /*
2493 * Attempt to handle error cases
2494 */
2495 switch (ret) {
2496 case 0:
2497 #ifdef DEBUG_ENCODING
2498 xmlGenericError(xmlGenericErrorContext,
2499 "converted %d bytes to %d bytes of output\n",
2500 c_in, c_out);
2501 #endif
2502 break;
2503 case -1:
2504 #ifdef DEBUG_ENCODING
2505 xmlGenericError(xmlGenericErrorContext,
2506 "output conversion failed by lack of space\n");
2507 #endif
2508 break;
2509 case -3:
2510 #ifdef DEBUG_ENCODING
2511 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2512 c_in, c_out, (int) xmlBufUse(in));
2513 #endif
2514 break;
2515 case -4:
2516 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2517 "xmlCharEncOutFunc: no output function !\n", NULL);
2518 ret = -1;
2519 break;
2520 case -2: {
2521 xmlChar charref[20];
2522 int len = (int) xmlBufUse(in);
2523 xmlChar *content = xmlBufContent(in);
2524 int cur, charrefLen;
2525
2526 cur = xmlGetUTF8Char(content, &len);
2527 if (cur <= 0)
2528 break;
2529
2530 #ifdef DEBUG_ENCODING
2531 xmlGenericError(xmlGenericErrorContext,
2532 "handling output conversion error\n");
2533 xmlGenericError(xmlGenericErrorContext,
2534 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2535 content[0], content[1],
2536 content[2], content[3]);
2537 #endif
2538 /*
2539 * Removes the UTF8 sequence, and replace it by a charref
2540 * and continue the transcoding phase, hoping the error
2541 * did not mangle the encoder state.
2542 */
2543 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2544 "&#%d;", cur);
2545 xmlBufShrink(in, len);
2546 xmlBufGrow(out, charrefLen * 4);
2547 c_out = xmlBufAvail(out) - 1;
2548 c_in = charrefLen;
2549 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2550 charref, &c_in);
2551
2552 if ((ret < 0) || (c_in != charrefLen)) {
2553 char buf[50];
2554
2555 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2556 content[0], content[1],
2557 content[2], content[3]);
2558 buf[49] = 0;
2559 xmlEncodingErr(XML_I18N_CONV_FAILED,
2560 "output conversion failed due to conv error, bytes %s\n",
2561 buf);
2562 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2563 content[0] = ' ';
2564 break;
2565 }
2566
2567 xmlBufAddLen(out, c_out);
2568 writtentot += c_out;
2569 goto retry;
2570 }
2571 }
2572 return(ret);
2573 }
2574 #endif
2575
2576 /**
2577 * xmlCharEncOutFunc:
2578 * @handler: char enconding transformation data structure
2579 * @out: an xmlBuffer for the output.
2580 * @in: an xmlBuffer for the input
2581 *
2582 * Generic front-end for the encoding handler output function
2583 * a first call with @in == NULL has to be made firs to initiate the
2584 * output in case of non-stateless encoding needing to initiate their
2585 * state or the output (like the BOM in UTF16).
2586 * In case of UTF8 sequence conversion errors for the given encoder,
2587 * the content will be automatically remapped to a CharRef sequence.
2588 *
2589 * Returns the number of byte written if success, or
2590 * -1 general error
2591 * -2 if the transcoding fails (for *in is not valid utf8 string or
2592 * the result of transformation can't fit into the encoding we want), or
2593 */
2594 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2595 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2596 xmlBufferPtr in) {
2597 int ret;
2598 int written;
2599 int writtentot = 0;
2600 int toconv;
2601 int output = 0;
2602
2603 if (handler == NULL) return(-1);
2604 if (out == NULL) return(-1);
2605
2606 retry:
2607
2608 written = out->size - out->use;
2609
2610 if (written > 0)
2611 written--; /* Gennady: count '/0' */
2612
2613 /*
2614 * First specific handling of in = NULL, i.e. the initialization call
2615 */
2616 if (in == NULL) {
2617 toconv = 0;
2618 /* TODO: Check return value. */
2619 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2620 NULL, &toconv);
2621 out->use += written;
2622 out->content[out->use] = 0;
2623 #ifdef DEBUG_ENCODING
2624 xmlGenericError(xmlGenericErrorContext,
2625 "initialized encoder\n");
2626 #endif
2627 return(0);
2628 }
2629
2630 /*
2631 * Conversion itself.
2632 */
2633 toconv = in->use;
2634 if (toconv == 0)
2635 return(0);
2636 if (toconv * 4 >= written) {
2637 xmlBufferGrow(out, toconv * 4);
2638 written = out->size - out->use - 1;
2639 }
2640 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2641 in->content, &toconv);
2642 xmlBufferShrink(in, toconv);
2643 out->use += written;
2644 writtentot += written;
2645 out->content[out->use] = 0;
2646 if (ret == -1) {
2647 if (written > 0) {
2648 /* Can be a limitation of iconv or uconv */
2649 goto retry;
2650 }
2651 ret = -3;
2652 }
2653
2654 if (ret >= 0) output += ret;
2655
2656 /*
2657 * Attempt to handle error cases
2658 */
2659 switch (ret) {
2660 case 0:
2661 #ifdef DEBUG_ENCODING
2662 xmlGenericError(xmlGenericErrorContext,
2663 "converted %d bytes to %d bytes of output\n",
2664 toconv, written);
2665 #endif
2666 break;
2667 case -1:
2668 #ifdef DEBUG_ENCODING
2669 xmlGenericError(xmlGenericErrorContext,
2670 "output conversion failed by lack of space\n");
2671 #endif
2672 break;
2673 case -3:
2674 #ifdef DEBUG_ENCODING
2675 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2676 toconv, written, in->use);
2677 #endif
2678 break;
2679 case -4:
2680 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2681 "xmlCharEncOutFunc: no output function !\n", NULL);
2682 ret = -1;
2683 break;
2684 case -2: {
2685 xmlChar charref[20];
2686 int len = in->use;
2687 const xmlChar *utf = (const xmlChar *) in->content;
2688 int cur, charrefLen;
2689
2690 cur = xmlGetUTF8Char(utf, &len);
2691 if (cur <= 0)
2692 break;
2693
2694 #ifdef DEBUG_ENCODING
2695 xmlGenericError(xmlGenericErrorContext,
2696 "handling output conversion error\n");
2697 xmlGenericError(xmlGenericErrorContext,
2698 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2699 in->content[0], in->content[1],
2700 in->content[2], in->content[3]);
2701 #endif
2702 /*
2703 * Removes the UTF8 sequence, and replace it by a charref
2704 * and continue the transcoding phase, hoping the error
2705 * did not mangle the encoder state.
2706 */
2707 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2708 "&#%d;", cur);
2709 xmlBufferShrink(in, len);
2710 xmlBufferGrow(out, charrefLen * 4);
2711 written = out->size - out->use - 1;
2712 toconv = charrefLen;
2713 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714 charref, &toconv);
2715
2716 if ((ret < 0) || (toconv != charrefLen)) {
2717 char buf[50];
2718
2719 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2720 in->content[0], in->content[1],
2721 in->content[2], in->content[3]);
2722 buf[49] = 0;
2723 xmlEncodingErr(XML_I18N_CONV_FAILED,
2724 "output conversion failed due to conv error, bytes %s\n",
2725 buf);
2726 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2727 in->content[0] = ' ';
2728 break;
2729 }
2730
2731 out->use += written;
2732 writtentot += written;
2733 out->content[out->use] = 0;
2734 goto retry;
2735 }
2736 }
2737 return(ret);
2738 }
2739
2740 /**
2741 * xmlCharEncCloseFunc:
2742 * @handler: char enconding transformation data structure
2743 *
2744 * Generic front-end for encoding handler close function
2745 *
2746 * Returns 0 if success, or -1 in case of error
2747 */
2748 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2749 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2750 int ret = 0;
2751 int tofree = 0;
2752 int i, handler_in_list = 0;
2753
2754 if (handler == NULL) return(-1);
2755 if (handler->name == NULL) return(-1);
2756 if (handlers != NULL) {
2757 for (i = 0;i < nbCharEncodingHandler; i++) {
2758 if (handler == handlers[i]) {
2759 handler_in_list = 1;
2760 break;
2761 }
2762 }
2763 }
2764 #ifdef LIBXML_ICONV_ENABLED
2765 /*
2766 * Iconv handlers can be used only once, free the whole block.
2767 * and the associated icon resources.
2768 */
2769 if ((handler_in_list == 0) &&
2770 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2771 tofree = 1;
2772 if (handler->iconv_out != NULL) {
2773 if (iconv_close(handler->iconv_out))
2774 ret = -1;
2775 handler->iconv_out = NULL;
2776 }
2777 if (handler->iconv_in != NULL) {
2778 if (iconv_close(handler->iconv_in))
2779 ret = -1;
2780 handler->iconv_in = NULL;
2781 }
2782 }
2783 #endif /* LIBXML_ICONV_ENABLED */
2784 #ifdef LIBXML_ICU_ENABLED
2785 if ((handler_in_list == 0) &&
2786 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2787 tofree = 1;
2788 if (handler->uconv_out != NULL) {
2789 closeIcuConverter(handler->uconv_out);
2790 handler->uconv_out = NULL;
2791 }
2792 if (handler->uconv_in != NULL) {
2793 closeIcuConverter(handler->uconv_in);
2794 handler->uconv_in = NULL;
2795 }
2796 }
2797 #endif
2798 if (tofree) {
2799 /* free up only dynamic handlers iconv/uconv */
2800 if (handler->name != NULL)
2801 xmlFree(handler->name);
2802 handler->name = NULL;
2803 xmlFree(handler);
2804 }
2805 #ifdef DEBUG_ENCODING
2806 if (ret)
2807 xmlGenericError(xmlGenericErrorContext,
2808 "failed to close the encoding handler\n");
2809 else
2810 xmlGenericError(xmlGenericErrorContext,
2811 "closed the encoding handler\n");
2812 #endif
2813
2814 return(ret);
2815 }
2816
2817 /**
2818 * xmlByteConsumed:
2819 * @ctxt: an XML parser context
2820 *
2821 * This function provides the current index of the parser relative
2822 * to the start of the current entity. This function is computed in
2823 * bytes from the beginning starting at zero and finishing at the
2824 * size in byte of the file if parsing a file. The function is
2825 * of constant cost if the input is UTF-8 but can be costly if run
2826 * on non-UTF-8 input.
2827 *
2828 * Returns the index in bytes from the beginning of the entity or -1
2829 * in case the index could not be computed.
2830 */
2831 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2832 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2833 xmlParserInputPtr in;
2834
2835 if (ctxt == NULL) return(-1);
2836 in = ctxt->input;
2837 if (in == NULL) return(-1);
2838 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2839 unsigned int unused = 0;
2840 xmlCharEncodingHandler * handler = in->buf->encoder;
2841 /*
2842 * Encoding conversion, compute the number of unused original
2843 * bytes from the input not consumed and substract that from
2844 * the raw consumed value, this is not a cheap operation
2845 */
2846 if (in->end - in->cur > 0) {
2847 unsigned char convbuf[32000];
2848 const unsigned char *cur = (const unsigned char *)in->cur;
2849 int toconv = in->end - in->cur, written = 32000;
2850
2851 int ret;
2852
2853 do {
2854 toconv = in->end - cur;
2855 written = 32000;
2856 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2857 cur, &toconv);
2858 if (ret < 0) {
2859 if (written > 0)
2860 ret = -2;
2861 else
2862 return(-1);
2863 }
2864 unused += written;
2865 cur += toconv;
2866 } while (ret == -2);
2867 }
2868 if (in->buf->rawconsumed < unused)
2869 return(-1);
2870 return(in->buf->rawconsumed - unused);
2871 }
2872 return(in->consumed + (in->cur - in->base));
2873 }
2874
2875 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2876 #ifdef LIBXML_ISO8859X_ENABLED
2877
2878 /**
2879 * UTF8ToISO8859x:
2880 * @out: a pointer to an array of bytes to store the result
2881 * @outlen: the length of @out
2882 * @in: a pointer to an array of UTF-8 chars
2883 * @inlen: the length of @in
2884 * @xlattable: the 2-level transcoding table
2885 *
2886 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2887 * block of chars out.
2888 *
2889 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2890 * The value of @inlen after return is the number of octets consumed
2891 * as the return value is positive, else unpredictable.
2892 * The value of @outlen after return is the number of ocetes consumed.
2893 */
2894 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2895 UTF8ToISO8859x(unsigned char* out, int *outlen,
2896 const unsigned char* in, int *inlen,
2897 unsigned char const *xlattable) {
2898 const unsigned char* outstart = out;
2899 const unsigned char* inend;
2900 const unsigned char* instart = in;
2901 const unsigned char* processed = in;
2902
2903 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2904 (xlattable == NULL))
2905 return(-1);
2906 if (in == NULL) {
2907 /*
2908 * initialization nothing to do
2909 */
2910 *outlen = 0;
2911 *inlen = 0;
2912 return(0);
2913 }
2914 inend = in + (*inlen);
2915 while (in < inend) {
2916 unsigned char d = *in++;
2917 if (d < 0x80) {
2918 *out++ = d;
2919 } else if (d < 0xC0) {
2920 /* trailing byte in leading position */
2921 *outlen = out - outstart;
2922 *inlen = processed - instart;
2923 return(-2);
2924 } else if (d < 0xE0) {
2925 unsigned char c;
2926 if (!(in < inend)) {
2927 /* trailing byte not in input buffer */
2928 *outlen = out - outstart;
2929 *inlen = processed - instart;
2930 return(-3);
2931 }
2932 c = *in++;
2933 if ((c & 0xC0) != 0x80) {
2934 /* not a trailing byte */
2935 *outlen = out - outstart;
2936 *inlen = processed - instart;
2937 return(-2);
2938 }
2939 c = c & 0x3F;
2940 d = d & 0x1F;
2941 d = xlattable [48 + c + xlattable [d] * 64];
2942 if (d == 0) {
2943 /* not in character set */
2944 *outlen = out - outstart;
2945 *inlen = processed - instart;
2946 return(-2);
2947 }
2948 *out++ = d;
2949 } else if (d < 0xF0) {
2950 unsigned char c1;
2951 unsigned char c2;
2952 if (!(in < inend - 1)) {
2953 /* trailing bytes not in input buffer */
2954 *outlen = out - outstart;
2955 *inlen = processed - instart;
2956 return(-3);
2957 }
2958 c1 = *in++;
2959 if ((c1 & 0xC0) != 0x80) {
2960 /* not a trailing byte (c1) */
2961 *outlen = out - outstart;
2962 *inlen = processed - instart;
2963 return(-2);
2964 }
2965 c2 = *in++;
2966 if ((c2 & 0xC0) != 0x80) {
2967 /* not a trailing byte (c2) */
2968 *outlen = out - outstart;
2969 *inlen = processed - instart;
2970 return(-2);
2971 }
2972 c1 = c1 & 0x3F;
2973 c2 = c2 & 0x3F;
2974 d = d & 0x0F;
2975 d = xlattable [48 + c2 + xlattable [48 + c1 +
2976 xlattable [32 + d] * 64] * 64];
2977 if (d == 0) {
2978 /* not in character set */
2979 *outlen = out - outstart;
2980 *inlen = processed - instart;
2981 return(-2);
2982 }
2983 *out++ = d;
2984 } else {
2985 /* cannot transcode >= U+010000 */
2986 *outlen = out - outstart;
2987 *inlen = processed - instart;
2988 return(-2);
2989 }
2990 processed = in;
2991 }
2992 *outlen = out - outstart;
2993 *inlen = processed - instart;
2994 return(*outlen);
2995 }
2996
2997 /**
2998 * ISO8859xToUTF8
2999 * @out: a pointer to an array of bytes to store the result
3000 * @outlen: the length of @out
3001 * @in: a pointer to an array of ISO Latin 1 chars
3002 * @inlen: the length of @in
3003 *
3004 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3005 * block of chars out.
3006 * Returns 0 if success, or -1 otherwise
3007 * The value of @inlen after return is the number of octets consumed
3008 * The value of @outlen after return is the number of ocetes produced.
3009 */
3010 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3011 ISO8859xToUTF8(unsigned char* out, int *outlen,
3012 const unsigned char* in, int *inlen,
3013 unsigned short const *unicodetable) {
3014 unsigned char* outstart = out;
3015 unsigned char* outend;
3016 const unsigned char* instart = in;
3017 const unsigned char* inend;
3018 const unsigned char* instop;
3019 unsigned int c;
3020
3021 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3022 (in == NULL) || (unicodetable == NULL))
3023 return(-1);
3024 outend = out + *outlen;
3025 inend = in + *inlen;
3026 instop = inend;
3027
3028 while ((in < inend) && (out < outend - 2)) {
3029 if (*in >= 0x80) {
3030 c = unicodetable [*in - 0x80];
3031 if (c == 0) {
3032 /* undefined code point */
3033 *outlen = out - outstart;
3034 *inlen = in - instart;
3035 return (-1);
3036 }
3037 if (c < 0x800) {
3038 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3039 *out++ = (c & 0x3F) | 0x80;
3040 } else {
3041 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3042 *out++ = ((c >> 6) & 0x3F) | 0x80;
3043 *out++ = (c & 0x3F) | 0x80;
3044 }
3045 ++in;
3046 }
3047 if (instop - in > outend - out) instop = in + (outend - out);
3048 while ((*in < 0x80) && (in < instop)) {
3049 *out++ = *in++;
3050 }
3051 }
3052 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3053 *out++ = *in++;
3054 }
3055 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3056 *out++ = *in++;
3057 }
3058 *outlen = out - outstart;
3059 *inlen = in - instart;
3060 return (*outlen);
3061 }
3062
3063
3064 /************************************************************************
3065 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3066 ************************************************************************/
3067
3068 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3069 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3070 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3071 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3072 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3073 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3074 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3075 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3076 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3077 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3078 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3079 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3080 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3081 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3082 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3083 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3084 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3085 };
3086
3087 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3088 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3096 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3097 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3098 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3100 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3108 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3109 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3110 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3111 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3112 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3113 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3114 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3115 };
3116
3117 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3118 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3119 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3120 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3121 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3122 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3123 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3124 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3125 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3126 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3127 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3128 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3129 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3130 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3131 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3132 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3133 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3134 };
3135
3136 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3137 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3138 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3145 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3146 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3147 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3148 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3149 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3150 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3151 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3164 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3165 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3166 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3167 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3168 };
3169
3170 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3171 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3172 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3173 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3174 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3175 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3176 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3177 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3178 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3179 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3180 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3181 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3182 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3183 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3184 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3185 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3186 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3187 };
3188
3189 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3190 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3191 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3198 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3199 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3200 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3201 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3202 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3203 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3204 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3206 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3208 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3214 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3215 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3216 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3217 };
3218
3219 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3220 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3221 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3222 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3223 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3224 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3225 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3226 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3227 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3228 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3229 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3230 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3231 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3232 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3233 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3234 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3235 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3236 };
3237
3238 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3239 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3247 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3248 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3249 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3251 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3252 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3253 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3254 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3255 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 };
3267
3268 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3269 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3270 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3271 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3272 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3273 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3274 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3275 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3276 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3277 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3278 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3279 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3280 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3281 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3282 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3283 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3285 };
3286
3287 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3288 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3296 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3297 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3305 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3306 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3307 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3308 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 };
3312
3313 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3314 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3315 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3316 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3317 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3318 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3319 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3320 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3321 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3322 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3323 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3324 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3325 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3326 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3327 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3328 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3329 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3330 };
3331
3332 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3333 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3341 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3342 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3343 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3344 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3357 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3358 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3359 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3360 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 };
3365
3366 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3367 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3368 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3369 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3370 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3371 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3372 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3373 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3374 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3375 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3376 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3377 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3378 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3379 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3380 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3381 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3382 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3383 };
3384
3385 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3386 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3388 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3394 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3395 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3396 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3397 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3405 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3410 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3415 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 };
3418
3419 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3420 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3421 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3422 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3423 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3424 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3425 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3426 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3427 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3428 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3429 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3430 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3431 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3432 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3433 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3434 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3435 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3436 };
3437
3438 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3439 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3447 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3448 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3449 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3450 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3451 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3452 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3453 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462 };
3463
3464 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3465 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3466 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3467 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3468 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3469 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3470 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3471 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3472 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3473 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3474 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3475 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3476 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3477 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3478 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3479 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3480 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3481 };
3482
3483 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3484 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3492 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3493 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3494 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3495 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3496 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3497 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3498 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3499 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3500 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3502 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3503 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3512 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3513 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3514 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3515 };
3516
3517 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3518 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3519 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3520 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3521 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3522 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3523 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3524 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3525 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3526 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3527 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3528 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3529 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3530 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3531 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3532 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3533 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3534 };
3535
3536 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3537 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3545 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3546 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3552 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3553 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3554 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3555 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3556 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3561 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 };
3565
3566 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3567 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3568 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3569 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3570 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3571 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3572 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3573 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3574 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3575 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3576 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3577 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3578 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3579 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3580 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3581 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3582 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3583 };
3584
3585 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3586 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3587 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3594 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3595 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3596 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3597 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3602 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3603 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3604 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3605 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3606 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3607 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3608 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3609 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3610 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3611 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3612 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3613 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3614 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3615 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3616 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3617 };
3618
3619 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3620 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3621 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3622 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3623 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3624 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3625 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3626 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3627 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3628 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3629 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3630 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3631 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3632 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3633 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3634 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3635 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3636 };
3637
3638 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3639 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3647 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3648 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3649 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3653 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3654 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3657 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3667 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3674 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3679 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3680 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3681 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3682 };
3683
3684 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3685 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3686 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3687 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3688 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3689 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3690 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3691 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3692 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3693 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3694 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3695 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3696 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3697 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3698 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3699 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3700 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3701 };
3702
3703 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3704 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3712 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3713 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3714 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3715 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3727 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3728 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3729 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3730 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3731 };
3732
3733 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3734 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3735 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3736 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3737 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3738 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3739 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3740 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3741 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3742 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3743 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3744 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3745 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3746 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3747 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3748 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3749 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3750 };
3751
3752 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3753 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3761 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3762 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3763 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3764 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3765 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3770 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3772 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3789 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3790 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3791 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3792 };
3793
3794
3795 /*
3796 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3797 */
3798
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3799 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3800 const unsigned char* in, int *inlen) {
3801 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3802 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3803 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3804 const unsigned char* in, int *inlen) {
3805 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3806 }
3807
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3808 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3809 const unsigned char* in, int *inlen) {
3810 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3811 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3812 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3813 const unsigned char* in, int *inlen) {
3814 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3815 }
3816
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3817 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3818 const unsigned char* in, int *inlen) {
3819 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3820 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3821 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3822 const unsigned char* in, int *inlen) {
3823 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3824 }
3825
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3826 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3827 const unsigned char* in, int *inlen) {
3828 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3829 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3830 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3831 const unsigned char* in, int *inlen) {
3832 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3833 }
3834
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3835 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3836 const unsigned char* in, int *inlen) {
3837 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3838 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3839 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3840 const unsigned char* in, int *inlen) {
3841 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3842 }
3843
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3844 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3845 const unsigned char* in, int *inlen) {
3846 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3847 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3848 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3849 const unsigned char* in, int *inlen) {
3850 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3851 }
3852
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3853 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3854 const unsigned char* in, int *inlen) {
3855 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3856 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3857 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3858 const unsigned char* in, int *inlen) {
3859 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3860 }
3861
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3862 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3863 const unsigned char* in, int *inlen) {
3864 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3865 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3866 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3867 const unsigned char* in, int *inlen) {
3868 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3869 }
3870
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3871 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3872 const unsigned char* in, int *inlen) {
3873 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3874 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3875 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3876 const unsigned char* in, int *inlen) {
3877 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3878 }
3879
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3880 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3881 const unsigned char* in, int *inlen) {
3882 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3883 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3884 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3885 const unsigned char* in, int *inlen) {
3886 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3887 }
3888
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3889 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3890 const unsigned char* in, int *inlen) {
3891 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3892 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3893 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3894 const unsigned char* in, int *inlen) {
3895 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3896 }
3897
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3898 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3899 const unsigned char* in, int *inlen) {
3900 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3901 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3902 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3903 const unsigned char* in, int *inlen) {
3904 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3905 }
3906
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3907 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3908 const unsigned char* in, int *inlen) {
3909 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3910 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3911 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3912 const unsigned char* in, int *inlen) {
3913 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3914 }
3915
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3916 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3917 const unsigned char* in, int *inlen) {
3918 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3919 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3920 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3921 const unsigned char* in, int *inlen) {
3922 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3923 }
3924
3925 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3926 xmlRegisterCharEncodingHandlersISO8859x (void) {
3927 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3928 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3929 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3930 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3931 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3932 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3933 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3934 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3935 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3936 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3937 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3938 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3939 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3940 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3941 }
3942
3943 #endif
3944 #endif
3945
3946 #define bottom_encoding
3947 #include "elfgcchack.h"
3948