xref: /reactos/sdk/lib/3rdparty/libxml2/encoding.c (revision 911153da)
1 /*
2  * encoding.c : implements the encoding conversion functions needed for XML
3  *
4  * Related specs:
5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8  * [ISO-8859-1]   ISO Latin-1 characters codes.
9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10  *                Worldwide Character Encoding -- Version 1.0", Addison-
11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12  *                described in Unicode Technical Report #4.
13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14  *                Information Interchange, ANSI X3.4-1986.
15  *
16  * See Copyright for the status of this software.
17  *
18  * daniel@veillard.com
19  *
20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21  */
22 
23 #define IN_LIBXML
24 #include "libxml.h"
25 
26 #include <string.h>
27 #include <limits.h>
28 #include <ctype.h>
29 #include <stdlib.h>
30 
31 #ifdef LIBXML_ICONV_ENABLED
32 #include <errno.h>
33 #endif
34 
35 #include <libxml/encoding.h>
36 #include <libxml/xmlmemory.h>
37 #ifdef LIBXML_HTML_ENABLED
38 #include <libxml/HTMLparser.h>
39 #endif
40 #include <libxml/globals.h>
41 #include <libxml/xmlerror.h>
42 
43 #include "buf.h"
44 #include "enc.h"
45 
46 #ifdef LIBXML_ICU_ENABLED
47 #include <unicode/ucnv.h>
48 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49 #define ICU_PIVOT_BUF_SIZE 1024
50 typedef struct _uconv_t uconv_t;
51 struct _uconv_t {
52   UConverter *uconv; /* for conversion between an encoding and UTF-16 */
53   UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
54   UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
55   UChar      *pivot_source;
56   UChar      *pivot_target;
57 };
58 #endif
59 
60 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
61 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
62 
63 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
64 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
65 struct _xmlCharEncodingAlias {
66     const char *name;
67     const char *alias;
68 };
69 
70 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
71 static int xmlCharEncodingAliasesNb = 0;
72 static int xmlCharEncodingAliasesMax = 0;
73 
74 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
75 #if 0
76 #define DEBUG_ENCODING  /* Define this to get encoding traces */
77 #endif
78 #else
79 #ifdef LIBXML_ISO8859X_ENABLED
80 static void xmlRegisterCharEncodingHandlersISO8859x (void);
81 #endif
82 #endif
83 
84 static int xmlLittleEndian = 1;
85 
86 /**
87  * xmlEncodingErrMemory:
88  * @extra:  extra information
89  *
90  * Handle an out of memory condition
91  */
92 static void
xmlEncodingErrMemory(const char * extra)93 xmlEncodingErrMemory(const char *extra)
94 {
95     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96 }
97 
98 /**
99  * xmlErrEncoding:
100  * @error:  the error number
101  * @msg:  the error message
102  *
103  * n encoding error
104  */
105 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)106 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
107 {
108     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
109                     XML_FROM_I18N, error, XML_ERR_FATAL,
110                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111 }
112 
113 #ifdef LIBXML_ICU_ENABLED
114 static uconv_t*
openIcuConverter(const char * name,int toUnicode)115 openIcuConverter(const char* name, int toUnicode)
116 {
117   UErrorCode status = U_ZERO_ERROR;
118   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
119   if (conv == NULL)
120     return NULL;
121 
122   conv->pivot_source = conv->pivot_buf;
123   conv->pivot_target = conv->pivot_buf;
124 
125   conv->uconv = ucnv_open(name, &status);
126   if (U_FAILURE(status))
127     goto error;
128 
129   status = U_ZERO_ERROR;
130   if (toUnicode) {
131     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
132                         NULL, NULL, NULL, &status);
133   }
134   else {
135     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
136                         NULL, NULL, NULL, &status);
137   }
138   if (U_FAILURE(status))
139     goto error;
140 
141   status = U_ZERO_ERROR;
142   conv->utf8 = ucnv_open("UTF-8", &status);
143   if (U_SUCCESS(status))
144     return conv;
145 
146 error:
147   if (conv->uconv)
148     ucnv_close(conv->uconv);
149   xmlFree(conv);
150   return NULL;
151 }
152 
153 static void
closeIcuConverter(uconv_t * conv)154 closeIcuConverter(uconv_t *conv)
155 {
156   if (conv != NULL) {
157     ucnv_close(conv->uconv);
158     ucnv_close(conv->utf8);
159     xmlFree(conv);
160   }
161 }
162 #endif /* LIBXML_ICU_ENABLED */
163 
164 /************************************************************************
165  *									*
166  *		Conversions To/From UTF8 encoding			*
167  *									*
168  ************************************************************************/
169 
170 /**
171  * asciiToUTF8:
172  * @out:  a pointer to an array of bytes to store the result
173  * @outlen:  the length of @out
174  * @in:  a pointer to an array of ASCII chars
175  * @inlen:  the length of @in
176  *
177  * Take a block of ASCII chars in and try to convert it to an UTF-8
178  * block of chars out.
179  * Returns 0 if success, or -1 otherwise
180  * The value of @inlen after return is the number of octets consumed
181  *     if the return value is positive, else unpredictable.
182  * The value of @outlen after return is the number of octets produced.
183  */
184 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)185 asciiToUTF8(unsigned char* out, int *outlen,
186               const unsigned char* in, int *inlen) {
187     unsigned char* outstart = out;
188     const unsigned char* base = in;
189     const unsigned char* processed = in;
190     unsigned char* outend = out + *outlen;
191     const unsigned char* inend;
192     unsigned int c;
193 
194     inend = in + (*inlen);
195     while ((in < inend) && (out - outstart + 5 < *outlen)) {
196 	c= *in++;
197 
198         if (out >= outend)
199 	    break;
200         if (c < 0x80) {
201 	    *out++ = c;
202 	} else {
203 	    *outlen = out - outstart;
204 	    *inlen = processed - base;
205 	    return(-1);
206 	}
207 
208 	processed = (const unsigned char*) in;
209     }
210     *outlen = out - outstart;
211     *inlen = processed - base;
212     return(*outlen);
213 }
214 
215 #ifdef LIBXML_OUTPUT_ENABLED
216 /**
217  * UTF8Toascii:
218  * @out:  a pointer to an array of bytes to store the result
219  * @outlen:  the length of @out
220  * @in:  a pointer to an array of UTF-8 chars
221  * @inlen:  the length of @in
222  *
223  * Take a block of UTF-8 chars in and try to convert it to an ASCII
224  * block of chars out.
225  *
226  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227  * The value of @inlen after return is the number of octets consumed
228  *     if the return value is positive, else unpredictable.
229  * The value of @outlen after return is the number of octets produced.
230  */
231 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)232 UTF8Toascii(unsigned char* out, int *outlen,
233               const unsigned char* in, int *inlen) {
234     const unsigned char* processed = in;
235     const unsigned char* outend;
236     const unsigned char* outstart = out;
237     const unsigned char* instart = in;
238     const unsigned char* inend;
239     unsigned int c, d;
240     int trailing;
241 
242     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
243     if (in == NULL) {
244         /*
245 	 * initialization nothing to do
246 	 */
247 	*outlen = 0;
248 	*inlen = 0;
249 	return(0);
250     }
251     inend = in + (*inlen);
252     outend = out + (*outlen);
253     while (in < inend) {
254 	d = *in++;
255 	if      (d < 0x80)  { c= d; trailing= 0; }
256 	else if (d < 0xC0) {
257 	    /* trailing byte in leading position */
258 	    *outlen = out - outstart;
259 	    *inlen = processed - instart;
260 	    return(-2);
261         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
262         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
263         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
264 	else {
265 	    /* no chance for this in Ascii */
266 	    *outlen = out - outstart;
267 	    *inlen = processed - instart;
268 	    return(-2);
269 	}
270 
271 	if (inend - in < trailing) {
272 	    break;
273 	}
274 
275 	for ( ; trailing; trailing--) {
276 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
277 		break;
278 	    c <<= 6;
279 	    c |= d & 0x3F;
280 	}
281 
282 	/* assertion: c is a single UTF-4 value */
283 	if (c < 0x80) {
284 	    if (out >= outend)
285 		break;
286 	    *out++ = c;
287 	} else {
288 	    /* no chance for this in Ascii */
289 	    *outlen = out - outstart;
290 	    *inlen = processed - instart;
291 	    return(-2);
292 	}
293 	processed = in;
294     }
295     *outlen = out - outstart;
296     *inlen = processed - instart;
297     return(*outlen);
298 }
299 #endif /* LIBXML_OUTPUT_ENABLED */
300 
301 /**
302  * isolat1ToUTF8:
303  * @out:  a pointer to an array of bytes to store the result
304  * @outlen:  the length of @out
305  * @in:  a pointer to an array of ISO Latin 1 chars
306  * @inlen:  the length of @in
307  *
308  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309  * block of chars out.
310  * Returns the number of bytes written if success, or -1 otherwise
311  * The value of @inlen after return is the number of octets consumed
312  *     if the return value is positive, else unpredictable.
313  * The value of @outlen after return is the number of octets produced.
314  */
315 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)316 isolat1ToUTF8(unsigned char* out, int *outlen,
317               const unsigned char* in, int *inlen) {
318     unsigned char* outstart = out;
319     const unsigned char* base = in;
320     unsigned char* outend;
321     const unsigned char* inend;
322     const unsigned char* instop;
323 
324     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325 	return(-1);
326 
327     outend = out + *outlen;
328     inend = in + (*inlen);
329     instop = inend;
330 
331     while ((in < inend) && (out < outend - 1)) {
332 	if (*in >= 0x80) {
333 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
334             *out++ = ((*in) & 0x3F) | 0x80;
335 	    ++in;
336 	}
337 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
338 	while ((in < instop) && (*in < 0x80)) {
339 	    *out++ = *in++;
340 	}
341     }
342     if ((in < inend) && (out < outend) && (*in < 0x80)) {
343         *out++ = *in++;
344     }
345     *outlen = out - outstart;
346     *inlen = in - base;
347     return(*outlen);
348 }
349 
350 /**
351  * UTF8ToUTF8:
352  * @out:  a pointer to an array of bytes to store the result
353  * @outlen:  the length of @out
354  * @inb:  a pointer to an array of UTF-8 chars
355  * @inlenb:  the length of @in in UTF-8 chars
356  *
357  * No op copy operation for UTF8 handling.
358  *
359  * Returns the number of bytes written, or -1 if lack of space.
360  *     The value of *inlen after return is the number of octets consumed
361  *     if the return value is positive, else unpredictable.
362  */
363 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)364 UTF8ToUTF8(unsigned char* out, int *outlen,
365            const unsigned char* inb, int *inlenb)
366 {
367     int len;
368 
369     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370 	return(-1);
371     if (inb == NULL) {
372         /* inb == NULL means output is initialized. */
373         *outlen = 0;
374         *inlenb = 0;
375         return(0);
376     }
377     if (*outlen > *inlenb) {
378 	len = *inlenb;
379     } else {
380 	len = *outlen;
381     }
382     if (len < 0)
383 	return(-1);
384 
385     /*
386      * FIXME: Conversion functions must assure valid UTF-8, so we have
387      * to check for UTF-8 validity. Preferably, this converter shouldn't
388      * be used at all.
389      */
390     memcpy(out, inb, len);
391 
392     *outlen = len;
393     *inlenb = len;
394     return(*outlen);
395 }
396 
397 
398 #ifdef LIBXML_OUTPUT_ENABLED
399 /**
400  * UTF8Toisolat1:
401  * @out:  a pointer to an array of bytes to store the result
402  * @outlen:  the length of @out
403  * @in:  a pointer to an array of UTF-8 chars
404  * @inlen:  the length of @in
405  *
406  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407  * block of chars out.
408  *
409  * Returns the number of bytes written if success, -2 if the transcoding fails,
410            or -1 otherwise
411  * The value of @inlen after return is the number of octets consumed
412  *     if the return value is positive, else unpredictable.
413  * The value of @outlen after return is the number of octets produced.
414  */
415 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)416 UTF8Toisolat1(unsigned char* out, int *outlen,
417               const unsigned char* in, int *inlen) {
418     const unsigned char* processed = in;
419     const unsigned char* outend;
420     const unsigned char* outstart = out;
421     const unsigned char* instart = in;
422     const unsigned char* inend;
423     unsigned int c, d;
424     int trailing;
425 
426     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
427     if (in == NULL) {
428         /*
429 	 * initialization nothing to do
430 	 */
431 	*outlen = 0;
432 	*inlen = 0;
433 	return(0);
434     }
435     inend = in + (*inlen);
436     outend = out + (*outlen);
437     while (in < inend) {
438 	d = *in++;
439 	if      (d < 0x80)  { c= d; trailing= 0; }
440 	else if (d < 0xC0) {
441 	    /* trailing byte in leading position */
442 	    *outlen = out - outstart;
443 	    *inlen = processed - instart;
444 	    return(-2);
445         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
446         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
447         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
448 	else {
449 	    /* no chance for this in IsoLat1 */
450 	    *outlen = out - outstart;
451 	    *inlen = processed - instart;
452 	    return(-2);
453 	}
454 
455 	if (inend - in < trailing) {
456 	    break;
457 	}
458 
459 	for ( ; trailing; trailing--) {
460 	    if (in >= inend)
461 		break;
462 	    if (((d= *in++) & 0xC0) != 0x80) {
463 		*outlen = out - outstart;
464 		*inlen = processed - instart;
465 		return(-2);
466 	    }
467 	    c <<= 6;
468 	    c |= d & 0x3F;
469 	}
470 
471 	/* assertion: c is a single UTF-4 value */
472 	if (c <= 0xFF) {
473 	    if (out >= outend)
474 		break;
475 	    *out++ = c;
476 	} else {
477 	    /* no chance for this in IsoLat1 */
478 	    *outlen = out - outstart;
479 	    *inlen = processed - instart;
480 	    return(-2);
481 	}
482 	processed = in;
483     }
484     *outlen = out - outstart;
485     *inlen = processed - instart;
486     return(*outlen);
487 }
488 #endif /* LIBXML_OUTPUT_ENABLED */
489 
490 /**
491  * UTF16LEToUTF8:
492  * @out:  a pointer to an array of bytes to store the result
493  * @outlen:  the length of @out
494  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
495  * @inlenb:  the length of @in in UTF-16LE chars
496  *
497  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498  * block of chars out. This function assumes the endian property
499  * is the same between the native type of this machine and the
500  * inputed one.
501  *
502  * Returns the number of bytes written, or -1 if lack of space, or -2
503  *     if the transcoding fails (if *in is not a valid utf16 string)
504  *     The value of *inlen after return is the number of octets consumed
505  *     if the return value is positive, else unpredictable.
506  */
507 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)508 UTF16LEToUTF8(unsigned char* out, int *outlen,
509             const unsigned char* inb, int *inlenb)
510 {
511     unsigned char* outstart = out;
512     const unsigned char* processed = inb;
513     unsigned char* outend;
514     unsigned short* in = (unsigned short*) inb;
515     unsigned short* inend;
516     unsigned int c, d, inlen;
517     unsigned char *tmp;
518     int bits;
519 
520     if (*outlen == 0) {
521         *inlenb = 0;
522         return(0);
523     }
524     outend = out + *outlen;
525     if ((*inlenb % 2) == 1)
526         (*inlenb)--;
527     inlen = *inlenb / 2;
528     inend = in + inlen;
529     while ((in < inend) && (out - outstart + 5 < *outlen)) {
530         if (xmlLittleEndian) {
531 	    c= *in++;
532 	} else {
533 	    tmp = (unsigned char *) in;
534 	    c = *tmp++;
535 	    c = c | (((unsigned int)*tmp) << 8);
536 	    in++;
537 	}
538         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
539 	    if (in >= inend) {           /* handle split mutli-byte characters */
540 		break;
541 	    }
542 	    if (xmlLittleEndian) {
543 		d = *in++;
544 	    } else {
545 		tmp = (unsigned char *) in;
546 		d = *tmp++;
547 		d = d | (((unsigned int)*tmp) << 8);
548 		in++;
549 	    }
550             if ((d & 0xFC00) == 0xDC00) {
551                 c &= 0x03FF;
552                 c <<= 10;
553                 c |= d & 0x03FF;
554                 c += 0x10000;
555             }
556             else {
557 		*outlen = out - outstart;
558 		*inlenb = processed - inb;
559 	        return(-2);
560 	    }
561         }
562 
563 	/* assertion: c is a single UTF-4 value */
564         if (out >= outend)
565 	    break;
566         if      (c <    0x80) {  *out++=  c;                bits= -6; }
567         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
568         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
569         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
570 
571         for ( ; bits >= 0; bits-= 6) {
572             if (out >= outend)
573 	        break;
574             *out++= ((c >> bits) & 0x3F) | 0x80;
575         }
576 	processed = (const unsigned char*) in;
577     }
578     *outlen = out - outstart;
579     *inlenb = processed - inb;
580     return(*outlen);
581 }
582 
583 #ifdef LIBXML_OUTPUT_ENABLED
584 /**
585  * UTF8ToUTF16LE:
586  * @outb:  a pointer to an array of bytes to store the result
587  * @outlen:  the length of @outb
588  * @in:  a pointer to an array of UTF-8 chars
589  * @inlen:  the length of @in
590  *
591  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592  * block of chars out.
593  *
594  * Returns the number of bytes written, or -1 if lack of space, or -2
595  *     if the transcoding failed.
596  */
597 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)598 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
599             const unsigned char* in, int *inlen)
600 {
601     unsigned short* out = (unsigned short*) outb;
602     const unsigned char* processed = in;
603     const unsigned char *const instart = in;
604     unsigned short* outstart= out;
605     unsigned short* outend;
606     const unsigned char* inend;
607     unsigned int c, d;
608     int trailing;
609     unsigned char *tmp;
610     unsigned short tmp1, tmp2;
611 
612     /* UTF16LE encoding has no BOM */
613     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
614     if (in == NULL) {
615 	*outlen = 0;
616 	*inlen = 0;
617 	return(0);
618     }
619     inend= in + *inlen;
620     outend = out + (*outlen / 2);
621     while (in < inend) {
622       d= *in++;
623       if      (d < 0x80)  { c= d; trailing= 0; }
624       else if (d < 0xC0) {
625           /* trailing byte in leading position */
626 	  *outlen = (out - outstart) * 2;
627 	  *inlen = processed - instart;
628 	  return(-2);
629       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
630       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
631       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
632       else {
633 	/* no chance for this in UTF-16 */
634 	*outlen = (out - outstart) * 2;
635 	*inlen = processed - instart;
636 	return(-2);
637       }
638 
639       if (inend - in < trailing) {
640           break;
641       }
642 
643       for ( ; trailing; trailing--) {
644           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
645 	      break;
646           c <<= 6;
647           c |= d & 0x3F;
648       }
649 
650       /* assertion: c is a single UTF-4 value */
651         if (c < 0x10000) {
652             if (out >= outend)
653 	        break;
654 	    if (xmlLittleEndian) {
655 		*out++ = c;
656 	    } else {
657 		tmp = (unsigned char *) out;
658 		*tmp = c ;
659 		*(tmp + 1) = c >> 8 ;
660 		out++;
661 	    }
662         }
663         else if (c < 0x110000) {
664             if (out+1 >= outend)
665 	        break;
666             c -= 0x10000;
667 	    if (xmlLittleEndian) {
668 		*out++ = 0xD800 | (c >> 10);
669 		*out++ = 0xDC00 | (c & 0x03FF);
670 	    } else {
671 		tmp1 = 0xD800 | (c >> 10);
672 		tmp = (unsigned char *) out;
673 		*tmp = (unsigned char) tmp1;
674 		*(tmp + 1) = tmp1 >> 8;
675 		out++;
676 
677 		tmp2 = 0xDC00 | (c & 0x03FF);
678 		tmp = (unsigned char *) out;
679 		*tmp  = (unsigned char) tmp2;
680 		*(tmp + 1) = tmp2 >> 8;
681 		out++;
682 	    }
683         }
684         else
685 	    break;
686 	processed = in;
687     }
688     *outlen = (out - outstart) * 2;
689     *inlen = processed - instart;
690     return(*outlen);
691 }
692 
693 /**
694  * UTF8ToUTF16:
695  * @outb:  a pointer to an array of bytes to store the result
696  * @outlen:  the length of @outb
697  * @in:  a pointer to an array of UTF-8 chars
698  * @inlen:  the length of @in
699  *
700  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701  * block of chars out.
702  *
703  * Returns the number of bytes written, or -1 if lack of space, or -2
704  *     if the transcoding failed.
705  */
706 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)707 UTF8ToUTF16(unsigned char* outb, int *outlen,
708             const unsigned char* in, int *inlen)
709 {
710     if (in == NULL) {
711 	/*
712 	 * initialization, add the Byte Order Mark for UTF-16LE
713 	 */
714         if (*outlen >= 2) {
715 	    outb[0] = 0xFF;
716 	    outb[1] = 0xFE;
717 	    *outlen = 2;
718 	    *inlen = 0;
719 #ifdef DEBUG_ENCODING
720             xmlGenericError(xmlGenericErrorContext,
721 		    "Added FFFE Byte Order Mark\n");
722 #endif
723 	    return(2);
724 	}
725 	*outlen = 0;
726 	*inlen = 0;
727 	return(0);
728     }
729     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
730 }
731 #endif /* LIBXML_OUTPUT_ENABLED */
732 
733 /**
734  * UTF16BEToUTF8:
735  * @out:  a pointer to an array of bytes to store the result
736  * @outlen:  the length of @out
737  * @inb:  a pointer to an array of UTF-16 passed as a byte array
738  * @inlenb:  the length of @in in UTF-16 chars
739  *
740  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741  * block of chars out. This function assumes the endian property
742  * is the same between the native type of this machine and the
743  * inputed one.
744  *
745  * Returns the number of bytes written, or -1 if lack of space, or -2
746  *     if the transcoding fails (if *in is not a valid utf16 string)
747  * The value of *inlen after return is the number of octets consumed
748  *     if the return value is positive, else unpredictable.
749  */
750 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)751 UTF16BEToUTF8(unsigned char* out, int *outlen,
752             const unsigned char* inb, int *inlenb)
753 {
754     unsigned char* outstart = out;
755     const unsigned char* processed = inb;
756     unsigned char* outend;
757     unsigned short* in = (unsigned short*) inb;
758     unsigned short* inend;
759     unsigned int c, d, inlen;
760     unsigned char *tmp;
761     int bits;
762 
763     if (*outlen == 0) {
764         *inlenb = 0;
765         return(0);
766     }
767     outend = out + *outlen;
768     if ((*inlenb % 2) == 1)
769         (*inlenb)--;
770     inlen = *inlenb / 2;
771     inend= in + inlen;
772     while ((in < inend) && (out - outstart + 5 < *outlen)) {
773 	if (xmlLittleEndian) {
774 	    tmp = (unsigned char *) in;
775 	    c = *tmp++;
776 	    c = (c << 8) | (unsigned int) *tmp;
777 	    in++;
778 	} else {
779 	    c= *in++;
780 	}
781         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
782 	    if (in >= inend) {           /* handle split mutli-byte characters */
783                 break;
784 	    }
785 	    if (xmlLittleEndian) {
786 		tmp = (unsigned char *) in;
787 		d = *tmp++;
788 		d = (d << 8) | (unsigned int) *tmp;
789 		in++;
790 	    } else {
791 		d= *in++;
792 	    }
793             if ((d & 0xFC00) == 0xDC00) {
794                 c &= 0x03FF;
795                 c <<= 10;
796                 c |= d & 0x03FF;
797                 c += 0x10000;
798             }
799             else {
800 		*outlen = out - outstart;
801 		*inlenb = processed - inb;
802 	        return(-2);
803 	    }
804         }
805 
806 	/* assertion: c is a single UTF-4 value */
807         if (out >= outend)
808 	    break;
809         if      (c <    0x80) {  *out++=  c;                bits= -6; }
810         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
811         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
812         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
813 
814         for ( ; bits >= 0; bits-= 6) {
815             if (out >= outend)
816 	        break;
817             *out++= ((c >> bits) & 0x3F) | 0x80;
818         }
819 	processed = (const unsigned char*) in;
820     }
821     *outlen = out - outstart;
822     *inlenb = processed - inb;
823     return(*outlen);
824 }
825 
826 #ifdef LIBXML_OUTPUT_ENABLED
827 /**
828  * UTF8ToUTF16BE:
829  * @outb:  a pointer to an array of bytes to store the result
830  * @outlen:  the length of @outb
831  * @in:  a pointer to an array of UTF-8 chars
832  * @inlen:  the length of @in
833  *
834  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835  * block of chars out.
836  *
837  * Returns the number of byte written, or -1 by lack of space, or -2
838  *     if the transcoding failed.
839  */
840 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)841 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
842             const unsigned char* in, int *inlen)
843 {
844     unsigned short* out = (unsigned short*) outb;
845     const unsigned char* processed = in;
846     const unsigned char *const instart = in;
847     unsigned short* outstart= out;
848     unsigned short* outend;
849     const unsigned char* inend;
850     unsigned int c, d;
851     int trailing;
852     unsigned char *tmp;
853     unsigned short tmp1, tmp2;
854 
855     /* UTF-16BE has no BOM */
856     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
857     if (in == NULL) {
858 	*outlen = 0;
859 	*inlen = 0;
860 	return(0);
861     }
862     inend= in + *inlen;
863     outend = out + (*outlen / 2);
864     while (in < inend) {
865       d= *in++;
866       if      (d < 0x80)  { c= d; trailing= 0; }
867       else if (d < 0xC0)  {
868           /* trailing byte in leading position */
869 	  *outlen = out - outstart;
870 	  *inlen = processed - instart;
871 	  return(-2);
872       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
873       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
874       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
875       else {
876           /* no chance for this in UTF-16 */
877 	  *outlen = out - outstart;
878 	  *inlen = processed - instart;
879 	  return(-2);
880       }
881 
882       if (inend - in < trailing) {
883           break;
884       }
885 
886       for ( ; trailing; trailing--) {
887           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
888           c <<= 6;
889           c |= d & 0x3F;
890       }
891 
892       /* assertion: c is a single UTF-4 value */
893         if (c < 0x10000) {
894             if (out >= outend)  break;
895 	    if (xmlLittleEndian) {
896 		tmp = (unsigned char *) out;
897 		*tmp = c >> 8;
898 		*(tmp + 1) = c;
899 		out++;
900 	    } else {
901 		*out++ = c;
902 	    }
903         }
904         else if (c < 0x110000) {
905             if (out+1 >= outend)  break;
906             c -= 0x10000;
907 	    if (xmlLittleEndian) {
908 		tmp1 = 0xD800 | (c >> 10);
909 		tmp = (unsigned char *) out;
910 		*tmp = tmp1 >> 8;
911 		*(tmp + 1) = (unsigned char) tmp1;
912 		out++;
913 
914 		tmp2 = 0xDC00 | (c & 0x03FF);
915 		tmp = (unsigned char *) out;
916 		*tmp = tmp2 >> 8;
917 		*(tmp + 1) = (unsigned char) tmp2;
918 		out++;
919 	    } else {
920 		*out++ = 0xD800 | (c >> 10);
921 		*out++ = 0xDC00 | (c & 0x03FF);
922 	    }
923         }
924         else
925 	    break;
926 	processed = in;
927     }
928     *outlen = (out - outstart) * 2;
929     *inlen = processed - instart;
930     return(*outlen);
931 }
932 #endif /* LIBXML_OUTPUT_ENABLED */
933 
934 /************************************************************************
935  *									*
936  *		Generic encoding handling routines			*
937  *									*
938  ************************************************************************/
939 
940 /**
941  * xmlDetectCharEncoding:
942  * @in:  a pointer to the first bytes of the XML entity, must be at least
943  *       2 bytes long (at least 4 if encoding is UTF4 variant).
944  * @len:  pointer to the length of the buffer
945  *
946  * Guess the encoding of the entity using the first bytes of the entity content
947  * according to the non-normative appendix F of the XML-1.0 recommendation.
948  *
949  * Returns one of the XML_CHAR_ENCODING_... values.
950  */
951 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)952 xmlDetectCharEncoding(const unsigned char* in, int len)
953 {
954     if (in == NULL)
955         return(XML_CHAR_ENCODING_NONE);
956     if (len >= 4) {
957 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
958 	    (in[2] == 0x00) && (in[3] == 0x3C))
959 	    return(XML_CHAR_ENCODING_UCS4BE);
960 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961 	    (in[2] == 0x00) && (in[3] == 0x00))
962 	    return(XML_CHAR_ENCODING_UCS4LE);
963 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
964 	    (in[2] == 0x3C) && (in[3] == 0x00))
965 	    return(XML_CHAR_ENCODING_UCS4_2143);
966 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967 	    (in[2] == 0x00) && (in[3] == 0x00))
968 	    return(XML_CHAR_ENCODING_UCS4_3412);
969 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970 	    (in[2] == 0xA7) && (in[3] == 0x94))
971 	    return(XML_CHAR_ENCODING_EBCDIC);
972 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973 	    (in[2] == 0x78) && (in[3] == 0x6D))
974 	    return(XML_CHAR_ENCODING_UTF8);
975 	/*
976 	 * Although not part of the recommendation, we also
977 	 * attempt an "auto-recognition" of UTF-16LE and
978 	 * UTF-16BE encodings.
979 	 */
980 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981 	    (in[2] == 0x3F) && (in[3] == 0x00))
982 	    return(XML_CHAR_ENCODING_UTF16LE);
983 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984 	    (in[2] == 0x00) && (in[3] == 0x3F))
985 	    return(XML_CHAR_ENCODING_UTF16BE);
986     }
987     if (len >= 3) {
988 	/*
989 	 * Errata on XML-1.0 June 20 2001
990 	 * We now allow an UTF8 encoded BOM
991 	 */
992 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993 	    (in[2] == 0xBF))
994 	    return(XML_CHAR_ENCODING_UTF8);
995     }
996     /* For UTF-16 we can recognize by the BOM */
997     if (len >= 2) {
998 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
999 	    return(XML_CHAR_ENCODING_UTF16BE);
1000 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
1001 	    return(XML_CHAR_ENCODING_UTF16LE);
1002     }
1003     return(XML_CHAR_ENCODING_NONE);
1004 }
1005 
1006 /**
1007  * xmlCleanupEncodingAliases:
1008  *
1009  * Unregisters all aliases
1010  */
1011 void
xmlCleanupEncodingAliases(void)1012 xmlCleanupEncodingAliases(void) {
1013     int i;
1014 
1015     if (xmlCharEncodingAliases == NULL)
1016 	return;
1017 
1018     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1019 	if (xmlCharEncodingAliases[i].name != NULL)
1020 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1021 	if (xmlCharEncodingAliases[i].alias != NULL)
1022 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1023     }
1024     xmlCharEncodingAliasesNb = 0;
1025     xmlCharEncodingAliasesMax = 0;
1026     xmlFree(xmlCharEncodingAliases);
1027     xmlCharEncodingAliases = NULL;
1028 }
1029 
1030 /**
1031  * xmlGetEncodingAlias:
1032  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1033  *
1034  * Lookup an encoding name for the given alias.
1035  *
1036  * Returns NULL if not found, otherwise the original name
1037  */
1038 const char *
xmlGetEncodingAlias(const char * alias)1039 xmlGetEncodingAlias(const char *alias) {
1040     int i;
1041     char upper[100];
1042 
1043     if (alias == NULL)
1044 	return(NULL);
1045 
1046     if (xmlCharEncodingAliases == NULL)
1047 	return(NULL);
1048 
1049     for (i = 0;i < 99;i++) {
1050         upper[i] = toupper(alias[i]);
1051 	if (upper[i] == 0) break;
1052     }
1053     upper[i] = 0;
1054 
1055     /*
1056      * Walk down the list looking for a definition of the alias
1057      */
1058     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060 	    return(xmlCharEncodingAliases[i].name);
1061 	}
1062     }
1063     return(NULL);
1064 }
1065 
1066 /**
1067  * xmlAddEncodingAlias:
1068  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1069  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1070  *
1071  * Registers an alias @alias for an encoding named @name. Existing alias
1072  * will be overwritten.
1073  *
1074  * Returns 0 in case of success, -1 in case of error
1075  */
1076 int
xmlAddEncodingAlias(const char * name,const char * alias)1077 xmlAddEncodingAlias(const char *name, const char *alias) {
1078     int i;
1079     char upper[100];
1080 
1081     if ((name == NULL) || (alias == NULL))
1082 	return(-1);
1083 
1084     for (i = 0;i < 99;i++) {
1085         upper[i] = toupper(alias[i]);
1086 	if (upper[i] == 0) break;
1087     }
1088     upper[i] = 0;
1089 
1090     if (xmlCharEncodingAliases == NULL) {
1091 	xmlCharEncodingAliasesNb = 0;
1092 	xmlCharEncodingAliasesMax = 20;
1093 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1094 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095 	if (xmlCharEncodingAliases == NULL)
1096 	    return(-1);
1097     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1098 	xmlCharEncodingAliasesMax *= 2;
1099 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1100 	      xmlRealloc(xmlCharEncodingAliases,
1101 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1102     }
1103     /*
1104      * Walk down the list looking for a definition of the alias
1105      */
1106     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108 	    /*
1109 	     * Replace the definition.
1110 	     */
1111 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1112 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1113 	    return(0);
1114 	}
1115     }
1116     /*
1117      * Add the definition
1118      */
1119     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1120     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1121     xmlCharEncodingAliasesNb++;
1122     return(0);
1123 }
1124 
1125 /**
1126  * xmlDelEncodingAlias:
1127  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1128  *
1129  * Unregisters an encoding alias @alias
1130  *
1131  * Returns 0 in case of success, -1 in case of error
1132  */
1133 int
xmlDelEncodingAlias(const char * alias)1134 xmlDelEncodingAlias(const char *alias) {
1135     int i;
1136 
1137     if (alias == NULL)
1138 	return(-1);
1139 
1140     if (xmlCharEncodingAliases == NULL)
1141 	return(-1);
1142     /*
1143      * Walk down the list looking for a definition of the alias
1144      */
1145     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1146 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1147 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1148 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1149 	    xmlCharEncodingAliasesNb--;
1150 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1151 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1152 	    return(0);
1153 	}
1154     }
1155     return(-1);
1156 }
1157 
1158 /**
1159  * xmlParseCharEncoding:
1160  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1161  *
1162  * Compare the string to the encoding schemes already known. Note
1163  * that the comparison is case insensitive accordingly to the section
1164  * [XML] 4.3.3 Character Encoding in Entities.
1165  *
1166  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167  * if not recognized.
1168  */
1169 xmlCharEncoding
xmlParseCharEncoding(const char * name)1170 xmlParseCharEncoding(const char* name)
1171 {
1172     const char *alias;
1173     char upper[500];
1174     int i;
1175 
1176     if (name == NULL)
1177 	return(XML_CHAR_ENCODING_NONE);
1178 
1179     /*
1180      * Do the alias resolution
1181      */
1182     alias = xmlGetEncodingAlias(name);
1183     if (alias != NULL)
1184 	name = alias;
1185 
1186     for (i = 0;i < 499;i++) {
1187         upper[i] = toupper(name[i]);
1188 	if (upper[i] == 0) break;
1189     }
1190     upper[i] = 0;
1191 
1192     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195 
1196     /*
1197      * NOTE: if we were able to parse this, the endianness of UTF16 is
1198      *       already found and in use
1199      */
1200     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202 
1203     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206 
1207     /*
1208      * NOTE: if we were able to parse this, the endianness of UCS4 is
1209      *       already found and in use
1210      */
1211     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214 
1215 
1216     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219 
1220     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223 
1224     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231 
1232     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235 
1236 #ifdef DEBUG_ENCODING
1237     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238 #endif
1239     return(XML_CHAR_ENCODING_ERROR);
1240 }
1241 
1242 /**
1243  * xmlGetCharEncodingName:
1244  * @enc:  the encoding
1245  *
1246  * The "canonical" name for XML encoding.
1247  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248  * Section 4.3.3  Character Encoding in Entities
1249  *
1250  * Returns the canonical name for the given encoding
1251  */
1252 
1253 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1254 xmlGetCharEncodingName(xmlCharEncoding enc) {
1255     switch (enc) {
1256         case XML_CHAR_ENCODING_ERROR:
1257 	    return(NULL);
1258         case XML_CHAR_ENCODING_NONE:
1259 	    return(NULL);
1260         case XML_CHAR_ENCODING_UTF8:
1261 	    return("UTF-8");
1262         case XML_CHAR_ENCODING_UTF16LE:
1263 	    return("UTF-16");
1264         case XML_CHAR_ENCODING_UTF16BE:
1265 	    return("UTF-16");
1266         case XML_CHAR_ENCODING_EBCDIC:
1267             return("EBCDIC");
1268         case XML_CHAR_ENCODING_UCS4LE:
1269             return("ISO-10646-UCS-4");
1270         case XML_CHAR_ENCODING_UCS4BE:
1271             return("ISO-10646-UCS-4");
1272         case XML_CHAR_ENCODING_UCS4_2143:
1273             return("ISO-10646-UCS-4");
1274         case XML_CHAR_ENCODING_UCS4_3412:
1275             return("ISO-10646-UCS-4");
1276         case XML_CHAR_ENCODING_UCS2:
1277             return("ISO-10646-UCS-2");
1278         case XML_CHAR_ENCODING_8859_1:
1279 	    return("ISO-8859-1");
1280         case XML_CHAR_ENCODING_8859_2:
1281 	    return("ISO-8859-2");
1282         case XML_CHAR_ENCODING_8859_3:
1283 	    return("ISO-8859-3");
1284         case XML_CHAR_ENCODING_8859_4:
1285 	    return("ISO-8859-4");
1286         case XML_CHAR_ENCODING_8859_5:
1287 	    return("ISO-8859-5");
1288         case XML_CHAR_ENCODING_8859_6:
1289 	    return("ISO-8859-6");
1290         case XML_CHAR_ENCODING_8859_7:
1291 	    return("ISO-8859-7");
1292         case XML_CHAR_ENCODING_8859_8:
1293 	    return("ISO-8859-8");
1294         case XML_CHAR_ENCODING_8859_9:
1295 	    return("ISO-8859-9");
1296         case XML_CHAR_ENCODING_2022_JP:
1297             return("ISO-2022-JP");
1298         case XML_CHAR_ENCODING_SHIFT_JIS:
1299             return("Shift-JIS");
1300         case XML_CHAR_ENCODING_EUC_JP:
1301             return("EUC-JP");
1302 	case XML_CHAR_ENCODING_ASCII:
1303 	    return(NULL);
1304     }
1305     return(NULL);
1306 }
1307 
1308 /************************************************************************
1309  *									*
1310  *			Char encoding handlers				*
1311  *									*
1312  ************************************************************************/
1313 
1314 
1315 /* the size should be growable, but it's not a big deal ... */
1316 #define MAX_ENCODING_HANDLERS 50
1317 static xmlCharEncodingHandlerPtr *handlers = NULL;
1318 static int nbCharEncodingHandler = 0;
1319 
1320 /*
1321  * The default is UTF-8 for XML, that's also the default used for the
1322  * parser internals, so the default encoding handler is NULL
1323  */
1324 
1325 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1326 
1327 /**
1328  * xmlNewCharEncodingHandler:
1329  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1330  * @input:  the xmlCharEncodingInputFunc to read that encoding
1331  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1332  *
1333  * Create and registers an xmlCharEncodingHandler.
1334  *
1335  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1336  */
1337 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1338 xmlNewCharEncodingHandler(const char *name,
1339                           xmlCharEncodingInputFunc input,
1340                           xmlCharEncodingOutputFunc output) {
1341     xmlCharEncodingHandlerPtr handler;
1342     const char *alias;
1343     char upper[500];
1344     int i;
1345     char *up = NULL;
1346 
1347     /*
1348      * Do the alias resolution
1349      */
1350     alias = xmlGetEncodingAlias(name);
1351     if (alias != NULL)
1352 	name = alias;
1353 
1354     /*
1355      * Keep only the uppercase version of the encoding.
1356      */
1357     if (name == NULL) {
1358         xmlEncodingErr(XML_I18N_NO_NAME,
1359 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1360 	return(NULL);
1361     }
1362     for (i = 0;i < 499;i++) {
1363         upper[i] = toupper(name[i]);
1364 	if (upper[i] == 0) break;
1365     }
1366     upper[i] = 0;
1367     up = xmlMemStrdup(upper);
1368     if (up == NULL) {
1369         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370 	return(NULL);
1371     }
1372 
1373     /*
1374      * allocate and fill-up an handler block.
1375      */
1376     handler = (xmlCharEncodingHandlerPtr)
1377               xmlMalloc(sizeof(xmlCharEncodingHandler));
1378     if (handler == NULL) {
1379         xmlFree(up);
1380         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381 	return(NULL);
1382     }
1383     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1384     handler->input = input;
1385     handler->output = output;
1386     handler->name = up;
1387 
1388 #ifdef LIBXML_ICONV_ENABLED
1389     handler->iconv_in = NULL;
1390     handler->iconv_out = NULL;
1391 #endif
1392 #ifdef LIBXML_ICU_ENABLED
1393     handler->uconv_in = NULL;
1394     handler->uconv_out = NULL;
1395 #endif
1396 
1397     /*
1398      * registers and returns the handler.
1399      */
1400     xmlRegisterCharEncodingHandler(handler);
1401 #ifdef DEBUG_ENCODING
1402     xmlGenericError(xmlGenericErrorContext,
1403 	    "Registered encoding handler for %s\n", name);
1404 #endif
1405     return(handler);
1406 }
1407 
1408 /**
1409  * xmlInitCharEncodingHandlers:
1410  *
1411  * DEPRECATED: This function will be made private. Call xmlInitParser to
1412  * initialize the library.
1413  *
1414  * Initialize the char encoding support, it registers the default
1415  * encoding supported.
1416  * NOTE: while public, this function usually doesn't need to be called
1417  *       in normal processing.
1418  */
1419 void
xmlInitCharEncodingHandlers(void)1420 xmlInitCharEncodingHandlers(void) {
1421     unsigned short int tst = 0x1234;
1422     unsigned char *ptr = (unsigned char *) &tst;
1423 
1424     if (handlers != NULL) return;
1425 
1426     handlers = (xmlCharEncodingHandlerPtr *)
1427         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1428 
1429     if (*ptr == 0x12) xmlLittleEndian = 0;
1430     else if (*ptr == 0x34) xmlLittleEndian = 1;
1431     else {
1432         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433 	               "Odd problem at endianness detection\n", NULL);
1434     }
1435 
1436     if (handlers == NULL) {
1437         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438 	return;
1439     }
1440     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1441 #ifdef LIBXML_OUTPUT_ENABLED
1442     xmlUTF16LEHandler =
1443           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1444     xmlUTF16BEHandler =
1445           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450 #ifdef LIBXML_HTML_ENABLED
1451     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452 #endif
1453 #else
1454     xmlUTF16LEHandler =
1455           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1456     xmlUTF16BEHandler =
1457           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1458     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1459     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1460     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1461     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1462 #endif /* LIBXML_OUTPUT_ENABLED */
1463 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464 #ifdef LIBXML_ISO8859X_ENABLED
1465     xmlRegisterCharEncodingHandlersISO8859x ();
1466 #endif
1467 #endif
1468 
1469 }
1470 
1471 /**
1472  * xmlCleanupCharEncodingHandlers:
1473  *
1474  * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475  * to free global state but see the warnings there. xmlCleanupParser
1476  * should be only called once at program exit. In most cases, you don't
1477  * have call cleanup functions at all.
1478  *
1479  * Cleanup the memory allocated for the char encoding support, it
1480  * unregisters all the encoding handlers and the aliases.
1481  */
1482 void
xmlCleanupCharEncodingHandlers(void)1483 xmlCleanupCharEncodingHandlers(void) {
1484     xmlCleanupEncodingAliases();
1485 
1486     if (handlers == NULL) return;
1487 
1488     for (;nbCharEncodingHandler > 0;) {
1489         nbCharEncodingHandler--;
1490 	if (handlers[nbCharEncodingHandler] != NULL) {
1491 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1492 		xmlFree(handlers[nbCharEncodingHandler]->name);
1493 	    xmlFree(handlers[nbCharEncodingHandler]);
1494 	}
1495     }
1496     xmlFree(handlers);
1497     handlers = NULL;
1498     nbCharEncodingHandler = 0;
1499     xmlDefaultCharEncodingHandler = NULL;
1500 }
1501 
1502 /**
1503  * xmlRegisterCharEncodingHandler:
1504  * @handler:  the xmlCharEncodingHandlerPtr handler block
1505  *
1506  * Register the char encoding handler, surprising, isn't it ?
1507  */
1508 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1509 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1510     if (handlers == NULL) xmlInitCharEncodingHandlers();
1511     if ((handler == NULL) || (handlers == NULL)) {
1512         xmlEncodingErr(XML_I18N_NO_HANDLER,
1513 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1514         goto free_handler;
1515     }
1516 
1517     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1518         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520 	               "MAX_ENCODING_HANDLERS");
1521         goto free_handler;
1522     }
1523     handlers[nbCharEncodingHandler++] = handler;
1524     return;
1525 
1526 free_handler:
1527     if (handler != NULL) {
1528         if (handler->name != NULL) {
1529             xmlFree(handler->name);
1530         }
1531         xmlFree(handler);
1532     }
1533 }
1534 
1535 /**
1536  * xmlGetCharEncodingHandler:
1537  * @enc:  an xmlCharEncoding value.
1538  *
1539  * Search in the registered set the handler able to read/write that encoding.
1540  *
1541  * Returns the handler or NULL if not found
1542  */
1543 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1544 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1545     xmlCharEncodingHandlerPtr handler;
1546 
1547     if (handlers == NULL) xmlInitCharEncodingHandlers();
1548     switch (enc) {
1549         case XML_CHAR_ENCODING_ERROR:
1550 	    return(NULL);
1551         case XML_CHAR_ENCODING_NONE:
1552 	    return(NULL);
1553         case XML_CHAR_ENCODING_UTF8:
1554 	    return(NULL);
1555         case XML_CHAR_ENCODING_UTF16LE:
1556 	    return(xmlUTF16LEHandler);
1557         case XML_CHAR_ENCODING_UTF16BE:
1558 	    return(xmlUTF16BEHandler);
1559         case XML_CHAR_ENCODING_EBCDIC:
1560             handler = xmlFindCharEncodingHandler("EBCDIC");
1561             if (handler != NULL) return(handler);
1562             handler = xmlFindCharEncodingHandler("ebcdic");
1563             if (handler != NULL) return(handler);
1564             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565             if (handler != NULL) return(handler);
1566             handler = xmlFindCharEncodingHandler("IBM-037");
1567             if (handler != NULL) return(handler);
1568 	    break;
1569         case XML_CHAR_ENCODING_UCS4BE:
1570             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571             if (handler != NULL) return(handler);
1572             handler = xmlFindCharEncodingHandler("UCS-4");
1573             if (handler != NULL) return(handler);
1574             handler = xmlFindCharEncodingHandler("UCS4");
1575             if (handler != NULL) return(handler);
1576 	    break;
1577         case XML_CHAR_ENCODING_UCS4LE:
1578             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579             if (handler != NULL) return(handler);
1580             handler = xmlFindCharEncodingHandler("UCS-4");
1581             if (handler != NULL) return(handler);
1582             handler = xmlFindCharEncodingHandler("UCS4");
1583             if (handler != NULL) return(handler);
1584 	    break;
1585         case XML_CHAR_ENCODING_UCS4_2143:
1586 	    break;
1587         case XML_CHAR_ENCODING_UCS4_3412:
1588 	    break;
1589         case XML_CHAR_ENCODING_UCS2:
1590             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591             if (handler != NULL) return(handler);
1592             handler = xmlFindCharEncodingHandler("UCS-2");
1593             if (handler != NULL) return(handler);
1594             handler = xmlFindCharEncodingHandler("UCS2");
1595             if (handler != NULL) return(handler);
1596 	    break;
1597 
1598 	    /*
1599 	     * We used to keep ISO Latin encodings native in the
1600 	     * generated data. This led to so many problems that
1601 	     * this has been removed. One can still change this
1602 	     * back by registering no-ops encoders for those
1603 	     */
1604         case XML_CHAR_ENCODING_8859_1:
1605 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606 	    if (handler != NULL) return(handler);
1607 	    break;
1608         case XML_CHAR_ENCODING_8859_2:
1609 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610 	    if (handler != NULL) return(handler);
1611 	    break;
1612         case XML_CHAR_ENCODING_8859_3:
1613 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614 	    if (handler != NULL) return(handler);
1615 	    break;
1616         case XML_CHAR_ENCODING_8859_4:
1617 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618 	    if (handler != NULL) return(handler);
1619 	    break;
1620         case XML_CHAR_ENCODING_8859_5:
1621 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622 	    if (handler != NULL) return(handler);
1623 	    break;
1624         case XML_CHAR_ENCODING_8859_6:
1625 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626 	    if (handler != NULL) return(handler);
1627 	    break;
1628         case XML_CHAR_ENCODING_8859_7:
1629 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630 	    if (handler != NULL) return(handler);
1631 	    break;
1632         case XML_CHAR_ENCODING_8859_8:
1633 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634 	    if (handler != NULL) return(handler);
1635 	    break;
1636         case XML_CHAR_ENCODING_8859_9:
1637 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638 	    if (handler != NULL) return(handler);
1639 	    break;
1640 
1641 
1642         case XML_CHAR_ENCODING_2022_JP:
1643             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644             if (handler != NULL) return(handler);
1645 	    break;
1646         case XML_CHAR_ENCODING_SHIFT_JIS:
1647             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648             if (handler != NULL) return(handler);
1649             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650             if (handler != NULL) return(handler);
1651             handler = xmlFindCharEncodingHandler("Shift_JIS");
1652             if (handler != NULL) return(handler);
1653 	    break;
1654         case XML_CHAR_ENCODING_EUC_JP:
1655             handler = xmlFindCharEncodingHandler("EUC-JP");
1656             if (handler != NULL) return(handler);
1657 	    break;
1658 	default:
1659 	    break;
1660     }
1661 
1662 #ifdef DEBUG_ENCODING
1663     xmlGenericError(xmlGenericErrorContext,
1664 	    "No handler found for encoding %d\n", enc);
1665 #endif
1666     return(NULL);
1667 }
1668 
1669 /**
1670  * xmlFindCharEncodingHandler:
1671  * @name:  a string describing the char encoding.
1672  *
1673  * Search in the registered set the handler able to read/write that encoding.
1674  *
1675  * Returns the handler or NULL if not found
1676  */
1677 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1678 xmlFindCharEncodingHandler(const char *name) {
1679     const char *nalias;
1680     const char *norig;
1681     xmlCharEncoding alias;
1682 #ifdef LIBXML_ICONV_ENABLED
1683     xmlCharEncodingHandlerPtr enc;
1684     iconv_t icv_in, icv_out;
1685 #endif /* LIBXML_ICONV_ENABLED */
1686 #ifdef LIBXML_ICU_ENABLED
1687     xmlCharEncodingHandlerPtr encu;
1688     uconv_t *ucv_in, *ucv_out;
1689 #endif /* LIBXML_ICU_ENABLED */
1690     char upper[100];
1691     int i;
1692 
1693     if (handlers == NULL) xmlInitCharEncodingHandlers();
1694     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696 
1697     /*
1698      * Do the alias resolution
1699      */
1700     norig = name;
1701     nalias = xmlGetEncodingAlias(name);
1702     if (nalias != NULL)
1703 	name = nalias;
1704 
1705     /*
1706      * Check first for directly registered encoding names
1707      */
1708     for (i = 0;i < 99;i++) {
1709         upper[i] = toupper(name[i]);
1710 	if (upper[i] == 0) break;
1711     }
1712     upper[i] = 0;
1713 
1714     if (handlers != NULL) {
1715         for (i = 0;i < nbCharEncodingHandler; i++) {
1716             if (!strcmp(upper, handlers[i]->name)) {
1717 #ifdef DEBUG_ENCODING
1718                 xmlGenericError(xmlGenericErrorContext,
1719                         "Found registered handler for encoding %s\n", name);
1720 #endif
1721                 return(handlers[i]);
1722             }
1723         }
1724     }
1725 
1726 #ifdef LIBXML_ICONV_ENABLED
1727     /* check whether iconv can handle this */
1728     icv_in = iconv_open("UTF-8", name);
1729     icv_out = iconv_open(name, "UTF-8");
1730     if (icv_in == (iconv_t) -1) {
1731         icv_in = iconv_open("UTF-8", upper);
1732     }
1733     if (icv_out == (iconv_t) -1) {
1734 	icv_out = iconv_open(upper, "UTF-8");
1735     }
1736     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1737 	    enc = (xmlCharEncodingHandlerPtr)
1738 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1739 	    if (enc == NULL) {
1740 	        iconv_close(icv_in);
1741 	        iconv_close(icv_out);
1742 		return(NULL);
1743 	    }
1744             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745 	    enc->name = xmlMemStrdup(name);
1746 	    enc->input = NULL;
1747 	    enc->output = NULL;
1748 	    enc->iconv_in = icv_in;
1749 	    enc->iconv_out = icv_out;
1750 #ifdef DEBUG_ENCODING
1751             xmlGenericError(xmlGenericErrorContext,
1752 		    "Found iconv handler for encoding %s\n", name);
1753 #endif
1754 	    return enc;
1755     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757 		    "iconv : problems with filters for '%s'\n", name);
1758 	    if (icv_in != (iconv_t) -1)
1759 		iconv_close(icv_in);
1760 	    else
1761 		iconv_close(icv_out);
1762     }
1763 #endif /* LIBXML_ICONV_ENABLED */
1764 #ifdef LIBXML_ICU_ENABLED
1765     /* check whether icu can handle this */
1766     ucv_in = openIcuConverter(name, 1);
1767     ucv_out = openIcuConverter(name, 0);
1768     if (ucv_in != NULL && ucv_out != NULL) {
1769 	    encu = (xmlCharEncodingHandlerPtr)
1770 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1771 	    if (encu == NULL) {
1772                 closeIcuConverter(ucv_in);
1773                 closeIcuConverter(ucv_out);
1774 		return(NULL);
1775 	    }
1776             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777 	    encu->name = xmlMemStrdup(name);
1778 	    encu->input = NULL;
1779 	    encu->output = NULL;
1780 	    encu->uconv_in = ucv_in;
1781 	    encu->uconv_out = ucv_out;
1782 #ifdef DEBUG_ENCODING
1783             xmlGenericError(xmlGenericErrorContext,
1784 		    "Found ICU converter handler for encoding %s\n", name);
1785 #endif
1786 	    return encu;
1787     } else if (ucv_in != NULL || ucv_out != NULL) {
1788             closeIcuConverter(ucv_in);
1789             closeIcuConverter(ucv_out);
1790 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791 		    "ICU converter : problems with filters for '%s'\n", name);
1792     }
1793 #endif /* LIBXML_ICU_ENABLED */
1794 
1795 #ifdef DEBUG_ENCODING
1796     xmlGenericError(xmlGenericErrorContext,
1797 	    "No handler found for encoding %s\n", name);
1798 #endif
1799 
1800     /*
1801      * Fallback using the canonical names
1802      */
1803     alias = xmlParseCharEncoding(norig);
1804     if (alias != XML_CHAR_ENCODING_ERROR) {
1805         const char* canon;
1806         canon = xmlGetCharEncodingName(alias);
1807         if ((canon != NULL) && (strcmp(name, canon))) {
1808 	    return(xmlFindCharEncodingHandler(canon));
1809         }
1810     }
1811 
1812     /* If "none of the above", give up */
1813     return(NULL);
1814 }
1815 
1816 /************************************************************************
1817  *									*
1818  *		ICONV based generic conversion functions		*
1819  *									*
1820  ************************************************************************/
1821 
1822 #ifdef LIBXML_ICONV_ENABLED
1823 /**
1824  * xmlIconvWrapper:
1825  * @cd:		iconv converter data structure
1826  * @out:  a pointer to an array of bytes to store the result
1827  * @outlen:  the length of @out
1828  * @in:  a pointer to an array of input bytes
1829  * @inlen:  the length of @in
1830  *
1831  * Returns 0 if success, or
1832  *     -1 by lack of space, or
1833  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1834  *        the result of transformation can't fit into the encoding we want), or
1835  *     -3 if there the last byte can't form a single output char.
1836  *
1837  * The value of @inlen after return is the number of octets consumed
1838  *     as the return value is positive, else unpredictable.
1839  * The value of @outlen after return is the number of octets produced.
1840  */
1841 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1842 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1843                 const unsigned char *in, int *inlen) {
1844     size_t icv_inlen, icv_outlen;
1845     const char *icv_in = (const char *) in;
1846     char *icv_out = (char *) out;
1847     size_t ret;
1848 
1849     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1850         if (outlen != NULL) *outlen = 0;
1851         return(-1);
1852     }
1853     icv_inlen = *inlen;
1854     icv_outlen = *outlen;
1855     /*
1856      * Some versions take const, other versions take non-const input.
1857      */
1858     ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1859     *inlen -= icv_inlen;
1860     *outlen -= icv_outlen;
1861     if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1862 #ifdef EILSEQ
1863         if (errno == EILSEQ) {
1864             return -2;
1865         } else
1866 #endif
1867 #ifdef E2BIG
1868         if (errno == E2BIG) {
1869             return -1;
1870         } else
1871 #endif
1872 #ifdef EINVAL
1873         if (errno == EINVAL) {
1874             return -3;
1875         } else
1876 #endif
1877         {
1878             return -3;
1879         }
1880     }
1881     return 0;
1882 }
1883 #endif /* LIBXML_ICONV_ENABLED */
1884 
1885 /************************************************************************
1886  *									*
1887  *		ICU based generic conversion functions		*
1888  *									*
1889  ************************************************************************/
1890 
1891 #ifdef LIBXML_ICU_ENABLED
1892 /**
1893  * xmlUconvWrapper:
1894  * @cd: ICU uconverter data structure
1895  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896  * @out:  a pointer to an array of bytes to store the result
1897  * @outlen:  the length of @out
1898  * @in:  a pointer to an array of input bytes
1899  * @inlen:  the length of @in
1900  * @flush: if true, indicates end of input
1901  *
1902  * Returns 0 if success, or
1903  *     -1 by lack of space, or
1904  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1905  *        the result of transformation can't fit into the encoding we want), or
1906  *     -3 if there the last byte can't form a single output char.
1907  *
1908  * The value of @inlen after return is the number of octets consumed
1909  *     as the return value is positive, else unpredictable.
1910  * The value of @outlen after return is the number of octets produced.
1911  */
1912 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1913 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1914                 const unsigned char *in, int *inlen, int flush) {
1915     const char *ucv_in = (const char *) in;
1916     char *ucv_out = (char *) out;
1917     UErrorCode err = U_ZERO_ERROR;
1918 
1919     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1920         if (outlen != NULL) *outlen = 0;
1921         return(-1);
1922     }
1923 
1924     if (toUnicode) {
1925         /* encoding => UTF-16 => UTF-8 */
1926         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1927                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1928                        &cd->pivot_source, &cd->pivot_target,
1929                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1930     } else {
1931         /* UTF-8 => UTF-16 => encoding */
1932         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1933                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1934                        &cd->pivot_source, &cd->pivot_target,
1935                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1936     }
1937     *inlen = ucv_in - (const char*) in;
1938     *outlen = ucv_out - (char *) out;
1939     if (U_SUCCESS(err)) {
1940         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1941         if (flush)
1942             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1943         return 0;
1944     }
1945     if (err == U_BUFFER_OVERFLOW_ERROR)
1946         return -1;
1947     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1948         return -2;
1949     return -3;
1950 }
1951 #endif /* LIBXML_ICU_ENABLED */
1952 
1953 /************************************************************************
1954  *									*
1955  *		The real API used by libxml for on-the-fly conversion	*
1956  *									*
1957  ************************************************************************/
1958 
1959 /**
1960  * xmlEncInputChunk:
1961  * @handler:  encoding handler
1962  * @out:  a pointer to an array of bytes to store the result
1963  * @outlen:  the length of @out
1964  * @in:  a pointer to an array of input bytes
1965  * @inlen:  the length of @in
1966  * @flush:  flush (ICU-related)
1967  *
1968  * Returns 0 if success, or
1969  *     -1 by lack of space, or
1970  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1971  *        the result of transformation can't fit into the encoding we want), or
1972  *     -3 if there the last byte can't form a single output char.
1973  *
1974  * The value of @inlen after return is the number of octets consumed
1975  *     as the return value is 0, else unpredictable.
1976  * The value of @outlen after return is the number of octets produced.
1977  */
1978 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1979 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1980                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1981     int ret;
1982     (void)flush;
1983 
1984     if (handler->input != NULL) {
1985         ret = handler->input(out, outlen, in, inlen);
1986         if (ret > 0)
1987            ret = 0;
1988     }
1989 #ifdef LIBXML_ICONV_ENABLED
1990     else if (handler->iconv_in != NULL) {
1991         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992     }
1993 #endif /* LIBXML_ICONV_ENABLED */
1994 #ifdef LIBXML_ICU_ENABLED
1995     else if (handler->uconv_in != NULL) {
1996         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1997                               flush);
1998     }
1999 #endif /* LIBXML_ICU_ENABLED */
2000     else {
2001         *outlen = 0;
2002         *inlen = 0;
2003         ret = -2;
2004     }
2005 
2006     return(ret);
2007 }
2008 
2009 /**
2010  * xmlEncOutputChunk:
2011  * @handler:  encoding handler
2012  * @out:  a pointer to an array of bytes to store the result
2013  * @outlen:  the length of @out
2014  * @in:  a pointer to an array of input bytes
2015  * @inlen:  the length of @in
2016  *
2017  * Returns 0 if success, or
2018  *     -1 by lack of space, or
2019  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2020  *        the result of transformation can't fit into the encoding we want), or
2021  *     -3 if there the last byte can't form a single output char.
2022  *     -4 if no output function was found.
2023  *
2024  * The value of @inlen after return is the number of octets consumed
2025  *     as the return value is 0, else unpredictable.
2026  * The value of @outlen after return is the number of octets produced.
2027  */
2028 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2029 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030                   int *outlen, const unsigned char *in, int *inlen) {
2031     int ret;
2032 
2033     if (handler->output != NULL) {
2034         ret = handler->output(out, outlen, in, inlen);
2035         if (ret > 0)
2036            ret = 0;
2037     }
2038 #ifdef LIBXML_ICONV_ENABLED
2039     else if (handler->iconv_out != NULL) {
2040         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041     }
2042 #endif /* LIBXML_ICONV_ENABLED */
2043 #ifdef LIBXML_ICU_ENABLED
2044     else if (handler->uconv_out != NULL) {
2045         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2046                               1);
2047     }
2048 #endif /* LIBXML_ICU_ENABLED */
2049     else {
2050         *outlen = 0;
2051         *inlen = 0;
2052         ret = -4;
2053     }
2054 
2055     return(ret);
2056 }
2057 
2058 /**
2059  * xmlCharEncFirstLineInt:
2060  * @handler:	char encoding transformation data structure
2061  * @out:  an xmlBuffer for the output.
2062  * @in:  an xmlBuffer for the input
2063  * @len:  number of bytes to convert for the first line, or -1
2064  *
2065  * Front-end for the encoding handler input function, but handle only
2066  * the very first line, i.e. limit itself to 45 chars.
2067  *
2068  * Returns the number of byte written if success, or
2069  *     -1 general error
2070  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071  *        the result of transformation can't fit into the encoding we want), or
2072  */
2073 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2074 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075                        xmlBufferPtr in, int len) {
2076     int ret;
2077     int written;
2078     int toconv;
2079 
2080     if (handler == NULL) return(-1);
2081     if (out == NULL) return(-1);
2082     if (in == NULL) return(-1);
2083 
2084     /* calculate space available */
2085     written = out->size - out->use - 1; /* count '\0' */
2086     toconv = in->use;
2087     /*
2088      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089      * 45 chars should be sufficient to reach the end of the encoding
2090      * declaration without going too far inside the document content.
2091      * on UTF-16 this means 90bytes, on UCS4 this means 180
2092      * The actual value depending on guessed encoding is passed as @len
2093      * if provided
2094      */
2095     if (len >= 0) {
2096         if (toconv > len)
2097             toconv = len;
2098     } else {
2099         if (toconv > 180)
2100             toconv = 180;
2101     }
2102     if (toconv * 2 >= written) {
2103         xmlBufferGrow(out, toconv * 2);
2104 	written = out->size - out->use - 1;
2105     }
2106 
2107     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2108                            in->content, &toconv, 0);
2109     xmlBufferShrink(in, toconv);
2110     out->use += written;
2111     out->content[out->use] = 0;
2112     if (ret == -1) ret = -3;
2113 
2114 #ifdef DEBUG_ENCODING
2115     switch (ret) {
2116         case 0:
2117 	    xmlGenericError(xmlGenericErrorContext,
2118 		    "converted %d bytes to %d bytes of input\n",
2119 	            toconv, written);
2120 	    break;
2121         case -1:
2122 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123 	            toconv, written, in->use);
2124 	    break;
2125         case -2:
2126 	    xmlGenericError(xmlGenericErrorContext,
2127 		    "input conversion failed due to input error\n");
2128 	    break;
2129         case -3:
2130 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131 	            toconv, written, in->use);
2132 	    break;
2133 	default:
2134 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135     }
2136 #endif /* DEBUG_ENCODING */
2137     /*
2138      * Ignore when input buffer is not on a boundary
2139      */
2140     if (ret == -3) ret = 0;
2141     if (ret == -1) ret = 0;
2142     return(written ? written : ret);
2143 }
2144 
2145 /**
2146  * xmlCharEncFirstLine:
2147  * @handler:	char encoding transformation data structure
2148  * @out:  an xmlBuffer for the output.
2149  * @in:  an xmlBuffer for the input
2150  *
2151  * Front-end for the encoding handler input function, but handle only
2152  * the very first line, i.e. limit itself to 45 chars.
2153  *
2154  * Returns the number of byte written if success, or
2155  *     -1 general error
2156  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2157  *        the result of transformation can't fit into the encoding we want), or
2158  */
2159 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2160 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2161                  xmlBufferPtr in) {
2162     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163 }
2164 
2165 /**
2166  * xmlCharEncFirstLineInput:
2167  * @input: a parser input buffer
2168  * @len:  number of bytes to convert for the first line, or -1
2169  *
2170  * Front-end for the encoding handler input function, but handle only
2171  * the very first line. Point is that this is based on autodetection
2172  * of the encoding and once that first line is converted we may find
2173  * out that a different decoder is needed to process the input.
2174  *
2175  * Returns the number of byte written if success, or
2176  *     -1 general error
2177  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2178  *        the result of transformation can't fit into the encoding we want), or
2179  */
2180 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2181 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2182 {
2183     int ret;
2184     size_t written;
2185     size_t toconv;
2186     int c_in;
2187     int c_out;
2188     xmlBufPtr in;
2189     xmlBufPtr out;
2190 
2191     if ((input == NULL) || (input->encoder == NULL) ||
2192         (input->buffer == NULL) || (input->raw == NULL))
2193         return (-1);
2194     out = input->buffer;
2195     in = input->raw;
2196 
2197     toconv = xmlBufUse(in);
2198     if (toconv == 0)
2199         return (0);
2200     written = xmlBufAvail(out);
2201     /*
2202      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203      * 45 chars should be sufficient to reach the end of the encoding
2204      * declaration without going too far inside the document content.
2205      * on UTF-16 this means 90bytes, on UCS4 this means 180
2206      * The actual value depending on guessed encoding is passed as @len
2207      * if provided
2208      */
2209     if (len >= 0) {
2210         if (toconv > (unsigned int) len)
2211             toconv = len;
2212     } else {
2213         if (toconv > 180)
2214             toconv = 180;
2215     }
2216     if (toconv * 2 >= written) {
2217         xmlBufGrow(out, toconv * 2);
2218         written = xmlBufAvail(out);
2219     }
2220     if (written > 360)
2221         written = 360;
2222 
2223     c_in = toconv;
2224     c_out = written;
2225     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2226                            xmlBufContent(in), &c_in, 0);
2227     xmlBufShrink(in, c_in);
2228     xmlBufAddLen(out, c_out);
2229     if (ret == -1)
2230         ret = -3;
2231 
2232     switch (ret) {
2233         case 0:
2234 #ifdef DEBUG_ENCODING
2235             xmlGenericError(xmlGenericErrorContext,
2236                             "converted %d bytes to %d bytes of input\n",
2237                             c_in, c_out);
2238 #endif
2239             break;
2240         case -1:
2241 #ifdef DEBUG_ENCODING
2242             xmlGenericError(xmlGenericErrorContext,
2243                          "converted %d bytes to %d bytes of input, %d left\n",
2244                             c_in, c_out, (int)xmlBufUse(in));
2245 #endif
2246             break;
2247         case -3:
2248 #ifdef DEBUG_ENCODING
2249             xmlGenericError(xmlGenericErrorContext,
2250                         "converted %d bytes to %d bytes of input, %d left\n",
2251                             c_in, c_out, (int)xmlBufUse(in));
2252 #endif
2253             break;
2254         case -2: {
2255             char buf[50];
2256             const xmlChar *content = xmlBufContent(in);
2257 
2258 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259 		     content[0], content[1],
2260 		     content[2], content[3]);
2261 	    buf[49] = 0;
2262 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2263 		    "input conversion failed due to input error, bytes %s\n",
2264 		           buf);
2265         }
2266     }
2267     /*
2268      * Ignore when input buffer is not on a boundary
2269      */
2270     if (ret == -3) ret = 0;
2271     if (ret == -1) ret = 0;
2272     return(c_out ? c_out : ret);
2273 }
2274 
2275 /**
2276  * xmlCharEncInput:
2277  * @input: a parser input buffer
2278  * @flush: try to flush all the raw buffer
2279  *
2280  * Generic front-end for the encoding handler on parser input
2281  *
2282  * Returns the number of byte written if success, or
2283  *     -1 general error
2284  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2285  *        the result of transformation can't fit into the encoding we want), or
2286  */
2287 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2288 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2289 {
2290     int ret;
2291     size_t written;
2292     size_t toconv;
2293     int c_in;
2294     int c_out;
2295     xmlBufPtr in;
2296     xmlBufPtr out;
2297 
2298     if ((input == NULL) || (input->encoder == NULL) ||
2299         (input->buffer == NULL) || (input->raw == NULL))
2300         return (-1);
2301     out = input->buffer;
2302     in = input->raw;
2303 
2304     toconv = xmlBufUse(in);
2305     if (toconv == 0)
2306         return (0);
2307     if ((toconv > 64 * 1024) && (flush == 0))
2308         toconv = 64 * 1024;
2309     written = xmlBufAvail(out);
2310     if (toconv * 2 >= written) {
2311         xmlBufGrow(out, toconv * 2);
2312         written = xmlBufAvail(out);
2313     }
2314     if ((written > 128 * 1024) && (flush == 0))
2315         written = 128 * 1024;
2316 
2317     c_in = toconv;
2318     c_out = written;
2319     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2320                            xmlBufContent(in), &c_in, flush);
2321     xmlBufShrink(in, c_in);
2322     xmlBufAddLen(out, c_out);
2323     if (ret == -1)
2324         ret = -3;
2325 
2326     switch (ret) {
2327         case 0:
2328 #ifdef DEBUG_ENCODING
2329             xmlGenericError(xmlGenericErrorContext,
2330                             "converted %d bytes to %d bytes of input\n",
2331                             c_in, c_out);
2332 #endif
2333             break;
2334         case -1:
2335 #ifdef DEBUG_ENCODING
2336             xmlGenericError(xmlGenericErrorContext,
2337                          "converted %d bytes to %d bytes of input, %d left\n",
2338                             c_in, c_out, (int)xmlBufUse(in));
2339 #endif
2340             break;
2341         case -3:
2342 #ifdef DEBUG_ENCODING
2343             xmlGenericError(xmlGenericErrorContext,
2344                         "converted %d bytes to %d bytes of input, %d left\n",
2345                             c_in, c_out, (int)xmlBufUse(in));
2346 #endif
2347             break;
2348         case -2: {
2349             char buf[50];
2350             const xmlChar *content = xmlBufContent(in);
2351 
2352 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353 		     content[0], content[1],
2354 		     content[2], content[3]);
2355 	    buf[49] = 0;
2356 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2357 		    "input conversion failed due to input error, bytes %s\n",
2358 		           buf);
2359         }
2360     }
2361     /*
2362      * Ignore when input buffer is not on a boundary
2363      */
2364     if (ret == -3)
2365         ret = 0;
2366     return (c_out? c_out : ret);
2367 }
2368 
2369 /**
2370  * xmlCharEncInFunc:
2371  * @handler:	char encoding transformation data structure
2372  * @out:  an xmlBuffer for the output.
2373  * @in:  an xmlBuffer for the input
2374  *
2375  * Generic front-end for the encoding handler input function
2376  *
2377  * Returns the number of byte written if success, or
2378  *     -1 general error
2379  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2380  *        the result of transformation can't fit into the encoding we want), or
2381  */
2382 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2383 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2384                  xmlBufferPtr in)
2385 {
2386     int ret;
2387     int written;
2388     int toconv;
2389 
2390     if (handler == NULL)
2391         return (-1);
2392     if (out == NULL)
2393         return (-1);
2394     if (in == NULL)
2395         return (-1);
2396 
2397     toconv = in->use;
2398     if (toconv == 0)
2399         return (0);
2400     written = out->size - out->use -1; /* count '\0' */
2401     if (toconv * 2 >= written) {
2402         xmlBufferGrow(out, out->size + toconv * 2);
2403         written = out->size - out->use - 1;
2404     }
2405     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2406                            in->content, &toconv, 1);
2407     xmlBufferShrink(in, toconv);
2408     out->use += written;
2409     out->content[out->use] = 0;
2410     if (ret == -1)
2411         ret = -3;
2412 
2413     switch (ret) {
2414         case 0:
2415 #ifdef DEBUG_ENCODING
2416             xmlGenericError(xmlGenericErrorContext,
2417                             "converted %d bytes to %d bytes of input\n",
2418                             toconv, written);
2419 #endif
2420             break;
2421         case -1:
2422 #ifdef DEBUG_ENCODING
2423             xmlGenericError(xmlGenericErrorContext,
2424                          "converted %d bytes to %d bytes of input, %d left\n",
2425                             toconv, written, in->use);
2426 #endif
2427             break;
2428         case -3:
2429 #ifdef DEBUG_ENCODING
2430             xmlGenericError(xmlGenericErrorContext,
2431                         "converted %d bytes to %d bytes of input, %d left\n",
2432                             toconv, written, in->use);
2433 #endif
2434             break;
2435         case -2: {
2436             char buf[50];
2437 
2438 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439 		     in->content[0], in->content[1],
2440 		     in->content[2], in->content[3]);
2441 	    buf[49] = 0;
2442 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2443 		    "input conversion failed due to input error, bytes %s\n",
2444 		           buf);
2445         }
2446     }
2447     /*
2448      * Ignore when input buffer is not on a boundary
2449      */
2450     if (ret == -3)
2451         ret = 0;
2452     return (written? written : ret);
2453 }
2454 
2455 #ifdef LIBXML_OUTPUT_ENABLED
2456 /**
2457  * xmlCharEncOutput:
2458  * @output: a parser output buffer
2459  * @init: is this an initialization call without data
2460  *
2461  * Generic front-end for the encoding handler on parser output
2462  * a first call with @init == 1 has to be made first to initiate the
2463  * output in case of non-stateless encoding needing to initiate their
2464  * state or the output (like the BOM in UTF16).
2465  * In case of UTF8 sequence conversion errors for the given encoder,
2466  * the content will be automatically remapped to a CharRef sequence.
2467  *
2468  * Returns the number of byte written if success, or
2469  *     -1 general error
2470  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2471  *        the result of transformation can't fit into the encoding we want), or
2472  */
2473 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2474 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2475 {
2476     int ret;
2477     size_t written;
2478     int writtentot = 0;
2479     size_t toconv;
2480     int c_in;
2481     int c_out;
2482     xmlBufPtr in;
2483     xmlBufPtr out;
2484 
2485     if ((output == NULL) || (output->encoder == NULL) ||
2486         (output->buffer == NULL) || (output->conv == NULL))
2487         return (-1);
2488     out = output->conv;
2489     in = output->buffer;
2490 
2491 retry:
2492 
2493     written = xmlBufAvail(out);
2494 
2495     /*
2496      * First specific handling of the initialization call
2497      */
2498     if (init) {
2499         c_in = 0;
2500         c_out = written;
2501         /* TODO: Check return value. */
2502         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2503                           NULL, &c_in);
2504         xmlBufAddLen(out, c_out);
2505 #ifdef DEBUG_ENCODING
2506 	xmlGenericError(xmlGenericErrorContext,
2507 		"initialized encoder\n");
2508 #endif
2509         return(c_out);
2510     }
2511 
2512     /*
2513      * Conversion itself.
2514      */
2515     toconv = xmlBufUse(in);
2516     if (toconv == 0)
2517         return (writtentot);
2518     if (toconv > 64 * 1024)
2519         toconv = 64 * 1024;
2520     if (toconv * 4 >= written) {
2521         xmlBufGrow(out, toconv * 4);
2522         written = xmlBufAvail(out);
2523     }
2524     if (written > 256 * 1024)
2525         written = 256 * 1024;
2526 
2527     c_in = toconv;
2528     c_out = written;
2529     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2530                             xmlBufContent(in), &c_in);
2531     xmlBufShrink(in, c_in);
2532     xmlBufAddLen(out, c_out);
2533     writtentot += c_out;
2534     if (ret == -1) {
2535         if (c_out > 0) {
2536             /* Can be a limitation of iconv or uconv */
2537             goto retry;
2538         }
2539         ret = -3;
2540     }
2541 
2542     /*
2543      * Attempt to handle error cases
2544      */
2545     switch (ret) {
2546         case 0:
2547 #ifdef DEBUG_ENCODING
2548 	    xmlGenericError(xmlGenericErrorContext,
2549 		    "converted %d bytes to %d bytes of output\n",
2550 	            c_in, c_out);
2551 #endif
2552 	    break;
2553         case -1:
2554 #ifdef DEBUG_ENCODING
2555 	    xmlGenericError(xmlGenericErrorContext,
2556 		    "output conversion failed by lack of space\n");
2557 #endif
2558 	    break;
2559         case -3:
2560 #ifdef DEBUG_ENCODING
2561 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2562 	            c_in, c_out, (int) xmlBufUse(in));
2563 #endif
2564 	    break;
2565         case -4:
2566             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2567                            "xmlCharEncOutFunc: no output function !\n", NULL);
2568             ret = -1;
2569             break;
2570         case -2: {
2571 	    xmlChar charref[20];
2572 	    int len = (int) xmlBufUse(in);
2573             xmlChar *content = xmlBufContent(in);
2574 	    int cur, charrefLen;
2575 
2576 	    cur = xmlGetUTF8Char(content, &len);
2577 	    if (cur <= 0)
2578                 break;
2579 
2580 #ifdef DEBUG_ENCODING
2581             xmlGenericError(xmlGenericErrorContext,
2582                     "handling output conversion error\n");
2583             xmlGenericError(xmlGenericErrorContext,
2584                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2585                     content[0], content[1],
2586                     content[2], content[3]);
2587 #endif
2588             /*
2589              * Removes the UTF8 sequence, and replace it by a charref
2590              * and continue the transcoding phase, hoping the error
2591              * did not mangle the encoder state.
2592              */
2593             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2594                              "&#%d;", cur);
2595             xmlBufShrink(in, len);
2596             xmlBufGrow(out, charrefLen * 4);
2597             c_out = xmlBufAvail(out);
2598             c_in = charrefLen;
2599             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2600                                     charref, &c_in);
2601 
2602 	    if ((ret < 0) || (c_in != charrefLen)) {
2603 		char buf[50];
2604 
2605 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2606 			 content[0], content[1],
2607 			 content[2], content[3]);
2608 		buf[49] = 0;
2609 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2610 		    "output conversion failed due to conv error, bytes %s\n",
2611 			       buf);
2612 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2613 		    content[0] = ' ';
2614                 break;
2615 	    }
2616 
2617             xmlBufAddLen(out, c_out);
2618             writtentot += c_out;
2619             goto retry;
2620 	}
2621     }
2622     return(writtentot ? writtentot : ret);
2623 }
2624 #endif
2625 
2626 /**
2627  * xmlCharEncOutFunc:
2628  * @handler:	char encoding transformation data structure
2629  * @out:  an xmlBuffer for the output.
2630  * @in:  an xmlBuffer for the input
2631  *
2632  * Generic front-end for the encoding handler output function
2633  * a first call with @in == NULL has to be made firs to initiate the
2634  * output in case of non-stateless encoding needing to initiate their
2635  * state or the output (like the BOM in UTF16).
2636  * In case of UTF8 sequence conversion errors for the given encoder,
2637  * the content will be automatically remapped to a CharRef sequence.
2638  *
2639  * Returns the number of byte written if success, or
2640  *     -1 general error
2641  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2642  *        the result of transformation can't fit into the encoding we want), or
2643  */
2644 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2645 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2646                   xmlBufferPtr in) {
2647     int ret;
2648     int written;
2649     int writtentot = 0;
2650     int toconv;
2651 
2652     if (handler == NULL) return(-1);
2653     if (out == NULL) return(-1);
2654 
2655 retry:
2656 
2657     written = out->size - out->use;
2658 
2659     if (written > 0)
2660 	written--; /* Gennady: count '/0' */
2661 
2662     /*
2663      * First specific handling of in = NULL, i.e. the initialization call
2664      */
2665     if (in == NULL) {
2666         toconv = 0;
2667         /* TODO: Check return value. */
2668         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669                           NULL, &toconv);
2670         out->use += written;
2671         out->content[out->use] = 0;
2672 #ifdef DEBUG_ENCODING
2673 	xmlGenericError(xmlGenericErrorContext,
2674 		"initialized encoder\n");
2675 #endif
2676         return(0);
2677     }
2678 
2679     /*
2680      * Conversion itself.
2681      */
2682     toconv = in->use;
2683     if (toconv == 0)
2684 	return(0);
2685     if (toconv * 4 >= written) {
2686         xmlBufferGrow(out, toconv * 4);
2687 	written = out->size - out->use - 1;
2688     }
2689     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690                             in->content, &toconv);
2691     xmlBufferShrink(in, toconv);
2692     out->use += written;
2693     writtentot += written;
2694     out->content[out->use] = 0;
2695     if (ret == -1) {
2696         if (written > 0) {
2697             /* Can be a limitation of iconv or uconv */
2698             goto retry;
2699         }
2700         ret = -3;
2701     }
2702 
2703     /*
2704      * Attempt to handle error cases
2705      */
2706     switch (ret) {
2707         case 0:
2708 #ifdef DEBUG_ENCODING
2709 	    xmlGenericError(xmlGenericErrorContext,
2710 		    "converted %d bytes to %d bytes of output\n",
2711 	            toconv, written);
2712 #endif
2713 	    break;
2714         case -1:
2715 #ifdef DEBUG_ENCODING
2716 	    xmlGenericError(xmlGenericErrorContext,
2717 		    "output conversion failed by lack of space\n");
2718 #endif
2719 	    break;
2720         case -3:
2721 #ifdef DEBUG_ENCODING
2722 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723 	            toconv, written, in->use);
2724 #endif
2725 	    break;
2726         case -4:
2727 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2729 	    ret = -1;
2730             break;
2731         case -2: {
2732 	    xmlChar charref[20];
2733 	    int len = in->use;
2734 	    const xmlChar *utf = (const xmlChar *) in->content;
2735 	    int cur, charrefLen;
2736 
2737 	    cur = xmlGetUTF8Char(utf, &len);
2738 	    if (cur <= 0)
2739                 break;
2740 
2741 #ifdef DEBUG_ENCODING
2742             xmlGenericError(xmlGenericErrorContext,
2743                     "handling output conversion error\n");
2744             xmlGenericError(xmlGenericErrorContext,
2745                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746                     in->content[0], in->content[1],
2747                     in->content[2], in->content[3]);
2748 #endif
2749             /*
2750              * Removes the UTF8 sequence, and replace it by a charref
2751              * and continue the transcoding phase, hoping the error
2752              * did not mangle the encoder state.
2753              */
2754             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755                              "&#%d;", cur);
2756             xmlBufferShrink(in, len);
2757             xmlBufferGrow(out, charrefLen * 4);
2758 	    written = out->size - out->use - 1;
2759             toconv = charrefLen;
2760             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761                                     charref, &toconv);
2762 
2763 	    if ((ret < 0) || (toconv != charrefLen)) {
2764 		char buf[50];
2765 
2766 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767 			 in->content[0], in->content[1],
2768 			 in->content[2], in->content[3]);
2769 		buf[49] = 0;
2770 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2771 		    "output conversion failed due to conv error, bytes %s\n",
2772 			       buf);
2773 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774 		    in->content[0] = ' ';
2775 	        break;
2776 	    }
2777 
2778             out->use += written;
2779             writtentot += written;
2780             out->content[out->use] = 0;
2781             goto retry;
2782 	}
2783     }
2784     return(writtentot ? writtentot : ret);
2785 }
2786 
2787 /**
2788  * xmlCharEncCloseFunc:
2789  * @handler:	char encoding transformation data structure
2790  *
2791  * Generic front-end for encoding handler close function
2792  *
2793  * Returns 0 if success, or -1 in case of error
2794  */
2795 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2796 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2797     int ret = 0;
2798     int tofree = 0;
2799     int i, handler_in_list = 0;
2800 
2801     /* Avoid unused variable warning if features are disabled. */
2802     (void) handler_in_list;
2803 
2804     if (handler == NULL) return(-1);
2805     if (handler->name == NULL) return(-1);
2806     if (handlers != NULL) {
2807         for (i = 0;i < nbCharEncodingHandler; i++) {
2808             if (handler == handlers[i]) {
2809 	        handler_in_list = 1;
2810 		break;
2811 	    }
2812 	}
2813     }
2814 #ifdef LIBXML_ICONV_ENABLED
2815     /*
2816      * Iconv handlers can be used only once, free the whole block.
2817      * and the associated icon resources.
2818      */
2819     if ((handler_in_list == 0) &&
2820         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821         tofree = 1;
2822 	if (handler->iconv_out != NULL) {
2823 	    if (iconv_close(handler->iconv_out))
2824 		ret = -1;
2825 	    handler->iconv_out = NULL;
2826 	}
2827 	if (handler->iconv_in != NULL) {
2828 	    if (iconv_close(handler->iconv_in))
2829 		ret = -1;
2830 	    handler->iconv_in = NULL;
2831 	}
2832     }
2833 #endif /* LIBXML_ICONV_ENABLED */
2834 #ifdef LIBXML_ICU_ENABLED
2835     if ((handler_in_list == 0) &&
2836         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837         tofree = 1;
2838 	if (handler->uconv_out != NULL) {
2839 	    closeIcuConverter(handler->uconv_out);
2840 	    handler->uconv_out = NULL;
2841 	}
2842 	if (handler->uconv_in != NULL) {
2843 	    closeIcuConverter(handler->uconv_in);
2844 	    handler->uconv_in = NULL;
2845 	}
2846     }
2847 #endif
2848     if (tofree) {
2849         /* free up only dynamic handlers iconv/uconv */
2850         if (handler->name != NULL)
2851             xmlFree(handler->name);
2852         handler->name = NULL;
2853         xmlFree(handler);
2854     }
2855 #ifdef DEBUG_ENCODING
2856     if (ret)
2857         xmlGenericError(xmlGenericErrorContext,
2858 		"failed to close the encoding handler\n");
2859     else
2860         xmlGenericError(xmlGenericErrorContext,
2861 		"closed the encoding handler\n");
2862 #endif
2863 
2864     return(ret);
2865 }
2866 
2867 /**
2868  * xmlByteConsumed:
2869  * @ctxt: an XML parser context
2870  *
2871  * This function provides the current index of the parser relative
2872  * to the start of the current entity. This function is computed in
2873  * bytes from the beginning starting at zero and finishing at the
2874  * size in byte of the file if parsing a file. The function is
2875  * of constant cost if the input is UTF-8 but can be costly if run
2876  * on non-UTF-8 input.
2877  *
2878  * Returns the index in bytes from the beginning of the entity or -1
2879  *         in case the index could not be computed.
2880  */
2881 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2882 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2883     xmlParserInputPtr in;
2884 
2885     if (ctxt == NULL) return(-1);
2886     in = ctxt->input;
2887     if (in == NULL)  return(-1);
2888     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2889         unsigned int unused = 0;
2890 	xmlCharEncodingHandler * handler = in->buf->encoder;
2891         /*
2892 	 * Encoding conversion, compute the number of unused original
2893 	 * bytes from the input not consumed and subtract that from
2894 	 * the raw consumed value, this is not a cheap operation
2895 	 */
2896         if (in->end - in->cur > 0) {
2897 	    unsigned char convbuf[32000];
2898 	    const unsigned char *cur = (const unsigned char *)in->cur;
2899 	    int toconv = in->end - in->cur, written = 32000;
2900 
2901 	    int ret;
2902 
2903             do {
2904                 toconv = in->end - cur;
2905                 written = 32000;
2906                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2907                                         cur, &toconv);
2908                 if (ret < 0) {
2909                     if (written > 0)
2910                         ret = -2;
2911                     else
2912                         return(-1);
2913                 }
2914                 unused += written;
2915                 cur += toconv;
2916             } while (ret == -2);
2917 	}
2918 	if (in->buf->rawconsumed < unused)
2919 	    return(-1);
2920 	return(in->buf->rawconsumed - unused);
2921     }
2922     return(in->consumed + (in->cur - in->base));
2923 }
2924 
2925 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2926 #ifdef LIBXML_ISO8859X_ENABLED
2927 
2928 /**
2929  * UTF8ToISO8859x:
2930  * @out:  a pointer to an array of bytes to store the result
2931  * @outlen:  the length of @out
2932  * @in:  a pointer to an array of UTF-8 chars
2933  * @inlen:  the length of @in
2934  * @xlattable: the 2-level transcoding table
2935  *
2936  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2937  * block of chars out.
2938  *
2939  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2940  * The value of @inlen after return is the number of octets consumed
2941  *     as the return value is positive, else unpredictable.
2942  * The value of @outlen after return is the number of octets consumed.
2943  */
2944 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,const unsigned char * const xlattable)2945 UTF8ToISO8859x(unsigned char* out, int *outlen,
2946               const unsigned char* in, int *inlen,
2947               const unsigned char* const xlattable) {
2948     const unsigned char* outstart = out;
2949     const unsigned char* inend;
2950     const unsigned char* instart = in;
2951     const unsigned char* processed = in;
2952 
2953     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2954         (xlattable == NULL))
2955 	return(-1);
2956     if (in == NULL) {
2957         /*
2958         * initialization nothing to do
2959         */
2960         *outlen = 0;
2961         *inlen = 0;
2962         return(0);
2963     }
2964     inend = in + (*inlen);
2965     while (in < inend) {
2966         unsigned char d = *in++;
2967         if  (d < 0x80)  {
2968             *out++ = d;
2969         } else if (d < 0xC0) {
2970             /* trailing byte in leading position */
2971             *outlen = out - outstart;
2972             *inlen = processed - instart;
2973             return(-2);
2974         } else if (d < 0xE0) {
2975             unsigned char c;
2976             if (!(in < inend)) {
2977                 /* trailing byte not in input buffer */
2978                 *outlen = out - outstart;
2979                 *inlen = processed - instart;
2980                 return(-3);
2981             }
2982             c = *in++;
2983             if ((c & 0xC0) != 0x80) {
2984                 /* not a trailing byte */
2985                 *outlen = out - outstart;
2986                 *inlen = processed - instart;
2987                 return(-2);
2988             }
2989             c = c & 0x3F;
2990             d = d & 0x1F;
2991             d = xlattable [48 + c + xlattable [d] * 64];
2992             if (d == 0) {
2993                 /* not in character set */
2994                 *outlen = out - outstart;
2995                 *inlen = processed - instart;
2996                 return(-2);
2997             }
2998             *out++ = d;
2999         } else if (d < 0xF0) {
3000             unsigned char c1;
3001             unsigned char c2;
3002             if (!(in < inend - 1)) {
3003                 /* trailing bytes not in input buffer */
3004                 *outlen = out - outstart;
3005                 *inlen = processed - instart;
3006                 return(-3);
3007             }
3008             c1 = *in++;
3009             if ((c1 & 0xC0) != 0x80) {
3010                 /* not a trailing byte (c1) */
3011                 *outlen = out - outstart;
3012                 *inlen = processed - instart;
3013                 return(-2);
3014             }
3015             c2 = *in++;
3016             if ((c2 & 0xC0) != 0x80) {
3017                 /* not a trailing byte (c2) */
3018                 *outlen = out - outstart;
3019                 *inlen = processed - instart;
3020                 return(-2);
3021             }
3022             c1 = c1 & 0x3F;
3023             c2 = c2 & 0x3F;
3024 	    d = d & 0x0F;
3025 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3026 			xlattable [32 + d] * 64] * 64];
3027             if (d == 0) {
3028                 /* not in character set */
3029                 *outlen = out - outstart;
3030                 *inlen = processed - instart;
3031                 return(-2);
3032             }
3033             *out++ = d;
3034         } else {
3035             /* cannot transcode >= U+010000 */
3036             *outlen = out - outstart;
3037             *inlen = processed - instart;
3038             return(-2);
3039         }
3040         processed = in;
3041     }
3042     *outlen = out - outstart;
3043     *inlen = processed - instart;
3044     return(*outlen);
3045 }
3046 
3047 /**
3048  * ISO8859xToUTF8
3049  * @out:  a pointer to an array of bytes to store the result
3050  * @outlen:  the length of @out
3051  * @in:  a pointer to an array of ISO Latin 1 chars
3052  * @inlen:  the length of @in
3053  *
3054  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3055  * block of chars out.
3056  * Returns 0 if success, or -1 otherwise
3057  * The value of @inlen after return is the number of octets consumed
3058  * The value of @outlen after return is the number of octets produced.
3059  */
3060 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3061 ISO8859xToUTF8(unsigned char* out, int *outlen,
3062               const unsigned char* in, int *inlen,
3063               unsigned short const *unicodetable) {
3064     unsigned char* outstart = out;
3065     unsigned char* outend;
3066     const unsigned char* instart = in;
3067     const unsigned char* inend;
3068     const unsigned char* instop;
3069     unsigned int c;
3070 
3071     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3072         (in == NULL) || (unicodetable == NULL))
3073 	return(-1);
3074     outend = out + *outlen;
3075     inend = in + *inlen;
3076     instop = inend;
3077 
3078     while ((in < inend) && (out < outend - 2)) {
3079         if (*in >= 0x80) {
3080             c = unicodetable [*in - 0x80];
3081             if (c == 0) {
3082                 /* undefined code point */
3083                 *outlen = out - outstart;
3084                 *inlen = in - instart;
3085                 return (-1);
3086             }
3087             if (c < 0x800) {
3088                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3089                 *out++ = (c & 0x3F) | 0x80;
3090             } else {
3091                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3092                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3093                 *out++ = (c & 0x3F) | 0x80;
3094             }
3095             ++in;
3096         }
3097         if (instop - in > outend - out) instop = in + (outend - out);
3098         while ((*in < 0x80) && (in < instop)) {
3099             *out++ = *in++;
3100         }
3101     }
3102     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3103         *out++ =  *in++;
3104     }
3105     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3106         *out++ =  *in++;
3107     }
3108     *outlen = out - outstart;
3109     *inlen = in - instart;
3110     return (*outlen);
3111 }
3112 
3113 
3114 /************************************************************************
3115  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3116  ************************************************************************/
3117 
3118 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3119     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3120     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3121     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3122     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3123     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3124     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3125     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3126     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3127     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3128     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3129     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3130     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3131     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3132     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3133     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3134     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3135 };
3136 
3137 static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3138     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3139     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3146     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3147     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3148     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3149     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3150     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3151     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3153     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3154     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3155     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3158     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3159     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3160     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3161     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3162     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3163     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3164     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3165 };
3166 
3167 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3168     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3169     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3170     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3171     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3172     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3173     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3174     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3175     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3176     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3177     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3178     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3179     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3180     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3181     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3182     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3183     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3184 };
3185 
3186 static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3187     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3195     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3196     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3197     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3198     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3199     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3200     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3201     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3204     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3212     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3213     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3214     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3215     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3216     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3217     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3218 };
3219 
3220 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3221     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3222     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3223     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3224     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3225     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3226     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3227     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3228     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3229     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3230     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3231     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3232     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3233     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3234     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3235     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3236     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3237 };
3238 
3239 static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3240     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3241     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3248     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3249     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3250     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3251     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3252     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3253     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3254     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3255     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3256     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3257     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3258     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3259     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3260     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3261     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3264     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3265     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3266     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3267 };
3268 
3269 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3270     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3271     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3272     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3273     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3274     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3275     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3276     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3277     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3278     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3279     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3280     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3281     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3282     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3283     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3284     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3285     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3286 };
3287 
3288 static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3289     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3297     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3298     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3299     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3301     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3302     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3304     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3306     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 };
3317 
3318 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3319     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3320     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3321     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3322     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3323     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3324     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3325     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3326     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3327     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3328     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3329     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3330     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3331     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3332     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3333     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3334     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3335 };
3336 
3337 static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3338     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3340     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3346     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3347     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3348     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3354     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3355     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3356     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3357     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3358     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 };
3362 
3363 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3364     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3369     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3370     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3371     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3372     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3373     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3374     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3375     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3376     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3377     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3378     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3379     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3380 };
3381 
3382 static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3383     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3393     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3394     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3400     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3407     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3408     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3409     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3410     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3411     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 };
3415 
3416 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3417     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3418     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3419     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3420     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3421     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3422     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3423     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3424     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3425     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3426     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3427     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3428     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3429     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3430     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3431     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3432     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3433 };
3434 
3435 static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3436     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3438     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3444     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3445     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3446     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3447     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3453     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3455     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3460     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3461     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3465     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3466     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467 };
3468 
3469 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3470     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3471     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3472     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3473     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3474     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3475     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3476     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3477     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3478     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3479     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3480     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3481     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3482     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3483     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3484     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3485     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3486 };
3487 
3488 static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3489     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3497     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3498     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3499     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3500     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3501     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3502     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3503     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3504     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3506     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3510     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512 };
3513 
3514 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3515     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3520     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3521     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3522     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3523     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3524     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3525     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3526     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3527     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3528     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3529     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3530     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3531 };
3532 
3533 static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3534     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3544     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3545     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3546     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3547     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3548     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3549     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3550     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3552     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3553     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3562     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3563     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3564     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3565 };
3566 
3567 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3568     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3569     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3570     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3571     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3572     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3573     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3574     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3575     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3576     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3577     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3578     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3579     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3580     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3581     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3582     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3583     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3584 };
3585 
3586 static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3587     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3595     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3596     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3602     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3603     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3604     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3605     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3606     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3611     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3612     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 };
3615 
3616 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3617     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3618     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3619     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3620     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3621     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3622     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3623     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3624     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3625     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3626     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3627     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3628     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3629     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3630     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3631     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3632     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3633 };
3634 
3635 static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3636     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3644     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3645     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3646     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3647     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3653     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3656     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3657     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3658     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3659     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3660     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3661     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3662     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3663     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3664     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3665     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3666     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3667 };
3668 
3669 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3670     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3671     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3672     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3673     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3674     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3675     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3676     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3677     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3678     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3679     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3680     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3681     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3682     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3683     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3684     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3685     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3686 };
3687 
3688 static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3689     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3697     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3698     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3699     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3704     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3705     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3706     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3709     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3724     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3726     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3727     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3729     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3730     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3731     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3732 };
3733 
3734 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3735     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3736     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3737     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3738     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3739     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3740     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3741     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3742     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3743     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3744     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3745     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3746     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3747     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3748     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3749     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3750     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3751 };
3752 
3753 static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3754     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3762     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3763     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3764     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3765     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3772     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3777     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3778     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3779     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3780     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3781 };
3782 
3783 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3784     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3785     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3786     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3787     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3788     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3789     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3790     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3791     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3792     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3793     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3794     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3795     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3796     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3797     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3798     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3799     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3800 };
3801 
3802 static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3803     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3804     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3811     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3812     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3813     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3814     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3815     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3816     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3820     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3822     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3829     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3832     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3836     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3839     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3840     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3841     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3842 };
3843 
3844 
3845 /*
3846  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3847  */
3848 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3849 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3850     const unsigned char* in, int *inlen) {
3851     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3852 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3853 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3854     const unsigned char* in, int *inlen) {
3855     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3856 }
3857 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3858 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3859     const unsigned char* in, int *inlen) {
3860     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3861 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3862 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3863     const unsigned char* in, int *inlen) {
3864     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3865 }
3866 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3867 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3868     const unsigned char* in, int *inlen) {
3869     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3870 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3871 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3872     const unsigned char* in, int *inlen) {
3873     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3874 }
3875 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3876 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3877     const unsigned char* in, int *inlen) {
3878     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3879 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3880 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3881     const unsigned char* in, int *inlen) {
3882     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3883 }
3884 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3885 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3886     const unsigned char* in, int *inlen) {
3887     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3888 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3889 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3890     const unsigned char* in, int *inlen) {
3891     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3892 }
3893 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3894 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3895     const unsigned char* in, int *inlen) {
3896     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3897 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3898 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3899     const unsigned char* in, int *inlen) {
3900     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3901 }
3902 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3903 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3904     const unsigned char* in, int *inlen) {
3905     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3906 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3907 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3908     const unsigned char* in, int *inlen) {
3909     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3910 }
3911 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3912 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3913     const unsigned char* in, int *inlen) {
3914     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3915 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3916 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3917     const unsigned char* in, int *inlen) {
3918     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3919 }
3920 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3921 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3922     const unsigned char* in, int *inlen) {
3923     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3924 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3925 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3926     const unsigned char* in, int *inlen) {
3927     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3928 }
3929 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3930 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3931     const unsigned char* in, int *inlen) {
3932     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3933 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3934 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3935     const unsigned char* in, int *inlen) {
3936     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3937 }
3938 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3939 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3940     const unsigned char* in, int *inlen) {
3941     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3942 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3943 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3944     const unsigned char* in, int *inlen) {
3945     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3946 }
3947 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3948 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3949     const unsigned char* in, int *inlen) {
3950     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3951 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3952 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3953     const unsigned char* in, int *inlen) {
3954     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3955 }
3956 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3957 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3958     const unsigned char* in, int *inlen) {
3959     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3960 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3961 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3962     const unsigned char* in, int *inlen) {
3963     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3964 }
3965 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3966 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3967     const unsigned char* in, int *inlen) {
3968     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3969 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3970 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3971     const unsigned char* in, int *inlen) {
3972     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3973 }
3974 
3975 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3976 xmlRegisterCharEncodingHandlersISO8859x (void) {
3977     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3978     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3979     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3980     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3981     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3982     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3983     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3984     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3985     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3986     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3987     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3988     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3989     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3990     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3991 }
3992 
3993 #endif
3994 #endif
3995 
3996