1 /* libxml2 - Library for parsing XML documents
2  * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3  *
4  * This file is not part of the GNU gettext program, but is used with
5  * GNU gettext.
6  *
7  * The original copyright notice is as follows:
8  */
9 
10 /*
11  * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is fur-
18  * nished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25  * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  *
31  * daniel@veillard.com
32  *
33  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
34  */
35 
36 /*
37  * encoding.c : implements the encoding conversion functions needed for XML
38  *
39  * Related specs:
40  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
41  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
42  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
43  * [ISO-8859-1]   ISO Latin-1 characters codes.
44  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
45  *                Worldwide Character Encoding -- Version 1.0", Addison-
46  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
47  *                described in Unicode Technical Report #4.
48  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
49  *                Information Interchange, ANSI X3.4-1986.
50  */
51 
52 #define IN_LIBXML
53 #include "libxml.h"
54 
55 #include <string.h>
56 #include <limits.h>
57 
58 #ifdef HAVE_CTYPE_H
59 #include <ctype.h>
60 #endif
61 #ifdef HAVE_STDLIB_H
62 #include <stdlib.h>
63 #endif
64 #ifdef LIBXML_ICONV_ENABLED
65 #ifdef HAVE_ERRNO_H
66 #include <errno.h>
67 #endif
68 #endif
69 #include <libxml/encoding.h>
70 #include <libxml/xmlmemory.h>
71 #ifdef LIBXML_HTML_ENABLED
72 #include <libxml/HTMLparser.h>
73 #endif
74 #include <libxml/globals.h>
75 #include <libxml/xmlerror.h>
76 
77 #include "buf.h"
78 #include "enc.h"
79 
80 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
81 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
82 
83 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
84 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
85 struct _xmlCharEncodingAlias {
86     const char *name;
87     const char *alias;
88 };
89 
90 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
91 static int xmlCharEncodingAliasesNb = 0;
92 static int xmlCharEncodingAliasesMax = 0;
93 
94 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
95 #if 0
96 #define DEBUG_ENCODING  /* Define this to get encoding traces */
97 #endif
98 #else
99 #ifdef LIBXML_ISO8859X_ENABLED
100 static void xmlRegisterCharEncodingHandlersISO8859x (void);
101 #endif
102 #endif
103 
104 static int xmlLittleEndian = 1;
105 
106 /**
107  * xmlEncodingErrMemory:
108  * @extra:  extra informations
109  *
110  * Handle an out of memory condition
111  */
112 static void
xmlEncodingErrMemory(const char * extra)113 xmlEncodingErrMemory(const char *extra)
114 {
115     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
116 }
117 
118 /**
119  * xmlErrEncoding:
120  * @error:  the error number
121  * @msg:  the error message
122  *
123  * n encoding error
124  */
125 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)126 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
127 {
128     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
129                     XML_FROM_I18N, error, XML_ERR_FATAL,
130                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
131 }
132 
133 #ifdef LIBXML_ICU_ENABLED
134 static uconv_t*
openIcuConverter(const char * name,int toUnicode)135 openIcuConverter(const char* name, int toUnicode)
136 {
137   UErrorCode status = U_ZERO_ERROR;
138   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
139   if (conv == NULL)
140     return NULL;
141 
142   conv->pivot_source = conv->pivot_buf;
143   conv->pivot_target = conv->pivot_buf;
144 
145   conv->uconv = ucnv_open(name, &status);
146   if (U_FAILURE(status))
147     goto error;
148 
149   status = U_ZERO_ERROR;
150   if (toUnicode) {
151     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
152                         NULL, NULL, NULL, &status);
153   }
154   else {
155     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
156                         NULL, NULL, NULL, &status);
157   }
158   if (U_FAILURE(status))
159     goto error;
160 
161   status = U_ZERO_ERROR;
162   conv->utf8 = ucnv_open("UTF-8", &status);
163   if (U_SUCCESS(status))
164     return conv;
165 
166 error:
167   if (conv->uconv)
168     ucnv_close(conv->uconv);
169   xmlFree(conv);
170   return NULL;
171 }
172 
173 static void
closeIcuConverter(uconv_t * conv)174 closeIcuConverter(uconv_t *conv)
175 {
176   if (conv != NULL) {
177     ucnv_close(conv->uconv);
178     ucnv_close(conv->utf8);
179     xmlFree(conv);
180   }
181 }
182 #endif /* LIBXML_ICU_ENABLED */
183 
184 /************************************************************************
185  *									*
186  *		Conversions To/From UTF8 encoding			*
187  *									*
188  ************************************************************************/
189 
190 /**
191  * asciiToUTF8:
192  * @out:  a pointer to an array of bytes to store the result
193  * @outlen:  the length of @out
194  * @in:  a pointer to an array of ASCII chars
195  * @inlen:  the length of @in
196  *
197  * Take a block of ASCII chars in and try to convert it to an UTF-8
198  * block of chars out.
199  * Returns 0 if success, or -1 otherwise
200  * The value of @inlen after return is the number of octets consumed
201  *     if the return value is positive, else unpredictable.
202  * The value of @outlen after return is the number of octets consumed.
203  */
204 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)205 asciiToUTF8(unsigned char* out, int *outlen,
206               const unsigned char* in, int *inlen) {
207     unsigned char* outstart = out;
208     const unsigned char* base = in;
209     const unsigned char* processed = in;
210     unsigned char* outend = out + *outlen;
211     const unsigned char* inend;
212     unsigned int c;
213 
214     inend = in + (*inlen);
215     while ((in < inend) && (out - outstart + 5 < *outlen)) {
216 	c= *in++;
217 
218         if (out >= outend)
219 	    break;
220         if (c < 0x80) {
221 	    *out++ = c;
222 	} else {
223 	    *outlen = out - outstart;
224 	    *inlen = processed - base;
225 	    return(-1);
226 	}
227 
228 	processed = (const unsigned char*) in;
229     }
230     *outlen = out - outstart;
231     *inlen = processed - base;
232     return(*outlen);
233 }
234 
235 #ifdef LIBXML_OUTPUT_ENABLED
236 /**
237  * UTF8Toascii:
238  * @out:  a pointer to an array of bytes to store the result
239  * @outlen:  the length of @out
240  * @in:  a pointer to an array of UTF-8 chars
241  * @inlen:  the length of @in
242  *
243  * Take a block of UTF-8 chars in and try to convert it to an ASCII
244  * block of chars out.
245  *
246  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
247  * The value of @inlen after return is the number of octets consumed
248  *     if the return value is positive, else unpredictable.
249  * The value of @outlen after return is the number of octets consumed.
250  */
251 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)252 UTF8Toascii(unsigned char* out, int *outlen,
253               const unsigned char* in, int *inlen) {
254     const unsigned char* processed = in;
255     const unsigned char* outend;
256     const unsigned char* outstart = out;
257     const unsigned char* instart = in;
258     const unsigned char* inend;
259     unsigned int c, d;
260     int trailing;
261 
262     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
263     if (in == NULL) {
264         /*
265 	 * initialization nothing to do
266 	 */
267 	*outlen = 0;
268 	*inlen = 0;
269 	return(0);
270     }
271     inend = in + (*inlen);
272     outend = out + (*outlen);
273     while (in < inend) {
274 	d = *in++;
275 	if      (d < 0x80)  { c= d; trailing= 0; }
276 	else if (d < 0xC0) {
277 	    /* trailing byte in leading position */
278 	    *outlen = out - outstart;
279 	    *inlen = processed - instart;
280 	    return(-2);
281         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
282         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
283         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
284 	else {
285 	    /* no chance for this in Ascii */
286 	    *outlen = out - outstart;
287 	    *inlen = processed - instart;
288 	    return(-2);
289 	}
290 
291 	if (inend - in < trailing) {
292 	    break;
293 	}
294 
295 	for ( ; trailing; trailing--) {
296 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
297 		break;
298 	    c <<= 6;
299 	    c |= d & 0x3F;
300 	}
301 
302 	/* assertion: c is a single UTF-4 value */
303 	if (c < 0x80) {
304 	    if (out >= outend)
305 		break;
306 	    *out++ = c;
307 	} else {
308 	    /* no chance for this in Ascii */
309 	    *outlen = out - outstart;
310 	    *inlen = processed - instart;
311 	    return(-2);
312 	}
313 	processed = in;
314     }
315     *outlen = out - outstart;
316     *inlen = processed - instart;
317     return(*outlen);
318 }
319 #endif /* LIBXML_OUTPUT_ENABLED */
320 
321 /**
322  * isolat1ToUTF8:
323  * @out:  a pointer to an array of bytes to store the result
324  * @outlen:  the length of @out
325  * @in:  a pointer to an array of ISO Latin 1 chars
326  * @inlen:  the length of @in
327  *
328  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
329  * block of chars out.
330  * Returns the number of bytes written if success, or -1 otherwise
331  * The value of @inlen after return is the number of octets consumed
332  *     if the return value is positive, else unpredictable.
333  * The value of @outlen after return is the number of octets consumed.
334  */
335 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)336 isolat1ToUTF8(unsigned char* out, int *outlen,
337               const unsigned char* in, int *inlen) {
338     unsigned char* outstart = out;
339     const unsigned char* base = in;
340     unsigned char* outend;
341     const unsigned char* inend;
342     const unsigned char* instop;
343 
344     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
345 	return(-1);
346 
347     outend = out + *outlen;
348     inend = in + (*inlen);
349     instop = inend;
350 
351     while ((in < inend) && (out < outend - 1)) {
352 	if (*in >= 0x80) {
353 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
354             *out++ = ((*in) & 0x3F) | 0x80;
355 	    ++in;
356 	}
357 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
358 	while ((in < instop) && (*in < 0x80)) {
359 	    *out++ = *in++;
360 	}
361     }
362     if ((in < inend) && (out < outend) && (*in < 0x80)) {
363         *out++ = *in++;
364     }
365     *outlen = out - outstart;
366     *inlen = in - base;
367     return(*outlen);
368 }
369 
370 /**
371  * UTF8ToUTF8:
372  * @out:  a pointer to an array of bytes to store the result
373  * @outlen:  the length of @out
374  * @inb:  a pointer to an array of UTF-8 chars
375  * @inlenb:  the length of @in in UTF-8 chars
376  *
377  * No op copy operation for UTF8 handling.
378  *
379  * Returns the number of bytes written, or -1 if lack of space.
380  *     The value of *inlen after return is the number of octets consumed
381  *     if the return value is positive, else unpredictable.
382  */
383 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)384 UTF8ToUTF8(unsigned char* out, int *outlen,
385            const unsigned char* inb, int *inlenb)
386 {
387     int len;
388 
389     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
390 	return(-1);
391     if (inb == NULL) {
392         /* inb == NULL means output is initialized. */
393         *outlen = 0;
394         *inlenb = 0;
395         return(0);
396     }
397     if (*outlen > *inlenb) {
398 	len = *inlenb;
399     } else {
400 	len = *outlen;
401     }
402     if (len < 0)
403 	return(-1);
404 
405     memcpy(out, inb, len);
406 
407     *outlen = len;
408     *inlenb = len;
409     return(*outlen);
410 }
411 
412 
413 #ifdef LIBXML_OUTPUT_ENABLED
414 /**
415  * UTF8Toisolat1:
416  * @out:  a pointer to an array of bytes to store the result
417  * @outlen:  the length of @out
418  * @in:  a pointer to an array of UTF-8 chars
419  * @inlen:  the length of @in
420  *
421  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
422  * block of chars out.
423  *
424  * Returns the number of bytes written if success, -2 if the transcoding fails,
425            or -1 otherwise
426  * The value of @inlen after return is the number of octets consumed
427  *     if the return value is positive, else unpredictable.
428  * The value of @outlen after return is the number of octets consumed.
429  */
430 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)431 UTF8Toisolat1(unsigned char* out, int *outlen,
432               const unsigned char* in, int *inlen) {
433     const unsigned char* processed = in;
434     const unsigned char* outend;
435     const unsigned char* outstart = out;
436     const unsigned char* instart = in;
437     const unsigned char* inend;
438     unsigned int c, d;
439     int trailing;
440 
441     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
442     if (in == NULL) {
443         /*
444 	 * initialization nothing to do
445 	 */
446 	*outlen = 0;
447 	*inlen = 0;
448 	return(0);
449     }
450     inend = in + (*inlen);
451     outend = out + (*outlen);
452     while (in < inend) {
453 	d = *in++;
454 	if      (d < 0x80)  { c= d; trailing= 0; }
455 	else if (d < 0xC0) {
456 	    /* trailing byte in leading position */
457 	    *outlen = out - outstart;
458 	    *inlen = processed - instart;
459 	    return(-2);
460         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
461         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
462         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
463 	else {
464 	    /* no chance for this in IsoLat1 */
465 	    *outlen = out - outstart;
466 	    *inlen = processed - instart;
467 	    return(-2);
468 	}
469 
470 	if (inend - in < trailing) {
471 	    break;
472 	}
473 
474 	for ( ; trailing; trailing--) {
475 	    if (in >= inend)
476 		break;
477 	    if (((d= *in++) & 0xC0) != 0x80) {
478 		*outlen = out - outstart;
479 		*inlen = processed - instart;
480 		return(-2);
481 	    }
482 	    c <<= 6;
483 	    c |= d & 0x3F;
484 	}
485 
486 	/* assertion: c is a single UTF-4 value */
487 	if (c <= 0xFF) {
488 	    if (out >= outend)
489 		break;
490 	    *out++ = c;
491 	} else {
492 	    /* no chance for this in IsoLat1 */
493 	    *outlen = out - outstart;
494 	    *inlen = processed - instart;
495 	    return(-2);
496 	}
497 	processed = in;
498     }
499     *outlen = out - outstart;
500     *inlen = processed - instart;
501     return(*outlen);
502 }
503 #endif /* LIBXML_OUTPUT_ENABLED */
504 
505 /**
506  * UTF16LEToUTF8:
507  * @out:  a pointer to an array of bytes to store the result
508  * @outlen:  the length of @out
509  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
510  * @inlenb:  the length of @in in UTF-16LE chars
511  *
512  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
513  * block of chars out. This function assumes the endian property
514  * is the same between the native type of this machine and the
515  * inputed one.
516  *
517  * Returns the number of bytes written, or -1 if lack of space, or -2
518  *     if the transcoding fails (if *in is not a valid utf16 string)
519  *     The value of *inlen after return is the number of octets consumed
520  *     if the return value is positive, else unpredictable.
521  */
522 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)523 UTF16LEToUTF8(unsigned char* out, int *outlen,
524             const unsigned char* inb, int *inlenb)
525 {
526     unsigned char* outstart = out;
527     const unsigned char* processed = inb;
528     unsigned char* outend = out + *outlen;
529     unsigned short* in = (unsigned short*) inb;
530     unsigned short* inend;
531     unsigned int c, d, inlen;
532     unsigned char *tmp;
533     int bits;
534 
535     if ((*inlenb % 2) == 1)
536         (*inlenb)--;
537     inlen = *inlenb / 2;
538     inend = in + inlen;
539     while ((in < inend) && (out - outstart + 5 < *outlen)) {
540         if (xmlLittleEndian) {
541 	    c= *in++;
542 	} else {
543 	    tmp = (unsigned char *) in;
544 	    c = *tmp++;
545 	    c = c | (((unsigned int)*tmp) << 8);
546 	    in++;
547 	}
548         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
549 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
550 		break;
551 	    }
552 	    if (xmlLittleEndian) {
553 		d = *in++;
554 	    } else {
555 		tmp = (unsigned char *) in;
556 		d = *tmp++;
557 		d = d | (((unsigned int)*tmp) << 8);
558 		in++;
559 	    }
560             if ((d & 0xFC00) == 0xDC00) {
561                 c &= 0x03FF;
562                 c <<= 10;
563                 c |= d & 0x03FF;
564                 c += 0x10000;
565             }
566             else {
567 		*outlen = out - outstart;
568 		*inlenb = processed - inb;
569 	        return(-2);
570 	    }
571         }
572 
573 	/* assertion: c is a single UTF-4 value */
574         if (out >= outend)
575 	    break;
576         if      (c <    0x80) {  *out++=  c;                bits= -6; }
577         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
578         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
579         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
580 
581         for ( ; bits >= 0; bits-= 6) {
582             if (out >= outend)
583 	        break;
584             *out++= ((c >> bits) & 0x3F) | 0x80;
585         }
586 	processed = (const unsigned char*) in;
587     }
588     *outlen = out - outstart;
589     *inlenb = processed - inb;
590     return(*outlen);
591 }
592 
593 #ifdef LIBXML_OUTPUT_ENABLED
594 /**
595  * UTF8ToUTF16LE:
596  * @outb:  a pointer to an array of bytes to store the result
597  * @outlen:  the length of @outb
598  * @in:  a pointer to an array of UTF-8 chars
599  * @inlen:  the length of @in
600  *
601  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
602  * block of chars out.
603  *
604  * Returns the number of bytes written, or -1 if lack of space, or -2
605  *     if the transcoding failed.
606  */
607 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)608 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
609             const unsigned char* in, int *inlen)
610 {
611     unsigned short* out = (unsigned short*) outb;
612     const unsigned char* processed = in;
613     const unsigned char *const instart = in;
614     unsigned short* outstart= out;
615     unsigned short* outend;
616     const unsigned char* inend;
617     unsigned int c, d;
618     int trailing;
619     unsigned char *tmp;
620     unsigned short tmp1, tmp2;
621 
622     /* UTF16LE encoding has no BOM */
623     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
624     if (in == NULL) {
625 	*outlen = 0;
626 	*inlen = 0;
627 	return(0);
628     }
629     inend= in + *inlen;
630     outend = out + (*outlen / 2);
631     while (in < inend) {
632       d= *in++;
633       if      (d < 0x80)  { c= d; trailing= 0; }
634       else if (d < 0xC0) {
635           /* trailing byte in leading position */
636 	  *outlen = (out - outstart) * 2;
637 	  *inlen = processed - instart;
638 	  return(-2);
639       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
640       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
641       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
642       else {
643 	/* no chance for this in UTF-16 */
644 	*outlen = (out - outstart) * 2;
645 	*inlen = processed - instart;
646 	return(-2);
647       }
648 
649       if (inend - in < trailing) {
650           break;
651       }
652 
653       for ( ; trailing; trailing--) {
654           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
655 	      break;
656           c <<= 6;
657           c |= d & 0x3F;
658       }
659 
660       /* assertion: c is a single UTF-4 value */
661         if (c < 0x10000) {
662             if (out >= outend)
663 	        break;
664 	    if (xmlLittleEndian) {
665 		*out++ = c;
666 	    } else {
667 		tmp = (unsigned char *) out;
668 		*tmp = c ;
669 		*(tmp + 1) = c >> 8 ;
670 		out++;
671 	    }
672         }
673         else if (c < 0x110000) {
674             if (out+1 >= outend)
675 	        break;
676             c -= 0x10000;
677 	    if (xmlLittleEndian) {
678 		*out++ = 0xD800 | (c >> 10);
679 		*out++ = 0xDC00 | (c & 0x03FF);
680 	    } else {
681 		tmp1 = 0xD800 | (c >> 10);
682 		tmp = (unsigned char *) out;
683 		*tmp = (unsigned char) tmp1;
684 		*(tmp + 1) = tmp1 >> 8;
685 		out++;
686 
687 		tmp2 = 0xDC00 | (c & 0x03FF);
688 		tmp = (unsigned char *) out;
689 		*tmp  = (unsigned char) tmp2;
690 		*(tmp + 1) = tmp2 >> 8;
691 		out++;
692 	    }
693         }
694         else
695 	    break;
696 	processed = in;
697     }
698     *outlen = (out - outstart) * 2;
699     *inlen = processed - instart;
700     return(*outlen);
701 }
702 
703 /**
704  * UTF8ToUTF16:
705  * @outb:  a pointer to an array of bytes to store the result
706  * @outlen:  the length of @outb
707  * @in:  a pointer to an array of UTF-8 chars
708  * @inlen:  the length of @in
709  *
710  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
711  * block of chars out.
712  *
713  * Returns the number of bytes written, or -1 if lack of space, or -2
714  *     if the transcoding failed.
715  */
716 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)717 UTF8ToUTF16(unsigned char* outb, int *outlen,
718             const unsigned char* in, int *inlen)
719 {
720     if (in == NULL) {
721 	/*
722 	 * initialization, add the Byte Order Mark for UTF-16LE
723 	 */
724         if (*outlen >= 2) {
725 	    outb[0] = 0xFF;
726 	    outb[1] = 0xFE;
727 	    *outlen = 2;
728 	    *inlen = 0;
729 #ifdef DEBUG_ENCODING
730             xmlGenericError(xmlGenericErrorContext,
731 		    "Added FFFE Byte Order Mark\n");
732 #endif
733 	    return(2);
734 	}
735 	*outlen = 0;
736 	*inlen = 0;
737 	return(0);
738     }
739     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
740 }
741 #endif /* LIBXML_OUTPUT_ENABLED */
742 
743 /**
744  * UTF16BEToUTF8:
745  * @out:  a pointer to an array of bytes to store the result
746  * @outlen:  the length of @out
747  * @inb:  a pointer to an array of UTF-16 passed as a byte array
748  * @inlenb:  the length of @in in UTF-16 chars
749  *
750  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
751  * block of chars out. This function assumes the endian property
752  * is the same between the native type of this machine and the
753  * inputed one.
754  *
755  * Returns the number of bytes written, or -1 if lack of space, or -2
756  *     if the transcoding fails (if *in is not a valid utf16 string)
757  * The value of *inlen after return is the number of octets consumed
758  *     if the return value is positive, else unpredictable.
759  */
760 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)761 UTF16BEToUTF8(unsigned char* out, int *outlen,
762             const unsigned char* inb, int *inlenb)
763 {
764     unsigned char* outstart = out;
765     const unsigned char* processed = inb;
766     unsigned char* outend = out + *outlen;
767     unsigned short* in = (unsigned short*) inb;
768     unsigned short* inend;
769     unsigned int c, d, inlen;
770     unsigned char *tmp;
771     int bits;
772 
773     if ((*inlenb % 2) == 1)
774         (*inlenb)--;
775     inlen = *inlenb / 2;
776     inend= in + inlen;
777     while (in < inend) {
778 	if (xmlLittleEndian) {
779 	    tmp = (unsigned char *) in;
780 	    c = *tmp++;
781 	    c = c << 8;
782 	    c = c | (unsigned int) *tmp;
783 	    in++;
784 	} else {
785 	    c= *in++;
786 	}
787         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
788 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
789 		*outlen = out - outstart;
790 		*inlenb = processed - inb;
791 	        return(-2);
792 	    }
793 	    if (xmlLittleEndian) {
794 		tmp = (unsigned char *) in;
795 		d = *tmp++;
796 		d = d << 8;
797 		d = d | (unsigned int) *tmp;
798 		in++;
799 	    } else {
800 		d= *in++;
801 	    }
802             if ((d & 0xFC00) == 0xDC00) {
803                 c &= 0x03FF;
804                 c <<= 10;
805                 c |= d & 0x03FF;
806                 c += 0x10000;
807             }
808             else {
809 		*outlen = out - outstart;
810 		*inlenb = processed - inb;
811 	        return(-2);
812 	    }
813         }
814 
815 	/* assertion: c is a single UTF-4 value */
816         if (out >= outend)
817 	    break;
818         if      (c <    0x80) {  *out++=  c;                bits= -6; }
819         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
820         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
821         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
822 
823         for ( ; bits >= 0; bits-= 6) {
824             if (out >= outend)
825 	        break;
826             *out++= ((c >> bits) & 0x3F) | 0x80;
827         }
828 	processed = (const unsigned char*) in;
829     }
830     *outlen = out - outstart;
831     *inlenb = processed - inb;
832     return(*outlen);
833 }
834 
835 #ifdef LIBXML_OUTPUT_ENABLED
836 /**
837  * UTF8ToUTF16BE:
838  * @outb:  a pointer to an array of bytes to store the result
839  * @outlen:  the length of @outb
840  * @in:  a pointer to an array of UTF-8 chars
841  * @inlen:  the length of @in
842  *
843  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
844  * block of chars out.
845  *
846  * Returns the number of byte written, or -1 by lack of space, or -2
847  *     if the transcoding failed.
848  */
849 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)850 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
851             const unsigned char* in, int *inlen)
852 {
853     unsigned short* out = (unsigned short*) outb;
854     const unsigned char* processed = in;
855     const unsigned char *const instart = in;
856     unsigned short* outstart= out;
857     unsigned short* outend;
858     const unsigned char* inend;
859     unsigned int c, d;
860     int trailing;
861     unsigned char *tmp;
862     unsigned short tmp1, tmp2;
863 
864     /* UTF-16BE has no BOM */
865     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
866     if (in == NULL) {
867 	*outlen = 0;
868 	*inlen = 0;
869 	return(0);
870     }
871     inend= in + *inlen;
872     outend = out + (*outlen / 2);
873     while (in < inend) {
874       d= *in++;
875       if      (d < 0x80)  { c= d; trailing= 0; }
876       else if (d < 0xC0)  {
877           /* trailing byte in leading position */
878 	  *outlen = out - outstart;
879 	  *inlen = processed - instart;
880 	  return(-2);
881       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
882       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
883       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
884       else {
885           /* no chance for this in UTF-16 */
886 	  *outlen = out - outstart;
887 	  *inlen = processed - instart;
888 	  return(-2);
889       }
890 
891       if (inend - in < trailing) {
892           break;
893       }
894 
895       for ( ; trailing; trailing--) {
896           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
897           c <<= 6;
898           c |= d & 0x3F;
899       }
900 
901       /* assertion: c is a single UTF-4 value */
902         if (c < 0x10000) {
903             if (out >= outend)  break;
904 	    if (xmlLittleEndian) {
905 		tmp = (unsigned char *) out;
906 		*tmp = c >> 8;
907 		*(tmp + 1) = c;
908 		out++;
909 	    } else {
910 		*out++ = c;
911 	    }
912         }
913         else if (c < 0x110000) {
914             if (out+1 >= outend)  break;
915             c -= 0x10000;
916 	    if (xmlLittleEndian) {
917 		tmp1 = 0xD800 | (c >> 10);
918 		tmp = (unsigned char *) out;
919 		*tmp = tmp1 >> 8;
920 		*(tmp + 1) = (unsigned char) tmp1;
921 		out++;
922 
923 		tmp2 = 0xDC00 | (c & 0x03FF);
924 		tmp = (unsigned char *) out;
925 		*tmp = tmp2 >> 8;
926 		*(tmp + 1) = (unsigned char) tmp2;
927 		out++;
928 	    } else {
929 		*out++ = 0xD800 | (c >> 10);
930 		*out++ = 0xDC00 | (c & 0x03FF);
931 	    }
932         }
933         else
934 	    break;
935 	processed = in;
936     }
937     *outlen = (out - outstart) * 2;
938     *inlen = processed - instart;
939     return(*outlen);
940 }
941 #endif /* LIBXML_OUTPUT_ENABLED */
942 
943 /************************************************************************
944  *									*
945  *		Generic encoding handling routines			*
946  *									*
947  ************************************************************************/
948 
949 /**
950  * xmlDetectCharEncoding:
951  * @in:  a pointer to the first bytes of the XML entity, must be at least
952  *       2 bytes long (at least 4 if encoding is UTF4 variant).
953  * @len:  pointer to the length of the buffer
954  *
955  * Guess the encoding of the entity using the first bytes of the entity content
956  * according to the non-normative appendix F of the XML-1.0 recommendation.
957  *
958  * Returns one of the XML_CHAR_ENCODING_... values.
959  */
960 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)961 xmlDetectCharEncoding(const unsigned char* in, int len)
962 {
963     if (in == NULL)
964         return(XML_CHAR_ENCODING_NONE);
965     if (len >= 4) {
966 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
967 	    (in[2] == 0x00) && (in[3] == 0x3C))
968 	    return(XML_CHAR_ENCODING_UCS4BE);
969 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
970 	    (in[2] == 0x00) && (in[3] == 0x00))
971 	    return(XML_CHAR_ENCODING_UCS4LE);
972 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
973 	    (in[2] == 0x3C) && (in[3] == 0x00))
974 	    return(XML_CHAR_ENCODING_UCS4_2143);
975 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
976 	    (in[2] == 0x00) && (in[3] == 0x00))
977 	    return(XML_CHAR_ENCODING_UCS4_3412);
978 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
979 	    (in[2] == 0xA7) && (in[3] == 0x94))
980 	    return(XML_CHAR_ENCODING_EBCDIC);
981 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
982 	    (in[2] == 0x78) && (in[3] == 0x6D))
983 	    return(XML_CHAR_ENCODING_UTF8);
984 	/*
985 	 * Although not part of the recommendation, we also
986 	 * attempt an "auto-recognition" of UTF-16LE and
987 	 * UTF-16BE encodings.
988 	 */
989 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
990 	    (in[2] == 0x3F) && (in[3] == 0x00))
991 	    return(XML_CHAR_ENCODING_UTF16LE);
992 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
993 	    (in[2] == 0x00) && (in[3] == 0x3F))
994 	    return(XML_CHAR_ENCODING_UTF16BE);
995     }
996     if (len >= 3) {
997 	/*
998 	 * Errata on XML-1.0 June 20 2001
999 	 * We now allow an UTF8 encoded BOM
1000 	 */
1001 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1002 	    (in[2] == 0xBF))
1003 	    return(XML_CHAR_ENCODING_UTF8);
1004     }
1005     /* For UTF-16 we can recognize by the BOM */
1006     if (len >= 2) {
1007 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
1008 	    return(XML_CHAR_ENCODING_UTF16BE);
1009 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
1010 	    return(XML_CHAR_ENCODING_UTF16LE);
1011     }
1012     return(XML_CHAR_ENCODING_NONE);
1013 }
1014 
1015 /**
1016  * xmlCleanupEncodingAliases:
1017  *
1018  * Unregisters all aliases
1019  */
1020 void
xmlCleanupEncodingAliases(void)1021 xmlCleanupEncodingAliases(void) {
1022     int i;
1023 
1024     if (xmlCharEncodingAliases == NULL)
1025 	return;
1026 
1027     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1028 	if (xmlCharEncodingAliases[i].name != NULL)
1029 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1030 	if (xmlCharEncodingAliases[i].alias != NULL)
1031 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1032     }
1033     xmlCharEncodingAliasesNb = 0;
1034     xmlCharEncodingAliasesMax = 0;
1035     xmlFree(xmlCharEncodingAliases);
1036     xmlCharEncodingAliases = NULL;
1037 }
1038 
1039 /**
1040  * xmlGetEncodingAlias:
1041  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1042  *
1043  * Lookup an encoding name for the given alias.
1044  *
1045  * Returns NULL if not found, otherwise the original name
1046  */
1047 const char *
xmlGetEncodingAlias(const char * alias)1048 xmlGetEncodingAlias(const char *alias) {
1049     int i;
1050     char upper[100];
1051 
1052     if (alias == NULL)
1053 	return(NULL);
1054 
1055     if (xmlCharEncodingAliases == NULL)
1056 	return(NULL);
1057 
1058     for (i = 0;i < 99;i++) {
1059         upper[i] = toupper(alias[i]);
1060 	if (upper[i] == 0) break;
1061     }
1062     upper[i] = 0;
1063 
1064     /*
1065      * Walk down the list looking for a definition of the alias
1066      */
1067     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1068 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1069 	    return(xmlCharEncodingAliases[i].name);
1070 	}
1071     }
1072     return(NULL);
1073 }
1074 
1075 /**
1076  * xmlAddEncodingAlias:
1077  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1078  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1079  *
1080  * Registers an alias @alias for an encoding named @name. Existing alias
1081  * will be overwritten.
1082  *
1083  * Returns 0 in case of success, -1 in case of error
1084  */
1085 int
xmlAddEncodingAlias(const char * name,const char * alias)1086 xmlAddEncodingAlias(const char *name, const char *alias) {
1087     int i;
1088     char upper[100];
1089 
1090     if ((name == NULL) || (alias == NULL))
1091 	return(-1);
1092 
1093     for (i = 0;i < 99;i++) {
1094         upper[i] = toupper(alias[i]);
1095 	if (upper[i] == 0) break;
1096     }
1097     upper[i] = 0;
1098 
1099     if (xmlCharEncodingAliases == NULL) {
1100 	xmlCharEncodingAliasesNb = 0;
1101 	xmlCharEncodingAliasesMax = 20;
1102 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1103 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1104 	if (xmlCharEncodingAliases == NULL)
1105 	    return(-1);
1106     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1107 	xmlCharEncodingAliasesMax *= 2;
1108 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1109 	      xmlRealloc(xmlCharEncodingAliases,
1110 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1111     }
1112     /*
1113      * Walk down the list looking for a definition of the alias
1114      */
1115     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1116 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1117 	    /*
1118 	     * Replace the definition.
1119 	     */
1120 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1121 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1122 	    return(0);
1123 	}
1124     }
1125     /*
1126      * Add the definition
1127      */
1128     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1129     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1130     xmlCharEncodingAliasesNb++;
1131     return(0);
1132 }
1133 
1134 /**
1135  * xmlDelEncodingAlias:
1136  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1137  *
1138  * Unregisters an encoding alias @alias
1139  *
1140  * Returns 0 in case of success, -1 in case of error
1141  */
1142 int
xmlDelEncodingAlias(const char * alias)1143 xmlDelEncodingAlias(const char *alias) {
1144     int i;
1145 
1146     if (alias == NULL)
1147 	return(-1);
1148 
1149     if (xmlCharEncodingAliases == NULL)
1150 	return(-1);
1151     /*
1152      * Walk down the list looking for a definition of the alias
1153      */
1154     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1155 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1156 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1157 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1158 	    xmlCharEncodingAliasesNb--;
1159 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1160 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1161 	    return(0);
1162 	}
1163     }
1164     return(-1);
1165 }
1166 
1167 /**
1168  * xmlParseCharEncoding:
1169  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1170  *
1171  * Compare the string to the encoding schemes already known. Note
1172  * that the comparison is case insensitive accordingly to the section
1173  * [XML] 4.3.3 Character Encoding in Entities.
1174  *
1175  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1176  * if not recognized.
1177  */
1178 xmlCharEncoding
xmlParseCharEncoding(const char * name)1179 xmlParseCharEncoding(const char* name)
1180 {
1181     const char *alias;
1182     char upper[500];
1183     int i;
1184 
1185     if (name == NULL)
1186 	return(XML_CHAR_ENCODING_NONE);
1187 
1188     /*
1189      * Do the alias resolution
1190      */
1191     alias = xmlGetEncodingAlias(name);
1192     if (alias != NULL)
1193 	name = alias;
1194 
1195     for (i = 0;i < 499;i++) {
1196         upper[i] = toupper(name[i]);
1197 	if (upper[i] == 0) break;
1198     }
1199     upper[i] = 0;
1200 
1201     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1202     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1203     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1204 
1205     /*
1206      * NOTE: if we were able to parse this, the endianness of UTF16 is
1207      *       already found and in use
1208      */
1209     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1210     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1211 
1212     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1213     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1214     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1215 
1216     /*
1217      * NOTE: if we were able to parse this, the endianness of UCS4 is
1218      *       already found and in use
1219      */
1220     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1221     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1222     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1223 
1224 
1225     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1226     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1227     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1228 
1229     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1230     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1231     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1232 
1233     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1234     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1235     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1236     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1237     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1238     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1239     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1240 
1241     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1242     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1243     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1244 
1245 #ifdef DEBUG_ENCODING
1246     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1247 #endif
1248     return(XML_CHAR_ENCODING_ERROR);
1249 }
1250 
1251 /**
1252  * xmlGetCharEncodingName:
1253  * @enc:  the encoding
1254  *
1255  * The "canonical" name for XML encoding.
1256  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1257  * Section 4.3.3  Character Encoding in Entities
1258  *
1259  * Returns the canonical name for the given encoding
1260  */
1261 
1262 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1263 xmlGetCharEncodingName(xmlCharEncoding enc) {
1264     switch (enc) {
1265         case XML_CHAR_ENCODING_ERROR:
1266 	    return(NULL);
1267         case XML_CHAR_ENCODING_NONE:
1268 	    return(NULL);
1269         case XML_CHAR_ENCODING_UTF8:
1270 	    return("UTF-8");
1271         case XML_CHAR_ENCODING_UTF16LE:
1272 	    return("UTF-16");
1273         case XML_CHAR_ENCODING_UTF16BE:
1274 	    return("UTF-16");
1275         case XML_CHAR_ENCODING_EBCDIC:
1276             return("EBCDIC");
1277         case XML_CHAR_ENCODING_UCS4LE:
1278             return("ISO-10646-UCS-4");
1279         case XML_CHAR_ENCODING_UCS4BE:
1280             return("ISO-10646-UCS-4");
1281         case XML_CHAR_ENCODING_UCS4_2143:
1282             return("ISO-10646-UCS-4");
1283         case XML_CHAR_ENCODING_UCS4_3412:
1284             return("ISO-10646-UCS-4");
1285         case XML_CHAR_ENCODING_UCS2:
1286             return("ISO-10646-UCS-2");
1287         case XML_CHAR_ENCODING_8859_1:
1288 	    return("ISO-8859-1");
1289         case XML_CHAR_ENCODING_8859_2:
1290 	    return("ISO-8859-2");
1291         case XML_CHAR_ENCODING_8859_3:
1292 	    return("ISO-8859-3");
1293         case XML_CHAR_ENCODING_8859_4:
1294 	    return("ISO-8859-4");
1295         case XML_CHAR_ENCODING_8859_5:
1296 	    return("ISO-8859-5");
1297         case XML_CHAR_ENCODING_8859_6:
1298 	    return("ISO-8859-6");
1299         case XML_CHAR_ENCODING_8859_7:
1300 	    return("ISO-8859-7");
1301         case XML_CHAR_ENCODING_8859_8:
1302 	    return("ISO-8859-8");
1303         case XML_CHAR_ENCODING_8859_9:
1304 	    return("ISO-8859-9");
1305         case XML_CHAR_ENCODING_2022_JP:
1306             return("ISO-2022-JP");
1307         case XML_CHAR_ENCODING_SHIFT_JIS:
1308             return("Shift-JIS");
1309         case XML_CHAR_ENCODING_EUC_JP:
1310             return("EUC-JP");
1311 	case XML_CHAR_ENCODING_ASCII:
1312 	    return(NULL);
1313     }
1314     return(NULL);
1315 }
1316 
1317 /************************************************************************
1318  *									*
1319  *			Char encoding handlers				*
1320  *									*
1321  ************************************************************************/
1322 
1323 
1324 /* the size should be growable, but it's not a big deal ... */
1325 #define MAX_ENCODING_HANDLERS 50
1326 static xmlCharEncodingHandlerPtr *handlers = NULL;
1327 static int nbCharEncodingHandler = 0;
1328 
1329 /*
1330  * The default is UTF-8 for XML, that's also the default used for the
1331  * parser internals, so the default encoding handler is NULL
1332  */
1333 
1334 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1335 
1336 /**
1337  * xmlNewCharEncodingHandler:
1338  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1339  * @input:  the xmlCharEncodingInputFunc to read that encoding
1340  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1341  *
1342  * Create and registers an xmlCharEncodingHandler.
1343  *
1344  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1345  */
1346 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1347 xmlNewCharEncodingHandler(const char *name,
1348                           xmlCharEncodingInputFunc input,
1349                           xmlCharEncodingOutputFunc output) {
1350     xmlCharEncodingHandlerPtr handler;
1351     const char *alias;
1352     char upper[500];
1353     int i;
1354     char *up = NULL;
1355 
1356     /*
1357      * Do the alias resolution
1358      */
1359     alias = xmlGetEncodingAlias(name);
1360     if (alias != NULL)
1361 	name = alias;
1362 
1363     /*
1364      * Keep only the uppercase version of the encoding.
1365      */
1366     if (name == NULL) {
1367         xmlEncodingErr(XML_I18N_NO_NAME,
1368 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1369 	return(NULL);
1370     }
1371     for (i = 0;i < 499;i++) {
1372         upper[i] = toupper(name[i]);
1373 	if (upper[i] == 0) break;
1374     }
1375     upper[i] = 0;
1376     up = xmlMemStrdup(upper);
1377     if (up == NULL) {
1378         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1379 	return(NULL);
1380     }
1381 
1382     /*
1383      * allocate and fill-up an handler block.
1384      */
1385     handler = (xmlCharEncodingHandlerPtr)
1386               xmlMalloc(sizeof(xmlCharEncodingHandler));
1387     if (handler == NULL) {
1388         xmlFree(up);
1389         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1390 	return(NULL);
1391     }
1392     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1393     handler->input = input;
1394     handler->output = output;
1395     handler->name = up;
1396 
1397 #ifdef LIBXML_ICONV_ENABLED
1398     handler->iconv_in = NULL;
1399     handler->iconv_out = NULL;
1400 #endif
1401 #ifdef LIBXML_ICU_ENABLED
1402     handler->uconv_in = NULL;
1403     handler->uconv_out = NULL;
1404 #endif
1405 
1406     /*
1407      * registers and returns the handler.
1408      */
1409     xmlRegisterCharEncodingHandler(handler);
1410 #ifdef DEBUG_ENCODING
1411     xmlGenericError(xmlGenericErrorContext,
1412 	    "Registered encoding handler for %s\n", name);
1413 #endif
1414     return(handler);
1415 }
1416 
1417 /**
1418  * xmlInitCharEncodingHandlers:
1419  *
1420  * Initialize the char encoding support, it registers the default
1421  * encoding supported.
1422  * NOTE: while public, this function usually doesn't need to be called
1423  *       in normal processing.
1424  */
1425 void
xmlInitCharEncodingHandlers(void)1426 xmlInitCharEncodingHandlers(void) {
1427     unsigned short int tst = 0x1234;
1428     unsigned char *ptr = (unsigned char *) &tst;
1429 
1430     if (handlers != NULL) return;
1431 
1432     handlers = (xmlCharEncodingHandlerPtr *)
1433         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1434 
1435     if (*ptr == 0x12) xmlLittleEndian = 0;
1436     else if (*ptr == 0x34) xmlLittleEndian = 1;
1437     else {
1438         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1439 	               "Odd problem at endianness detection\n", NULL);
1440     }
1441 
1442     if (handlers == NULL) {
1443         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1444 	return;
1445     }
1446     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1447 #ifdef LIBXML_OUTPUT_ENABLED
1448     xmlUTF16LEHandler =
1449           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1450     xmlUTF16BEHandler =
1451           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1452     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1453     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1454     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1455     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1456 #ifdef LIBXML_HTML_ENABLED
1457     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1458 #endif
1459 #else
1460     xmlUTF16LEHandler =
1461           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1462     xmlUTF16BEHandler =
1463           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1464     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1465     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1466     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1467     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1468 #endif /* LIBXML_OUTPUT_ENABLED */
1469 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1470 #ifdef LIBXML_ISO8859X_ENABLED
1471     xmlRegisterCharEncodingHandlersISO8859x ();
1472 #endif
1473 #endif
1474 
1475 }
1476 
1477 /**
1478  * xmlCleanupCharEncodingHandlers:
1479  *
1480  * Cleanup the memory allocated for the char encoding support, it
1481  * unregisters all the encoding handlers and the aliases.
1482  */
1483 void
xmlCleanupCharEncodingHandlers(void)1484 xmlCleanupCharEncodingHandlers(void) {
1485     xmlCleanupEncodingAliases();
1486 
1487     if (handlers == NULL) return;
1488 
1489     for (;nbCharEncodingHandler > 0;) {
1490         nbCharEncodingHandler--;
1491 	if (handlers[nbCharEncodingHandler] != NULL) {
1492 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1493 		xmlFree(handlers[nbCharEncodingHandler]->name);
1494 	    xmlFree(handlers[nbCharEncodingHandler]);
1495 	}
1496     }
1497     xmlFree(handlers);
1498     handlers = NULL;
1499     nbCharEncodingHandler = 0;
1500     xmlDefaultCharEncodingHandler = NULL;
1501 }
1502 
1503 /**
1504  * xmlRegisterCharEncodingHandler:
1505  * @handler:  the xmlCharEncodingHandlerPtr handler block
1506  *
1507  * Register the char encoding handler, surprising, isn't it ?
1508  */
1509 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1510 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1511     if (handlers == NULL) xmlInitCharEncodingHandlers();
1512     if ((handler == NULL) || (handlers == NULL)) {
1513         xmlEncodingErr(XML_I18N_NO_HANDLER,
1514 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1515 	return;
1516     }
1517 
1518     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1519         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1520 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1521 	               "MAX_ENCODING_HANDLERS");
1522 	return;
1523     }
1524     handlers[nbCharEncodingHandler++] = handler;
1525 }
1526 
1527 /**
1528  * xmlGetCharEncodingHandler:
1529  * @enc:  an xmlCharEncoding value.
1530  *
1531  * Search in the registered set the handler able to read/write that encoding.
1532  *
1533  * Returns the handler or NULL if not found
1534  */
1535 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1536 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1537     xmlCharEncodingHandlerPtr handler;
1538 
1539     if (handlers == NULL) xmlInitCharEncodingHandlers();
1540     switch (enc) {
1541         case XML_CHAR_ENCODING_ERROR:
1542 	    return(NULL);
1543         case XML_CHAR_ENCODING_NONE:
1544 	    return(NULL);
1545         case XML_CHAR_ENCODING_UTF8:
1546 	    return(NULL);
1547         case XML_CHAR_ENCODING_UTF16LE:
1548 	    return(xmlUTF16LEHandler);
1549         case XML_CHAR_ENCODING_UTF16BE:
1550 	    return(xmlUTF16BEHandler);
1551         case XML_CHAR_ENCODING_EBCDIC:
1552             handler = xmlFindCharEncodingHandler("EBCDIC");
1553             if (handler != NULL) return(handler);
1554             handler = xmlFindCharEncodingHandler("ebcdic");
1555             if (handler != NULL) return(handler);
1556             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1557             if (handler != NULL) return(handler);
1558             handler = xmlFindCharEncodingHandler("IBM-037");
1559             if (handler != NULL) return(handler);
1560 	    break;
1561         case XML_CHAR_ENCODING_UCS4BE:
1562             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1563             if (handler != NULL) return(handler);
1564             handler = xmlFindCharEncodingHandler("UCS-4");
1565             if (handler != NULL) return(handler);
1566             handler = xmlFindCharEncodingHandler("UCS4");
1567             if (handler != NULL) return(handler);
1568 	    break;
1569         case XML_CHAR_ENCODING_UCS4LE:
1570             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571             if (handler != NULL) return(handler);
1572             handler = xmlFindCharEncodingHandler("UCS-4");
1573             if (handler != NULL) return(handler);
1574             handler = xmlFindCharEncodingHandler("UCS4");
1575             if (handler != NULL) return(handler);
1576 	    break;
1577         case XML_CHAR_ENCODING_UCS4_2143:
1578 	    break;
1579         case XML_CHAR_ENCODING_UCS4_3412:
1580 	    break;
1581         case XML_CHAR_ENCODING_UCS2:
1582             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1583             if (handler != NULL) return(handler);
1584             handler = xmlFindCharEncodingHandler("UCS-2");
1585             if (handler != NULL) return(handler);
1586             handler = xmlFindCharEncodingHandler("UCS2");
1587             if (handler != NULL) return(handler);
1588 	    break;
1589 
1590 	    /*
1591 	     * We used to keep ISO Latin encodings native in the
1592 	     * generated data. This led to so many problems that
1593 	     * this has been removed. One can still change this
1594 	     * back by registering no-ops encoders for those
1595 	     */
1596         case XML_CHAR_ENCODING_8859_1:
1597 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1598 	    if (handler != NULL) return(handler);
1599 	    break;
1600         case XML_CHAR_ENCODING_8859_2:
1601 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1602 	    if (handler != NULL) return(handler);
1603 	    break;
1604         case XML_CHAR_ENCODING_8859_3:
1605 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1606 	    if (handler != NULL) return(handler);
1607 	    break;
1608         case XML_CHAR_ENCODING_8859_4:
1609 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1610 	    if (handler != NULL) return(handler);
1611 	    break;
1612         case XML_CHAR_ENCODING_8859_5:
1613 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1614 	    if (handler != NULL) return(handler);
1615 	    break;
1616         case XML_CHAR_ENCODING_8859_6:
1617 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1618 	    if (handler != NULL) return(handler);
1619 	    break;
1620         case XML_CHAR_ENCODING_8859_7:
1621 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1622 	    if (handler != NULL) return(handler);
1623 	    break;
1624         case XML_CHAR_ENCODING_8859_8:
1625 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1626 	    if (handler != NULL) return(handler);
1627 	    break;
1628         case XML_CHAR_ENCODING_8859_9:
1629 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1630 	    if (handler != NULL) return(handler);
1631 	    break;
1632 
1633 
1634         case XML_CHAR_ENCODING_2022_JP:
1635             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1636             if (handler != NULL) return(handler);
1637 	    break;
1638         case XML_CHAR_ENCODING_SHIFT_JIS:
1639             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1640             if (handler != NULL) return(handler);
1641             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1642             if (handler != NULL) return(handler);
1643             handler = xmlFindCharEncodingHandler("Shift_JIS");
1644             if (handler != NULL) return(handler);
1645 	    break;
1646         case XML_CHAR_ENCODING_EUC_JP:
1647             handler = xmlFindCharEncodingHandler("EUC-JP");
1648             if (handler != NULL) return(handler);
1649 	    break;
1650 	default:
1651 	    break;
1652     }
1653 
1654 #ifdef DEBUG_ENCODING
1655     xmlGenericError(xmlGenericErrorContext,
1656 	    "No handler found for encoding %d\n", enc);
1657 #endif
1658     return(NULL);
1659 }
1660 
1661 /**
1662  * xmlFindCharEncodingHandler:
1663  * @name:  a string describing the char encoding.
1664  *
1665  * Search in the registered set the handler able to read/write that encoding.
1666  *
1667  * Returns the handler or NULL if not found
1668  */
1669 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1670 xmlFindCharEncodingHandler(const char *name) {
1671     const char *nalias;
1672     const char *norig;
1673     xmlCharEncoding alias;
1674 #ifdef LIBXML_ICONV_ENABLED
1675     xmlCharEncodingHandlerPtr enc;
1676     iconv_t icv_in, icv_out;
1677 #endif /* LIBXML_ICONV_ENABLED */
1678 #ifdef LIBXML_ICU_ENABLED
1679     xmlCharEncodingHandlerPtr encu;
1680     uconv_t *ucv_in, *ucv_out;
1681 #endif /* LIBXML_ICU_ENABLED */
1682     char upper[100];
1683     int i;
1684 
1685     if (handlers == NULL) xmlInitCharEncodingHandlers();
1686     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1687     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1688 
1689     /*
1690      * Do the alias resolution
1691      */
1692     norig = name;
1693     nalias = xmlGetEncodingAlias(name);
1694     if (nalias != NULL)
1695 	name = nalias;
1696 
1697     /*
1698      * Check first for directly registered encoding names
1699      */
1700     for (i = 0;i < 99;i++) {
1701         upper[i] = toupper(name[i]);
1702 	if (upper[i] == 0) break;
1703     }
1704     upper[i] = 0;
1705 
1706     if (handlers != NULL) {
1707         for (i = 0;i < nbCharEncodingHandler; i++) {
1708             if (!strcmp(upper, handlers[i]->name)) {
1709 #ifdef DEBUG_ENCODING
1710                 xmlGenericError(xmlGenericErrorContext,
1711                         "Found registered handler for encoding %s\n", name);
1712 #endif
1713                 return(handlers[i]);
1714             }
1715         }
1716     }
1717 
1718 #ifdef LIBXML_ICONV_ENABLED
1719     /* check whether iconv can handle this */
1720     icv_in = iconv_open("UTF-8", name);
1721     icv_out = iconv_open(name, "UTF-8");
1722     if (icv_in == (iconv_t) -1) {
1723         icv_in = iconv_open("UTF-8", upper);
1724     }
1725     if (icv_out == (iconv_t) -1) {
1726 	icv_out = iconv_open(upper, "UTF-8");
1727     }
1728     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1729 	    enc = (xmlCharEncodingHandlerPtr)
1730 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1731 	    if (enc == NULL) {
1732 	        iconv_close(icv_in);
1733 	        iconv_close(icv_out);
1734 		return(NULL);
1735 	    }
1736             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1737 	    enc->name = xmlMemStrdup(name);
1738 	    enc->input = NULL;
1739 	    enc->output = NULL;
1740 	    enc->iconv_in = icv_in;
1741 	    enc->iconv_out = icv_out;
1742 #ifdef DEBUG_ENCODING
1743             xmlGenericError(xmlGenericErrorContext,
1744 		    "Found iconv handler for encoding %s\n", name);
1745 #endif
1746 	    return enc;
1747     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1748 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1749 		    "iconv : problems with filters for '%s'\n", name);
1750     }
1751 #endif /* LIBXML_ICONV_ENABLED */
1752 #ifdef LIBXML_ICU_ENABLED
1753     /* check whether icu can handle this */
1754     ucv_in = openIcuConverter(name, 1);
1755     ucv_out = openIcuConverter(name, 0);
1756     if (ucv_in != NULL && ucv_out != NULL) {
1757 	    encu = (xmlCharEncodingHandlerPtr)
1758 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1759 	    if (encu == NULL) {
1760                 closeIcuConverter(ucv_in);
1761                 closeIcuConverter(ucv_out);
1762 		return(NULL);
1763 	    }
1764             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1765 	    encu->name = xmlMemStrdup(name);
1766 	    encu->input = NULL;
1767 	    encu->output = NULL;
1768 	    encu->uconv_in = ucv_in;
1769 	    encu->uconv_out = ucv_out;
1770 #ifdef DEBUG_ENCODING
1771             xmlGenericError(xmlGenericErrorContext,
1772 		    "Found ICU converter handler for encoding %s\n", name);
1773 #endif
1774 	    return encu;
1775     } else if (ucv_in != NULL || ucv_out != NULL) {
1776             closeIcuConverter(ucv_in);
1777             closeIcuConverter(ucv_out);
1778 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1779 		    "ICU converter : problems with filters for '%s'\n", name);
1780     }
1781 #endif /* LIBXML_ICU_ENABLED */
1782 
1783 #ifdef DEBUG_ENCODING
1784     xmlGenericError(xmlGenericErrorContext,
1785 	    "No handler found for encoding %s\n", name);
1786 #endif
1787 
1788     /*
1789      * Fallback using the canonical names
1790      */
1791     alias = xmlParseCharEncoding(norig);
1792     if (alias != XML_CHAR_ENCODING_ERROR) {
1793         const char* canon;
1794         canon = xmlGetCharEncodingName(alias);
1795         if ((canon != NULL) && (strcmp(name, canon))) {
1796 	    return(xmlFindCharEncodingHandler(canon));
1797         }
1798     }
1799 
1800     /* If "none of the above", give up */
1801     return(NULL);
1802 }
1803 
1804 /************************************************************************
1805  *									*
1806  *		ICONV based generic conversion functions		*
1807  *									*
1808  ************************************************************************/
1809 
1810 #ifdef LIBXML_ICONV_ENABLED
1811 /**
1812  * xmlIconvWrapper:
1813  * @cd:		iconv converter data structure
1814  * @out:  a pointer to an array of bytes to store the result
1815  * @outlen:  the length of @out
1816  * @in:  a pointer to an array of ISO Latin 1 chars
1817  * @inlen:  the length of @in
1818  *
1819  * Returns 0 if success, or
1820  *     -1 by lack of space, or
1821  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1822  *        the result of transformation can't fit into the encoding we want), or
1823  *     -3 if there the last byte can't form a single output char.
1824  *
1825  * The value of @inlen after return is the number of octets consumed
1826  *     as the return value is positive, else unpredictable.
1827  * The value of @outlen after return is the number of ocetes consumed.
1828  */
1829 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1830 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1831                 const unsigned char *in, int *inlen) {
1832     size_t icv_inlen, icv_outlen;
1833     const char *icv_in = (const char *) in;
1834     char *icv_out = (char *) out;
1835     int ret;
1836 
1837     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1838         if (outlen != NULL) *outlen = 0;
1839         return(-1);
1840     }
1841     icv_inlen = *inlen;
1842     icv_outlen = *outlen;
1843     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1844     *inlen -= icv_inlen;
1845     *outlen -= icv_outlen;
1846     if ((icv_inlen != 0) || (ret == -1)) {
1847 #ifdef EILSEQ
1848         if (errno == EILSEQ) {
1849             return -2;
1850         } else
1851 #endif
1852 #ifdef E2BIG
1853         if (errno == E2BIG) {
1854             return -1;
1855         } else
1856 #endif
1857 #ifdef EINVAL
1858         if (errno == EINVAL) {
1859             return -3;
1860         } else
1861 #endif
1862         {
1863             return -3;
1864         }
1865     }
1866     return 0;
1867 }
1868 #endif /* LIBXML_ICONV_ENABLED */
1869 
1870 /************************************************************************
1871  *									*
1872  *		ICU based generic conversion functions		*
1873  *									*
1874  ************************************************************************/
1875 
1876 #ifdef LIBXML_ICU_ENABLED
1877 /**
1878  * xmlUconvWrapper:
1879  * @cd: ICU uconverter data structure
1880  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1881  * @out:  a pointer to an array of bytes to store the result
1882  * @outlen:  the length of @out
1883  * @in:  a pointer to an array of ISO Latin 1 chars
1884  * @inlen:  the length of @in
1885  * @flush: if true, indicates end of input
1886  *
1887  * Returns 0 if success, or
1888  *     -1 by lack of space, or
1889  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1890  *        the result of transformation can't fit into the encoding we want), or
1891  *     -3 if there the last byte can't form a single output char.
1892  *
1893  * The value of @inlen after return is the number of octets consumed
1894  *     as the return value is positive, else unpredictable.
1895  * The value of @outlen after return is the number of ocetes consumed.
1896  */
1897 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1898 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1899                 const unsigned char *in, int *inlen, int flush) {
1900     const char *ucv_in = (const char *) in;
1901     char *ucv_out = (char *) out;
1902     UErrorCode err = U_ZERO_ERROR;
1903 
1904     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1905         if (outlen != NULL) *outlen = 0;
1906         return(-1);
1907     }
1908 
1909     if (toUnicode) {
1910         /* encoding => UTF-16 => UTF-8 */
1911         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1912                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1913                        &cd->pivot_source, &cd->pivot_target,
1914                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1915     } else {
1916         /* UTF-8 => UTF-16 => encoding */
1917         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1918                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1919                        &cd->pivot_source, &cd->pivot_target,
1920                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1921     }
1922     *inlen = ucv_in - (const char*) in;
1923     *outlen = ucv_out - (char *) out;
1924     if (U_SUCCESS(err)) {
1925         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1926         if (flush)
1927             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1928         return 0;
1929     }
1930     if (err == U_BUFFER_OVERFLOW_ERROR)
1931         return -1;
1932     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1933         return -2;
1934     return -3;
1935 }
1936 #endif /* LIBXML_ICU_ENABLED */
1937 
1938 /************************************************************************
1939  *									*
1940  *		The real API used by libxml for on-the-fly conversion	*
1941  *									*
1942  ************************************************************************/
1943 
1944 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1945 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1946                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1947     int ret;
1948     (void)flush;
1949 
1950     if (handler->input != NULL) {
1951         ret = handler->input(out, outlen, in, inlen);
1952     }
1953 #ifdef LIBXML_ICONV_ENABLED
1954     else if (handler->iconv_in != NULL) {
1955         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1956     }
1957 #endif /* LIBXML_ICONV_ENABLED */
1958 #ifdef LIBXML_ICU_ENABLED
1959     else if (handler->uconv_in != NULL) {
1960         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1961                               flush);
1962     }
1963 #endif /* LIBXML_ICU_ENABLED */
1964     else {
1965         *outlen = 0;
1966         *inlen = 0;
1967         ret = -2;
1968     }
1969 
1970     return(ret);
1971 }
1972 
1973 /* Returns -4 if no output function was found. */
1974 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1975 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1976                   int *outlen, const unsigned char *in, int *inlen) {
1977     int ret;
1978 
1979     if (handler->output != NULL) {
1980         ret = handler->output(out, outlen, in, inlen);
1981     }
1982 #ifdef LIBXML_ICONV_ENABLED
1983     else if (handler->iconv_out != NULL) {
1984         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1985     }
1986 #endif /* LIBXML_ICONV_ENABLED */
1987 #ifdef LIBXML_ICU_ENABLED
1988     else if (handler->uconv_out != NULL) {
1989         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
1990                               TRUE);
1991     }
1992 #endif /* LIBXML_ICU_ENABLED */
1993     else {
1994         *outlen = 0;
1995         *inlen = 0;
1996         ret = -4;
1997     }
1998 
1999     return(ret);
2000 }
2001 
2002 /**
2003  * xmlCharEncFirstLineInt:
2004  * @handler:	char enconding transformation data structure
2005  * @out:  an xmlBuffer for the output.
2006  * @in:  an xmlBuffer for the input
2007  * @len:  number of bytes to convert for the first line, or -1
2008  *
2009  * Front-end for the encoding handler input function, but handle only
2010  * the very first line, i.e. limit itself to 45 chars.
2011  *
2012  * Returns the number of byte written if success, or
2013  *     -1 general error
2014  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2015  *        the result of transformation can't fit into the encoding we want), or
2016  */
2017 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2018 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2019                        xmlBufferPtr in, int len) {
2020     int ret;
2021     int written;
2022     int toconv;
2023 
2024     if (handler == NULL) return(-1);
2025     if (out == NULL) return(-1);
2026     if (in == NULL) return(-1);
2027 
2028     /* calculate space available */
2029     written = out->size - out->use - 1; /* count '\0' */
2030     toconv = in->use;
2031     /*
2032      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2033      * 45 chars should be sufficient to reach the end of the encoding
2034      * declaration without going too far inside the document content.
2035      * on UTF-16 this means 90bytes, on UCS4 this means 180
2036      * The actual value depending on guessed encoding is passed as @len
2037      * if provided
2038      */
2039     if (len >= 0) {
2040         if (toconv > len)
2041             toconv = len;
2042     } else {
2043         if (toconv > 180)
2044             toconv = 180;
2045     }
2046     if (toconv * 2 >= written) {
2047         xmlBufferGrow(out, toconv * 2);
2048 	written = out->size - out->use - 1;
2049     }
2050 
2051     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2052                            in->content, &toconv, 0);
2053     xmlBufferShrink(in, toconv);
2054     out->use += written;
2055     out->content[out->use] = 0;
2056     if (ret == -1) ret = -3;
2057 
2058 #ifdef DEBUG_ENCODING
2059     switch (ret) {
2060         case 0:
2061 	    xmlGenericError(xmlGenericErrorContext,
2062 		    "converted %d bytes to %d bytes of input\n",
2063 	            toconv, written);
2064 	    break;
2065         case -1:
2066 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2067 	            toconv, written, in->use);
2068 	    break;
2069         case -2:
2070 	    xmlGenericError(xmlGenericErrorContext,
2071 		    "input conversion failed due to input error\n");
2072 	    break;
2073         case -3:
2074 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2075 	            toconv, written, in->use);
2076 	    break;
2077 	default:
2078 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2079     }
2080 #endif /* DEBUG_ENCODING */
2081     /*
2082      * Ignore when input buffer is not on a boundary
2083      */
2084     if (ret == -3) ret = 0;
2085     if (ret == -1) ret = 0;
2086     return(ret);
2087 }
2088 
2089 /**
2090  * xmlCharEncFirstLine:
2091  * @handler:	char enconding transformation data structure
2092  * @out:  an xmlBuffer for the output.
2093  * @in:  an xmlBuffer for the input
2094  *
2095  * Front-end for the encoding handler input function, but handle only
2096  * the very first line, i.e. limit itself to 45 chars.
2097  *
2098  * Returns the number of byte written if success, or
2099  *     -1 general error
2100  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2101  *        the result of transformation can't fit into the encoding we want), or
2102  */
2103 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2104 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2105                  xmlBufferPtr in) {
2106     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2107 }
2108 
2109 /**
2110  * xmlCharEncFirstLineInput:
2111  * @input: a parser input buffer
2112  * @len:  number of bytes to convert for the first line, or -1
2113  *
2114  * Front-end for the encoding handler input function, but handle only
2115  * the very first line. Point is that this is based on autodetection
2116  * of the encoding and once that first line is converted we may find
2117  * out that a different decoder is needed to process the input.
2118  *
2119  * Returns the number of byte written if success, or
2120  *     -1 general error
2121  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2122  *        the result of transformation can't fit into the encoding we want), or
2123  */
2124 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2125 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2126 {
2127     int ret;
2128     size_t written;
2129     size_t toconv;
2130     int c_in;
2131     int c_out;
2132     xmlBufPtr in;
2133     xmlBufPtr out;
2134 
2135     if ((input == NULL) || (input->encoder == NULL) ||
2136         (input->buffer == NULL) || (input->raw == NULL))
2137         return (-1);
2138     out = input->buffer;
2139     in = input->raw;
2140 
2141     toconv = xmlBufUse(in);
2142     if (toconv == 0)
2143         return (0);
2144     written = xmlBufAvail(out) - 1; /* count '\0' */
2145     /*
2146      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2147      * 45 chars should be sufficient to reach the end of the encoding
2148      * declaration without going too far inside the document content.
2149      * on UTF-16 this means 90bytes, on UCS4 this means 180
2150      * The actual value depending on guessed encoding is passed as @len
2151      * if provided
2152      */
2153     if (len >= 0) {
2154         if (toconv > (unsigned int) len)
2155             toconv = len;
2156     } else {
2157         if (toconv > 180)
2158             toconv = 180;
2159     }
2160     if (toconv * 2 >= written) {
2161         xmlBufGrow(out, toconv * 2);
2162         written = xmlBufAvail(out) - 1;
2163     }
2164     if (written > 360)
2165         written = 360;
2166 
2167     c_in = toconv;
2168     c_out = written;
2169     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2170                            xmlBufContent(in), &c_in, 0);
2171     xmlBufShrink(in, c_in);
2172     xmlBufAddLen(out, c_out);
2173     if (ret == -1)
2174         ret = -3;
2175 
2176     switch (ret) {
2177         case 0:
2178 #ifdef DEBUG_ENCODING
2179             xmlGenericError(xmlGenericErrorContext,
2180                             "converted %d bytes to %d bytes of input\n",
2181                             c_in, c_out);
2182 #endif
2183             break;
2184         case -1:
2185 #ifdef DEBUG_ENCODING
2186             xmlGenericError(xmlGenericErrorContext,
2187                          "converted %d bytes to %d bytes of input, %d left\n",
2188                             c_in, c_out, (int)xmlBufUse(in));
2189 #endif
2190             break;
2191         case -3:
2192 #ifdef DEBUG_ENCODING
2193             xmlGenericError(xmlGenericErrorContext,
2194                         "converted %d bytes to %d bytes of input, %d left\n",
2195                             c_in, c_out, (int)xmlBufUse(in));
2196 #endif
2197             break;
2198         case -2: {
2199             char buf[50];
2200             const xmlChar *content = xmlBufContent(in);
2201 
2202 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2203 		     content[0], content[1],
2204 		     content[2], content[3]);
2205 	    buf[49] = 0;
2206 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2207 		    "input conversion failed due to input error, bytes %s\n",
2208 		           buf);
2209         }
2210     }
2211     /*
2212      * Ignore when input buffer is not on a boundary
2213      */
2214     if (ret == -3) ret = 0;
2215     if (ret == -1) ret = 0;
2216     return(ret);
2217 }
2218 
2219 /**
2220  * xmlCharEncInput:
2221  * @input: a parser input buffer
2222  * @flush: try to flush all the raw buffer
2223  *
2224  * Generic front-end for the encoding handler on parser input
2225  *
2226  * Returns the number of byte written if success, or
2227  *     -1 general error
2228  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2229  *        the result of transformation can't fit into the encoding we want), or
2230  */
2231 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2232 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2233 {
2234     int ret;
2235     size_t written;
2236     size_t toconv;
2237     int c_in;
2238     int c_out;
2239     xmlBufPtr in;
2240     xmlBufPtr out;
2241 
2242     if ((input == NULL) || (input->encoder == NULL) ||
2243         (input->buffer == NULL) || (input->raw == NULL))
2244         return (-1);
2245     out = input->buffer;
2246     in = input->raw;
2247 
2248     toconv = xmlBufUse(in);
2249     if (toconv == 0)
2250         return (0);
2251     if ((toconv > 64 * 1024) && (flush == 0))
2252         toconv = 64 * 1024;
2253     written = xmlBufAvail(out);
2254     if (written > 0)
2255         written--; /* count '\0' */
2256     if (toconv * 2 >= written) {
2257         xmlBufGrow(out, toconv * 2);
2258         written = xmlBufAvail(out);
2259         if (written > 0)
2260             written--; /* count '\0' */
2261     }
2262     if ((written > 128 * 1024) && (flush == 0))
2263         written = 128 * 1024;
2264 
2265     c_in = toconv;
2266     c_out = written;
2267     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2268                            xmlBufContent(in), &c_in, flush);
2269     xmlBufShrink(in, c_in);
2270     xmlBufAddLen(out, c_out);
2271     if (ret == -1)
2272         ret = -3;
2273 
2274     switch (ret) {
2275         case 0:
2276 #ifdef DEBUG_ENCODING
2277             xmlGenericError(xmlGenericErrorContext,
2278                             "converted %d bytes to %d bytes of input\n",
2279                             c_in, c_out);
2280 #endif
2281             break;
2282         case -1:
2283 #ifdef DEBUG_ENCODING
2284             xmlGenericError(xmlGenericErrorContext,
2285                          "converted %d bytes to %d bytes of input, %d left\n",
2286                             c_in, c_out, (int)xmlBufUse(in));
2287 #endif
2288             break;
2289         case -3:
2290 #ifdef DEBUG_ENCODING
2291             xmlGenericError(xmlGenericErrorContext,
2292                         "converted %d bytes to %d bytes of input, %d left\n",
2293                             c_in, c_out, (int)xmlBufUse(in));
2294 #endif
2295             break;
2296         case -2: {
2297             char buf[50];
2298             const xmlChar *content = xmlBufContent(in);
2299 
2300 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2301 		     content[0], content[1],
2302 		     content[2], content[3]);
2303 	    buf[49] = 0;
2304 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2305 		    "input conversion failed due to input error, bytes %s\n",
2306 		           buf);
2307         }
2308     }
2309     /*
2310      * Ignore when input buffer is not on a boundary
2311      */
2312     if (ret == -3)
2313         ret = 0;
2314     return (c_out? c_out : ret);
2315 }
2316 
2317 /**
2318  * xmlCharEncInFunc:
2319  * @handler:	char encoding transformation data structure
2320  * @out:  an xmlBuffer for the output.
2321  * @in:  an xmlBuffer for the input
2322  *
2323  * Generic front-end for the encoding handler input function
2324  *
2325  * Returns the number of byte written if success, or
2326  *     -1 general error
2327  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2328  *        the result of transformation can't fit into the encoding we want), or
2329  */
2330 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2331 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2332                  xmlBufferPtr in)
2333 {
2334     int ret;
2335     int written;
2336     int toconv;
2337 
2338     if (handler == NULL)
2339         return (-1);
2340     if (out == NULL)
2341         return (-1);
2342     if (in == NULL)
2343         return (-1);
2344 
2345     toconv = in->use;
2346     if (toconv == 0)
2347         return (0);
2348     written = out->size - out->use -1; /* count '\0' */
2349     if (toconv * 2 >= written) {
2350         xmlBufferGrow(out, out->size + toconv * 2);
2351         written = out->size - out->use - 1;
2352     }
2353     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2354                            in->content, &toconv, 1);
2355     xmlBufferShrink(in, toconv);
2356     out->use += written;
2357     out->content[out->use] = 0;
2358     if (ret == -1)
2359         ret = -3;
2360 
2361     switch (ret) {
2362         case 0:
2363 #ifdef DEBUG_ENCODING
2364             xmlGenericError(xmlGenericErrorContext,
2365                             "converted %d bytes to %d bytes of input\n",
2366                             toconv, written);
2367 #endif
2368             break;
2369         case -1:
2370 #ifdef DEBUG_ENCODING
2371             xmlGenericError(xmlGenericErrorContext,
2372                          "converted %d bytes to %d bytes of input, %d left\n",
2373                             toconv, written, in->use);
2374 #endif
2375             break;
2376         case -3:
2377 #ifdef DEBUG_ENCODING
2378             xmlGenericError(xmlGenericErrorContext,
2379                         "converted %d bytes to %d bytes of input, %d left\n",
2380                             toconv, written, in->use);
2381 #endif
2382             break;
2383         case -2: {
2384             char buf[50];
2385 
2386 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2387 		     in->content[0], in->content[1],
2388 		     in->content[2], in->content[3]);
2389 	    buf[49] = 0;
2390 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2391 		    "input conversion failed due to input error, bytes %s\n",
2392 		           buf);
2393         }
2394     }
2395     /*
2396      * Ignore when input buffer is not on a boundary
2397      */
2398     if (ret == -3)
2399         ret = 0;
2400     return (written? written : ret);
2401 }
2402 
2403 #ifdef LIBXML_OUTPUT_ENABLED
2404 /**
2405  * xmlCharEncOutput:
2406  * @output: a parser output buffer
2407  * @init: is this an initialization call without data
2408  *
2409  * Generic front-end for the encoding handler on parser output
2410  * a first call with @init == 1 has to be made first to initiate the
2411  * output in case of non-stateless encoding needing to initiate their
2412  * state or the output (like the BOM in UTF16).
2413  * In case of UTF8 sequence conversion errors for the given encoder,
2414  * the content will be automatically remapped to a CharRef sequence.
2415  *
2416  * Returns the number of byte written if success, or
2417  *     -1 general error
2418  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2419  *        the result of transformation can't fit into the encoding we want), or
2420  */
2421 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2422 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2423 {
2424     int ret;
2425     size_t written;
2426     size_t writtentot = 0;
2427     size_t toconv;
2428     int c_in;
2429     int c_out;
2430     xmlBufPtr in;
2431     xmlBufPtr out;
2432 
2433     if ((output == NULL) || (output->encoder == NULL) ||
2434         (output->buffer == NULL) || (output->conv == NULL))
2435         return (-1);
2436     out = output->conv;
2437     in = output->buffer;
2438 
2439 retry:
2440 
2441     written = xmlBufAvail(out);
2442     if (written > 0)
2443         written--; /* count '\0' */
2444 
2445     /*
2446      * First specific handling of the initialization call
2447      */
2448     if (init) {
2449         c_in = 0;
2450         c_out = written;
2451         /* TODO: Check return value. */
2452         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2453                           NULL, &c_in);
2454         xmlBufAddLen(out, c_out);
2455 #ifdef DEBUG_ENCODING
2456 	xmlGenericError(xmlGenericErrorContext,
2457 		"initialized encoder\n");
2458 #endif
2459         return(0);
2460     }
2461 
2462     /*
2463      * Conversion itself.
2464      */
2465     toconv = xmlBufUse(in);
2466     if (toconv == 0)
2467         return (0);
2468     if (toconv > 64 * 1024)
2469         toconv = 64 * 1024;
2470     if (toconv * 4 >= written) {
2471         xmlBufGrow(out, toconv * 4);
2472         written = xmlBufAvail(out) - 1;
2473     }
2474     if (written > 256 * 1024)
2475         written = 256 * 1024;
2476 
2477     c_in = toconv;
2478     c_out = written;
2479     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2480                             xmlBufContent(in), &c_in);
2481     xmlBufShrink(in, c_in);
2482     xmlBufAddLen(out, c_out);
2483     writtentot += c_out;
2484     if (ret == -1) {
2485         if (c_out > 0) {
2486             /* Can be a limitation of iconv or uconv */
2487             goto retry;
2488         }
2489         ret = -3;
2490     }
2491 
2492     /*
2493      * Attempt to handle error cases
2494      */
2495     switch (ret) {
2496         case 0:
2497 #ifdef DEBUG_ENCODING
2498 	    xmlGenericError(xmlGenericErrorContext,
2499 		    "converted %d bytes to %d bytes of output\n",
2500 	            c_in, c_out);
2501 #endif
2502 	    break;
2503         case -1:
2504 #ifdef DEBUG_ENCODING
2505 	    xmlGenericError(xmlGenericErrorContext,
2506 		    "output conversion failed by lack of space\n");
2507 #endif
2508 	    break;
2509         case -3:
2510 #ifdef DEBUG_ENCODING
2511 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2512 	            c_in, c_out, (int) xmlBufUse(in));
2513 #endif
2514 	    break;
2515         case -4:
2516             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2517                            "xmlCharEncOutFunc: no output function !\n", NULL);
2518             ret = -1;
2519             break;
2520         case -2: {
2521 	    xmlChar charref[20];
2522 	    int len = (int) xmlBufUse(in);
2523             xmlChar *content = xmlBufContent(in);
2524 	    int cur, charrefLen;
2525 
2526 	    cur = xmlGetUTF8Char(content, &len);
2527 	    if (cur <= 0)
2528                 break;
2529 
2530 #ifdef DEBUG_ENCODING
2531             xmlGenericError(xmlGenericErrorContext,
2532                     "handling output conversion error\n");
2533             xmlGenericError(xmlGenericErrorContext,
2534                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2535                     content[0], content[1],
2536                     content[2], content[3]);
2537 #endif
2538             /*
2539              * Removes the UTF8 sequence, and replace it by a charref
2540              * and continue the transcoding phase, hoping the error
2541              * did not mangle the encoder state.
2542              */
2543             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2544                              "&#%d;", cur);
2545             xmlBufShrink(in, len);
2546             xmlBufGrow(out, charrefLen * 4);
2547             c_out = xmlBufAvail(out) - 1;
2548             c_in = charrefLen;
2549             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2550                                     charref, &c_in);
2551 
2552 	    if ((ret < 0) || (c_in != charrefLen)) {
2553 		char buf[50];
2554 
2555 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2556 			 content[0], content[1],
2557 			 content[2], content[3]);
2558 		buf[49] = 0;
2559 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2560 		    "output conversion failed due to conv error, bytes %s\n",
2561 			       buf);
2562 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2563 		    content[0] = ' ';
2564                 break;
2565 	    }
2566 
2567             xmlBufAddLen(out, c_out);
2568             writtentot += c_out;
2569             goto retry;
2570 	}
2571     }
2572     return(ret);
2573 }
2574 #endif
2575 
2576 /**
2577  * xmlCharEncOutFunc:
2578  * @handler:	char enconding transformation data structure
2579  * @out:  an xmlBuffer for the output.
2580  * @in:  an xmlBuffer for the input
2581  *
2582  * Generic front-end for the encoding handler output function
2583  * a first call with @in == NULL has to be made firs to initiate the
2584  * output in case of non-stateless encoding needing to initiate their
2585  * state or the output (like the BOM in UTF16).
2586  * In case of UTF8 sequence conversion errors for the given encoder,
2587  * the content will be automatically remapped to a CharRef sequence.
2588  *
2589  * Returns the number of byte written if success, or
2590  *     -1 general error
2591  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2592  *        the result of transformation can't fit into the encoding we want), or
2593  */
2594 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2595 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2596                   xmlBufferPtr in) {
2597     int ret;
2598     int written;
2599     int writtentot = 0;
2600     int toconv;
2601     int output = 0;
2602 
2603     if (handler == NULL) return(-1);
2604     if (out == NULL) return(-1);
2605 
2606 retry:
2607 
2608     written = out->size - out->use;
2609 
2610     if (written > 0)
2611 	written--; /* Gennady: count '/0' */
2612 
2613     /*
2614      * First specific handling of in = NULL, i.e. the initialization call
2615      */
2616     if (in == NULL) {
2617         toconv = 0;
2618         /* TODO: Check return value. */
2619         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2620                           NULL, &toconv);
2621         out->use += written;
2622         out->content[out->use] = 0;
2623 #ifdef DEBUG_ENCODING
2624 	xmlGenericError(xmlGenericErrorContext,
2625 		"initialized encoder\n");
2626 #endif
2627         return(0);
2628     }
2629 
2630     /*
2631      * Conversion itself.
2632      */
2633     toconv = in->use;
2634     if (toconv == 0)
2635 	return(0);
2636     if (toconv * 4 >= written) {
2637         xmlBufferGrow(out, toconv * 4);
2638 	written = out->size - out->use - 1;
2639     }
2640     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2641                             in->content, &toconv);
2642     xmlBufferShrink(in, toconv);
2643     out->use += written;
2644     writtentot += written;
2645     out->content[out->use] = 0;
2646     if (ret == -1) {
2647         if (written > 0) {
2648             /* Can be a limitation of iconv or uconv */
2649             goto retry;
2650         }
2651         ret = -3;
2652     }
2653 
2654     if (ret >= 0) output += ret;
2655 
2656     /*
2657      * Attempt to handle error cases
2658      */
2659     switch (ret) {
2660         case 0:
2661 #ifdef DEBUG_ENCODING
2662 	    xmlGenericError(xmlGenericErrorContext,
2663 		    "converted %d bytes to %d bytes of output\n",
2664 	            toconv, written);
2665 #endif
2666 	    break;
2667         case -1:
2668 #ifdef DEBUG_ENCODING
2669 	    xmlGenericError(xmlGenericErrorContext,
2670 		    "output conversion failed by lack of space\n");
2671 #endif
2672 	    break;
2673         case -3:
2674 #ifdef DEBUG_ENCODING
2675 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2676 	            toconv, written, in->use);
2677 #endif
2678 	    break;
2679         case -4:
2680 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2681 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2682 	    ret = -1;
2683             break;
2684         case -2: {
2685 	    xmlChar charref[20];
2686 	    int len = in->use;
2687 	    const xmlChar *utf = (const xmlChar *) in->content;
2688 	    int cur, charrefLen;
2689 
2690 	    cur = xmlGetUTF8Char(utf, &len);
2691 	    if (cur <= 0)
2692                 break;
2693 
2694 #ifdef DEBUG_ENCODING
2695             xmlGenericError(xmlGenericErrorContext,
2696                     "handling output conversion error\n");
2697             xmlGenericError(xmlGenericErrorContext,
2698                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2699                     in->content[0], in->content[1],
2700                     in->content[2], in->content[3]);
2701 #endif
2702             /*
2703              * Removes the UTF8 sequence, and replace it by a charref
2704              * and continue the transcoding phase, hoping the error
2705              * did not mangle the encoder state.
2706              */
2707             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2708                              "&#%d;", cur);
2709             xmlBufferShrink(in, len);
2710             xmlBufferGrow(out, charrefLen * 4);
2711 	    written = out->size - out->use - 1;
2712             toconv = charrefLen;
2713             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2714                                     charref, &toconv);
2715 
2716 	    if ((ret < 0) || (toconv != charrefLen)) {
2717 		char buf[50];
2718 
2719 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2720 			 in->content[0], in->content[1],
2721 			 in->content[2], in->content[3]);
2722 		buf[49] = 0;
2723 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2724 		    "output conversion failed due to conv error, bytes %s\n",
2725 			       buf);
2726 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2727 		    in->content[0] = ' ';
2728 	        break;
2729 	    }
2730 
2731             out->use += written;
2732             writtentot += written;
2733             out->content[out->use] = 0;
2734             goto retry;
2735 	}
2736     }
2737     return(ret);
2738 }
2739 
2740 /**
2741  * xmlCharEncCloseFunc:
2742  * @handler:	char enconding transformation data structure
2743  *
2744  * Generic front-end for encoding handler close function
2745  *
2746  * Returns 0 if success, or -1 in case of error
2747  */
2748 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2749 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2750     int ret = 0;
2751     int tofree = 0;
2752     int i, handler_in_list = 0;
2753 
2754     if (handler == NULL) return(-1);
2755     if (handler->name == NULL) return(-1);
2756     if (handlers != NULL) {
2757         for (i = 0;i < nbCharEncodingHandler; i++) {
2758             if (handler == handlers[i]) {
2759 	        handler_in_list = 1;
2760 		break;
2761 	    }
2762 	}
2763     }
2764 #ifdef LIBXML_ICONV_ENABLED
2765     /*
2766      * Iconv handlers can be used only once, free the whole block.
2767      * and the associated icon resources.
2768      */
2769     if ((handler_in_list == 0) &&
2770         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2771         tofree = 1;
2772 	if (handler->iconv_out != NULL) {
2773 	    if (iconv_close(handler->iconv_out))
2774 		ret = -1;
2775 	    handler->iconv_out = NULL;
2776 	}
2777 	if (handler->iconv_in != NULL) {
2778 	    if (iconv_close(handler->iconv_in))
2779 		ret = -1;
2780 	    handler->iconv_in = NULL;
2781 	}
2782     }
2783 #endif /* LIBXML_ICONV_ENABLED */
2784 #ifdef LIBXML_ICU_ENABLED
2785     if ((handler_in_list == 0) &&
2786         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2787         tofree = 1;
2788 	if (handler->uconv_out != NULL) {
2789 	    closeIcuConverter(handler->uconv_out);
2790 	    handler->uconv_out = NULL;
2791 	}
2792 	if (handler->uconv_in != NULL) {
2793 	    closeIcuConverter(handler->uconv_in);
2794 	    handler->uconv_in = NULL;
2795 	}
2796     }
2797 #endif
2798     if (tofree) {
2799         /* free up only dynamic handlers iconv/uconv */
2800         if (handler->name != NULL)
2801             xmlFree(handler->name);
2802         handler->name = NULL;
2803         xmlFree(handler);
2804     }
2805 #ifdef DEBUG_ENCODING
2806     if (ret)
2807         xmlGenericError(xmlGenericErrorContext,
2808 		"failed to close the encoding handler\n");
2809     else
2810         xmlGenericError(xmlGenericErrorContext,
2811 		"closed the encoding handler\n");
2812 #endif
2813 
2814     return(ret);
2815 }
2816 
2817 /**
2818  * xmlByteConsumed:
2819  * @ctxt: an XML parser context
2820  *
2821  * This function provides the current index of the parser relative
2822  * to the start of the current entity. This function is computed in
2823  * bytes from the beginning starting at zero and finishing at the
2824  * size in byte of the file if parsing a file. The function is
2825  * of constant cost if the input is UTF-8 but can be costly if run
2826  * on non-UTF-8 input.
2827  *
2828  * Returns the index in bytes from the beginning of the entity or -1
2829  *         in case the index could not be computed.
2830  */
2831 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2832 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2833     xmlParserInputPtr in;
2834 
2835     if (ctxt == NULL) return(-1);
2836     in = ctxt->input;
2837     if (in == NULL)  return(-1);
2838     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2839         unsigned int unused = 0;
2840 	xmlCharEncodingHandler * handler = in->buf->encoder;
2841         /*
2842 	 * Encoding conversion, compute the number of unused original
2843 	 * bytes from the input not consumed and substract that from
2844 	 * the raw consumed value, this is not a cheap operation
2845 	 */
2846         if (in->end - in->cur > 0) {
2847 	    unsigned char convbuf[32000];
2848 	    const unsigned char *cur = (const unsigned char *)in->cur;
2849 	    int toconv = in->end - in->cur, written = 32000;
2850 
2851 	    int ret;
2852 
2853             do {
2854                 toconv = in->end - cur;
2855                 written = 32000;
2856                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2857                                         cur, &toconv);
2858                 if (ret < 0) {
2859                     if (written > 0)
2860                         ret = -2;
2861                     else
2862                         return(-1);
2863                 }
2864                 unused += written;
2865                 cur += toconv;
2866             } while (ret == -2);
2867 	}
2868 	if (in->buf->rawconsumed < unused)
2869 	    return(-1);
2870 	return(in->buf->rawconsumed - unused);
2871     }
2872     return(in->consumed + (in->cur - in->base));
2873 }
2874 
2875 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2876 #ifdef LIBXML_ISO8859X_ENABLED
2877 
2878 /**
2879  * UTF8ToISO8859x:
2880  * @out:  a pointer to an array of bytes to store the result
2881  * @outlen:  the length of @out
2882  * @in:  a pointer to an array of UTF-8 chars
2883  * @inlen:  the length of @in
2884  * @xlattable: the 2-level transcoding table
2885  *
2886  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2887  * block of chars out.
2888  *
2889  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2890  * The value of @inlen after return is the number of octets consumed
2891  *     as the return value is positive, else unpredictable.
2892  * The value of @outlen after return is the number of ocetes consumed.
2893  */
2894 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2895 UTF8ToISO8859x(unsigned char* out, int *outlen,
2896               const unsigned char* in, int *inlen,
2897               unsigned char const *xlattable) {
2898     const unsigned char* outstart = out;
2899     const unsigned char* inend;
2900     const unsigned char* instart = in;
2901     const unsigned char* processed = in;
2902 
2903     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2904         (xlattable == NULL))
2905 	return(-1);
2906     if (in == NULL) {
2907         /*
2908         * initialization nothing to do
2909         */
2910         *outlen = 0;
2911         *inlen = 0;
2912         return(0);
2913     }
2914     inend = in + (*inlen);
2915     while (in < inend) {
2916         unsigned char d = *in++;
2917         if  (d < 0x80)  {
2918             *out++ = d;
2919         } else if (d < 0xC0) {
2920             /* trailing byte in leading position */
2921             *outlen = out - outstart;
2922             *inlen = processed - instart;
2923             return(-2);
2924         } else if (d < 0xE0) {
2925             unsigned char c;
2926             if (!(in < inend)) {
2927                 /* trailing byte not in input buffer */
2928                 *outlen = out - outstart;
2929                 *inlen = processed - instart;
2930                 return(-3);
2931             }
2932             c = *in++;
2933             if ((c & 0xC0) != 0x80) {
2934                 /* not a trailing byte */
2935                 *outlen = out - outstart;
2936                 *inlen = processed - instart;
2937                 return(-2);
2938             }
2939             c = c & 0x3F;
2940             d = d & 0x1F;
2941             d = xlattable [48 + c + xlattable [d] * 64];
2942             if (d == 0) {
2943                 /* not in character set */
2944                 *outlen = out - outstart;
2945                 *inlen = processed - instart;
2946                 return(-2);
2947             }
2948             *out++ = d;
2949         } else if (d < 0xF0) {
2950             unsigned char c1;
2951             unsigned char c2;
2952             if (!(in < inend - 1)) {
2953                 /* trailing bytes not in input buffer */
2954                 *outlen = out - outstart;
2955                 *inlen = processed - instart;
2956                 return(-3);
2957             }
2958             c1 = *in++;
2959             if ((c1 & 0xC0) != 0x80) {
2960                 /* not a trailing byte (c1) */
2961                 *outlen = out - outstart;
2962                 *inlen = processed - instart;
2963                 return(-2);
2964             }
2965             c2 = *in++;
2966             if ((c2 & 0xC0) != 0x80) {
2967                 /* not a trailing byte (c2) */
2968                 *outlen = out - outstart;
2969                 *inlen = processed - instart;
2970                 return(-2);
2971             }
2972             c1 = c1 & 0x3F;
2973             c2 = c2 & 0x3F;
2974 	    d = d & 0x0F;
2975 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
2976 			xlattable [32 + d] * 64] * 64];
2977             if (d == 0) {
2978                 /* not in character set */
2979                 *outlen = out - outstart;
2980                 *inlen = processed - instart;
2981                 return(-2);
2982             }
2983             *out++ = d;
2984         } else {
2985             /* cannot transcode >= U+010000 */
2986             *outlen = out - outstart;
2987             *inlen = processed - instart;
2988             return(-2);
2989         }
2990         processed = in;
2991     }
2992     *outlen = out - outstart;
2993     *inlen = processed - instart;
2994     return(*outlen);
2995 }
2996 
2997 /**
2998  * ISO8859xToUTF8
2999  * @out:  a pointer to an array of bytes to store the result
3000  * @outlen:  the length of @out
3001  * @in:  a pointer to an array of ISO Latin 1 chars
3002  * @inlen:  the length of @in
3003  *
3004  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3005  * block of chars out.
3006  * Returns 0 if success, or -1 otherwise
3007  * The value of @inlen after return is the number of octets consumed
3008  * The value of @outlen after return is the number of ocetes produced.
3009  */
3010 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3011 ISO8859xToUTF8(unsigned char* out, int *outlen,
3012               const unsigned char* in, int *inlen,
3013               unsigned short const *unicodetable) {
3014     unsigned char* outstart = out;
3015     unsigned char* outend;
3016     const unsigned char* instart = in;
3017     const unsigned char* inend;
3018     const unsigned char* instop;
3019     unsigned int c;
3020 
3021     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3022         (in == NULL) || (unicodetable == NULL))
3023 	return(-1);
3024     outend = out + *outlen;
3025     inend = in + *inlen;
3026     instop = inend;
3027 
3028     while ((in < inend) && (out < outend - 2)) {
3029         if (*in >= 0x80) {
3030             c = unicodetable [*in - 0x80];
3031             if (c == 0) {
3032                 /* undefined code point */
3033                 *outlen = out - outstart;
3034                 *inlen = in - instart;
3035                 return (-1);
3036             }
3037             if (c < 0x800) {
3038                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3039                 *out++ = (c & 0x3F) | 0x80;
3040             } else {
3041                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3042                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3043                 *out++ = (c & 0x3F) | 0x80;
3044             }
3045             ++in;
3046         }
3047         if (instop - in > outend - out) instop = in + (outend - out);
3048         while ((*in < 0x80) && (in < instop)) {
3049             *out++ = *in++;
3050         }
3051     }
3052     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3053         *out++ =  *in++;
3054     }
3055     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3056         *out++ =  *in++;
3057     }
3058     *outlen = out - outstart;
3059     *inlen = in - instart;
3060     return (*outlen);
3061 }
3062 
3063 
3064 /************************************************************************
3065  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3066  ************************************************************************/
3067 
3068 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3069     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3070     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3071     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3072     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3073     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3074     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3075     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3076     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3077     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3078     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3079     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3080     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3081     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3082     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3083     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3084     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3085 };
3086 
3087 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3088     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3089     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3096     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3097     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3098     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3099     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3100     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3101     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3103     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3104     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3105     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3108     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3109     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3110     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3111     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3112     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3113     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3114     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3115 };
3116 
3117 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3118     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3119     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3120     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3121     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3122     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3123     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3124     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3125     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3126     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3127     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3128     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3129     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3130     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3131     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3132     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3133     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3134 };
3135 
3136 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3137     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3138     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3145     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3146     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3147     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3148     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3149     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3150     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3151     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3154     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3162     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3163     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3164     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3165     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3166     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3167     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3168 };
3169 
3170 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3171     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3172     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3173     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3174     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3175     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3176     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3177     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3178     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3179     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3180     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3181     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3182     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3183     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3184     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3185     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3186     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3187 };
3188 
3189 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3190     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3198     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3199     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3200     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3201     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3202     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3203     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3204     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3205     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3206     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3207     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3208     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3209     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3210     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3211     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3214     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3215     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3216     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3217 };
3218 
3219 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3220     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3221     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3222     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3223     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3224     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3225     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3226     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3227     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3228     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3229     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3230     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3231     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3232     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3233     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3234     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3235     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3236 };
3237 
3238 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3239     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3247     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3248     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3249     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3251     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3252     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3253     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3254     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3255     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3256     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 };
3267 
3268 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3269     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3270     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3271     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3272     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3273     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3274     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3275     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3276     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3277     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3278     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3279     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3280     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3281     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3282     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3283     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3284     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3285 };
3286 
3287 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3288     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3290     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3296     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3297     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3298     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3304     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3305     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3306     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3307     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3308     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 };
3312 
3313 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3314     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3315     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3316     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3317     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3318     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3319     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3320     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3321     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3322     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3323     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3324     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3325     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3326     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3327     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3328     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3329     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3330 };
3331 
3332 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3333     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3341     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3342     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3343     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3344     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3350     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3357     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3358     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3359     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3360     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3361     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 };
3365 
3366 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3367     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3368     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3369     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3370     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3371     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3372     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3373     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3374     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3375     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3376     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3377     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3378     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3379     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3380     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3381     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3382     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3383 };
3384 
3385 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3386     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3388     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3394     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3395     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3396     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3397     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3400     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3403     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3405     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3410     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3411     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3415     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3416     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 };
3418 
3419 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3420     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3421     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3422     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3423     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3424     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3425     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3426     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3427     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3428     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3429     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3430     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3431     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3432     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3433     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3434     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3435     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3436 };
3437 
3438 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3439     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3447     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3448     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3449     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3450     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3451     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3452     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3453     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3454     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3456     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3460     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462 };
3463 
3464 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3465     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3466     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3467     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3468     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3469     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3470     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3471     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3472     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3473     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3474     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3475     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3476     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3477     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3478     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3479     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3480     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3481 };
3482 
3483 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3484     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3492     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3493     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3494     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3495     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3496     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3497     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3498     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3499     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3500     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3502     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3503     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3512     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3513     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3514     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3515 };
3516 
3517 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3518     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3519     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3520     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3521     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3522     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3523     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3524     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3525     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3526     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3527     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3528     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3529     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3530     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3531     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3532     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3533     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3534 };
3535 
3536 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3537     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3545     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3546     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3549     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3552     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3553     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3554     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3555     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3556     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3561     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3562     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 };
3565 
3566 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3567     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3568     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3569     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3570     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3571     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3572     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3573     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3574     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3575     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3576     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3577     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3578     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3579     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3580     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3581     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3582     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3583 };
3584 
3585 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3586     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3587     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3594     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3595     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3596     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3597     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3602     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3603     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3604     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3605     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3606     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3607     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3608     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3609     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3610     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3611     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3612     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3613     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3614     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3615     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3616     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3617 };
3618 
3619 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3620     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3621     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3622     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3623     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3624     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3625     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3626     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3627     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3628     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3629     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3630     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3631     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3632     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3633     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3634     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3635     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3636 };
3637 
3638 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3639     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3645     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3647     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3648     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3649     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3653     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3654     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3655     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3656     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3657     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3658     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3659     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3660     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3667     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3674     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3677     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3679     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3680     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3681     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3682 };
3683 
3684 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3685     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3686     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3687     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3688     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3689     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3690     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3691     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3692     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3693     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3694     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3695     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3696     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3697     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3698     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3699     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3700     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3701 };
3702 
3703 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3704     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3705     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3709     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3712     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3713     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3714     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3715     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3722     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3727     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3728     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3729     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3730     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3731 };
3732 
3733 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3734     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3735     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3736     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3737     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3738     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3739     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3740     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3741     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3742     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3743     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3744     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3745     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3746     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3747     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3748     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3749     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3750 };
3751 
3752 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3753     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3761     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3762     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3763     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3764     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3765     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3766     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3770     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3772     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3777     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3779     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3782     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3786     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3789     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3790     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3791     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3792 };
3793 
3794 
3795 /*
3796  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3797  */
3798 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3799 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3800     const unsigned char* in, int *inlen) {
3801     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3802 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3803 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3804     const unsigned char* in, int *inlen) {
3805     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3806 }
3807 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3808 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3809     const unsigned char* in, int *inlen) {
3810     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3811 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3812 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3813     const unsigned char* in, int *inlen) {
3814     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3815 }
3816 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3817 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3818     const unsigned char* in, int *inlen) {
3819     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3820 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3821 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3822     const unsigned char* in, int *inlen) {
3823     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3824 }
3825 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3826 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3827     const unsigned char* in, int *inlen) {
3828     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3829 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3830 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3831     const unsigned char* in, int *inlen) {
3832     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3833 }
3834 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3835 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3836     const unsigned char* in, int *inlen) {
3837     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3838 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3839 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3840     const unsigned char* in, int *inlen) {
3841     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3842 }
3843 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3844 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3845     const unsigned char* in, int *inlen) {
3846     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3847 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3848 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3849     const unsigned char* in, int *inlen) {
3850     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3851 }
3852 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3853 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3854     const unsigned char* in, int *inlen) {
3855     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3856 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3857 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3858     const unsigned char* in, int *inlen) {
3859     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3860 }
3861 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3862 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3863     const unsigned char* in, int *inlen) {
3864     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3865 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3866 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3867     const unsigned char* in, int *inlen) {
3868     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3869 }
3870 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3871 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3872     const unsigned char* in, int *inlen) {
3873     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3874 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3875 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3876     const unsigned char* in, int *inlen) {
3877     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3878 }
3879 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3880 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3881     const unsigned char* in, int *inlen) {
3882     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3883 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3884 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3885     const unsigned char* in, int *inlen) {
3886     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3887 }
3888 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3889 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3890     const unsigned char* in, int *inlen) {
3891     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3892 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3893 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3894     const unsigned char* in, int *inlen) {
3895     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3896 }
3897 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3898 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3899     const unsigned char* in, int *inlen) {
3900     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3901 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3902 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3903     const unsigned char* in, int *inlen) {
3904     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3905 }
3906 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3907 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3908     const unsigned char* in, int *inlen) {
3909     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3910 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3911 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3912     const unsigned char* in, int *inlen) {
3913     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3914 }
3915 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3916 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3917     const unsigned char* in, int *inlen) {
3918     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3919 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3920 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3921     const unsigned char* in, int *inlen) {
3922     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3923 }
3924 
3925 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3926 xmlRegisterCharEncodingHandlersISO8859x (void) {
3927     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3928     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3929     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3930     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3931     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3932     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3933     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3934     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3935     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3936     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3937     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3938     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3939     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3940     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3941 }
3942 
3943 #endif
3944 #endif
3945 
3946 #define bottom_encoding
3947 #include "elfgcchack.h"
3948