xref: /reactos/sdk/lib/3rdparty/libxml2/encoding.c (revision 911153da)
1c2c66affSColin Finck /*
2c2c66affSColin Finck  * encoding.c : implements the encoding conversion functions needed for XML
3c2c66affSColin Finck  *
4c2c66affSColin Finck  * Related specs:
5c2c66affSColin Finck  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6c2c66affSColin Finck  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7c2c66affSColin Finck  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8c2c66affSColin Finck  * [ISO-8859-1]   ISO Latin-1 characters codes.
9c2c66affSColin Finck  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10c2c66affSColin Finck  *                Worldwide Character Encoding -- Version 1.0", Addison-
11c2c66affSColin Finck  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12c2c66affSColin Finck  *                described in Unicode Technical Report #4.
13c2c66affSColin Finck  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14c2c66affSColin Finck  *                Information Interchange, ANSI X3.4-1986.
15c2c66affSColin Finck  *
16c2c66affSColin Finck  * See Copyright for the status of this software.
17c2c66affSColin Finck  *
18c2c66affSColin Finck  * daniel@veillard.com
19c2c66affSColin Finck  *
20c2c66affSColin Finck  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21c2c66affSColin Finck  */
22c2c66affSColin Finck 
23c2c66affSColin Finck #define IN_LIBXML
24c2c66affSColin Finck #include "libxml.h"
25c2c66affSColin Finck 
26c2c66affSColin Finck #include <string.h>
27c2c66affSColin Finck #include <limits.h>
28c2c66affSColin Finck #include <ctype.h>
29c2c66affSColin Finck #include <stdlib.h>
30*911153daSThomas Faber 
31c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
32c2c66affSColin Finck #include <errno.h>
33c2c66affSColin Finck #endif
34*911153daSThomas Faber 
35c2c66affSColin Finck #include <libxml/encoding.h>
36c2c66affSColin Finck #include <libxml/xmlmemory.h>
37c2c66affSColin Finck #ifdef LIBXML_HTML_ENABLED
38c2c66affSColin Finck #include <libxml/HTMLparser.h>
39c2c66affSColin Finck #endif
40c2c66affSColin Finck #include <libxml/globals.h>
41c2c66affSColin Finck #include <libxml/xmlerror.h>
42c2c66affSColin Finck 
43c2c66affSColin Finck #include "buf.h"
44c2c66affSColin Finck #include "enc.h"
45c2c66affSColin Finck 
46*911153daSThomas Faber #ifdef LIBXML_ICU_ENABLED
47*911153daSThomas Faber #include <unicode/ucnv.h>
48*911153daSThomas Faber /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49*911153daSThomas Faber #define ICU_PIVOT_BUF_SIZE 1024
50*911153daSThomas Faber typedef struct _uconv_t uconv_t;
51*911153daSThomas Faber struct _uconv_t {
52*911153daSThomas Faber   UConverter *uconv; /* for conversion between an encoding and UTF-16 */
53*911153daSThomas Faber   UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
54*911153daSThomas Faber   UChar      pivot_buf[ICU_PIVOT_BUF_SIZE];
55*911153daSThomas Faber   UChar      *pivot_source;
56*911153daSThomas Faber   UChar      *pivot_target;
57*911153daSThomas Faber };
58*911153daSThomas Faber #endif
59*911153daSThomas Faber 
60c2c66affSColin Finck static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
61c2c66affSColin Finck static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
62c2c66affSColin Finck 
63c2c66affSColin Finck typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
64c2c66affSColin Finck typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
65c2c66affSColin Finck struct _xmlCharEncodingAlias {
66c2c66affSColin Finck     const char *name;
67c2c66affSColin Finck     const char *alias;
68c2c66affSColin Finck };
69c2c66affSColin Finck 
70c2c66affSColin Finck static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
71c2c66affSColin Finck static int xmlCharEncodingAliasesNb = 0;
72c2c66affSColin Finck static int xmlCharEncodingAliasesMax = 0;
73c2c66affSColin Finck 
74c2c66affSColin Finck #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
75c2c66affSColin Finck #if 0
76c2c66affSColin Finck #define DEBUG_ENCODING  /* Define this to get encoding traces */
77c2c66affSColin Finck #endif
78c2c66affSColin Finck #else
79c2c66affSColin Finck #ifdef LIBXML_ISO8859X_ENABLED
80c2c66affSColin Finck static void xmlRegisterCharEncodingHandlersISO8859x (void);
81c2c66affSColin Finck #endif
82c2c66affSColin Finck #endif
83c2c66affSColin Finck 
84c2c66affSColin Finck static int xmlLittleEndian = 1;
85c2c66affSColin Finck 
86c2c66affSColin Finck /**
87c2c66affSColin Finck  * xmlEncodingErrMemory:
8840ee59d6SThomas Faber  * @extra:  extra information
89c2c66affSColin Finck  *
90c2c66affSColin Finck  * Handle an out of memory condition
91c2c66affSColin Finck  */
92c2c66affSColin Finck static void
xmlEncodingErrMemory(const char * extra)93c2c66affSColin Finck xmlEncodingErrMemory(const char *extra)
94c2c66affSColin Finck {
95c2c66affSColin Finck     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96c2c66affSColin Finck }
97c2c66affSColin Finck 
98c2c66affSColin Finck /**
99c2c66affSColin Finck  * xmlErrEncoding:
100c2c66affSColin Finck  * @error:  the error number
101c2c66affSColin Finck  * @msg:  the error message
102c2c66affSColin Finck  *
103c2c66affSColin Finck  * n encoding error
104c2c66affSColin Finck  */
105c2c66affSColin Finck static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)106c2c66affSColin Finck xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
107c2c66affSColin Finck {
108c2c66affSColin Finck     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
109c2c66affSColin Finck                     XML_FROM_I18N, error, XML_ERR_FATAL,
110c2c66affSColin Finck                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111c2c66affSColin Finck }
112c2c66affSColin Finck 
113c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
114c2c66affSColin Finck static uconv_t*
openIcuConverter(const char * name,int toUnicode)115c2c66affSColin Finck openIcuConverter(const char* name, int toUnicode)
116c2c66affSColin Finck {
117c2c66affSColin Finck   UErrorCode status = U_ZERO_ERROR;
118c2c66affSColin Finck   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
119c2c66affSColin Finck   if (conv == NULL)
120c2c66affSColin Finck     return NULL;
121c2c66affSColin Finck 
1225bb277a5SThomas Faber   conv->pivot_source = conv->pivot_buf;
1235bb277a5SThomas Faber   conv->pivot_target = conv->pivot_buf;
1245bb277a5SThomas Faber 
125c2c66affSColin Finck   conv->uconv = ucnv_open(name, &status);
126c2c66affSColin Finck   if (U_FAILURE(status))
127c2c66affSColin Finck     goto error;
128c2c66affSColin Finck 
129c2c66affSColin Finck   status = U_ZERO_ERROR;
130c2c66affSColin Finck   if (toUnicode) {
131c2c66affSColin Finck     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
132c2c66affSColin Finck                         NULL, NULL, NULL, &status);
133c2c66affSColin Finck   }
134c2c66affSColin Finck   else {
135c2c66affSColin Finck     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
136c2c66affSColin Finck                         NULL, NULL, NULL, &status);
137c2c66affSColin Finck   }
138c2c66affSColin Finck   if (U_FAILURE(status))
139c2c66affSColin Finck     goto error;
140c2c66affSColin Finck 
141c2c66affSColin Finck   status = U_ZERO_ERROR;
142c2c66affSColin Finck   conv->utf8 = ucnv_open("UTF-8", &status);
143c2c66affSColin Finck   if (U_SUCCESS(status))
144c2c66affSColin Finck     return conv;
145c2c66affSColin Finck 
146c2c66affSColin Finck error:
147c2c66affSColin Finck   if (conv->uconv)
148c2c66affSColin Finck     ucnv_close(conv->uconv);
149c2c66affSColin Finck   xmlFree(conv);
150c2c66affSColin Finck   return NULL;
151c2c66affSColin Finck }
152c2c66affSColin Finck 
153c2c66affSColin Finck static void
closeIcuConverter(uconv_t * conv)154c2c66affSColin Finck closeIcuConverter(uconv_t *conv)
155c2c66affSColin Finck {
156c2c66affSColin Finck   if (conv != NULL) {
157c2c66affSColin Finck     ucnv_close(conv->uconv);
158c2c66affSColin Finck     ucnv_close(conv->utf8);
159c2c66affSColin Finck     xmlFree(conv);
160c2c66affSColin Finck   }
161c2c66affSColin Finck }
162c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
163c2c66affSColin Finck 
164c2c66affSColin Finck /************************************************************************
165c2c66affSColin Finck  *									*
166c2c66affSColin Finck  *		Conversions To/From UTF8 encoding			*
167c2c66affSColin Finck  *									*
168c2c66affSColin Finck  ************************************************************************/
169c2c66affSColin Finck 
170c2c66affSColin Finck /**
171c2c66affSColin Finck  * asciiToUTF8:
172c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
173c2c66affSColin Finck  * @outlen:  the length of @out
174c2c66affSColin Finck  * @in:  a pointer to an array of ASCII chars
175c2c66affSColin Finck  * @inlen:  the length of @in
176c2c66affSColin Finck  *
177c2c66affSColin Finck  * Take a block of ASCII chars in and try to convert it to an UTF-8
178c2c66affSColin Finck  * block of chars out.
179c2c66affSColin Finck  * Returns 0 if success, or -1 otherwise
180c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
181c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
18240ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
183c2c66affSColin Finck  */
184c2c66affSColin Finck static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)185c2c66affSColin Finck asciiToUTF8(unsigned char* out, int *outlen,
186c2c66affSColin Finck               const unsigned char* in, int *inlen) {
187c2c66affSColin Finck     unsigned char* outstart = out;
188c2c66affSColin Finck     const unsigned char* base = in;
189c2c66affSColin Finck     const unsigned char* processed = in;
190c2c66affSColin Finck     unsigned char* outend = out + *outlen;
191c2c66affSColin Finck     const unsigned char* inend;
192c2c66affSColin Finck     unsigned int c;
193c2c66affSColin Finck 
194c2c66affSColin Finck     inend = in + (*inlen);
195c2c66affSColin Finck     while ((in < inend) && (out - outstart + 5 < *outlen)) {
196c2c66affSColin Finck 	c= *in++;
197c2c66affSColin Finck 
198c2c66affSColin Finck         if (out >= outend)
199c2c66affSColin Finck 	    break;
200c2c66affSColin Finck         if (c < 0x80) {
201c2c66affSColin Finck 	    *out++ = c;
202c2c66affSColin Finck 	} else {
203c2c66affSColin Finck 	    *outlen = out - outstart;
204c2c66affSColin Finck 	    *inlen = processed - base;
205c2c66affSColin Finck 	    return(-1);
206c2c66affSColin Finck 	}
207c2c66affSColin Finck 
208c2c66affSColin Finck 	processed = (const unsigned char*) in;
209c2c66affSColin Finck     }
210c2c66affSColin Finck     *outlen = out - outstart;
211c2c66affSColin Finck     *inlen = processed - base;
212c2c66affSColin Finck     return(*outlen);
213c2c66affSColin Finck }
214c2c66affSColin Finck 
215c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
216c2c66affSColin Finck /**
217c2c66affSColin Finck  * UTF8Toascii:
218c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
219c2c66affSColin Finck  * @outlen:  the length of @out
220c2c66affSColin Finck  * @in:  a pointer to an array of UTF-8 chars
221c2c66affSColin Finck  * @inlen:  the length of @in
222c2c66affSColin Finck  *
223c2c66affSColin Finck  * Take a block of UTF-8 chars in and try to convert it to an ASCII
224c2c66affSColin Finck  * block of chars out.
225c2c66affSColin Finck  *
226c2c66affSColin Finck  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
228c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
22940ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
230c2c66affSColin Finck  */
231c2c66affSColin Finck static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)232c2c66affSColin Finck UTF8Toascii(unsigned char* out, int *outlen,
233c2c66affSColin Finck               const unsigned char* in, int *inlen) {
234c2c66affSColin Finck     const unsigned char* processed = in;
235c2c66affSColin Finck     const unsigned char* outend;
236c2c66affSColin Finck     const unsigned char* outstart = out;
237c2c66affSColin Finck     const unsigned char* instart = in;
238c2c66affSColin Finck     const unsigned char* inend;
239c2c66affSColin Finck     unsigned int c, d;
240c2c66affSColin Finck     int trailing;
241c2c66affSColin Finck 
242c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
243c2c66affSColin Finck     if (in == NULL) {
244c2c66affSColin Finck         /*
245c2c66affSColin Finck 	 * initialization nothing to do
246c2c66affSColin Finck 	 */
247c2c66affSColin Finck 	*outlen = 0;
248c2c66affSColin Finck 	*inlen = 0;
249c2c66affSColin Finck 	return(0);
250c2c66affSColin Finck     }
251c2c66affSColin Finck     inend = in + (*inlen);
252c2c66affSColin Finck     outend = out + (*outlen);
253c2c66affSColin Finck     while (in < inend) {
254c2c66affSColin Finck 	d = *in++;
255c2c66affSColin Finck 	if      (d < 0x80)  { c= d; trailing= 0; }
256c2c66affSColin Finck 	else if (d < 0xC0) {
257c2c66affSColin Finck 	    /* trailing byte in leading position */
258c2c66affSColin Finck 	    *outlen = out - outstart;
259c2c66affSColin Finck 	    *inlen = processed - instart;
260c2c66affSColin Finck 	    return(-2);
261c2c66affSColin Finck         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
262c2c66affSColin Finck         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
263c2c66affSColin Finck         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
264c2c66affSColin Finck 	else {
265c2c66affSColin Finck 	    /* no chance for this in Ascii */
266c2c66affSColin Finck 	    *outlen = out - outstart;
267c2c66affSColin Finck 	    *inlen = processed - instart;
268c2c66affSColin Finck 	    return(-2);
269c2c66affSColin Finck 	}
270c2c66affSColin Finck 
271c2c66affSColin Finck 	if (inend - in < trailing) {
272c2c66affSColin Finck 	    break;
273c2c66affSColin Finck 	}
274c2c66affSColin Finck 
275c2c66affSColin Finck 	for ( ; trailing; trailing--) {
276c2c66affSColin Finck 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
277c2c66affSColin Finck 		break;
278c2c66affSColin Finck 	    c <<= 6;
279c2c66affSColin Finck 	    c |= d & 0x3F;
280c2c66affSColin Finck 	}
281c2c66affSColin Finck 
282c2c66affSColin Finck 	/* assertion: c is a single UTF-4 value */
283c2c66affSColin Finck 	if (c < 0x80) {
284c2c66affSColin Finck 	    if (out >= outend)
285c2c66affSColin Finck 		break;
286c2c66affSColin Finck 	    *out++ = c;
287c2c66affSColin Finck 	} else {
288c2c66affSColin Finck 	    /* no chance for this in Ascii */
289c2c66affSColin Finck 	    *outlen = out - outstart;
290c2c66affSColin Finck 	    *inlen = processed - instart;
291c2c66affSColin Finck 	    return(-2);
292c2c66affSColin Finck 	}
293c2c66affSColin Finck 	processed = in;
294c2c66affSColin Finck     }
295c2c66affSColin Finck     *outlen = out - outstart;
296c2c66affSColin Finck     *inlen = processed - instart;
297c2c66affSColin Finck     return(*outlen);
298c2c66affSColin Finck }
299c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
300c2c66affSColin Finck 
301c2c66affSColin Finck /**
302c2c66affSColin Finck  * isolat1ToUTF8:
303c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
304c2c66affSColin Finck  * @outlen:  the length of @out
305c2c66affSColin Finck  * @in:  a pointer to an array of ISO Latin 1 chars
306c2c66affSColin Finck  * @inlen:  the length of @in
307c2c66affSColin Finck  *
308c2c66affSColin Finck  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309c2c66affSColin Finck  * block of chars out.
310c2c66affSColin Finck  * Returns the number of bytes written if success, or -1 otherwise
311c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
312c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
31340ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
314c2c66affSColin Finck  */
315c2c66affSColin Finck int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)316c2c66affSColin Finck isolat1ToUTF8(unsigned char* out, int *outlen,
317c2c66affSColin Finck               const unsigned char* in, int *inlen) {
318c2c66affSColin Finck     unsigned char* outstart = out;
319c2c66affSColin Finck     const unsigned char* base = in;
320c2c66affSColin Finck     unsigned char* outend;
321c2c66affSColin Finck     const unsigned char* inend;
322c2c66affSColin Finck     const unsigned char* instop;
323c2c66affSColin Finck 
324c2c66affSColin Finck     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325c2c66affSColin Finck 	return(-1);
326c2c66affSColin Finck 
327c2c66affSColin Finck     outend = out + *outlen;
328c2c66affSColin Finck     inend = in + (*inlen);
329c2c66affSColin Finck     instop = inend;
330c2c66affSColin Finck 
331c2c66affSColin Finck     while ((in < inend) && (out < outend - 1)) {
332c2c66affSColin Finck 	if (*in >= 0x80) {
333c2c66affSColin Finck 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
334c2c66affSColin Finck             *out++ = ((*in) & 0x3F) | 0x80;
335c2c66affSColin Finck 	    ++in;
336c2c66affSColin Finck 	}
337c2c66affSColin Finck 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
338c2c66affSColin Finck 	while ((in < instop) && (*in < 0x80)) {
339c2c66affSColin Finck 	    *out++ = *in++;
340c2c66affSColin Finck 	}
341c2c66affSColin Finck     }
342c2c66affSColin Finck     if ((in < inend) && (out < outend) && (*in < 0x80)) {
343c2c66affSColin Finck         *out++ = *in++;
344c2c66affSColin Finck     }
345c2c66affSColin Finck     *outlen = out - outstart;
346c2c66affSColin Finck     *inlen = in - base;
347c2c66affSColin Finck     return(*outlen);
348c2c66affSColin Finck }
349c2c66affSColin Finck 
350c2c66affSColin Finck /**
351c2c66affSColin Finck  * UTF8ToUTF8:
352c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
353c2c66affSColin Finck  * @outlen:  the length of @out
354c2c66affSColin Finck  * @inb:  a pointer to an array of UTF-8 chars
355c2c66affSColin Finck  * @inlenb:  the length of @in in UTF-8 chars
356c2c66affSColin Finck  *
357c2c66affSColin Finck  * No op copy operation for UTF8 handling.
358c2c66affSColin Finck  *
359c2c66affSColin Finck  * Returns the number of bytes written, or -1 if lack of space.
360c2c66affSColin Finck  *     The value of *inlen after return is the number of octets consumed
361c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
362c2c66affSColin Finck  */
363c2c66affSColin Finck static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)364c2c66affSColin Finck UTF8ToUTF8(unsigned char* out, int *outlen,
365c2c66affSColin Finck            const unsigned char* inb, int *inlenb)
366c2c66affSColin Finck {
367c2c66affSColin Finck     int len;
368c2c66affSColin Finck 
369fc82f8e2SThomas Faber     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370c2c66affSColin Finck 	return(-1);
371fc82f8e2SThomas Faber     if (inb == NULL) {
372fc82f8e2SThomas Faber         /* inb == NULL means output is initialized. */
373fc82f8e2SThomas Faber         *outlen = 0;
374fc82f8e2SThomas Faber         *inlenb = 0;
375fc82f8e2SThomas Faber         return(0);
376fc82f8e2SThomas Faber     }
377c2c66affSColin Finck     if (*outlen > *inlenb) {
378c2c66affSColin Finck 	len = *inlenb;
379c2c66affSColin Finck     } else {
380c2c66affSColin Finck 	len = *outlen;
381c2c66affSColin Finck     }
382c2c66affSColin Finck     if (len < 0)
383c2c66affSColin Finck 	return(-1);
384c2c66affSColin Finck 
38540ee59d6SThomas Faber     /*
38640ee59d6SThomas Faber      * FIXME: Conversion functions must assure valid UTF-8, so we have
38740ee59d6SThomas Faber      * to check for UTF-8 validity. Preferably, this converter shouldn't
38840ee59d6SThomas Faber      * be used at all.
38940ee59d6SThomas Faber      */
390c2c66affSColin Finck     memcpy(out, inb, len);
391c2c66affSColin Finck 
392c2c66affSColin Finck     *outlen = len;
393c2c66affSColin Finck     *inlenb = len;
394c2c66affSColin Finck     return(*outlen);
395c2c66affSColin Finck }
396c2c66affSColin Finck 
397c2c66affSColin Finck 
398c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
399c2c66affSColin Finck /**
400c2c66affSColin Finck  * UTF8Toisolat1:
401c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
402c2c66affSColin Finck  * @outlen:  the length of @out
403c2c66affSColin Finck  * @in:  a pointer to an array of UTF-8 chars
404c2c66affSColin Finck  * @inlen:  the length of @in
405c2c66affSColin Finck  *
406c2c66affSColin Finck  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407c2c66affSColin Finck  * block of chars out.
408c2c66affSColin Finck  *
409c2c66affSColin Finck  * Returns the number of bytes written if success, -2 if the transcoding fails,
410c2c66affSColin Finck            or -1 otherwise
411c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
412c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
41340ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
414c2c66affSColin Finck  */
415c2c66affSColin Finck int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)416c2c66affSColin Finck UTF8Toisolat1(unsigned char* out, int *outlen,
417c2c66affSColin Finck               const unsigned char* in, int *inlen) {
418c2c66affSColin Finck     const unsigned char* processed = in;
419c2c66affSColin Finck     const unsigned char* outend;
420c2c66affSColin Finck     const unsigned char* outstart = out;
421c2c66affSColin Finck     const unsigned char* instart = in;
422c2c66affSColin Finck     const unsigned char* inend;
423c2c66affSColin Finck     unsigned int c, d;
424c2c66affSColin Finck     int trailing;
425c2c66affSColin Finck 
426c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
427c2c66affSColin Finck     if (in == NULL) {
428c2c66affSColin Finck         /*
429c2c66affSColin Finck 	 * initialization nothing to do
430c2c66affSColin Finck 	 */
431c2c66affSColin Finck 	*outlen = 0;
432c2c66affSColin Finck 	*inlen = 0;
433c2c66affSColin Finck 	return(0);
434c2c66affSColin Finck     }
435c2c66affSColin Finck     inend = in + (*inlen);
436c2c66affSColin Finck     outend = out + (*outlen);
437c2c66affSColin Finck     while (in < inend) {
438c2c66affSColin Finck 	d = *in++;
439c2c66affSColin Finck 	if      (d < 0x80)  { c= d; trailing= 0; }
440c2c66affSColin Finck 	else if (d < 0xC0) {
441c2c66affSColin Finck 	    /* trailing byte in leading position */
442c2c66affSColin Finck 	    *outlen = out - outstart;
443c2c66affSColin Finck 	    *inlen = processed - instart;
444c2c66affSColin Finck 	    return(-2);
445c2c66affSColin Finck         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
446c2c66affSColin Finck         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
447c2c66affSColin Finck         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
448c2c66affSColin Finck 	else {
449c2c66affSColin Finck 	    /* no chance for this in IsoLat1 */
450c2c66affSColin Finck 	    *outlen = out - outstart;
451c2c66affSColin Finck 	    *inlen = processed - instart;
452c2c66affSColin Finck 	    return(-2);
453c2c66affSColin Finck 	}
454c2c66affSColin Finck 
455c2c66affSColin Finck 	if (inend - in < trailing) {
456c2c66affSColin Finck 	    break;
457c2c66affSColin Finck 	}
458c2c66affSColin Finck 
459c2c66affSColin Finck 	for ( ; trailing; trailing--) {
460c2c66affSColin Finck 	    if (in >= inend)
461c2c66affSColin Finck 		break;
462c2c66affSColin Finck 	    if (((d= *in++) & 0xC0) != 0x80) {
463c2c66affSColin Finck 		*outlen = out - outstart;
464c2c66affSColin Finck 		*inlen = processed - instart;
465c2c66affSColin Finck 		return(-2);
466c2c66affSColin Finck 	    }
467c2c66affSColin Finck 	    c <<= 6;
468c2c66affSColin Finck 	    c |= d & 0x3F;
469c2c66affSColin Finck 	}
470c2c66affSColin Finck 
471c2c66affSColin Finck 	/* assertion: c is a single UTF-4 value */
472c2c66affSColin Finck 	if (c <= 0xFF) {
473c2c66affSColin Finck 	    if (out >= outend)
474c2c66affSColin Finck 		break;
475c2c66affSColin Finck 	    *out++ = c;
476c2c66affSColin Finck 	} else {
477c2c66affSColin Finck 	    /* no chance for this in IsoLat1 */
478c2c66affSColin Finck 	    *outlen = out - outstart;
479c2c66affSColin Finck 	    *inlen = processed - instart;
480c2c66affSColin Finck 	    return(-2);
481c2c66affSColin Finck 	}
482c2c66affSColin Finck 	processed = in;
483c2c66affSColin Finck     }
484c2c66affSColin Finck     *outlen = out - outstart;
485c2c66affSColin Finck     *inlen = processed - instart;
486c2c66affSColin Finck     return(*outlen);
487c2c66affSColin Finck }
488c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
489c2c66affSColin Finck 
490c2c66affSColin Finck /**
491c2c66affSColin Finck  * UTF16LEToUTF8:
492c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
493c2c66affSColin Finck  * @outlen:  the length of @out
494c2c66affSColin Finck  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
495c2c66affSColin Finck  * @inlenb:  the length of @in in UTF-16LE chars
496c2c66affSColin Finck  *
497c2c66affSColin Finck  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498c2c66affSColin Finck  * block of chars out. This function assumes the endian property
499c2c66affSColin Finck  * is the same between the native type of this machine and the
500c2c66affSColin Finck  * inputed one.
501c2c66affSColin Finck  *
502c2c66affSColin Finck  * Returns the number of bytes written, or -1 if lack of space, or -2
503c2c66affSColin Finck  *     if the transcoding fails (if *in is not a valid utf16 string)
504c2c66affSColin Finck  *     The value of *inlen after return is the number of octets consumed
505c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
506c2c66affSColin Finck  */
507c2c66affSColin Finck static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)508c2c66affSColin Finck UTF16LEToUTF8(unsigned char* out, int *outlen,
509c2c66affSColin Finck             const unsigned char* inb, int *inlenb)
510c2c66affSColin Finck {
511c2c66affSColin Finck     unsigned char* outstart = out;
512c2c66affSColin Finck     const unsigned char* processed = inb;
51340ee59d6SThomas Faber     unsigned char* outend;
514c2c66affSColin Finck     unsigned short* in = (unsigned short*) inb;
515c2c66affSColin Finck     unsigned short* inend;
516c2c66affSColin Finck     unsigned int c, d, inlen;
517c2c66affSColin Finck     unsigned char *tmp;
518c2c66affSColin Finck     int bits;
519c2c66affSColin Finck 
52040ee59d6SThomas Faber     if (*outlen == 0) {
52140ee59d6SThomas Faber         *inlenb = 0;
52240ee59d6SThomas Faber         return(0);
52340ee59d6SThomas Faber     }
52440ee59d6SThomas Faber     outend = out + *outlen;
525c2c66affSColin Finck     if ((*inlenb % 2) == 1)
526c2c66affSColin Finck         (*inlenb)--;
527c2c66affSColin Finck     inlen = *inlenb / 2;
528c2c66affSColin Finck     inend = in + inlen;
529c2c66affSColin Finck     while ((in < inend) && (out - outstart + 5 < *outlen)) {
530c2c66affSColin Finck         if (xmlLittleEndian) {
531c2c66affSColin Finck 	    c= *in++;
532c2c66affSColin Finck 	} else {
533c2c66affSColin Finck 	    tmp = (unsigned char *) in;
534c2c66affSColin Finck 	    c = *tmp++;
535c2c66affSColin Finck 	    c = c | (((unsigned int)*tmp) << 8);
536c2c66affSColin Finck 	    in++;
537c2c66affSColin Finck 	}
538c2c66affSColin Finck         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
5397244e0c5SThomas Faber 	    if (in >= inend) {           /* handle split mutli-byte characters */
540c2c66affSColin Finck 		break;
541c2c66affSColin Finck 	    }
542c2c66affSColin Finck 	    if (xmlLittleEndian) {
543c2c66affSColin Finck 		d = *in++;
544c2c66affSColin Finck 	    } else {
545c2c66affSColin Finck 		tmp = (unsigned char *) in;
546c2c66affSColin Finck 		d = *tmp++;
547c2c66affSColin Finck 		d = d | (((unsigned int)*tmp) << 8);
548c2c66affSColin Finck 		in++;
549c2c66affSColin Finck 	    }
550c2c66affSColin Finck             if ((d & 0xFC00) == 0xDC00) {
551c2c66affSColin Finck                 c &= 0x03FF;
552c2c66affSColin Finck                 c <<= 10;
553c2c66affSColin Finck                 c |= d & 0x03FF;
554c2c66affSColin Finck                 c += 0x10000;
555c2c66affSColin Finck             }
556c2c66affSColin Finck             else {
557c2c66affSColin Finck 		*outlen = out - outstart;
558c2c66affSColin Finck 		*inlenb = processed - inb;
559c2c66affSColin Finck 	        return(-2);
560c2c66affSColin Finck 	    }
561c2c66affSColin Finck         }
562c2c66affSColin Finck 
563c2c66affSColin Finck 	/* assertion: c is a single UTF-4 value */
564c2c66affSColin Finck         if (out >= outend)
565c2c66affSColin Finck 	    break;
566c2c66affSColin Finck         if      (c <    0x80) {  *out++=  c;                bits= -6; }
567c2c66affSColin Finck         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
568c2c66affSColin Finck         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
569c2c66affSColin Finck         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
570c2c66affSColin Finck 
571c2c66affSColin Finck         for ( ; bits >= 0; bits-= 6) {
572c2c66affSColin Finck             if (out >= outend)
573c2c66affSColin Finck 	        break;
574c2c66affSColin Finck             *out++= ((c >> bits) & 0x3F) | 0x80;
575c2c66affSColin Finck         }
576c2c66affSColin Finck 	processed = (const unsigned char*) in;
577c2c66affSColin Finck     }
578c2c66affSColin Finck     *outlen = out - outstart;
579c2c66affSColin Finck     *inlenb = processed - inb;
580c2c66affSColin Finck     return(*outlen);
581c2c66affSColin Finck }
582c2c66affSColin Finck 
583c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
584c2c66affSColin Finck /**
585c2c66affSColin Finck  * UTF8ToUTF16LE:
586c2c66affSColin Finck  * @outb:  a pointer to an array of bytes to store the result
587c2c66affSColin Finck  * @outlen:  the length of @outb
588c2c66affSColin Finck  * @in:  a pointer to an array of UTF-8 chars
589c2c66affSColin Finck  * @inlen:  the length of @in
590c2c66affSColin Finck  *
591c2c66affSColin Finck  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592c2c66affSColin Finck  * block of chars out.
593c2c66affSColin Finck  *
594c2c66affSColin Finck  * Returns the number of bytes written, or -1 if lack of space, or -2
595c2c66affSColin Finck  *     if the transcoding failed.
596c2c66affSColin Finck  */
597c2c66affSColin Finck static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)598c2c66affSColin Finck UTF8ToUTF16LE(unsigned char* outb, int *outlen,
599c2c66affSColin Finck             const unsigned char* in, int *inlen)
600c2c66affSColin Finck {
601c2c66affSColin Finck     unsigned short* out = (unsigned short*) outb;
602c2c66affSColin Finck     const unsigned char* processed = in;
603c2c66affSColin Finck     const unsigned char *const instart = in;
604c2c66affSColin Finck     unsigned short* outstart= out;
605c2c66affSColin Finck     unsigned short* outend;
606c2c66affSColin Finck     const unsigned char* inend;
607c2c66affSColin Finck     unsigned int c, d;
608c2c66affSColin Finck     int trailing;
609c2c66affSColin Finck     unsigned char *tmp;
610c2c66affSColin Finck     unsigned short tmp1, tmp2;
611c2c66affSColin Finck 
612c2c66affSColin Finck     /* UTF16LE encoding has no BOM */
613c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
614c2c66affSColin Finck     if (in == NULL) {
615c2c66affSColin Finck 	*outlen = 0;
616c2c66affSColin Finck 	*inlen = 0;
617c2c66affSColin Finck 	return(0);
618c2c66affSColin Finck     }
619c2c66affSColin Finck     inend= in + *inlen;
620c2c66affSColin Finck     outend = out + (*outlen / 2);
621c2c66affSColin Finck     while (in < inend) {
622c2c66affSColin Finck       d= *in++;
623c2c66affSColin Finck       if      (d < 0x80)  { c= d; trailing= 0; }
624c2c66affSColin Finck       else if (d < 0xC0) {
625c2c66affSColin Finck           /* trailing byte in leading position */
626c2c66affSColin Finck 	  *outlen = (out - outstart) * 2;
627c2c66affSColin Finck 	  *inlen = processed - instart;
628c2c66affSColin Finck 	  return(-2);
629c2c66affSColin Finck       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
630c2c66affSColin Finck       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
631c2c66affSColin Finck       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
632c2c66affSColin Finck       else {
633c2c66affSColin Finck 	/* no chance for this in UTF-16 */
634c2c66affSColin Finck 	*outlen = (out - outstart) * 2;
635c2c66affSColin Finck 	*inlen = processed - instart;
636c2c66affSColin Finck 	return(-2);
637c2c66affSColin Finck       }
638c2c66affSColin Finck 
639c2c66affSColin Finck       if (inend - in < trailing) {
640c2c66affSColin Finck           break;
641c2c66affSColin Finck       }
642c2c66affSColin Finck 
643c2c66affSColin Finck       for ( ; trailing; trailing--) {
644c2c66affSColin Finck           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
645c2c66affSColin Finck 	      break;
646c2c66affSColin Finck           c <<= 6;
647c2c66affSColin Finck           c |= d & 0x3F;
648c2c66affSColin Finck       }
649c2c66affSColin Finck 
650c2c66affSColin Finck       /* assertion: c is a single UTF-4 value */
651c2c66affSColin Finck         if (c < 0x10000) {
652c2c66affSColin Finck             if (out >= outend)
653c2c66affSColin Finck 	        break;
654c2c66affSColin Finck 	    if (xmlLittleEndian) {
655c2c66affSColin Finck 		*out++ = c;
656c2c66affSColin Finck 	    } else {
657c2c66affSColin Finck 		tmp = (unsigned char *) out;
658c2c66affSColin Finck 		*tmp = c ;
659c2c66affSColin Finck 		*(tmp + 1) = c >> 8 ;
660c2c66affSColin Finck 		out++;
661c2c66affSColin Finck 	    }
662c2c66affSColin Finck         }
663c2c66affSColin Finck         else if (c < 0x110000) {
664c2c66affSColin Finck             if (out+1 >= outend)
665c2c66affSColin Finck 	        break;
666c2c66affSColin Finck             c -= 0x10000;
667c2c66affSColin Finck 	    if (xmlLittleEndian) {
668c2c66affSColin Finck 		*out++ = 0xD800 | (c >> 10);
669c2c66affSColin Finck 		*out++ = 0xDC00 | (c & 0x03FF);
670c2c66affSColin Finck 	    } else {
671c2c66affSColin Finck 		tmp1 = 0xD800 | (c >> 10);
672c2c66affSColin Finck 		tmp = (unsigned char *) out;
673c2c66affSColin Finck 		*tmp = (unsigned char) tmp1;
674c2c66affSColin Finck 		*(tmp + 1) = tmp1 >> 8;
675c2c66affSColin Finck 		out++;
676c2c66affSColin Finck 
677c2c66affSColin Finck 		tmp2 = 0xDC00 | (c & 0x03FF);
678c2c66affSColin Finck 		tmp = (unsigned char *) out;
679c2c66affSColin Finck 		*tmp  = (unsigned char) tmp2;
680c2c66affSColin Finck 		*(tmp + 1) = tmp2 >> 8;
681c2c66affSColin Finck 		out++;
682c2c66affSColin Finck 	    }
683c2c66affSColin Finck         }
684c2c66affSColin Finck         else
685c2c66affSColin Finck 	    break;
686c2c66affSColin Finck 	processed = in;
687c2c66affSColin Finck     }
688c2c66affSColin Finck     *outlen = (out - outstart) * 2;
689c2c66affSColin Finck     *inlen = processed - instart;
690c2c66affSColin Finck     return(*outlen);
691c2c66affSColin Finck }
692c2c66affSColin Finck 
693c2c66affSColin Finck /**
694c2c66affSColin Finck  * UTF8ToUTF16:
695c2c66affSColin Finck  * @outb:  a pointer to an array of bytes to store the result
696c2c66affSColin Finck  * @outlen:  the length of @outb
697c2c66affSColin Finck  * @in:  a pointer to an array of UTF-8 chars
698c2c66affSColin Finck  * @inlen:  the length of @in
699c2c66affSColin Finck  *
700c2c66affSColin Finck  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701c2c66affSColin Finck  * block of chars out.
702c2c66affSColin Finck  *
703c2c66affSColin Finck  * Returns the number of bytes written, or -1 if lack of space, or -2
704c2c66affSColin Finck  *     if the transcoding failed.
705c2c66affSColin Finck  */
706c2c66affSColin Finck static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)707c2c66affSColin Finck UTF8ToUTF16(unsigned char* outb, int *outlen,
708c2c66affSColin Finck             const unsigned char* in, int *inlen)
709c2c66affSColin Finck {
710c2c66affSColin Finck     if (in == NULL) {
711c2c66affSColin Finck 	/*
712c2c66affSColin Finck 	 * initialization, add the Byte Order Mark for UTF-16LE
713c2c66affSColin Finck 	 */
714c2c66affSColin Finck         if (*outlen >= 2) {
715c2c66affSColin Finck 	    outb[0] = 0xFF;
716c2c66affSColin Finck 	    outb[1] = 0xFE;
717c2c66affSColin Finck 	    *outlen = 2;
718c2c66affSColin Finck 	    *inlen = 0;
719c2c66affSColin Finck #ifdef DEBUG_ENCODING
720c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
721c2c66affSColin Finck 		    "Added FFFE Byte Order Mark\n");
722c2c66affSColin Finck #endif
723c2c66affSColin Finck 	    return(2);
724c2c66affSColin Finck 	}
725c2c66affSColin Finck 	*outlen = 0;
726c2c66affSColin Finck 	*inlen = 0;
727c2c66affSColin Finck 	return(0);
728c2c66affSColin Finck     }
729c2c66affSColin Finck     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
730c2c66affSColin Finck }
731c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
732c2c66affSColin Finck 
733c2c66affSColin Finck /**
734c2c66affSColin Finck  * UTF16BEToUTF8:
735c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
736c2c66affSColin Finck  * @outlen:  the length of @out
737c2c66affSColin Finck  * @inb:  a pointer to an array of UTF-16 passed as a byte array
738c2c66affSColin Finck  * @inlenb:  the length of @in in UTF-16 chars
739c2c66affSColin Finck  *
740c2c66affSColin Finck  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741c2c66affSColin Finck  * block of chars out. This function assumes the endian property
742c2c66affSColin Finck  * is the same between the native type of this machine and the
743c2c66affSColin Finck  * inputed one.
744c2c66affSColin Finck  *
745c2c66affSColin Finck  * Returns the number of bytes written, or -1 if lack of space, or -2
746c2c66affSColin Finck  *     if the transcoding fails (if *in is not a valid utf16 string)
747c2c66affSColin Finck  * The value of *inlen after return is the number of octets consumed
748c2c66affSColin Finck  *     if the return value is positive, else unpredictable.
749c2c66affSColin Finck  */
750c2c66affSColin Finck static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)751c2c66affSColin Finck UTF16BEToUTF8(unsigned char* out, int *outlen,
752c2c66affSColin Finck             const unsigned char* inb, int *inlenb)
753c2c66affSColin Finck {
754c2c66affSColin Finck     unsigned char* outstart = out;
755c2c66affSColin Finck     const unsigned char* processed = inb;
7567244e0c5SThomas Faber     unsigned char* outend;
757c2c66affSColin Finck     unsigned short* in = (unsigned short*) inb;
758c2c66affSColin Finck     unsigned short* inend;
759c2c66affSColin Finck     unsigned int c, d, inlen;
760c2c66affSColin Finck     unsigned char *tmp;
761c2c66affSColin Finck     int bits;
762c2c66affSColin Finck 
7637244e0c5SThomas Faber     if (*outlen == 0) {
7647244e0c5SThomas Faber         *inlenb = 0;
7657244e0c5SThomas Faber         return(0);
7667244e0c5SThomas Faber     }
7677244e0c5SThomas Faber     outend = out + *outlen;
768c2c66affSColin Finck     if ((*inlenb % 2) == 1)
769c2c66affSColin Finck         (*inlenb)--;
770c2c66affSColin Finck     inlen = *inlenb / 2;
771c2c66affSColin Finck     inend= in + inlen;
7727244e0c5SThomas Faber     while ((in < inend) && (out - outstart + 5 < *outlen)) {
773c2c66affSColin Finck 	if (xmlLittleEndian) {
774c2c66affSColin Finck 	    tmp = (unsigned char *) in;
775c2c66affSColin Finck 	    c = *tmp++;
7767244e0c5SThomas Faber 	    c = (c << 8) | (unsigned int) *tmp;
777c2c66affSColin Finck 	    in++;
778c2c66affSColin Finck 	} else {
779c2c66affSColin Finck 	    c= *in++;
780c2c66affSColin Finck 	}
781c2c66affSColin Finck         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
7827244e0c5SThomas Faber 	    if (in >= inend) {           /* handle split mutli-byte characters */
7837244e0c5SThomas Faber                 break;
784c2c66affSColin Finck 	    }
785c2c66affSColin Finck 	    if (xmlLittleEndian) {
786c2c66affSColin Finck 		tmp = (unsigned char *) in;
787c2c66affSColin Finck 		d = *tmp++;
7887244e0c5SThomas Faber 		d = (d << 8) | (unsigned int) *tmp;
789c2c66affSColin Finck 		in++;
790c2c66affSColin Finck 	    } else {
791c2c66affSColin Finck 		d= *in++;
792c2c66affSColin Finck 	    }
793c2c66affSColin Finck             if ((d & 0xFC00) == 0xDC00) {
794c2c66affSColin Finck                 c &= 0x03FF;
795c2c66affSColin Finck                 c <<= 10;
796c2c66affSColin Finck                 c |= d & 0x03FF;
797c2c66affSColin Finck                 c += 0x10000;
798c2c66affSColin Finck             }
799c2c66affSColin Finck             else {
800c2c66affSColin Finck 		*outlen = out - outstart;
801c2c66affSColin Finck 		*inlenb = processed - inb;
802c2c66affSColin Finck 	        return(-2);
803c2c66affSColin Finck 	    }
804c2c66affSColin Finck         }
805c2c66affSColin Finck 
806c2c66affSColin Finck 	/* assertion: c is a single UTF-4 value */
807c2c66affSColin Finck         if (out >= outend)
808c2c66affSColin Finck 	    break;
809c2c66affSColin Finck         if      (c <    0x80) {  *out++=  c;                bits= -6; }
810c2c66affSColin Finck         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
811c2c66affSColin Finck         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
812c2c66affSColin Finck         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
813c2c66affSColin Finck 
814c2c66affSColin Finck         for ( ; bits >= 0; bits-= 6) {
815c2c66affSColin Finck             if (out >= outend)
816c2c66affSColin Finck 	        break;
817c2c66affSColin Finck             *out++= ((c >> bits) & 0x3F) | 0x80;
818c2c66affSColin Finck         }
819c2c66affSColin Finck 	processed = (const unsigned char*) in;
820c2c66affSColin Finck     }
821c2c66affSColin Finck     *outlen = out - outstart;
822c2c66affSColin Finck     *inlenb = processed - inb;
823c2c66affSColin Finck     return(*outlen);
824c2c66affSColin Finck }
825c2c66affSColin Finck 
826c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
827c2c66affSColin Finck /**
828c2c66affSColin Finck  * UTF8ToUTF16BE:
829c2c66affSColin Finck  * @outb:  a pointer to an array of bytes to store the result
830c2c66affSColin Finck  * @outlen:  the length of @outb
831c2c66affSColin Finck  * @in:  a pointer to an array of UTF-8 chars
832c2c66affSColin Finck  * @inlen:  the length of @in
833c2c66affSColin Finck  *
834c2c66affSColin Finck  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835c2c66affSColin Finck  * block of chars out.
836c2c66affSColin Finck  *
837c2c66affSColin Finck  * Returns the number of byte written, or -1 by lack of space, or -2
838c2c66affSColin Finck  *     if the transcoding failed.
839c2c66affSColin Finck  */
840c2c66affSColin Finck static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)841c2c66affSColin Finck UTF8ToUTF16BE(unsigned char* outb, int *outlen,
842c2c66affSColin Finck             const unsigned char* in, int *inlen)
843c2c66affSColin Finck {
844c2c66affSColin Finck     unsigned short* out = (unsigned short*) outb;
845c2c66affSColin Finck     const unsigned char* processed = in;
846c2c66affSColin Finck     const unsigned char *const instart = in;
847c2c66affSColin Finck     unsigned short* outstart= out;
848c2c66affSColin Finck     unsigned short* outend;
849c2c66affSColin Finck     const unsigned char* inend;
850c2c66affSColin Finck     unsigned int c, d;
851c2c66affSColin Finck     int trailing;
852c2c66affSColin Finck     unsigned char *tmp;
853c2c66affSColin Finck     unsigned short tmp1, tmp2;
854c2c66affSColin Finck 
855c2c66affSColin Finck     /* UTF-16BE has no BOM */
856c2c66affSColin Finck     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
857c2c66affSColin Finck     if (in == NULL) {
858c2c66affSColin Finck 	*outlen = 0;
859c2c66affSColin Finck 	*inlen = 0;
860c2c66affSColin Finck 	return(0);
861c2c66affSColin Finck     }
862c2c66affSColin Finck     inend= in + *inlen;
863c2c66affSColin Finck     outend = out + (*outlen / 2);
864c2c66affSColin Finck     while (in < inend) {
865c2c66affSColin Finck       d= *in++;
866c2c66affSColin Finck       if      (d < 0x80)  { c= d; trailing= 0; }
867c2c66affSColin Finck       else if (d < 0xC0)  {
868c2c66affSColin Finck           /* trailing byte in leading position */
869c2c66affSColin Finck 	  *outlen = out - outstart;
870c2c66affSColin Finck 	  *inlen = processed - instart;
871c2c66affSColin Finck 	  return(-2);
872c2c66affSColin Finck       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
873c2c66affSColin Finck       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
874c2c66affSColin Finck       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
875c2c66affSColin Finck       else {
876c2c66affSColin Finck           /* no chance for this in UTF-16 */
877c2c66affSColin Finck 	  *outlen = out - outstart;
878c2c66affSColin Finck 	  *inlen = processed - instart;
879c2c66affSColin Finck 	  return(-2);
880c2c66affSColin Finck       }
881c2c66affSColin Finck 
882c2c66affSColin Finck       if (inend - in < trailing) {
883c2c66affSColin Finck           break;
884c2c66affSColin Finck       }
885c2c66affSColin Finck 
886c2c66affSColin Finck       for ( ; trailing; trailing--) {
887c2c66affSColin Finck           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
888c2c66affSColin Finck           c <<= 6;
889c2c66affSColin Finck           c |= d & 0x3F;
890c2c66affSColin Finck       }
891c2c66affSColin Finck 
892c2c66affSColin Finck       /* assertion: c is a single UTF-4 value */
893c2c66affSColin Finck         if (c < 0x10000) {
894c2c66affSColin Finck             if (out >= outend)  break;
895c2c66affSColin Finck 	    if (xmlLittleEndian) {
896c2c66affSColin Finck 		tmp = (unsigned char *) out;
897c2c66affSColin Finck 		*tmp = c >> 8;
898c2c66affSColin Finck 		*(tmp + 1) = c;
899c2c66affSColin Finck 		out++;
900c2c66affSColin Finck 	    } else {
901c2c66affSColin Finck 		*out++ = c;
902c2c66affSColin Finck 	    }
903c2c66affSColin Finck         }
904c2c66affSColin Finck         else if (c < 0x110000) {
905c2c66affSColin Finck             if (out+1 >= outend)  break;
906c2c66affSColin Finck             c -= 0x10000;
907c2c66affSColin Finck 	    if (xmlLittleEndian) {
908c2c66affSColin Finck 		tmp1 = 0xD800 | (c >> 10);
909c2c66affSColin Finck 		tmp = (unsigned char *) out;
910c2c66affSColin Finck 		*tmp = tmp1 >> 8;
911c2c66affSColin Finck 		*(tmp + 1) = (unsigned char) tmp1;
912c2c66affSColin Finck 		out++;
913c2c66affSColin Finck 
914c2c66affSColin Finck 		tmp2 = 0xDC00 | (c & 0x03FF);
915c2c66affSColin Finck 		tmp = (unsigned char *) out;
916c2c66affSColin Finck 		*tmp = tmp2 >> 8;
917c2c66affSColin Finck 		*(tmp + 1) = (unsigned char) tmp2;
918c2c66affSColin Finck 		out++;
919c2c66affSColin Finck 	    } else {
920c2c66affSColin Finck 		*out++ = 0xD800 | (c >> 10);
921c2c66affSColin Finck 		*out++ = 0xDC00 | (c & 0x03FF);
922c2c66affSColin Finck 	    }
923c2c66affSColin Finck         }
924c2c66affSColin Finck         else
925c2c66affSColin Finck 	    break;
926c2c66affSColin Finck 	processed = in;
927c2c66affSColin Finck     }
928c2c66affSColin Finck     *outlen = (out - outstart) * 2;
929c2c66affSColin Finck     *inlen = processed - instart;
930c2c66affSColin Finck     return(*outlen);
931c2c66affSColin Finck }
932c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
933c2c66affSColin Finck 
934c2c66affSColin Finck /************************************************************************
935c2c66affSColin Finck  *									*
936c2c66affSColin Finck  *		Generic encoding handling routines			*
937c2c66affSColin Finck  *									*
938c2c66affSColin Finck  ************************************************************************/
939c2c66affSColin Finck 
940c2c66affSColin Finck /**
941c2c66affSColin Finck  * xmlDetectCharEncoding:
942c2c66affSColin Finck  * @in:  a pointer to the first bytes of the XML entity, must be at least
943c2c66affSColin Finck  *       2 bytes long (at least 4 if encoding is UTF4 variant).
944c2c66affSColin Finck  * @len:  pointer to the length of the buffer
945c2c66affSColin Finck  *
946c2c66affSColin Finck  * Guess the encoding of the entity using the first bytes of the entity content
947c2c66affSColin Finck  * according to the non-normative appendix F of the XML-1.0 recommendation.
948c2c66affSColin Finck  *
949c2c66affSColin Finck  * Returns one of the XML_CHAR_ENCODING_... values.
950c2c66affSColin Finck  */
951c2c66affSColin Finck xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)952c2c66affSColin Finck xmlDetectCharEncoding(const unsigned char* in, int len)
953c2c66affSColin Finck {
954c2c66affSColin Finck     if (in == NULL)
955c2c66affSColin Finck         return(XML_CHAR_ENCODING_NONE);
956c2c66affSColin Finck     if (len >= 4) {
957c2c66affSColin Finck 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
958c2c66affSColin Finck 	    (in[2] == 0x00) && (in[3] == 0x3C))
959c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UCS4BE);
960c2c66affSColin Finck 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961c2c66affSColin Finck 	    (in[2] == 0x00) && (in[3] == 0x00))
962c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UCS4LE);
963c2c66affSColin Finck 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
964c2c66affSColin Finck 	    (in[2] == 0x3C) && (in[3] == 0x00))
965c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UCS4_2143);
966c2c66affSColin Finck 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967c2c66affSColin Finck 	    (in[2] == 0x00) && (in[3] == 0x00))
968c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UCS4_3412);
969c2c66affSColin Finck 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970c2c66affSColin Finck 	    (in[2] == 0xA7) && (in[3] == 0x94))
971c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_EBCDIC);
972c2c66affSColin Finck 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973c2c66affSColin Finck 	    (in[2] == 0x78) && (in[3] == 0x6D))
974c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UTF8);
975c2c66affSColin Finck 	/*
976c2c66affSColin Finck 	 * Although not part of the recommendation, we also
977c2c66affSColin Finck 	 * attempt an "auto-recognition" of UTF-16LE and
978c2c66affSColin Finck 	 * UTF-16BE encodings.
979c2c66affSColin Finck 	 */
980c2c66affSColin Finck 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981c2c66affSColin Finck 	    (in[2] == 0x3F) && (in[3] == 0x00))
982c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UTF16LE);
983c2c66affSColin Finck 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984c2c66affSColin Finck 	    (in[2] == 0x00) && (in[3] == 0x3F))
985c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UTF16BE);
986c2c66affSColin Finck     }
987c2c66affSColin Finck     if (len >= 3) {
988c2c66affSColin Finck 	/*
989c2c66affSColin Finck 	 * Errata on XML-1.0 June 20 2001
990c2c66affSColin Finck 	 * We now allow an UTF8 encoded BOM
991c2c66affSColin Finck 	 */
992c2c66affSColin Finck 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993c2c66affSColin Finck 	    (in[2] == 0xBF))
994c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UTF8);
995c2c66affSColin Finck     }
996c2c66affSColin Finck     /* For UTF-16 we can recognize by the BOM */
997c2c66affSColin Finck     if (len >= 2) {
998c2c66affSColin Finck 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
999c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UTF16BE);
1000c2c66affSColin Finck 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
1001c2c66affSColin Finck 	    return(XML_CHAR_ENCODING_UTF16LE);
1002c2c66affSColin Finck     }
1003c2c66affSColin Finck     return(XML_CHAR_ENCODING_NONE);
1004c2c66affSColin Finck }
1005c2c66affSColin Finck 
1006c2c66affSColin Finck /**
1007c2c66affSColin Finck  * xmlCleanupEncodingAliases:
1008c2c66affSColin Finck  *
1009c2c66affSColin Finck  * Unregisters all aliases
1010c2c66affSColin Finck  */
1011c2c66affSColin Finck void
xmlCleanupEncodingAliases(void)1012c2c66affSColin Finck xmlCleanupEncodingAliases(void) {
1013c2c66affSColin Finck     int i;
1014c2c66affSColin Finck 
1015c2c66affSColin Finck     if (xmlCharEncodingAliases == NULL)
1016c2c66affSColin Finck 	return;
1017c2c66affSColin Finck 
1018c2c66affSColin Finck     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1019c2c66affSColin Finck 	if (xmlCharEncodingAliases[i].name != NULL)
1020c2c66affSColin Finck 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1021c2c66affSColin Finck 	if (xmlCharEncodingAliases[i].alias != NULL)
1022c2c66affSColin Finck 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1023c2c66affSColin Finck     }
1024c2c66affSColin Finck     xmlCharEncodingAliasesNb = 0;
1025c2c66affSColin Finck     xmlCharEncodingAliasesMax = 0;
1026c2c66affSColin Finck     xmlFree(xmlCharEncodingAliases);
1027c2c66affSColin Finck     xmlCharEncodingAliases = NULL;
1028c2c66affSColin Finck }
1029c2c66affSColin Finck 
1030c2c66affSColin Finck /**
1031c2c66affSColin Finck  * xmlGetEncodingAlias:
1032c2c66affSColin Finck  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1033c2c66affSColin Finck  *
1034c2c66affSColin Finck  * Lookup an encoding name for the given alias.
1035c2c66affSColin Finck  *
1036c2c66affSColin Finck  * Returns NULL if not found, otherwise the original name
1037c2c66affSColin Finck  */
1038c2c66affSColin Finck const char *
xmlGetEncodingAlias(const char * alias)1039c2c66affSColin Finck xmlGetEncodingAlias(const char *alias) {
1040c2c66affSColin Finck     int i;
1041c2c66affSColin Finck     char upper[100];
1042c2c66affSColin Finck 
1043c2c66affSColin Finck     if (alias == NULL)
1044c2c66affSColin Finck 	return(NULL);
1045c2c66affSColin Finck 
1046c2c66affSColin Finck     if (xmlCharEncodingAliases == NULL)
1047c2c66affSColin Finck 	return(NULL);
1048c2c66affSColin Finck 
1049c2c66affSColin Finck     for (i = 0;i < 99;i++) {
1050c2c66affSColin Finck         upper[i] = toupper(alias[i]);
1051c2c66affSColin Finck 	if (upper[i] == 0) break;
1052c2c66affSColin Finck     }
1053c2c66affSColin Finck     upper[i] = 0;
1054c2c66affSColin Finck 
1055c2c66affSColin Finck     /*
1056c2c66affSColin Finck      * Walk down the list looking for a definition of the alias
1057c2c66affSColin Finck      */
1058c2c66affSColin Finck     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059c2c66affSColin Finck 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060c2c66affSColin Finck 	    return(xmlCharEncodingAliases[i].name);
1061c2c66affSColin Finck 	}
1062c2c66affSColin Finck     }
1063c2c66affSColin Finck     return(NULL);
1064c2c66affSColin Finck }
1065c2c66affSColin Finck 
1066c2c66affSColin Finck /**
1067c2c66affSColin Finck  * xmlAddEncodingAlias:
1068c2c66affSColin Finck  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1069c2c66affSColin Finck  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1070c2c66affSColin Finck  *
1071c2c66affSColin Finck  * Registers an alias @alias for an encoding named @name. Existing alias
1072c2c66affSColin Finck  * will be overwritten.
1073c2c66affSColin Finck  *
1074c2c66affSColin Finck  * Returns 0 in case of success, -1 in case of error
1075c2c66affSColin Finck  */
1076c2c66affSColin Finck int
xmlAddEncodingAlias(const char * name,const char * alias)1077c2c66affSColin Finck xmlAddEncodingAlias(const char *name, const char *alias) {
1078c2c66affSColin Finck     int i;
1079c2c66affSColin Finck     char upper[100];
1080c2c66affSColin Finck 
1081c2c66affSColin Finck     if ((name == NULL) || (alias == NULL))
1082c2c66affSColin Finck 	return(-1);
1083c2c66affSColin Finck 
1084c2c66affSColin Finck     for (i = 0;i < 99;i++) {
1085c2c66affSColin Finck         upper[i] = toupper(alias[i]);
1086c2c66affSColin Finck 	if (upper[i] == 0) break;
1087c2c66affSColin Finck     }
1088c2c66affSColin Finck     upper[i] = 0;
1089c2c66affSColin Finck 
1090c2c66affSColin Finck     if (xmlCharEncodingAliases == NULL) {
1091c2c66affSColin Finck 	xmlCharEncodingAliasesNb = 0;
1092c2c66affSColin Finck 	xmlCharEncodingAliasesMax = 20;
1093c2c66affSColin Finck 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1094c2c66affSColin Finck 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095c2c66affSColin Finck 	if (xmlCharEncodingAliases == NULL)
1096c2c66affSColin Finck 	    return(-1);
1097c2c66affSColin Finck     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1098c2c66affSColin Finck 	xmlCharEncodingAliasesMax *= 2;
1099c2c66affSColin Finck 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1100c2c66affSColin Finck 	      xmlRealloc(xmlCharEncodingAliases,
1101c2c66affSColin Finck 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1102c2c66affSColin Finck     }
1103c2c66affSColin Finck     /*
1104c2c66affSColin Finck      * Walk down the list looking for a definition of the alias
1105c2c66affSColin Finck      */
1106c2c66affSColin Finck     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107c2c66affSColin Finck 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108c2c66affSColin Finck 	    /*
1109c2c66affSColin Finck 	     * Replace the definition.
1110c2c66affSColin Finck 	     */
1111c2c66affSColin Finck 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1112c2c66affSColin Finck 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1113c2c66affSColin Finck 	    return(0);
1114c2c66affSColin Finck 	}
1115c2c66affSColin Finck     }
1116c2c66affSColin Finck     /*
1117c2c66affSColin Finck      * Add the definition
1118c2c66affSColin Finck      */
1119c2c66affSColin Finck     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1120c2c66affSColin Finck     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1121c2c66affSColin Finck     xmlCharEncodingAliasesNb++;
1122c2c66affSColin Finck     return(0);
1123c2c66affSColin Finck }
1124c2c66affSColin Finck 
1125c2c66affSColin Finck /**
1126c2c66affSColin Finck  * xmlDelEncodingAlias:
1127c2c66affSColin Finck  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1128c2c66affSColin Finck  *
1129c2c66affSColin Finck  * Unregisters an encoding alias @alias
1130c2c66affSColin Finck  *
1131c2c66affSColin Finck  * Returns 0 in case of success, -1 in case of error
1132c2c66affSColin Finck  */
1133c2c66affSColin Finck int
xmlDelEncodingAlias(const char * alias)1134c2c66affSColin Finck xmlDelEncodingAlias(const char *alias) {
1135c2c66affSColin Finck     int i;
1136c2c66affSColin Finck 
1137c2c66affSColin Finck     if (alias == NULL)
1138c2c66affSColin Finck 	return(-1);
1139c2c66affSColin Finck 
1140c2c66affSColin Finck     if (xmlCharEncodingAliases == NULL)
1141c2c66affSColin Finck 	return(-1);
1142c2c66affSColin Finck     /*
1143c2c66affSColin Finck      * Walk down the list looking for a definition of the alias
1144c2c66affSColin Finck      */
1145c2c66affSColin Finck     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1146c2c66affSColin Finck 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1147c2c66affSColin Finck 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1148c2c66affSColin Finck 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1149c2c66affSColin Finck 	    xmlCharEncodingAliasesNb--;
1150c2c66affSColin Finck 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1151c2c66affSColin Finck 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1152c2c66affSColin Finck 	    return(0);
1153c2c66affSColin Finck 	}
1154c2c66affSColin Finck     }
1155c2c66affSColin Finck     return(-1);
1156c2c66affSColin Finck }
1157c2c66affSColin Finck 
1158c2c66affSColin Finck /**
1159c2c66affSColin Finck  * xmlParseCharEncoding:
1160c2c66affSColin Finck  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1161c2c66affSColin Finck  *
1162c2c66affSColin Finck  * Compare the string to the encoding schemes already known. Note
1163c2c66affSColin Finck  * that the comparison is case insensitive accordingly to the section
1164c2c66affSColin Finck  * [XML] 4.3.3 Character Encoding in Entities.
1165c2c66affSColin Finck  *
1166c2c66affSColin Finck  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167c2c66affSColin Finck  * if not recognized.
1168c2c66affSColin Finck  */
1169c2c66affSColin Finck xmlCharEncoding
xmlParseCharEncoding(const char * name)1170c2c66affSColin Finck xmlParseCharEncoding(const char* name)
1171c2c66affSColin Finck {
1172c2c66affSColin Finck     const char *alias;
1173c2c66affSColin Finck     char upper[500];
1174c2c66affSColin Finck     int i;
1175c2c66affSColin Finck 
1176c2c66affSColin Finck     if (name == NULL)
1177c2c66affSColin Finck 	return(XML_CHAR_ENCODING_NONE);
1178c2c66affSColin Finck 
1179c2c66affSColin Finck     /*
1180c2c66affSColin Finck      * Do the alias resolution
1181c2c66affSColin Finck      */
1182c2c66affSColin Finck     alias = xmlGetEncodingAlias(name);
1183c2c66affSColin Finck     if (alias != NULL)
1184c2c66affSColin Finck 	name = alias;
1185c2c66affSColin Finck 
1186c2c66affSColin Finck     for (i = 0;i < 499;i++) {
1187c2c66affSColin Finck         upper[i] = toupper(name[i]);
1188c2c66affSColin Finck 	if (upper[i] == 0) break;
1189c2c66affSColin Finck     }
1190c2c66affSColin Finck     upper[i] = 0;
1191c2c66affSColin Finck 
1192c2c66affSColin Finck     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193c2c66affSColin Finck     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194c2c66affSColin Finck     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195c2c66affSColin Finck 
1196c2c66affSColin Finck     /*
1197c2c66affSColin Finck      * NOTE: if we were able to parse this, the endianness of UTF16 is
1198c2c66affSColin Finck      *       already found and in use
1199c2c66affSColin Finck      */
1200c2c66affSColin Finck     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201c2c66affSColin Finck     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202c2c66affSColin Finck 
1203c2c66affSColin Finck     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204c2c66affSColin Finck     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205c2c66affSColin Finck     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206c2c66affSColin Finck 
1207c2c66affSColin Finck     /*
1208c2c66affSColin Finck      * NOTE: if we were able to parse this, the endianness of UCS4 is
1209c2c66affSColin Finck      *       already found and in use
1210c2c66affSColin Finck      */
1211c2c66affSColin Finck     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212c2c66affSColin Finck     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213c2c66affSColin Finck     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214c2c66affSColin Finck 
1215c2c66affSColin Finck 
1216c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217c2c66affSColin Finck     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218c2c66affSColin Finck     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219c2c66affSColin Finck 
1220c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221c2c66affSColin Finck     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222c2c66affSColin Finck     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223c2c66affSColin Finck 
1224c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230c2c66affSColin Finck     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231c2c66affSColin Finck 
1232c2c66affSColin Finck     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233c2c66affSColin Finck     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234c2c66affSColin Finck     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235c2c66affSColin Finck 
1236c2c66affSColin Finck #ifdef DEBUG_ENCODING
1237c2c66affSColin Finck     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238c2c66affSColin Finck #endif
1239c2c66affSColin Finck     return(XML_CHAR_ENCODING_ERROR);
1240c2c66affSColin Finck }
1241c2c66affSColin Finck 
1242c2c66affSColin Finck /**
1243c2c66affSColin Finck  * xmlGetCharEncodingName:
1244c2c66affSColin Finck  * @enc:  the encoding
1245c2c66affSColin Finck  *
1246c2c66affSColin Finck  * The "canonical" name for XML encoding.
1247c2c66affSColin Finck  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248c2c66affSColin Finck  * Section 4.3.3  Character Encoding in Entities
1249c2c66affSColin Finck  *
1250c2c66affSColin Finck  * Returns the canonical name for the given encoding
1251c2c66affSColin Finck  */
1252c2c66affSColin Finck 
1253c2c66affSColin Finck const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1254c2c66affSColin Finck xmlGetCharEncodingName(xmlCharEncoding enc) {
1255c2c66affSColin Finck     switch (enc) {
1256c2c66affSColin Finck         case XML_CHAR_ENCODING_ERROR:
1257c2c66affSColin Finck 	    return(NULL);
1258c2c66affSColin Finck         case XML_CHAR_ENCODING_NONE:
1259c2c66affSColin Finck 	    return(NULL);
1260c2c66affSColin Finck         case XML_CHAR_ENCODING_UTF8:
1261c2c66affSColin Finck 	    return("UTF-8");
1262c2c66affSColin Finck         case XML_CHAR_ENCODING_UTF16LE:
1263c2c66affSColin Finck 	    return("UTF-16");
1264c2c66affSColin Finck         case XML_CHAR_ENCODING_UTF16BE:
1265c2c66affSColin Finck 	    return("UTF-16");
1266c2c66affSColin Finck         case XML_CHAR_ENCODING_EBCDIC:
1267c2c66affSColin Finck             return("EBCDIC");
1268c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4LE:
1269c2c66affSColin Finck             return("ISO-10646-UCS-4");
1270c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4BE:
1271c2c66affSColin Finck             return("ISO-10646-UCS-4");
1272c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4_2143:
1273c2c66affSColin Finck             return("ISO-10646-UCS-4");
1274c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4_3412:
1275c2c66affSColin Finck             return("ISO-10646-UCS-4");
1276c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS2:
1277c2c66affSColin Finck             return("ISO-10646-UCS-2");
1278c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_1:
1279c2c66affSColin Finck 	    return("ISO-8859-1");
1280c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_2:
1281c2c66affSColin Finck 	    return("ISO-8859-2");
1282c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_3:
1283c2c66affSColin Finck 	    return("ISO-8859-3");
1284c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_4:
1285c2c66affSColin Finck 	    return("ISO-8859-4");
1286c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_5:
1287c2c66affSColin Finck 	    return("ISO-8859-5");
1288c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_6:
1289c2c66affSColin Finck 	    return("ISO-8859-6");
1290c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_7:
1291c2c66affSColin Finck 	    return("ISO-8859-7");
1292c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_8:
1293c2c66affSColin Finck 	    return("ISO-8859-8");
1294c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_9:
1295c2c66affSColin Finck 	    return("ISO-8859-9");
1296c2c66affSColin Finck         case XML_CHAR_ENCODING_2022_JP:
1297c2c66affSColin Finck             return("ISO-2022-JP");
1298c2c66affSColin Finck         case XML_CHAR_ENCODING_SHIFT_JIS:
1299c2c66affSColin Finck             return("Shift-JIS");
1300c2c66affSColin Finck         case XML_CHAR_ENCODING_EUC_JP:
1301c2c66affSColin Finck             return("EUC-JP");
1302c2c66affSColin Finck 	case XML_CHAR_ENCODING_ASCII:
1303c2c66affSColin Finck 	    return(NULL);
1304c2c66affSColin Finck     }
1305c2c66affSColin Finck     return(NULL);
1306c2c66affSColin Finck }
1307c2c66affSColin Finck 
1308c2c66affSColin Finck /************************************************************************
1309c2c66affSColin Finck  *									*
1310c2c66affSColin Finck  *			Char encoding handlers				*
1311c2c66affSColin Finck  *									*
1312c2c66affSColin Finck  ************************************************************************/
1313c2c66affSColin Finck 
1314c2c66affSColin Finck 
1315c2c66affSColin Finck /* the size should be growable, but it's not a big deal ... */
1316c2c66affSColin Finck #define MAX_ENCODING_HANDLERS 50
1317c2c66affSColin Finck static xmlCharEncodingHandlerPtr *handlers = NULL;
1318c2c66affSColin Finck static int nbCharEncodingHandler = 0;
1319c2c66affSColin Finck 
1320c2c66affSColin Finck /*
1321c2c66affSColin Finck  * The default is UTF-8 for XML, that's also the default used for the
1322c2c66affSColin Finck  * parser internals, so the default encoding handler is NULL
1323c2c66affSColin Finck  */
1324c2c66affSColin Finck 
1325c2c66affSColin Finck static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1326c2c66affSColin Finck 
1327c2c66affSColin Finck /**
1328c2c66affSColin Finck  * xmlNewCharEncodingHandler:
1329c2c66affSColin Finck  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1330c2c66affSColin Finck  * @input:  the xmlCharEncodingInputFunc to read that encoding
1331c2c66affSColin Finck  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1332c2c66affSColin Finck  *
1333c2c66affSColin Finck  * Create and registers an xmlCharEncodingHandler.
1334c2c66affSColin Finck  *
1335c2c66affSColin Finck  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1336c2c66affSColin Finck  */
1337c2c66affSColin Finck xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1338c2c66affSColin Finck xmlNewCharEncodingHandler(const char *name,
1339c2c66affSColin Finck                           xmlCharEncodingInputFunc input,
1340c2c66affSColin Finck                           xmlCharEncodingOutputFunc output) {
1341c2c66affSColin Finck     xmlCharEncodingHandlerPtr handler;
1342c2c66affSColin Finck     const char *alias;
1343c2c66affSColin Finck     char upper[500];
1344c2c66affSColin Finck     int i;
1345c2c66affSColin Finck     char *up = NULL;
1346c2c66affSColin Finck 
1347c2c66affSColin Finck     /*
1348c2c66affSColin Finck      * Do the alias resolution
1349c2c66affSColin Finck      */
1350c2c66affSColin Finck     alias = xmlGetEncodingAlias(name);
1351c2c66affSColin Finck     if (alias != NULL)
1352c2c66affSColin Finck 	name = alias;
1353c2c66affSColin Finck 
1354c2c66affSColin Finck     /*
1355c2c66affSColin Finck      * Keep only the uppercase version of the encoding.
1356c2c66affSColin Finck      */
1357c2c66affSColin Finck     if (name == NULL) {
1358c2c66affSColin Finck         xmlEncodingErr(XML_I18N_NO_NAME,
1359c2c66affSColin Finck 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1360c2c66affSColin Finck 	return(NULL);
1361c2c66affSColin Finck     }
1362c2c66affSColin Finck     for (i = 0;i < 499;i++) {
1363c2c66affSColin Finck         upper[i] = toupper(name[i]);
1364c2c66affSColin Finck 	if (upper[i] == 0) break;
1365c2c66affSColin Finck     }
1366c2c66affSColin Finck     upper[i] = 0;
1367c2c66affSColin Finck     up = xmlMemStrdup(upper);
1368c2c66affSColin Finck     if (up == NULL) {
1369c2c66affSColin Finck         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370c2c66affSColin Finck 	return(NULL);
1371c2c66affSColin Finck     }
1372c2c66affSColin Finck 
1373c2c66affSColin Finck     /*
1374c2c66affSColin Finck      * allocate and fill-up an handler block.
1375c2c66affSColin Finck      */
1376c2c66affSColin Finck     handler = (xmlCharEncodingHandlerPtr)
1377c2c66affSColin Finck               xmlMalloc(sizeof(xmlCharEncodingHandler));
1378c2c66affSColin Finck     if (handler == NULL) {
1379c2c66affSColin Finck         xmlFree(up);
1380c2c66affSColin Finck         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381c2c66affSColin Finck 	return(NULL);
1382c2c66affSColin Finck     }
1383c2c66affSColin Finck     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1384c2c66affSColin Finck     handler->input = input;
1385c2c66affSColin Finck     handler->output = output;
1386c2c66affSColin Finck     handler->name = up;
1387c2c66affSColin Finck 
1388c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1389c2c66affSColin Finck     handler->iconv_in = NULL;
1390c2c66affSColin Finck     handler->iconv_out = NULL;
1391c2c66affSColin Finck #endif
1392c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1393c2c66affSColin Finck     handler->uconv_in = NULL;
1394c2c66affSColin Finck     handler->uconv_out = NULL;
1395c2c66affSColin Finck #endif
1396c2c66affSColin Finck 
1397c2c66affSColin Finck     /*
1398c2c66affSColin Finck      * registers and returns the handler.
1399c2c66affSColin Finck      */
1400c2c66affSColin Finck     xmlRegisterCharEncodingHandler(handler);
1401c2c66affSColin Finck #ifdef DEBUG_ENCODING
1402c2c66affSColin Finck     xmlGenericError(xmlGenericErrorContext,
1403c2c66affSColin Finck 	    "Registered encoding handler for %s\n", name);
1404c2c66affSColin Finck #endif
1405c2c66affSColin Finck     return(handler);
1406c2c66affSColin Finck }
1407c2c66affSColin Finck 
1408c2c66affSColin Finck /**
1409c2c66affSColin Finck  * xmlInitCharEncodingHandlers:
1410c2c66affSColin Finck  *
1411*911153daSThomas Faber  * DEPRECATED: This function will be made private. Call xmlInitParser to
1412*911153daSThomas Faber  * initialize the library.
1413*911153daSThomas Faber  *
1414c2c66affSColin Finck  * Initialize the char encoding support, it registers the default
1415c2c66affSColin Finck  * encoding supported.
1416c2c66affSColin Finck  * NOTE: while public, this function usually doesn't need to be called
1417c2c66affSColin Finck  *       in normal processing.
1418c2c66affSColin Finck  */
1419c2c66affSColin Finck void
xmlInitCharEncodingHandlers(void)1420c2c66affSColin Finck xmlInitCharEncodingHandlers(void) {
1421c2c66affSColin Finck     unsigned short int tst = 0x1234;
1422c2c66affSColin Finck     unsigned char *ptr = (unsigned char *) &tst;
1423c2c66affSColin Finck 
1424c2c66affSColin Finck     if (handlers != NULL) return;
1425c2c66affSColin Finck 
1426c2c66affSColin Finck     handlers = (xmlCharEncodingHandlerPtr *)
1427c2c66affSColin Finck         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1428c2c66affSColin Finck 
1429c2c66affSColin Finck     if (*ptr == 0x12) xmlLittleEndian = 0;
1430c2c66affSColin Finck     else if (*ptr == 0x34) xmlLittleEndian = 1;
1431c2c66affSColin Finck     else {
1432c2c66affSColin Finck         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433c2c66affSColin Finck 	               "Odd problem at endianness detection\n", NULL);
1434c2c66affSColin Finck     }
1435c2c66affSColin Finck 
1436c2c66affSColin Finck     if (handlers == NULL) {
1437c2c66affSColin Finck         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438c2c66affSColin Finck 	return;
1439c2c66affSColin Finck     }
1440c2c66affSColin Finck     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1441c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
1442c2c66affSColin Finck     xmlUTF16LEHandler =
1443c2c66affSColin Finck           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1444c2c66affSColin Finck     xmlUTF16BEHandler =
1445c2c66affSColin Finck           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446c2c66affSColin Finck     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447c2c66affSColin Finck     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448c2c66affSColin Finck     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449c2c66affSColin Finck     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450c2c66affSColin Finck #ifdef LIBXML_HTML_ENABLED
1451c2c66affSColin Finck     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452c2c66affSColin Finck #endif
1453c2c66affSColin Finck #else
1454c2c66affSColin Finck     xmlUTF16LEHandler =
1455c2c66affSColin Finck           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1456c2c66affSColin Finck     xmlUTF16BEHandler =
1457c2c66affSColin Finck           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1458c2c66affSColin Finck     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1459c2c66affSColin Finck     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1460c2c66affSColin Finck     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1461c2c66affSColin Finck     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1462c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
1463c2c66affSColin Finck #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464c2c66affSColin Finck #ifdef LIBXML_ISO8859X_ENABLED
1465c2c66affSColin Finck     xmlRegisterCharEncodingHandlersISO8859x ();
1466c2c66affSColin Finck #endif
1467c2c66affSColin Finck #endif
1468c2c66affSColin Finck 
1469c2c66affSColin Finck }
1470c2c66affSColin Finck 
1471c2c66affSColin Finck /**
1472c2c66affSColin Finck  * xmlCleanupCharEncodingHandlers:
1473c2c66affSColin Finck  *
1474*911153daSThomas Faber  * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475*911153daSThomas Faber  * to free global state but see the warnings there. xmlCleanupParser
1476*911153daSThomas Faber  * should be only called once at program exit. In most cases, you don't
1477*911153daSThomas Faber  * have call cleanup functions at all.
1478*911153daSThomas Faber  *
1479c2c66affSColin Finck  * Cleanup the memory allocated for the char encoding support, it
1480c2c66affSColin Finck  * unregisters all the encoding handlers and the aliases.
1481c2c66affSColin Finck  */
1482c2c66affSColin Finck void
xmlCleanupCharEncodingHandlers(void)1483c2c66affSColin Finck xmlCleanupCharEncodingHandlers(void) {
1484c2c66affSColin Finck     xmlCleanupEncodingAliases();
1485c2c66affSColin Finck 
1486c2c66affSColin Finck     if (handlers == NULL) return;
1487c2c66affSColin Finck 
1488c2c66affSColin Finck     for (;nbCharEncodingHandler > 0;) {
1489c2c66affSColin Finck         nbCharEncodingHandler--;
1490c2c66affSColin Finck 	if (handlers[nbCharEncodingHandler] != NULL) {
1491c2c66affSColin Finck 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1492c2c66affSColin Finck 		xmlFree(handlers[nbCharEncodingHandler]->name);
1493c2c66affSColin Finck 	    xmlFree(handlers[nbCharEncodingHandler]);
1494c2c66affSColin Finck 	}
1495c2c66affSColin Finck     }
1496c2c66affSColin Finck     xmlFree(handlers);
1497c2c66affSColin Finck     handlers = NULL;
1498c2c66affSColin Finck     nbCharEncodingHandler = 0;
1499c2c66affSColin Finck     xmlDefaultCharEncodingHandler = NULL;
1500c2c66affSColin Finck }
1501c2c66affSColin Finck 
1502c2c66affSColin Finck /**
1503c2c66affSColin Finck  * xmlRegisterCharEncodingHandler:
1504c2c66affSColin Finck  * @handler:  the xmlCharEncodingHandlerPtr handler block
1505c2c66affSColin Finck  *
1506c2c66affSColin Finck  * Register the char encoding handler, surprising, isn't it ?
1507c2c66affSColin Finck  */
1508c2c66affSColin Finck void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1509c2c66affSColin Finck xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1510c2c66affSColin Finck     if (handlers == NULL) xmlInitCharEncodingHandlers();
1511c2c66affSColin Finck     if ((handler == NULL) || (handlers == NULL)) {
1512c2c66affSColin Finck         xmlEncodingErr(XML_I18N_NO_HANDLER,
1513c2c66affSColin Finck 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
151440ee59d6SThomas Faber         goto free_handler;
1515c2c66affSColin Finck     }
1516c2c66affSColin Finck 
1517c2c66affSColin Finck     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1518c2c66affSColin Finck         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519c2c66affSColin Finck 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520c2c66affSColin Finck 	               "MAX_ENCODING_HANDLERS");
152140ee59d6SThomas Faber         goto free_handler;
1522c2c66affSColin Finck     }
1523c2c66affSColin Finck     handlers[nbCharEncodingHandler++] = handler;
152440ee59d6SThomas Faber     return;
152540ee59d6SThomas Faber 
152640ee59d6SThomas Faber free_handler:
152740ee59d6SThomas Faber     if (handler != NULL) {
152840ee59d6SThomas Faber         if (handler->name != NULL) {
152940ee59d6SThomas Faber             xmlFree(handler->name);
153040ee59d6SThomas Faber         }
153140ee59d6SThomas Faber         xmlFree(handler);
153240ee59d6SThomas Faber     }
1533c2c66affSColin Finck }
1534c2c66affSColin Finck 
1535c2c66affSColin Finck /**
1536c2c66affSColin Finck  * xmlGetCharEncodingHandler:
1537c2c66affSColin Finck  * @enc:  an xmlCharEncoding value.
1538c2c66affSColin Finck  *
1539c2c66affSColin Finck  * Search in the registered set the handler able to read/write that encoding.
1540c2c66affSColin Finck  *
1541c2c66affSColin Finck  * Returns the handler or NULL if not found
1542c2c66affSColin Finck  */
1543c2c66affSColin Finck xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1544c2c66affSColin Finck xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1545c2c66affSColin Finck     xmlCharEncodingHandlerPtr handler;
1546c2c66affSColin Finck 
1547c2c66affSColin Finck     if (handlers == NULL) xmlInitCharEncodingHandlers();
1548c2c66affSColin Finck     switch (enc) {
1549c2c66affSColin Finck         case XML_CHAR_ENCODING_ERROR:
1550c2c66affSColin Finck 	    return(NULL);
1551c2c66affSColin Finck         case XML_CHAR_ENCODING_NONE:
1552c2c66affSColin Finck 	    return(NULL);
1553c2c66affSColin Finck         case XML_CHAR_ENCODING_UTF8:
1554c2c66affSColin Finck 	    return(NULL);
1555c2c66affSColin Finck         case XML_CHAR_ENCODING_UTF16LE:
1556c2c66affSColin Finck 	    return(xmlUTF16LEHandler);
1557c2c66affSColin Finck         case XML_CHAR_ENCODING_UTF16BE:
1558c2c66affSColin Finck 	    return(xmlUTF16BEHandler);
1559c2c66affSColin Finck         case XML_CHAR_ENCODING_EBCDIC:
1560c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("EBCDIC");
1561c2c66affSColin Finck             if (handler != NULL) return(handler);
1562c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("ebcdic");
1563c2c66affSColin Finck             if (handler != NULL) return(handler);
1564c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565c2c66affSColin Finck             if (handler != NULL) return(handler);
1566c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("IBM-037");
1567c2c66affSColin Finck             if (handler != NULL) return(handler);
1568c2c66affSColin Finck 	    break;
1569c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4BE:
1570c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571c2c66affSColin Finck             if (handler != NULL) return(handler);
1572c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("UCS-4");
1573c2c66affSColin Finck             if (handler != NULL) return(handler);
1574c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("UCS4");
1575c2c66affSColin Finck             if (handler != NULL) return(handler);
1576c2c66affSColin Finck 	    break;
1577c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4LE:
1578c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579c2c66affSColin Finck             if (handler != NULL) return(handler);
1580c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("UCS-4");
1581c2c66affSColin Finck             if (handler != NULL) return(handler);
1582c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("UCS4");
1583c2c66affSColin Finck             if (handler != NULL) return(handler);
1584c2c66affSColin Finck 	    break;
1585c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4_2143:
1586c2c66affSColin Finck 	    break;
1587c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS4_3412:
1588c2c66affSColin Finck 	    break;
1589c2c66affSColin Finck         case XML_CHAR_ENCODING_UCS2:
1590c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591c2c66affSColin Finck             if (handler != NULL) return(handler);
1592c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("UCS-2");
1593c2c66affSColin Finck             if (handler != NULL) return(handler);
1594c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("UCS2");
1595c2c66affSColin Finck             if (handler != NULL) return(handler);
1596c2c66affSColin Finck 	    break;
1597c2c66affSColin Finck 
1598c2c66affSColin Finck 	    /*
1599c2c66affSColin Finck 	     * We used to keep ISO Latin encodings native in the
1600c2c66affSColin Finck 	     * generated data. This led to so many problems that
1601c2c66affSColin Finck 	     * this has been removed. One can still change this
1602c2c66affSColin Finck 	     * back by registering no-ops encoders for those
1603c2c66affSColin Finck 	     */
1604c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_1:
1605c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1607c2c66affSColin Finck 	    break;
1608c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_2:
1609c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1611c2c66affSColin Finck 	    break;
1612c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_3:
1613c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1615c2c66affSColin Finck 	    break;
1616c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_4:
1617c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1619c2c66affSColin Finck 	    break;
1620c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_5:
1621c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1623c2c66affSColin Finck 	    break;
1624c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_6:
1625c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1627c2c66affSColin Finck 	    break;
1628c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_7:
1629c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1631c2c66affSColin Finck 	    break;
1632c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_8:
1633c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1635c2c66affSColin Finck 	    break;
1636c2c66affSColin Finck         case XML_CHAR_ENCODING_8859_9:
1637c2c66affSColin Finck 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638c2c66affSColin Finck 	    if (handler != NULL) return(handler);
1639c2c66affSColin Finck 	    break;
1640c2c66affSColin Finck 
1641c2c66affSColin Finck 
1642c2c66affSColin Finck         case XML_CHAR_ENCODING_2022_JP:
1643c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644c2c66affSColin Finck             if (handler != NULL) return(handler);
1645c2c66affSColin Finck 	    break;
1646c2c66affSColin Finck         case XML_CHAR_ENCODING_SHIFT_JIS:
1647c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648c2c66affSColin Finck             if (handler != NULL) return(handler);
1649c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650c2c66affSColin Finck             if (handler != NULL) return(handler);
1651c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("Shift_JIS");
1652c2c66affSColin Finck             if (handler != NULL) return(handler);
1653c2c66affSColin Finck 	    break;
1654c2c66affSColin Finck         case XML_CHAR_ENCODING_EUC_JP:
1655c2c66affSColin Finck             handler = xmlFindCharEncodingHandler("EUC-JP");
1656c2c66affSColin Finck             if (handler != NULL) return(handler);
1657c2c66affSColin Finck 	    break;
1658c2c66affSColin Finck 	default:
1659c2c66affSColin Finck 	    break;
1660c2c66affSColin Finck     }
1661c2c66affSColin Finck 
1662c2c66affSColin Finck #ifdef DEBUG_ENCODING
1663c2c66affSColin Finck     xmlGenericError(xmlGenericErrorContext,
1664c2c66affSColin Finck 	    "No handler found for encoding %d\n", enc);
1665c2c66affSColin Finck #endif
1666c2c66affSColin Finck     return(NULL);
1667c2c66affSColin Finck }
1668c2c66affSColin Finck 
1669c2c66affSColin Finck /**
1670c2c66affSColin Finck  * xmlFindCharEncodingHandler:
1671c2c66affSColin Finck  * @name:  a string describing the char encoding.
1672c2c66affSColin Finck  *
1673c2c66affSColin Finck  * Search in the registered set the handler able to read/write that encoding.
1674c2c66affSColin Finck  *
1675c2c66affSColin Finck  * Returns the handler or NULL if not found
1676c2c66affSColin Finck  */
1677c2c66affSColin Finck xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1678c2c66affSColin Finck xmlFindCharEncodingHandler(const char *name) {
1679c2c66affSColin Finck     const char *nalias;
1680c2c66affSColin Finck     const char *norig;
1681c2c66affSColin Finck     xmlCharEncoding alias;
1682c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1683c2c66affSColin Finck     xmlCharEncodingHandlerPtr enc;
1684c2c66affSColin Finck     iconv_t icv_in, icv_out;
1685c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
1686c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1687c2c66affSColin Finck     xmlCharEncodingHandlerPtr encu;
1688c2c66affSColin Finck     uconv_t *ucv_in, *ucv_out;
1689c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
1690c2c66affSColin Finck     char upper[100];
1691c2c66affSColin Finck     int i;
1692c2c66affSColin Finck 
1693c2c66affSColin Finck     if (handlers == NULL) xmlInitCharEncodingHandlers();
1694c2c66affSColin Finck     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695c2c66affSColin Finck     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696c2c66affSColin Finck 
1697c2c66affSColin Finck     /*
1698c2c66affSColin Finck      * Do the alias resolution
1699c2c66affSColin Finck      */
1700c2c66affSColin Finck     norig = name;
1701c2c66affSColin Finck     nalias = xmlGetEncodingAlias(name);
1702c2c66affSColin Finck     if (nalias != NULL)
1703c2c66affSColin Finck 	name = nalias;
1704c2c66affSColin Finck 
1705c2c66affSColin Finck     /*
1706c2c66affSColin Finck      * Check first for directly registered encoding names
1707c2c66affSColin Finck      */
1708c2c66affSColin Finck     for (i = 0;i < 99;i++) {
1709c2c66affSColin Finck         upper[i] = toupper(name[i]);
1710c2c66affSColin Finck 	if (upper[i] == 0) break;
1711c2c66affSColin Finck     }
1712c2c66affSColin Finck     upper[i] = 0;
1713c2c66affSColin Finck 
1714c2c66affSColin Finck     if (handlers != NULL) {
1715c2c66affSColin Finck         for (i = 0;i < nbCharEncodingHandler; i++) {
1716c2c66affSColin Finck             if (!strcmp(upper, handlers[i]->name)) {
1717c2c66affSColin Finck #ifdef DEBUG_ENCODING
1718c2c66affSColin Finck                 xmlGenericError(xmlGenericErrorContext,
1719c2c66affSColin Finck                         "Found registered handler for encoding %s\n", name);
1720c2c66affSColin Finck #endif
1721c2c66affSColin Finck                 return(handlers[i]);
1722c2c66affSColin Finck             }
1723c2c66affSColin Finck         }
1724c2c66affSColin Finck     }
1725c2c66affSColin Finck 
1726c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1727c2c66affSColin Finck     /* check whether iconv can handle this */
1728c2c66affSColin Finck     icv_in = iconv_open("UTF-8", name);
1729c2c66affSColin Finck     icv_out = iconv_open(name, "UTF-8");
1730c2c66affSColin Finck     if (icv_in == (iconv_t) -1) {
1731c2c66affSColin Finck         icv_in = iconv_open("UTF-8", upper);
1732c2c66affSColin Finck     }
1733c2c66affSColin Finck     if (icv_out == (iconv_t) -1) {
1734c2c66affSColin Finck 	icv_out = iconv_open(upper, "UTF-8");
1735c2c66affSColin Finck     }
1736c2c66affSColin Finck     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1737c2c66affSColin Finck 	    enc = (xmlCharEncodingHandlerPtr)
1738c2c66affSColin Finck 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1739c2c66affSColin Finck 	    if (enc == NULL) {
1740c2c66affSColin Finck 	        iconv_close(icv_in);
1741c2c66affSColin Finck 	        iconv_close(icv_out);
1742c2c66affSColin Finck 		return(NULL);
1743c2c66affSColin Finck 	    }
1744c2c66affSColin Finck             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745c2c66affSColin Finck 	    enc->name = xmlMemStrdup(name);
1746c2c66affSColin Finck 	    enc->input = NULL;
1747c2c66affSColin Finck 	    enc->output = NULL;
1748c2c66affSColin Finck 	    enc->iconv_in = icv_in;
1749c2c66affSColin Finck 	    enc->iconv_out = icv_out;
1750c2c66affSColin Finck #ifdef DEBUG_ENCODING
1751c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
1752c2c66affSColin Finck 		    "Found iconv handler for encoding %s\n", name);
1753c2c66affSColin Finck #endif
1754c2c66affSColin Finck 	    return enc;
1755c2c66affSColin Finck     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756c2c66affSColin Finck 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757c2c66affSColin Finck 		    "iconv : problems with filters for '%s'\n", name);
17588940614aSThomas Faber 	    if (icv_in != (iconv_t) -1)
17598940614aSThomas Faber 		iconv_close(icv_in);
17608940614aSThomas Faber 	    else
17618940614aSThomas Faber 		iconv_close(icv_out);
1762c2c66affSColin Finck     }
1763c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
1764c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1765c2c66affSColin Finck     /* check whether icu can handle this */
1766c2c66affSColin Finck     ucv_in = openIcuConverter(name, 1);
1767c2c66affSColin Finck     ucv_out = openIcuConverter(name, 0);
1768c2c66affSColin Finck     if (ucv_in != NULL && ucv_out != NULL) {
1769c2c66affSColin Finck 	    encu = (xmlCharEncodingHandlerPtr)
1770c2c66affSColin Finck 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1771c2c66affSColin Finck 	    if (encu == NULL) {
1772c2c66affSColin Finck                 closeIcuConverter(ucv_in);
1773c2c66affSColin Finck                 closeIcuConverter(ucv_out);
1774c2c66affSColin Finck 		return(NULL);
1775c2c66affSColin Finck 	    }
1776c2c66affSColin Finck             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777c2c66affSColin Finck 	    encu->name = xmlMemStrdup(name);
1778c2c66affSColin Finck 	    encu->input = NULL;
1779c2c66affSColin Finck 	    encu->output = NULL;
1780c2c66affSColin Finck 	    encu->uconv_in = ucv_in;
1781c2c66affSColin Finck 	    encu->uconv_out = ucv_out;
1782c2c66affSColin Finck #ifdef DEBUG_ENCODING
1783c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
1784c2c66affSColin Finck 		    "Found ICU converter handler for encoding %s\n", name);
1785c2c66affSColin Finck #endif
1786c2c66affSColin Finck 	    return encu;
1787c2c66affSColin Finck     } else if (ucv_in != NULL || ucv_out != NULL) {
1788c2c66affSColin Finck             closeIcuConverter(ucv_in);
1789c2c66affSColin Finck             closeIcuConverter(ucv_out);
1790c2c66affSColin Finck 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791c2c66affSColin Finck 		    "ICU converter : problems with filters for '%s'\n", name);
1792c2c66affSColin Finck     }
1793c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
1794c2c66affSColin Finck 
1795c2c66affSColin Finck #ifdef DEBUG_ENCODING
1796c2c66affSColin Finck     xmlGenericError(xmlGenericErrorContext,
1797c2c66affSColin Finck 	    "No handler found for encoding %s\n", name);
1798c2c66affSColin Finck #endif
1799c2c66affSColin Finck 
1800c2c66affSColin Finck     /*
1801c2c66affSColin Finck      * Fallback using the canonical names
1802c2c66affSColin Finck      */
1803c2c66affSColin Finck     alias = xmlParseCharEncoding(norig);
1804c2c66affSColin Finck     if (alias != XML_CHAR_ENCODING_ERROR) {
1805c2c66affSColin Finck         const char* canon;
1806c2c66affSColin Finck         canon = xmlGetCharEncodingName(alias);
1807c2c66affSColin Finck         if ((canon != NULL) && (strcmp(name, canon))) {
1808c2c66affSColin Finck 	    return(xmlFindCharEncodingHandler(canon));
1809c2c66affSColin Finck         }
1810c2c66affSColin Finck     }
1811c2c66affSColin Finck 
1812c2c66affSColin Finck     /* If "none of the above", give up */
1813c2c66affSColin Finck     return(NULL);
1814c2c66affSColin Finck }
1815c2c66affSColin Finck 
1816c2c66affSColin Finck /************************************************************************
1817c2c66affSColin Finck  *									*
1818c2c66affSColin Finck  *		ICONV based generic conversion functions		*
1819c2c66affSColin Finck  *									*
1820c2c66affSColin Finck  ************************************************************************/
1821c2c66affSColin Finck 
1822c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1823c2c66affSColin Finck /**
1824c2c66affSColin Finck  * xmlIconvWrapper:
1825c2c66affSColin Finck  * @cd:		iconv converter data structure
1826c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
1827c2c66affSColin Finck  * @outlen:  the length of @out
182840ee59d6SThomas Faber  * @in:  a pointer to an array of input bytes
1829c2c66affSColin Finck  * @inlen:  the length of @in
1830c2c66affSColin Finck  *
1831c2c66affSColin Finck  * Returns 0 if success, or
1832c2c66affSColin Finck  *     -1 by lack of space, or
1833c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1834c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
1835c2c66affSColin Finck  *     -3 if there the last byte can't form a single output char.
1836c2c66affSColin Finck  *
1837c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
1838c2c66affSColin Finck  *     as the return value is positive, else unpredictable.
183940ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
1840c2c66affSColin Finck  */
1841c2c66affSColin Finck static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1842c2c66affSColin Finck xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1843c2c66affSColin Finck                 const unsigned char *in, int *inlen) {
1844c2c66affSColin Finck     size_t icv_inlen, icv_outlen;
1845c2c66affSColin Finck     const char *icv_in = (const char *) in;
1846c2c66affSColin Finck     char *icv_out = (char *) out;
18477244e0c5SThomas Faber     size_t ret;
1848c2c66affSColin Finck 
1849c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1850c2c66affSColin Finck         if (outlen != NULL) *outlen = 0;
1851c2c66affSColin Finck         return(-1);
1852c2c66affSColin Finck     }
1853c2c66affSColin Finck     icv_inlen = *inlen;
1854c2c66affSColin Finck     icv_outlen = *outlen;
1855*911153daSThomas Faber     /*
1856*911153daSThomas Faber      * Some versions take const, other versions take non-const input.
1857*911153daSThomas Faber      */
1858*911153daSThomas Faber     ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1859c2c66affSColin Finck     *inlen -= icv_inlen;
1860c2c66affSColin Finck     *outlen -= icv_outlen;
18617244e0c5SThomas Faber     if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1862c2c66affSColin Finck #ifdef EILSEQ
1863c2c66affSColin Finck         if (errno == EILSEQ) {
1864c2c66affSColin Finck             return -2;
1865c2c66affSColin Finck         } else
1866c2c66affSColin Finck #endif
1867c2c66affSColin Finck #ifdef E2BIG
1868c2c66affSColin Finck         if (errno == E2BIG) {
1869c2c66affSColin Finck             return -1;
1870c2c66affSColin Finck         } else
1871c2c66affSColin Finck #endif
1872c2c66affSColin Finck #ifdef EINVAL
1873c2c66affSColin Finck         if (errno == EINVAL) {
1874c2c66affSColin Finck             return -3;
1875c2c66affSColin Finck         } else
1876c2c66affSColin Finck #endif
1877c2c66affSColin Finck         {
1878c2c66affSColin Finck             return -3;
1879c2c66affSColin Finck         }
1880c2c66affSColin Finck     }
1881c2c66affSColin Finck     return 0;
1882c2c66affSColin Finck }
1883c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
1884c2c66affSColin Finck 
1885c2c66affSColin Finck /************************************************************************
1886c2c66affSColin Finck  *									*
1887c2c66affSColin Finck  *		ICU based generic conversion functions		*
1888c2c66affSColin Finck  *									*
1889c2c66affSColin Finck  ************************************************************************/
1890c2c66affSColin Finck 
1891c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1892c2c66affSColin Finck /**
1893c2c66affSColin Finck  * xmlUconvWrapper:
1894c2c66affSColin Finck  * @cd: ICU uconverter data structure
1895c2c66affSColin Finck  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
1897c2c66affSColin Finck  * @outlen:  the length of @out
189840ee59d6SThomas Faber  * @in:  a pointer to an array of input bytes
1899c2c66affSColin Finck  * @inlen:  the length of @in
19005bb277a5SThomas Faber  * @flush: if true, indicates end of input
1901c2c66affSColin Finck  *
1902c2c66affSColin Finck  * Returns 0 if success, or
1903c2c66affSColin Finck  *     -1 by lack of space, or
1904c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1905c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
1906c2c66affSColin Finck  *     -3 if there the last byte can't form a single output char.
1907c2c66affSColin Finck  *
1908c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
1909c2c66affSColin Finck  *     as the return value is positive, else unpredictable.
191040ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
1911c2c66affSColin Finck  */
1912c2c66affSColin Finck static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1913c2c66affSColin Finck xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
19145bb277a5SThomas Faber                 const unsigned char *in, int *inlen, int flush) {
1915c2c66affSColin Finck     const char *ucv_in = (const char *) in;
1916c2c66affSColin Finck     char *ucv_out = (char *) out;
1917c2c66affSColin Finck     UErrorCode err = U_ZERO_ERROR;
1918c2c66affSColin Finck 
1919c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1920c2c66affSColin Finck         if (outlen != NULL) *outlen = 0;
1921c2c66affSColin Finck         return(-1);
1922c2c66affSColin Finck     }
1923c2c66affSColin Finck 
1924c2c66affSColin Finck     if (toUnicode) {
1925c2c66affSColin Finck         /* encoding => UTF-16 => UTF-8 */
1926c2c66affSColin Finck         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
19275bb277a5SThomas Faber                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
19285bb277a5SThomas Faber                        &cd->pivot_source, &cd->pivot_target,
19295bb277a5SThomas Faber                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1930c2c66affSColin Finck     } else {
1931c2c66affSColin Finck         /* UTF-8 => UTF-16 => encoding */
1932c2c66affSColin Finck         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
19335bb277a5SThomas Faber                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
19345bb277a5SThomas Faber                        &cd->pivot_source, &cd->pivot_target,
19355bb277a5SThomas Faber                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1936c2c66affSColin Finck     }
1937c2c66affSColin Finck     *inlen = ucv_in - (const char*) in;
1938c2c66affSColin Finck     *outlen = ucv_out - (char *) out;
19395bb277a5SThomas Faber     if (U_SUCCESS(err)) {
19405bb277a5SThomas Faber         /* reset pivot buf if this is the last call for input (flush==TRUE) */
19415bb277a5SThomas Faber         if (flush)
19425bb277a5SThomas Faber             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1943c2c66affSColin Finck         return 0;
19445bb277a5SThomas Faber     }
1945c2c66affSColin Finck     if (err == U_BUFFER_OVERFLOW_ERROR)
1946c2c66affSColin Finck         return -1;
1947c2c66affSColin Finck     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1948c2c66affSColin Finck         return -2;
1949c2c66affSColin Finck     return -3;
1950c2c66affSColin Finck }
1951c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
1952c2c66affSColin Finck 
1953c2c66affSColin Finck /************************************************************************
1954c2c66affSColin Finck  *									*
1955c2c66affSColin Finck  *		The real API used by libxml for on-the-fly conversion	*
1956c2c66affSColin Finck  *									*
1957c2c66affSColin Finck  ************************************************************************/
1958c2c66affSColin Finck 
195940ee59d6SThomas Faber /**
196040ee59d6SThomas Faber  * xmlEncInputChunk:
196140ee59d6SThomas Faber  * @handler:  encoding handler
196240ee59d6SThomas Faber  * @out:  a pointer to an array of bytes to store the result
196340ee59d6SThomas Faber  * @outlen:  the length of @out
196440ee59d6SThomas Faber  * @in:  a pointer to an array of input bytes
196540ee59d6SThomas Faber  * @inlen:  the length of @in
196640ee59d6SThomas Faber  * @flush:  flush (ICU-related)
196740ee59d6SThomas Faber  *
196840ee59d6SThomas Faber  * Returns 0 if success, or
196940ee59d6SThomas Faber  *     -1 by lack of space, or
197040ee59d6SThomas Faber  *     -2 if the transcoding fails (for *in is not valid utf8 string or
197140ee59d6SThomas Faber  *        the result of transformation can't fit into the encoding we want), or
197240ee59d6SThomas Faber  *     -3 if there the last byte can't form a single output char.
197340ee59d6SThomas Faber  *
197440ee59d6SThomas Faber  * The value of @inlen after return is the number of octets consumed
197540ee59d6SThomas Faber  *     as the return value is 0, else unpredictable.
197640ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
197740ee59d6SThomas Faber  */
1978fc82f8e2SThomas Faber static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1979fc82f8e2SThomas Faber xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
19805bb277a5SThomas Faber                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1981fc82f8e2SThomas Faber     int ret;
19825bb277a5SThomas Faber     (void)flush;
1983fc82f8e2SThomas Faber 
1984fc82f8e2SThomas Faber     if (handler->input != NULL) {
1985fc82f8e2SThomas Faber         ret = handler->input(out, outlen, in, inlen);
198640ee59d6SThomas Faber         if (ret > 0)
198740ee59d6SThomas Faber            ret = 0;
1988fc82f8e2SThomas Faber     }
1989fc82f8e2SThomas Faber #ifdef LIBXML_ICONV_ENABLED
1990fc82f8e2SThomas Faber     else if (handler->iconv_in != NULL) {
1991fc82f8e2SThomas Faber         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992fc82f8e2SThomas Faber     }
1993fc82f8e2SThomas Faber #endif /* LIBXML_ICONV_ENABLED */
1994fc82f8e2SThomas Faber #ifdef LIBXML_ICU_ENABLED
1995fc82f8e2SThomas Faber     else if (handler->uconv_in != NULL) {
19965bb277a5SThomas Faber         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
19975bb277a5SThomas Faber                               flush);
1998fc82f8e2SThomas Faber     }
1999fc82f8e2SThomas Faber #endif /* LIBXML_ICU_ENABLED */
2000fc82f8e2SThomas Faber     else {
2001fc82f8e2SThomas Faber         *outlen = 0;
2002fc82f8e2SThomas Faber         *inlen = 0;
2003fc82f8e2SThomas Faber         ret = -2;
2004fc82f8e2SThomas Faber     }
2005fc82f8e2SThomas Faber 
2006fc82f8e2SThomas Faber     return(ret);
2007fc82f8e2SThomas Faber }
2008fc82f8e2SThomas Faber 
200940ee59d6SThomas Faber /**
201040ee59d6SThomas Faber  * xmlEncOutputChunk:
201140ee59d6SThomas Faber  * @handler:  encoding handler
201240ee59d6SThomas Faber  * @out:  a pointer to an array of bytes to store the result
201340ee59d6SThomas Faber  * @outlen:  the length of @out
201440ee59d6SThomas Faber  * @in:  a pointer to an array of input bytes
201540ee59d6SThomas Faber  * @inlen:  the length of @in
201640ee59d6SThomas Faber  *
201740ee59d6SThomas Faber  * Returns 0 if success, or
201840ee59d6SThomas Faber  *     -1 by lack of space, or
201940ee59d6SThomas Faber  *     -2 if the transcoding fails (for *in is not valid utf8 string or
202040ee59d6SThomas Faber  *        the result of transformation can't fit into the encoding we want), or
202140ee59d6SThomas Faber  *     -3 if there the last byte can't form a single output char.
202240ee59d6SThomas Faber  *     -4 if no output function was found.
202340ee59d6SThomas Faber  *
202440ee59d6SThomas Faber  * The value of @inlen after return is the number of octets consumed
202540ee59d6SThomas Faber  *     as the return value is 0, else unpredictable.
202640ee59d6SThomas Faber  * The value of @outlen after return is the number of octets produced.
202740ee59d6SThomas Faber  */
2028fc82f8e2SThomas Faber static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2029fc82f8e2SThomas Faber xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030fc82f8e2SThomas Faber                   int *outlen, const unsigned char *in, int *inlen) {
2031fc82f8e2SThomas Faber     int ret;
2032fc82f8e2SThomas Faber 
2033fc82f8e2SThomas Faber     if (handler->output != NULL) {
2034fc82f8e2SThomas Faber         ret = handler->output(out, outlen, in, inlen);
203540ee59d6SThomas Faber         if (ret > 0)
203640ee59d6SThomas Faber            ret = 0;
2037fc82f8e2SThomas Faber     }
2038fc82f8e2SThomas Faber #ifdef LIBXML_ICONV_ENABLED
2039fc82f8e2SThomas Faber     else if (handler->iconv_out != NULL) {
2040fc82f8e2SThomas Faber         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041fc82f8e2SThomas Faber     }
2042fc82f8e2SThomas Faber #endif /* LIBXML_ICONV_ENABLED */
2043fc82f8e2SThomas Faber #ifdef LIBXML_ICU_ENABLED
2044fc82f8e2SThomas Faber     else if (handler->uconv_out != NULL) {
20455bb277a5SThomas Faber         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
204640ee59d6SThomas Faber                               1);
2047fc82f8e2SThomas Faber     }
2048fc82f8e2SThomas Faber #endif /* LIBXML_ICU_ENABLED */
2049fc82f8e2SThomas Faber     else {
2050fc82f8e2SThomas Faber         *outlen = 0;
2051fc82f8e2SThomas Faber         *inlen = 0;
2052fc82f8e2SThomas Faber         ret = -4;
2053fc82f8e2SThomas Faber     }
2054fc82f8e2SThomas Faber 
2055fc82f8e2SThomas Faber     return(ret);
2056fc82f8e2SThomas Faber }
2057fc82f8e2SThomas Faber 
2058c2c66affSColin Finck /**
2059c2c66affSColin Finck  * xmlCharEncFirstLineInt:
2060f22fa382SThomas Faber  * @handler:	char encoding transformation data structure
2061c2c66affSColin Finck  * @out:  an xmlBuffer for the output.
2062c2c66affSColin Finck  * @in:  an xmlBuffer for the input
2063c2c66affSColin Finck  * @len:  number of bytes to convert for the first line, or -1
2064c2c66affSColin Finck  *
2065c2c66affSColin Finck  * Front-end for the encoding handler input function, but handle only
2066c2c66affSColin Finck  * the very first line, i.e. limit itself to 45 chars.
2067c2c66affSColin Finck  *
2068c2c66affSColin Finck  * Returns the number of byte written if success, or
2069c2c66affSColin Finck  *     -1 general error
2070c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2071c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2072c2c66affSColin Finck  */
2073c2c66affSColin Finck int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2074c2c66affSColin Finck xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075c2c66affSColin Finck                        xmlBufferPtr in, int len) {
2076fc82f8e2SThomas Faber     int ret;
2077c2c66affSColin Finck     int written;
2078c2c66affSColin Finck     int toconv;
2079c2c66affSColin Finck 
2080c2c66affSColin Finck     if (handler == NULL) return(-1);
2081c2c66affSColin Finck     if (out == NULL) return(-1);
2082c2c66affSColin Finck     if (in == NULL) return(-1);
2083c2c66affSColin Finck 
2084c2c66affSColin Finck     /* calculate space available */
2085c2c66affSColin Finck     written = out->size - out->use - 1; /* count '\0' */
2086c2c66affSColin Finck     toconv = in->use;
2087c2c66affSColin Finck     /*
2088c2c66affSColin Finck      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089c2c66affSColin Finck      * 45 chars should be sufficient to reach the end of the encoding
2090c2c66affSColin Finck      * declaration without going too far inside the document content.
2091c2c66affSColin Finck      * on UTF-16 this means 90bytes, on UCS4 this means 180
2092c2c66affSColin Finck      * The actual value depending on guessed encoding is passed as @len
2093c2c66affSColin Finck      * if provided
2094c2c66affSColin Finck      */
2095c2c66affSColin Finck     if (len >= 0) {
2096c2c66affSColin Finck         if (toconv > len)
2097c2c66affSColin Finck             toconv = len;
2098c2c66affSColin Finck     } else {
2099c2c66affSColin Finck         if (toconv > 180)
2100c2c66affSColin Finck             toconv = 180;
2101c2c66affSColin Finck     }
2102c2c66affSColin Finck     if (toconv * 2 >= written) {
2103c2c66affSColin Finck         xmlBufferGrow(out, toconv * 2);
2104c2c66affSColin Finck 	written = out->size - out->use - 1;
2105c2c66affSColin Finck     }
2106c2c66affSColin Finck 
2107fc82f8e2SThomas Faber     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
21085bb277a5SThomas Faber                            in->content, &toconv, 0);
2109c2c66affSColin Finck     xmlBufferShrink(in, toconv);
2110c2c66affSColin Finck     out->use += written;
2111c2c66affSColin Finck     out->content[out->use] = 0;
2112c2c66affSColin Finck     if (ret == -1) ret = -3;
2113fc82f8e2SThomas Faber 
2114c2c66affSColin Finck #ifdef DEBUG_ENCODING
2115c2c66affSColin Finck     switch (ret) {
2116c2c66affSColin Finck         case 0:
2117c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,
2118c2c66affSColin Finck 		    "converted %d bytes to %d bytes of input\n",
2119c2c66affSColin Finck 	            toconv, written);
2120c2c66affSColin Finck 	    break;
2121c2c66affSColin Finck         case -1:
2122c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123c2c66affSColin Finck 	            toconv, written, in->use);
2124c2c66affSColin Finck 	    break;
2125c2c66affSColin Finck         case -2:
2126c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,
2127c2c66affSColin Finck 		    "input conversion failed due to input error\n");
2128c2c66affSColin Finck 	    break;
2129c2c66affSColin Finck         case -3:
2130c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131c2c66affSColin Finck 	            toconv, written, in->use);
2132c2c66affSColin Finck 	    break;
2133c2c66affSColin Finck 	default:
2134c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135c2c66affSColin Finck     }
2136c2c66affSColin Finck #endif /* DEBUG_ENCODING */
2137c2c66affSColin Finck     /*
2138c2c66affSColin Finck      * Ignore when input buffer is not on a boundary
2139c2c66affSColin Finck      */
2140c2c66affSColin Finck     if (ret == -3) ret = 0;
2141c2c66affSColin Finck     if (ret == -1) ret = 0;
214240ee59d6SThomas Faber     return(written ? written : ret);
2143c2c66affSColin Finck }
2144c2c66affSColin Finck 
2145c2c66affSColin Finck /**
2146c2c66affSColin Finck  * xmlCharEncFirstLine:
2147f22fa382SThomas Faber  * @handler:	char encoding transformation data structure
2148c2c66affSColin Finck  * @out:  an xmlBuffer for the output.
2149c2c66affSColin Finck  * @in:  an xmlBuffer for the input
2150c2c66affSColin Finck  *
2151c2c66affSColin Finck  * Front-end for the encoding handler input function, but handle only
2152c2c66affSColin Finck  * the very first line, i.e. limit itself to 45 chars.
2153c2c66affSColin Finck  *
2154c2c66affSColin Finck  * Returns the number of byte written if success, or
2155c2c66affSColin Finck  *     -1 general error
2156c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2157c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2158c2c66affSColin Finck  */
2159c2c66affSColin Finck int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2160c2c66affSColin Finck xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2161c2c66affSColin Finck                  xmlBufferPtr in) {
2162c2c66affSColin Finck     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163c2c66affSColin Finck }
2164c2c66affSColin Finck 
2165c2c66affSColin Finck /**
2166c2c66affSColin Finck  * xmlCharEncFirstLineInput:
2167c2c66affSColin Finck  * @input: a parser input buffer
2168c2c66affSColin Finck  * @len:  number of bytes to convert for the first line, or -1
2169c2c66affSColin Finck  *
2170c2c66affSColin Finck  * Front-end for the encoding handler input function, but handle only
2171c2c66affSColin Finck  * the very first line. Point is that this is based on autodetection
2172c2c66affSColin Finck  * of the encoding and once that first line is converted we may find
2173c2c66affSColin Finck  * out that a different decoder is needed to process the input.
2174c2c66affSColin Finck  *
2175c2c66affSColin Finck  * Returns the number of byte written if success, or
2176c2c66affSColin Finck  *     -1 general error
2177c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2178c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2179c2c66affSColin Finck  */
2180c2c66affSColin Finck int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2181c2c66affSColin Finck xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2182c2c66affSColin Finck {
2183fc82f8e2SThomas Faber     int ret;
2184c2c66affSColin Finck     size_t written;
2185c2c66affSColin Finck     size_t toconv;
2186c2c66affSColin Finck     int c_in;
2187c2c66affSColin Finck     int c_out;
2188c2c66affSColin Finck     xmlBufPtr in;
2189c2c66affSColin Finck     xmlBufPtr out;
2190c2c66affSColin Finck 
2191c2c66affSColin Finck     if ((input == NULL) || (input->encoder == NULL) ||
2192c2c66affSColin Finck         (input->buffer == NULL) || (input->raw == NULL))
2193c2c66affSColin Finck         return (-1);
2194c2c66affSColin Finck     out = input->buffer;
2195c2c66affSColin Finck     in = input->raw;
2196c2c66affSColin Finck 
2197c2c66affSColin Finck     toconv = xmlBufUse(in);
2198c2c66affSColin Finck     if (toconv == 0)
2199c2c66affSColin Finck         return (0);
2200*911153daSThomas Faber     written = xmlBufAvail(out);
2201c2c66affSColin Finck     /*
2202c2c66affSColin Finck      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203c2c66affSColin Finck      * 45 chars should be sufficient to reach the end of the encoding
2204c2c66affSColin Finck      * declaration without going too far inside the document content.
2205c2c66affSColin Finck      * on UTF-16 this means 90bytes, on UCS4 this means 180
2206c2c66affSColin Finck      * The actual value depending on guessed encoding is passed as @len
2207c2c66affSColin Finck      * if provided
2208c2c66affSColin Finck      */
2209c2c66affSColin Finck     if (len >= 0) {
2210c2c66affSColin Finck         if (toconv > (unsigned int) len)
2211c2c66affSColin Finck             toconv = len;
2212c2c66affSColin Finck     } else {
2213c2c66affSColin Finck         if (toconv > 180)
2214c2c66affSColin Finck             toconv = 180;
2215c2c66affSColin Finck     }
2216c2c66affSColin Finck     if (toconv * 2 >= written) {
2217c2c66affSColin Finck         xmlBufGrow(out, toconv * 2);
2218*911153daSThomas Faber         written = xmlBufAvail(out);
2219c2c66affSColin Finck     }
2220c2c66affSColin Finck     if (written > 360)
2221c2c66affSColin Finck         written = 360;
2222c2c66affSColin Finck 
2223c2c66affSColin Finck     c_in = toconv;
2224c2c66affSColin Finck     c_out = written;
2225fc82f8e2SThomas Faber     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
22265bb277a5SThomas Faber                            xmlBufContent(in), &c_in, 0);
2227c2c66affSColin Finck     xmlBufShrink(in, c_in);
2228c2c66affSColin Finck     xmlBufAddLen(out, c_out);
2229c2c66affSColin Finck     if (ret == -1)
2230c2c66affSColin Finck         ret = -3;
2231fc82f8e2SThomas Faber 
2232c2c66affSColin Finck     switch (ret) {
2233c2c66affSColin Finck         case 0:
2234c2c66affSColin Finck #ifdef DEBUG_ENCODING
2235c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2236c2c66affSColin Finck                             "converted %d bytes to %d bytes of input\n",
2237c2c66affSColin Finck                             c_in, c_out);
2238c2c66affSColin Finck #endif
2239c2c66affSColin Finck             break;
2240c2c66affSColin Finck         case -1:
2241c2c66affSColin Finck #ifdef DEBUG_ENCODING
2242c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2243c2c66affSColin Finck                          "converted %d bytes to %d bytes of input, %d left\n",
2244c2c66affSColin Finck                             c_in, c_out, (int)xmlBufUse(in));
2245c2c66affSColin Finck #endif
2246c2c66affSColin Finck             break;
2247c2c66affSColin Finck         case -3:
2248c2c66affSColin Finck #ifdef DEBUG_ENCODING
2249c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2250c2c66affSColin Finck                         "converted %d bytes to %d bytes of input, %d left\n",
2251c2c66affSColin Finck                             c_in, c_out, (int)xmlBufUse(in));
2252c2c66affSColin Finck #endif
2253c2c66affSColin Finck             break;
2254c2c66affSColin Finck         case -2: {
2255c2c66affSColin Finck             char buf[50];
2256c2c66affSColin Finck             const xmlChar *content = xmlBufContent(in);
2257c2c66affSColin Finck 
2258c2c66affSColin Finck 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259c2c66affSColin Finck 		     content[0], content[1],
2260c2c66affSColin Finck 		     content[2], content[3]);
2261c2c66affSColin Finck 	    buf[49] = 0;
2262c2c66affSColin Finck 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2263c2c66affSColin Finck 		    "input conversion failed due to input error, bytes %s\n",
2264c2c66affSColin Finck 		           buf);
2265c2c66affSColin Finck         }
2266c2c66affSColin Finck     }
2267c2c66affSColin Finck     /*
2268c2c66affSColin Finck      * Ignore when input buffer is not on a boundary
2269c2c66affSColin Finck      */
2270c2c66affSColin Finck     if (ret == -3) ret = 0;
2271c2c66affSColin Finck     if (ret == -1) ret = 0;
227240ee59d6SThomas Faber     return(c_out ? c_out : ret);
2273c2c66affSColin Finck }
2274c2c66affSColin Finck 
2275c2c66affSColin Finck /**
2276c2c66affSColin Finck  * xmlCharEncInput:
2277c2c66affSColin Finck  * @input: a parser input buffer
2278c2c66affSColin Finck  * @flush: try to flush all the raw buffer
2279c2c66affSColin Finck  *
2280c2c66affSColin Finck  * Generic front-end for the encoding handler on parser input
2281c2c66affSColin Finck  *
2282c2c66affSColin Finck  * Returns the number of byte written if success, or
2283c2c66affSColin Finck  *     -1 general error
2284c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2285c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2286c2c66affSColin Finck  */
2287c2c66affSColin Finck int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2288c2c66affSColin Finck xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2289c2c66affSColin Finck {
2290fc82f8e2SThomas Faber     int ret;
2291c2c66affSColin Finck     size_t written;
2292c2c66affSColin Finck     size_t toconv;
2293c2c66affSColin Finck     int c_in;
2294c2c66affSColin Finck     int c_out;
2295c2c66affSColin Finck     xmlBufPtr in;
2296c2c66affSColin Finck     xmlBufPtr out;
2297c2c66affSColin Finck 
2298c2c66affSColin Finck     if ((input == NULL) || (input->encoder == NULL) ||
2299c2c66affSColin Finck         (input->buffer == NULL) || (input->raw == NULL))
2300c2c66affSColin Finck         return (-1);
2301c2c66affSColin Finck     out = input->buffer;
2302c2c66affSColin Finck     in = input->raw;
2303c2c66affSColin Finck 
2304c2c66affSColin Finck     toconv = xmlBufUse(in);
2305c2c66affSColin Finck     if (toconv == 0)
2306c2c66affSColin Finck         return (0);
2307c2c66affSColin Finck     if ((toconv > 64 * 1024) && (flush == 0))
2308c2c66affSColin Finck         toconv = 64 * 1024;
2309c2c66affSColin Finck     written = xmlBufAvail(out);
2310c2c66affSColin Finck     if (toconv * 2 >= written) {
2311c2c66affSColin Finck         xmlBufGrow(out, toconv * 2);
2312c2c66affSColin Finck         written = xmlBufAvail(out);
2313c2c66affSColin Finck     }
2314c2c66affSColin Finck     if ((written > 128 * 1024) && (flush == 0))
2315c2c66affSColin Finck         written = 128 * 1024;
2316c2c66affSColin Finck 
2317c2c66affSColin Finck     c_in = toconv;
2318c2c66affSColin Finck     c_out = written;
2319fc82f8e2SThomas Faber     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
23205bb277a5SThomas Faber                            xmlBufContent(in), &c_in, flush);
2321c2c66affSColin Finck     xmlBufShrink(in, c_in);
2322c2c66affSColin Finck     xmlBufAddLen(out, c_out);
2323c2c66affSColin Finck     if (ret == -1)
2324c2c66affSColin Finck         ret = -3;
2325fc82f8e2SThomas Faber 
2326c2c66affSColin Finck     switch (ret) {
2327c2c66affSColin Finck         case 0:
2328c2c66affSColin Finck #ifdef DEBUG_ENCODING
2329c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2330c2c66affSColin Finck                             "converted %d bytes to %d bytes of input\n",
2331c2c66affSColin Finck                             c_in, c_out);
2332c2c66affSColin Finck #endif
2333c2c66affSColin Finck             break;
2334c2c66affSColin Finck         case -1:
2335c2c66affSColin Finck #ifdef DEBUG_ENCODING
2336c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2337c2c66affSColin Finck                          "converted %d bytes to %d bytes of input, %d left\n",
2338c2c66affSColin Finck                             c_in, c_out, (int)xmlBufUse(in));
2339c2c66affSColin Finck #endif
2340c2c66affSColin Finck             break;
2341c2c66affSColin Finck         case -3:
2342c2c66affSColin Finck #ifdef DEBUG_ENCODING
2343c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2344c2c66affSColin Finck                         "converted %d bytes to %d bytes of input, %d left\n",
2345c2c66affSColin Finck                             c_in, c_out, (int)xmlBufUse(in));
2346c2c66affSColin Finck #endif
2347c2c66affSColin Finck             break;
2348c2c66affSColin Finck         case -2: {
2349c2c66affSColin Finck             char buf[50];
2350c2c66affSColin Finck             const xmlChar *content = xmlBufContent(in);
2351c2c66affSColin Finck 
2352c2c66affSColin Finck 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353c2c66affSColin Finck 		     content[0], content[1],
2354c2c66affSColin Finck 		     content[2], content[3]);
2355c2c66affSColin Finck 	    buf[49] = 0;
2356c2c66affSColin Finck 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2357c2c66affSColin Finck 		    "input conversion failed due to input error, bytes %s\n",
2358c2c66affSColin Finck 		           buf);
2359c2c66affSColin Finck         }
2360c2c66affSColin Finck     }
2361c2c66affSColin Finck     /*
2362c2c66affSColin Finck      * Ignore when input buffer is not on a boundary
2363c2c66affSColin Finck      */
2364c2c66affSColin Finck     if (ret == -3)
2365c2c66affSColin Finck         ret = 0;
2366c2c66affSColin Finck     return (c_out? c_out : ret);
2367c2c66affSColin Finck }
2368c2c66affSColin Finck 
2369c2c66affSColin Finck /**
2370c2c66affSColin Finck  * xmlCharEncInFunc:
2371c2c66affSColin Finck  * @handler:	char encoding transformation data structure
2372c2c66affSColin Finck  * @out:  an xmlBuffer for the output.
2373c2c66affSColin Finck  * @in:  an xmlBuffer for the input
2374c2c66affSColin Finck  *
2375c2c66affSColin Finck  * Generic front-end for the encoding handler input function
2376c2c66affSColin Finck  *
2377c2c66affSColin Finck  * Returns the number of byte written if success, or
2378c2c66affSColin Finck  *     -1 general error
2379c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2380c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2381c2c66affSColin Finck  */
2382c2c66affSColin Finck int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2383c2c66affSColin Finck xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2384c2c66affSColin Finck                  xmlBufferPtr in)
2385c2c66affSColin Finck {
2386fc82f8e2SThomas Faber     int ret;
2387c2c66affSColin Finck     int written;
2388c2c66affSColin Finck     int toconv;
2389c2c66affSColin Finck 
2390c2c66affSColin Finck     if (handler == NULL)
2391c2c66affSColin Finck         return (-1);
2392c2c66affSColin Finck     if (out == NULL)
2393c2c66affSColin Finck         return (-1);
2394c2c66affSColin Finck     if (in == NULL)
2395c2c66affSColin Finck         return (-1);
2396c2c66affSColin Finck 
2397c2c66affSColin Finck     toconv = in->use;
2398c2c66affSColin Finck     if (toconv == 0)
2399c2c66affSColin Finck         return (0);
2400c2c66affSColin Finck     written = out->size - out->use -1; /* count '\0' */
2401c2c66affSColin Finck     if (toconv * 2 >= written) {
2402c2c66affSColin Finck         xmlBufferGrow(out, out->size + toconv * 2);
2403c2c66affSColin Finck         written = out->size - out->use - 1;
2404c2c66affSColin Finck     }
2405fc82f8e2SThomas Faber     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
24065bb277a5SThomas Faber                            in->content, &toconv, 1);
2407c2c66affSColin Finck     xmlBufferShrink(in, toconv);
2408c2c66affSColin Finck     out->use += written;
2409c2c66affSColin Finck     out->content[out->use] = 0;
2410c2c66affSColin Finck     if (ret == -1)
2411c2c66affSColin Finck         ret = -3;
2412fc82f8e2SThomas Faber 
2413c2c66affSColin Finck     switch (ret) {
2414c2c66affSColin Finck         case 0:
2415c2c66affSColin Finck #ifdef DEBUG_ENCODING
2416c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2417c2c66affSColin Finck                             "converted %d bytes to %d bytes of input\n",
2418c2c66affSColin Finck                             toconv, written);
2419c2c66affSColin Finck #endif
2420c2c66affSColin Finck             break;
2421c2c66affSColin Finck         case -1:
2422c2c66affSColin Finck #ifdef DEBUG_ENCODING
2423c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2424c2c66affSColin Finck                          "converted %d bytes to %d bytes of input, %d left\n",
2425c2c66affSColin Finck                             toconv, written, in->use);
2426c2c66affSColin Finck #endif
2427c2c66affSColin Finck             break;
2428c2c66affSColin Finck         case -3:
2429c2c66affSColin Finck #ifdef DEBUG_ENCODING
2430c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2431c2c66affSColin Finck                         "converted %d bytes to %d bytes of input, %d left\n",
2432c2c66affSColin Finck                             toconv, written, in->use);
2433c2c66affSColin Finck #endif
2434c2c66affSColin Finck             break;
2435c2c66affSColin Finck         case -2: {
2436c2c66affSColin Finck             char buf[50];
2437c2c66affSColin Finck 
2438c2c66affSColin Finck 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439c2c66affSColin Finck 		     in->content[0], in->content[1],
2440c2c66affSColin Finck 		     in->content[2], in->content[3]);
2441c2c66affSColin Finck 	    buf[49] = 0;
2442c2c66affSColin Finck 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2443c2c66affSColin Finck 		    "input conversion failed due to input error, bytes %s\n",
2444c2c66affSColin Finck 		           buf);
2445c2c66affSColin Finck         }
2446c2c66affSColin Finck     }
2447c2c66affSColin Finck     /*
2448c2c66affSColin Finck      * Ignore when input buffer is not on a boundary
2449c2c66affSColin Finck      */
2450c2c66affSColin Finck     if (ret == -3)
2451c2c66affSColin Finck         ret = 0;
2452c2c66affSColin Finck     return (written? written : ret);
2453c2c66affSColin Finck }
2454c2c66affSColin Finck 
2455c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
2456c2c66affSColin Finck /**
2457c2c66affSColin Finck  * xmlCharEncOutput:
2458c2c66affSColin Finck  * @output: a parser output buffer
2459c2c66affSColin Finck  * @init: is this an initialization call without data
2460c2c66affSColin Finck  *
2461c2c66affSColin Finck  * Generic front-end for the encoding handler on parser output
2462c2c66affSColin Finck  * a first call with @init == 1 has to be made first to initiate the
2463c2c66affSColin Finck  * output in case of non-stateless encoding needing to initiate their
2464c2c66affSColin Finck  * state or the output (like the BOM in UTF16).
2465c2c66affSColin Finck  * In case of UTF8 sequence conversion errors for the given encoder,
2466c2c66affSColin Finck  * the content will be automatically remapped to a CharRef sequence.
2467c2c66affSColin Finck  *
2468c2c66affSColin Finck  * Returns the number of byte written if success, or
2469c2c66affSColin Finck  *     -1 general error
2470c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2471c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2472c2c66affSColin Finck  */
2473c2c66affSColin Finck int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2474c2c66affSColin Finck xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2475c2c66affSColin Finck {
2476fc82f8e2SThomas Faber     int ret;
2477c2c66affSColin Finck     size_t written;
247840ee59d6SThomas Faber     int writtentot = 0;
2479c2c66affSColin Finck     size_t toconv;
2480c2c66affSColin Finck     int c_in;
2481c2c66affSColin Finck     int c_out;
2482c2c66affSColin Finck     xmlBufPtr in;
2483c2c66affSColin Finck     xmlBufPtr out;
2484c2c66affSColin Finck 
2485c2c66affSColin Finck     if ((output == NULL) || (output->encoder == NULL) ||
2486c2c66affSColin Finck         (output->buffer == NULL) || (output->conv == NULL))
2487c2c66affSColin Finck         return (-1);
2488c2c66affSColin Finck     out = output->conv;
2489c2c66affSColin Finck     in = output->buffer;
2490c2c66affSColin Finck 
2491c2c66affSColin Finck retry:
2492c2c66affSColin Finck 
2493c2c66affSColin Finck     written = xmlBufAvail(out);
2494c2c66affSColin Finck 
2495c2c66affSColin Finck     /*
2496c2c66affSColin Finck      * First specific handling of the initialization call
2497c2c66affSColin Finck      */
2498c2c66affSColin Finck     if (init) {
2499c2c66affSColin Finck         c_in = 0;
2500c2c66affSColin Finck         c_out = written;
2501fc82f8e2SThomas Faber         /* TODO: Check return value. */
2502fc82f8e2SThomas Faber         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2503c2c66affSColin Finck                           NULL, &c_in);
2504c2c66affSColin Finck         xmlBufAddLen(out, c_out);
2505c2c66affSColin Finck #ifdef DEBUG_ENCODING
2506c2c66affSColin Finck 	xmlGenericError(xmlGenericErrorContext,
2507c2c66affSColin Finck 		"initialized encoder\n");
2508c2c66affSColin Finck #endif
250940ee59d6SThomas Faber         return(c_out);
2510c2c66affSColin Finck     }
2511c2c66affSColin Finck 
2512c2c66affSColin Finck     /*
2513c2c66affSColin Finck      * Conversion itself.
2514c2c66affSColin Finck      */
2515c2c66affSColin Finck     toconv = xmlBufUse(in);
2516c2c66affSColin Finck     if (toconv == 0)
25177244e0c5SThomas Faber         return (writtentot);
2518c2c66affSColin Finck     if (toconv > 64 * 1024)
2519c2c66affSColin Finck         toconv = 64 * 1024;
2520c2c66affSColin Finck     if (toconv * 4 >= written) {
2521c2c66affSColin Finck         xmlBufGrow(out, toconv * 4);
2522*911153daSThomas Faber         written = xmlBufAvail(out);
2523c2c66affSColin Finck     }
2524c2c66affSColin Finck     if (written > 256 * 1024)
2525c2c66affSColin Finck         written = 256 * 1024;
2526c2c66affSColin Finck 
2527c2c66affSColin Finck     c_in = toconv;
2528c2c66affSColin Finck     c_out = written;
2529fc82f8e2SThomas Faber     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2530c2c66affSColin Finck                             xmlBufContent(in), &c_in);
2531c2c66affSColin Finck     xmlBufShrink(in, c_in);
2532c2c66affSColin Finck     xmlBufAddLen(out, c_out);
2533c2c66affSColin Finck     writtentot += c_out;
2534c2c66affSColin Finck     if (ret == -1) {
2535c2c66affSColin Finck         if (c_out > 0) {
2536fc82f8e2SThomas Faber             /* Can be a limitation of iconv or uconv */
2537c2c66affSColin Finck             goto retry;
2538c2c66affSColin Finck         }
2539c2c66affSColin Finck         ret = -3;
2540c2c66affSColin Finck     }
2541c2c66affSColin Finck 
2542c2c66affSColin Finck     /*
2543c2c66affSColin Finck      * Attempt to handle error cases
2544c2c66affSColin Finck      */
2545c2c66affSColin Finck     switch (ret) {
2546c2c66affSColin Finck         case 0:
2547c2c66affSColin Finck #ifdef DEBUG_ENCODING
2548c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,
2549c2c66affSColin Finck 		    "converted %d bytes to %d bytes of output\n",
2550c2c66affSColin Finck 	            c_in, c_out);
2551c2c66affSColin Finck #endif
2552c2c66affSColin Finck 	    break;
2553c2c66affSColin Finck         case -1:
2554c2c66affSColin Finck #ifdef DEBUG_ENCODING
2555c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,
2556c2c66affSColin Finck 		    "output conversion failed by lack of space\n");
2557c2c66affSColin Finck #endif
2558c2c66affSColin Finck 	    break;
2559c2c66affSColin Finck         case -3:
2560c2c66affSColin Finck #ifdef DEBUG_ENCODING
2561c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2562c2c66affSColin Finck 	            c_in, c_out, (int) xmlBufUse(in));
2563c2c66affSColin Finck #endif
2564c2c66affSColin Finck 	    break;
2565fc82f8e2SThomas Faber         case -4:
2566fc82f8e2SThomas Faber             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2567fc82f8e2SThomas Faber                            "xmlCharEncOutFunc: no output function !\n", NULL);
2568c2c66affSColin Finck             ret = -1;
2569c2c66affSColin Finck             break;
2570fc82f8e2SThomas Faber         case -2: {
2571c2c66affSColin Finck 	    xmlChar charref[20];
2572fc82f8e2SThomas Faber 	    int len = (int) xmlBufUse(in);
2573fc82f8e2SThomas Faber             xmlChar *content = xmlBufContent(in);
2574fc82f8e2SThomas Faber 	    int cur, charrefLen;
2575fc82f8e2SThomas Faber 
2576fc82f8e2SThomas Faber 	    cur = xmlGetUTF8Char(content, &len);
2577fc82f8e2SThomas Faber 	    if (cur <= 0)
2578fc82f8e2SThomas Faber                 break;
2579c2c66affSColin Finck 
2580c2c66affSColin Finck #ifdef DEBUG_ENCODING
2581c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2582c2c66affSColin Finck                     "handling output conversion error\n");
2583c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2584c2c66affSColin Finck                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2585c2c66affSColin Finck                     content[0], content[1],
2586c2c66affSColin Finck                     content[2], content[3]);
2587c2c66affSColin Finck #endif
2588c2c66affSColin Finck             /*
2589c2c66affSColin Finck              * Removes the UTF8 sequence, and replace it by a charref
2590c2c66affSColin Finck              * and continue the transcoding phase, hoping the error
2591c2c66affSColin Finck              * did not mangle the encoder state.
2592c2c66affSColin Finck              */
2593fc82f8e2SThomas Faber             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2594c2c66affSColin Finck                              "&#%d;", cur);
2595c2c66affSColin Finck             xmlBufShrink(in, len);
2596fc82f8e2SThomas Faber             xmlBufGrow(out, charrefLen * 4);
2597*911153daSThomas Faber             c_out = xmlBufAvail(out);
2598fc82f8e2SThomas Faber             c_in = charrefLen;
2599fc82f8e2SThomas Faber             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2600fc82f8e2SThomas Faber                                     charref, &c_in);
2601c2c66affSColin Finck 
2602fc82f8e2SThomas Faber 	    if ((ret < 0) || (c_in != charrefLen)) {
2603c2c66affSColin Finck 		char buf[50];
2604c2c66affSColin Finck 
2605c2c66affSColin Finck 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2606c2c66affSColin Finck 			 content[0], content[1],
2607c2c66affSColin Finck 			 content[2], content[3]);
2608c2c66affSColin Finck 		buf[49] = 0;
2609c2c66affSColin Finck 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2610c2c66affSColin Finck 		    "output conversion failed due to conv error, bytes %s\n",
2611c2c66affSColin Finck 			       buf);
2612c2c66affSColin Finck 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2613c2c66affSColin Finck 		    content[0] = ' ';
2614c2c66affSColin Finck                 break;
2615c2c66affSColin Finck 	    }
2616fc82f8e2SThomas Faber 
2617fc82f8e2SThomas Faber             xmlBufAddLen(out, c_out);
2618fc82f8e2SThomas Faber             writtentot += c_out;
2619fc82f8e2SThomas Faber             goto retry;
2620fc82f8e2SThomas Faber 	}
2621c2c66affSColin Finck     }
262240ee59d6SThomas Faber     return(writtentot ? writtentot : ret);
2623c2c66affSColin Finck }
2624c2c66affSColin Finck #endif
2625c2c66affSColin Finck 
2626c2c66affSColin Finck /**
2627c2c66affSColin Finck  * xmlCharEncOutFunc:
2628f22fa382SThomas Faber  * @handler:	char encoding transformation data structure
2629c2c66affSColin Finck  * @out:  an xmlBuffer for the output.
2630c2c66affSColin Finck  * @in:  an xmlBuffer for the input
2631c2c66affSColin Finck  *
2632c2c66affSColin Finck  * Generic front-end for the encoding handler output function
2633c2c66affSColin Finck  * a first call with @in == NULL has to be made firs to initiate the
2634c2c66affSColin Finck  * output in case of non-stateless encoding needing to initiate their
2635c2c66affSColin Finck  * state or the output (like the BOM in UTF16).
2636c2c66affSColin Finck  * In case of UTF8 sequence conversion errors for the given encoder,
2637c2c66affSColin Finck  * the content will be automatically remapped to a CharRef sequence.
2638c2c66affSColin Finck  *
2639c2c66affSColin Finck  * Returns the number of byte written if success, or
2640c2c66affSColin Finck  *     -1 general error
2641c2c66affSColin Finck  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2642c2c66affSColin Finck  *        the result of transformation can't fit into the encoding we want), or
2643c2c66affSColin Finck  */
2644c2c66affSColin Finck int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2645c2c66affSColin Finck xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2646c2c66affSColin Finck                   xmlBufferPtr in) {
2647fc82f8e2SThomas Faber     int ret;
2648c2c66affSColin Finck     int written;
2649c2c66affSColin Finck     int writtentot = 0;
2650c2c66affSColin Finck     int toconv;
2651c2c66affSColin Finck 
2652c2c66affSColin Finck     if (handler == NULL) return(-1);
2653c2c66affSColin Finck     if (out == NULL) return(-1);
2654c2c66affSColin Finck 
2655c2c66affSColin Finck retry:
2656c2c66affSColin Finck 
2657c2c66affSColin Finck     written = out->size - out->use;
2658c2c66affSColin Finck 
2659c2c66affSColin Finck     if (written > 0)
2660c2c66affSColin Finck 	written--; /* Gennady: count '/0' */
2661c2c66affSColin Finck 
2662c2c66affSColin Finck     /*
2663c2c66affSColin Finck      * First specific handling of in = NULL, i.e. the initialization call
2664c2c66affSColin Finck      */
2665c2c66affSColin Finck     if (in == NULL) {
2666c2c66affSColin Finck         toconv = 0;
2667fc82f8e2SThomas Faber         /* TODO: Check return value. */
2668fc82f8e2SThomas Faber         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669c2c66affSColin Finck                           NULL, &toconv);
2670c2c66affSColin Finck         out->use += written;
2671c2c66affSColin Finck         out->content[out->use] = 0;
2672c2c66affSColin Finck #ifdef DEBUG_ENCODING
2673c2c66affSColin Finck 	xmlGenericError(xmlGenericErrorContext,
2674c2c66affSColin Finck 		"initialized encoder\n");
2675c2c66affSColin Finck #endif
2676c2c66affSColin Finck         return(0);
2677c2c66affSColin Finck     }
2678c2c66affSColin Finck 
2679c2c66affSColin Finck     /*
2680c2c66affSColin Finck      * Conversion itself.
2681c2c66affSColin Finck      */
2682c2c66affSColin Finck     toconv = in->use;
2683c2c66affSColin Finck     if (toconv == 0)
2684c2c66affSColin Finck 	return(0);
2685c2c66affSColin Finck     if (toconv * 4 >= written) {
2686c2c66affSColin Finck         xmlBufferGrow(out, toconv * 4);
2687c2c66affSColin Finck 	written = out->size - out->use - 1;
2688c2c66affSColin Finck     }
2689fc82f8e2SThomas Faber     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690c2c66affSColin Finck                             in->content, &toconv);
2691c2c66affSColin Finck     xmlBufferShrink(in, toconv);
2692c2c66affSColin Finck     out->use += written;
2693c2c66affSColin Finck     writtentot += written;
2694c2c66affSColin Finck     out->content[out->use] = 0;
2695c2c66affSColin Finck     if (ret == -1) {
2696c2c66affSColin Finck         if (written > 0) {
2697fc82f8e2SThomas Faber             /* Can be a limitation of iconv or uconv */
2698c2c66affSColin Finck             goto retry;
2699c2c66affSColin Finck         }
2700c2c66affSColin Finck         ret = -3;
2701c2c66affSColin Finck     }
2702c2c66affSColin Finck 
2703c2c66affSColin Finck     /*
2704c2c66affSColin Finck      * Attempt to handle error cases
2705c2c66affSColin Finck      */
2706c2c66affSColin Finck     switch (ret) {
2707c2c66affSColin Finck         case 0:
2708c2c66affSColin Finck #ifdef DEBUG_ENCODING
2709c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,
2710c2c66affSColin Finck 		    "converted %d bytes to %d bytes of output\n",
2711c2c66affSColin Finck 	            toconv, written);
2712c2c66affSColin Finck #endif
2713c2c66affSColin Finck 	    break;
2714c2c66affSColin Finck         case -1:
2715c2c66affSColin Finck #ifdef DEBUG_ENCODING
2716c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,
2717c2c66affSColin Finck 		    "output conversion failed by lack of space\n");
2718c2c66affSColin Finck #endif
2719c2c66affSColin Finck 	    break;
2720c2c66affSColin Finck         case -3:
2721c2c66affSColin Finck #ifdef DEBUG_ENCODING
2722c2c66affSColin Finck 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723c2c66affSColin Finck 	            toconv, written, in->use);
2724c2c66affSColin Finck #endif
2725c2c66affSColin Finck 	    break;
2726fc82f8e2SThomas Faber         case -4:
2727fc82f8e2SThomas Faber 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728fc82f8e2SThomas Faber 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2729c2c66affSColin Finck 	    ret = -1;
2730c2c66affSColin Finck             break;
2731fc82f8e2SThomas Faber         case -2: {
2732c2c66affSColin Finck 	    xmlChar charref[20];
2733fc82f8e2SThomas Faber 	    int len = in->use;
2734fc82f8e2SThomas Faber 	    const xmlChar *utf = (const xmlChar *) in->content;
2735fc82f8e2SThomas Faber 	    int cur, charrefLen;
2736fc82f8e2SThomas Faber 
2737fc82f8e2SThomas Faber 	    cur = xmlGetUTF8Char(utf, &len);
2738fc82f8e2SThomas Faber 	    if (cur <= 0)
2739fc82f8e2SThomas Faber                 break;
2740c2c66affSColin Finck 
2741c2c66affSColin Finck #ifdef DEBUG_ENCODING
2742c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2743c2c66affSColin Finck                     "handling output conversion error\n");
2744c2c66affSColin Finck             xmlGenericError(xmlGenericErrorContext,
2745c2c66affSColin Finck                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746c2c66affSColin Finck                     in->content[0], in->content[1],
2747c2c66affSColin Finck                     in->content[2], in->content[3]);
2748c2c66affSColin Finck #endif
2749c2c66affSColin Finck             /*
2750c2c66affSColin Finck              * Removes the UTF8 sequence, and replace it by a charref
2751c2c66affSColin Finck              * and continue the transcoding phase, hoping the error
2752c2c66affSColin Finck              * did not mangle the encoder state.
2753c2c66affSColin Finck              */
2754fc82f8e2SThomas Faber             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755c2c66affSColin Finck                              "&#%d;", cur);
2756c2c66affSColin Finck             xmlBufferShrink(in, len);
2757fc82f8e2SThomas Faber             xmlBufferGrow(out, charrefLen * 4);
2758fc82f8e2SThomas Faber 	    written = out->size - out->use - 1;
2759fc82f8e2SThomas Faber             toconv = charrefLen;
2760fc82f8e2SThomas Faber             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761fc82f8e2SThomas Faber                                     charref, &toconv);
2762c2c66affSColin Finck 
2763fc82f8e2SThomas Faber 	    if ((ret < 0) || (toconv != charrefLen)) {
2764c2c66affSColin Finck 		char buf[50];
2765c2c66affSColin Finck 
2766c2c66affSColin Finck 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767c2c66affSColin Finck 			 in->content[0], in->content[1],
2768c2c66affSColin Finck 			 in->content[2], in->content[3]);
2769c2c66affSColin Finck 		buf[49] = 0;
2770c2c66affSColin Finck 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2771c2c66affSColin Finck 		    "output conversion failed due to conv error, bytes %s\n",
2772c2c66affSColin Finck 			       buf);
2773c2c66affSColin Finck 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774c2c66affSColin Finck 		    in->content[0] = ' ';
2775c2c66affSColin Finck 	        break;
2776c2c66affSColin Finck 	    }
2777fc82f8e2SThomas Faber 
2778fc82f8e2SThomas Faber             out->use += written;
2779fc82f8e2SThomas Faber             writtentot += written;
2780fc82f8e2SThomas Faber             out->content[out->use] = 0;
2781fc82f8e2SThomas Faber             goto retry;
2782fc82f8e2SThomas Faber 	}
2783c2c66affSColin Finck     }
278440ee59d6SThomas Faber     return(writtentot ? writtentot : ret);
2785c2c66affSColin Finck }
2786c2c66affSColin Finck 
2787c2c66affSColin Finck /**
2788c2c66affSColin Finck  * xmlCharEncCloseFunc:
2789f22fa382SThomas Faber  * @handler:	char encoding transformation data structure
2790c2c66affSColin Finck  *
2791c2c66affSColin Finck  * Generic front-end for encoding handler close function
2792c2c66affSColin Finck  *
2793c2c66affSColin Finck  * Returns 0 if success, or -1 in case of error
2794c2c66affSColin Finck  */
2795c2c66affSColin Finck int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2796c2c66affSColin Finck xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2797c2c66affSColin Finck     int ret = 0;
2798c2c66affSColin Finck     int tofree = 0;
2799c2c66affSColin Finck     int i, handler_in_list = 0;
2800c2c66affSColin Finck 
2801*911153daSThomas Faber     /* Avoid unused variable warning if features are disabled. */
2802*911153daSThomas Faber     (void) handler_in_list;
2803*911153daSThomas Faber 
2804c2c66affSColin Finck     if (handler == NULL) return(-1);
2805c2c66affSColin Finck     if (handler->name == NULL) return(-1);
2806c2c66affSColin Finck     if (handlers != NULL) {
2807c2c66affSColin Finck         for (i = 0;i < nbCharEncodingHandler; i++) {
2808c2c66affSColin Finck             if (handler == handlers[i]) {
2809c2c66affSColin Finck 	        handler_in_list = 1;
2810c2c66affSColin Finck 		break;
2811c2c66affSColin Finck 	    }
2812c2c66affSColin Finck 	}
2813c2c66affSColin Finck     }
2814c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
2815c2c66affSColin Finck     /*
2816c2c66affSColin Finck      * Iconv handlers can be used only once, free the whole block.
2817c2c66affSColin Finck      * and the associated icon resources.
2818c2c66affSColin Finck      */
2819c2c66affSColin Finck     if ((handler_in_list == 0) &&
2820c2c66affSColin Finck         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821c2c66affSColin Finck         tofree = 1;
2822c2c66affSColin Finck 	if (handler->iconv_out != NULL) {
2823c2c66affSColin Finck 	    if (iconv_close(handler->iconv_out))
2824c2c66affSColin Finck 		ret = -1;
2825c2c66affSColin Finck 	    handler->iconv_out = NULL;
2826c2c66affSColin Finck 	}
2827c2c66affSColin Finck 	if (handler->iconv_in != NULL) {
2828c2c66affSColin Finck 	    if (iconv_close(handler->iconv_in))
2829c2c66affSColin Finck 		ret = -1;
2830c2c66affSColin Finck 	    handler->iconv_in = NULL;
2831c2c66affSColin Finck 	}
2832c2c66affSColin Finck     }
2833c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
2834c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
2835c2c66affSColin Finck     if ((handler_in_list == 0) &&
2836c2c66affSColin Finck         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837c2c66affSColin Finck         tofree = 1;
2838c2c66affSColin Finck 	if (handler->uconv_out != NULL) {
2839c2c66affSColin Finck 	    closeIcuConverter(handler->uconv_out);
2840c2c66affSColin Finck 	    handler->uconv_out = NULL;
2841c2c66affSColin Finck 	}
2842c2c66affSColin Finck 	if (handler->uconv_in != NULL) {
2843c2c66affSColin Finck 	    closeIcuConverter(handler->uconv_in);
2844c2c66affSColin Finck 	    handler->uconv_in = NULL;
2845c2c66affSColin Finck 	}
2846c2c66affSColin Finck     }
2847c2c66affSColin Finck #endif
2848c2c66affSColin Finck     if (tofree) {
2849c2c66affSColin Finck         /* free up only dynamic handlers iconv/uconv */
2850c2c66affSColin Finck         if (handler->name != NULL)
2851c2c66affSColin Finck             xmlFree(handler->name);
2852c2c66affSColin Finck         handler->name = NULL;
2853c2c66affSColin Finck         xmlFree(handler);
2854c2c66affSColin Finck     }
2855c2c66affSColin Finck #ifdef DEBUG_ENCODING
2856c2c66affSColin Finck     if (ret)
2857c2c66affSColin Finck         xmlGenericError(xmlGenericErrorContext,
2858c2c66affSColin Finck 		"failed to close the encoding handler\n");
2859c2c66affSColin Finck     else
2860c2c66affSColin Finck         xmlGenericError(xmlGenericErrorContext,
2861c2c66affSColin Finck 		"closed the encoding handler\n");
2862c2c66affSColin Finck #endif
2863c2c66affSColin Finck 
2864c2c66affSColin Finck     return(ret);
2865c2c66affSColin Finck }
2866c2c66affSColin Finck 
2867c2c66affSColin Finck /**
2868c2c66affSColin Finck  * xmlByteConsumed:
2869c2c66affSColin Finck  * @ctxt: an XML parser context
2870c2c66affSColin Finck  *
2871c2c66affSColin Finck  * This function provides the current index of the parser relative
2872c2c66affSColin Finck  * to the start of the current entity. This function is computed in
2873c2c66affSColin Finck  * bytes from the beginning starting at zero and finishing at the
2874c2c66affSColin Finck  * size in byte of the file if parsing a file. The function is
2875c2c66affSColin Finck  * of constant cost if the input is UTF-8 but can be costly if run
2876c2c66affSColin Finck  * on non-UTF-8 input.
2877c2c66affSColin Finck  *
2878c2c66affSColin Finck  * Returns the index in bytes from the beginning of the entity or -1
2879c2c66affSColin Finck  *         in case the index could not be computed.
2880c2c66affSColin Finck  */
2881c2c66affSColin Finck long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2882c2c66affSColin Finck xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2883c2c66affSColin Finck     xmlParserInputPtr in;
2884c2c66affSColin Finck 
2885c2c66affSColin Finck     if (ctxt == NULL) return(-1);
2886c2c66affSColin Finck     in = ctxt->input;
2887c2c66affSColin Finck     if (in == NULL)  return(-1);
2888c2c66affSColin Finck     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2889c2c66affSColin Finck         unsigned int unused = 0;
2890c2c66affSColin Finck 	xmlCharEncodingHandler * handler = in->buf->encoder;
2891c2c66affSColin Finck         /*
2892c2c66affSColin Finck 	 * Encoding conversion, compute the number of unused original
2893f22fa382SThomas Faber 	 * bytes from the input not consumed and subtract that from
2894c2c66affSColin Finck 	 * the raw consumed value, this is not a cheap operation
2895c2c66affSColin Finck 	 */
2896c2c66affSColin Finck         if (in->end - in->cur > 0) {
2897c2c66affSColin Finck 	    unsigned char convbuf[32000];
2898c2c66affSColin Finck 	    const unsigned char *cur = (const unsigned char *)in->cur;
2899c2c66affSColin Finck 	    int toconv = in->end - in->cur, written = 32000;
2900c2c66affSColin Finck 
2901c2c66affSColin Finck 	    int ret;
2902c2c66affSColin Finck 
2903c2c66affSColin Finck             do {
2904c2c66affSColin Finck                 toconv = in->end - cur;
2905c2c66affSColin Finck                 written = 32000;
2906fc82f8e2SThomas Faber                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2907c2c66affSColin Finck                                         cur, &toconv);
2908c2c66affSColin Finck                 if (ret < 0) {
2909c2c66affSColin Finck                     if (written > 0)
2910c2c66affSColin Finck                         ret = -2;
2911c2c66affSColin Finck                     else
2912c2c66affSColin Finck                         return(-1);
2913c2c66affSColin Finck                 }
2914c2c66affSColin Finck                 unused += written;
2915c2c66affSColin Finck                 cur += toconv;
2916c2c66affSColin Finck             } while (ret == -2);
2917c2c66affSColin Finck 	}
2918c2c66affSColin Finck 	if (in->buf->rawconsumed < unused)
2919c2c66affSColin Finck 	    return(-1);
2920c2c66affSColin Finck 	return(in->buf->rawconsumed - unused);
2921c2c66affSColin Finck     }
2922c2c66affSColin Finck     return(in->consumed + (in->cur - in->base));
2923c2c66affSColin Finck }
2924c2c66affSColin Finck 
2925c2c66affSColin Finck #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2926c2c66affSColin Finck #ifdef LIBXML_ISO8859X_ENABLED
2927c2c66affSColin Finck 
2928c2c66affSColin Finck /**
2929c2c66affSColin Finck  * UTF8ToISO8859x:
2930c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
2931c2c66affSColin Finck  * @outlen:  the length of @out
2932c2c66affSColin Finck  * @in:  a pointer to an array of UTF-8 chars
2933c2c66affSColin Finck  * @inlen:  the length of @in
2934c2c66affSColin Finck  * @xlattable: the 2-level transcoding table
2935c2c66affSColin Finck  *
2936c2c66affSColin Finck  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2937c2c66affSColin Finck  * block of chars out.
2938c2c66affSColin Finck  *
2939c2c66affSColin Finck  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2940c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
2941c2c66affSColin Finck  *     as the return value is positive, else unpredictable.
2942f22fa382SThomas Faber  * The value of @outlen after return is the number of octets consumed.
2943c2c66affSColin Finck  */
2944c2c66affSColin Finck static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,const unsigned char * const xlattable)2945c2c66affSColin Finck UTF8ToISO8859x(unsigned char* out, int *outlen,
2946c2c66affSColin Finck               const unsigned char* in, int *inlen,
2947*911153daSThomas Faber               const unsigned char* const xlattable) {
2948c2c66affSColin Finck     const unsigned char* outstart = out;
2949c2c66affSColin Finck     const unsigned char* inend;
2950c2c66affSColin Finck     const unsigned char* instart = in;
2951c2c66affSColin Finck     const unsigned char* processed = in;
2952c2c66affSColin Finck 
2953c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2954c2c66affSColin Finck         (xlattable == NULL))
2955c2c66affSColin Finck 	return(-1);
2956c2c66affSColin Finck     if (in == NULL) {
2957c2c66affSColin Finck         /*
2958c2c66affSColin Finck         * initialization nothing to do
2959c2c66affSColin Finck         */
2960c2c66affSColin Finck         *outlen = 0;
2961c2c66affSColin Finck         *inlen = 0;
2962c2c66affSColin Finck         return(0);
2963c2c66affSColin Finck     }
2964c2c66affSColin Finck     inend = in + (*inlen);
2965c2c66affSColin Finck     while (in < inend) {
2966c2c66affSColin Finck         unsigned char d = *in++;
2967c2c66affSColin Finck         if  (d < 0x80)  {
2968c2c66affSColin Finck             *out++ = d;
2969c2c66affSColin Finck         } else if (d < 0xC0) {
2970c2c66affSColin Finck             /* trailing byte in leading position */
2971c2c66affSColin Finck             *outlen = out - outstart;
2972c2c66affSColin Finck             *inlen = processed - instart;
2973c2c66affSColin Finck             return(-2);
2974c2c66affSColin Finck         } else if (d < 0xE0) {
2975c2c66affSColin Finck             unsigned char c;
2976c2c66affSColin Finck             if (!(in < inend)) {
2977c2c66affSColin Finck                 /* trailing byte not in input buffer */
2978c2c66affSColin Finck                 *outlen = out - outstart;
2979c2c66affSColin Finck                 *inlen = processed - instart;
2980c2c66affSColin Finck                 return(-3);
2981c2c66affSColin Finck             }
2982c2c66affSColin Finck             c = *in++;
2983c2c66affSColin Finck             if ((c & 0xC0) != 0x80) {
2984c2c66affSColin Finck                 /* not a trailing byte */
2985c2c66affSColin Finck                 *outlen = out - outstart;
2986c2c66affSColin Finck                 *inlen = processed - instart;
2987c2c66affSColin Finck                 return(-2);
2988c2c66affSColin Finck             }
2989c2c66affSColin Finck             c = c & 0x3F;
2990c2c66affSColin Finck             d = d & 0x1F;
2991c2c66affSColin Finck             d = xlattable [48 + c + xlattable [d] * 64];
2992c2c66affSColin Finck             if (d == 0) {
2993c2c66affSColin Finck                 /* not in character set */
2994c2c66affSColin Finck                 *outlen = out - outstart;
2995c2c66affSColin Finck                 *inlen = processed - instart;
2996c2c66affSColin Finck                 return(-2);
2997c2c66affSColin Finck             }
2998c2c66affSColin Finck             *out++ = d;
2999c2c66affSColin Finck         } else if (d < 0xF0) {
3000c2c66affSColin Finck             unsigned char c1;
3001c2c66affSColin Finck             unsigned char c2;
3002c2c66affSColin Finck             if (!(in < inend - 1)) {
3003c2c66affSColin Finck                 /* trailing bytes not in input buffer */
3004c2c66affSColin Finck                 *outlen = out - outstart;
3005c2c66affSColin Finck                 *inlen = processed - instart;
3006c2c66affSColin Finck                 return(-3);
3007c2c66affSColin Finck             }
3008c2c66affSColin Finck             c1 = *in++;
3009c2c66affSColin Finck             if ((c1 & 0xC0) != 0x80) {
3010c2c66affSColin Finck                 /* not a trailing byte (c1) */
3011c2c66affSColin Finck                 *outlen = out - outstart;
3012c2c66affSColin Finck                 *inlen = processed - instart;
3013c2c66affSColin Finck                 return(-2);
3014c2c66affSColin Finck             }
3015c2c66affSColin Finck             c2 = *in++;
3016c2c66affSColin Finck             if ((c2 & 0xC0) != 0x80) {
3017c2c66affSColin Finck                 /* not a trailing byte (c2) */
3018c2c66affSColin Finck                 *outlen = out - outstart;
3019c2c66affSColin Finck                 *inlen = processed - instart;
3020c2c66affSColin Finck                 return(-2);
3021c2c66affSColin Finck             }
3022c2c66affSColin Finck             c1 = c1 & 0x3F;
3023c2c66affSColin Finck             c2 = c2 & 0x3F;
3024c2c66affSColin Finck 	    d = d & 0x0F;
3025c2c66affSColin Finck 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3026c2c66affSColin Finck 			xlattable [32 + d] * 64] * 64];
3027c2c66affSColin Finck             if (d == 0) {
3028c2c66affSColin Finck                 /* not in character set */
3029c2c66affSColin Finck                 *outlen = out - outstart;
3030c2c66affSColin Finck                 *inlen = processed - instart;
3031c2c66affSColin Finck                 return(-2);
3032c2c66affSColin Finck             }
3033c2c66affSColin Finck             *out++ = d;
3034c2c66affSColin Finck         } else {
3035c2c66affSColin Finck             /* cannot transcode >= U+010000 */
3036c2c66affSColin Finck             *outlen = out - outstart;
3037c2c66affSColin Finck             *inlen = processed - instart;
3038c2c66affSColin Finck             return(-2);
3039c2c66affSColin Finck         }
3040c2c66affSColin Finck         processed = in;
3041c2c66affSColin Finck     }
3042c2c66affSColin Finck     *outlen = out - outstart;
3043c2c66affSColin Finck     *inlen = processed - instart;
3044c2c66affSColin Finck     return(*outlen);
3045c2c66affSColin Finck }
3046c2c66affSColin Finck 
3047c2c66affSColin Finck /**
3048c2c66affSColin Finck  * ISO8859xToUTF8
3049c2c66affSColin Finck  * @out:  a pointer to an array of bytes to store the result
3050c2c66affSColin Finck  * @outlen:  the length of @out
3051c2c66affSColin Finck  * @in:  a pointer to an array of ISO Latin 1 chars
3052c2c66affSColin Finck  * @inlen:  the length of @in
3053c2c66affSColin Finck  *
3054c2c66affSColin Finck  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3055c2c66affSColin Finck  * block of chars out.
3056c2c66affSColin Finck  * Returns 0 if success, or -1 otherwise
3057c2c66affSColin Finck  * The value of @inlen after return is the number of octets consumed
3058f22fa382SThomas Faber  * The value of @outlen after return is the number of octets produced.
3059c2c66affSColin Finck  */
3060c2c66affSColin Finck static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3061c2c66affSColin Finck ISO8859xToUTF8(unsigned char* out, int *outlen,
3062c2c66affSColin Finck               const unsigned char* in, int *inlen,
3063c2c66affSColin Finck               unsigned short const *unicodetable) {
3064c2c66affSColin Finck     unsigned char* outstart = out;
3065c2c66affSColin Finck     unsigned char* outend;
3066c2c66affSColin Finck     const unsigned char* instart = in;
3067c2c66affSColin Finck     const unsigned char* inend;
3068c2c66affSColin Finck     const unsigned char* instop;
3069c2c66affSColin Finck     unsigned int c;
3070c2c66affSColin Finck 
3071c2c66affSColin Finck     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3072c2c66affSColin Finck         (in == NULL) || (unicodetable == NULL))
3073c2c66affSColin Finck 	return(-1);
3074c2c66affSColin Finck     outend = out + *outlen;
3075c2c66affSColin Finck     inend = in + *inlen;
3076c2c66affSColin Finck     instop = inend;
3077c2c66affSColin Finck 
3078c2c66affSColin Finck     while ((in < inend) && (out < outend - 2)) {
3079c2c66affSColin Finck         if (*in >= 0x80) {
3080c2c66affSColin Finck             c = unicodetable [*in - 0x80];
3081c2c66affSColin Finck             if (c == 0) {
3082c2c66affSColin Finck                 /* undefined code point */
3083c2c66affSColin Finck                 *outlen = out - outstart;
3084c2c66affSColin Finck                 *inlen = in - instart;
3085c2c66affSColin Finck                 return (-1);
3086c2c66affSColin Finck             }
3087c2c66affSColin Finck             if (c < 0x800) {
3088c2c66affSColin Finck                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3089c2c66affSColin Finck                 *out++ = (c & 0x3F) | 0x80;
3090c2c66affSColin Finck             } else {
3091c2c66affSColin Finck                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3092c2c66affSColin Finck                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3093c2c66affSColin Finck                 *out++ = (c & 0x3F) | 0x80;
3094c2c66affSColin Finck             }
3095c2c66affSColin Finck             ++in;
3096c2c66affSColin Finck         }
3097c2c66affSColin Finck         if (instop - in > outend - out) instop = in + (outend - out);
3098c2c66affSColin Finck         while ((*in < 0x80) && (in < instop)) {
3099c2c66affSColin Finck             *out++ = *in++;
3100c2c66affSColin Finck         }
3101c2c66affSColin Finck     }
3102c2c66affSColin Finck     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3103c2c66affSColin Finck         *out++ =  *in++;
3104c2c66affSColin Finck     }
3105c2c66affSColin Finck     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3106c2c66affSColin Finck         *out++ =  *in++;
3107c2c66affSColin Finck     }
3108c2c66affSColin Finck     *outlen = out - outstart;
3109c2c66affSColin Finck     *inlen = in - instart;
3110c2c66affSColin Finck     return (*outlen);
3111c2c66affSColin Finck }
3112c2c66affSColin Finck 
3113c2c66affSColin Finck 
3114c2c66affSColin Finck /************************************************************************
3115c2c66affSColin Finck  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3116c2c66affSColin Finck  ************************************************************************/
3117c2c66affSColin Finck 
3118c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3119c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3120c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3121c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3122c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3123c2c66affSColin Finck     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3124c2c66affSColin Finck     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3125c2c66affSColin Finck     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3126c2c66affSColin Finck     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3127c2c66affSColin Finck     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3128c2c66affSColin Finck     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3129c2c66affSColin Finck     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3130c2c66affSColin Finck     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3131c2c66affSColin Finck     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3132c2c66affSColin Finck     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3133c2c66affSColin Finck     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3134c2c66affSColin Finck     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3135c2c66affSColin Finck };
3136c2c66affSColin Finck 
3137*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3138c2c66affSColin Finck     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3139c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3146c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3147c2c66affSColin Finck     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3148c2c66affSColin Finck     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3149c2c66affSColin Finck     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3150c2c66affSColin Finck     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3151c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3153c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3154c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3155c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157c2c66affSColin Finck     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3158c2c66affSColin Finck     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3159c2c66affSColin Finck     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3160c2c66affSColin Finck     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3161c2c66affSColin Finck     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3162c2c66affSColin Finck     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3163c2c66affSColin Finck     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3164c2c66affSColin Finck     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3165c2c66affSColin Finck };
3166c2c66affSColin Finck 
3167c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3168c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3169c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3170c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3171c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3172c2c66affSColin Finck     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3173c2c66affSColin Finck     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3174c2c66affSColin Finck     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3175c2c66affSColin Finck     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3176c2c66affSColin Finck     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3177c2c66affSColin Finck     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3178c2c66affSColin Finck     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3179c2c66affSColin Finck     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3180c2c66affSColin Finck     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3181c2c66affSColin Finck     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3182c2c66affSColin Finck     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3183c2c66affSColin Finck     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3184c2c66affSColin Finck };
3185c2c66affSColin Finck 
3186*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3187c2c66affSColin Finck     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3188c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3195c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3196c2c66affSColin Finck     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3197c2c66affSColin Finck     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3198c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3199c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3200c2c66affSColin Finck     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3201c2c66affSColin Finck     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3204c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206c2c66affSColin Finck     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3212c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3213c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3214c2c66affSColin Finck     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3215c2c66affSColin Finck     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3216c2c66affSColin Finck     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3217c2c66affSColin Finck     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3218c2c66affSColin Finck };
3219c2c66affSColin Finck 
3220c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3221c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3222c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3223c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3224c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3225c2c66affSColin Finck     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3226c2c66affSColin Finck     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3227c2c66affSColin Finck     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3228c2c66affSColin Finck     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3229c2c66affSColin Finck     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3230c2c66affSColin Finck     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3231c2c66affSColin Finck     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3232c2c66affSColin Finck     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3233c2c66affSColin Finck     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3234c2c66affSColin Finck     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3235c2c66affSColin Finck     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3236c2c66affSColin Finck     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3237c2c66affSColin Finck };
3238c2c66affSColin Finck 
3239*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3240c2c66affSColin Finck     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3241c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3248c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3249c2c66affSColin Finck     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3250c2c66affSColin Finck     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3251c2c66affSColin Finck     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3252c2c66affSColin Finck     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3253c2c66affSColin Finck     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3254c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3255c2c66affSColin Finck     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3256c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3257c2c66affSColin Finck     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3258c2c66affSColin Finck     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3259c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3260c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3261c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263c2c66affSColin Finck     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3264c2c66affSColin Finck     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3265c2c66affSColin Finck     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3266c2c66affSColin Finck     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3267c2c66affSColin Finck };
3268c2c66affSColin Finck 
3269c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3270c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3271c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3272c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3273c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3274c2c66affSColin Finck     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3275c2c66affSColin Finck     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3276c2c66affSColin Finck     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3277c2c66affSColin Finck     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3278c2c66affSColin Finck     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3279c2c66affSColin Finck     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3280c2c66affSColin Finck     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3281c2c66affSColin Finck     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3282c2c66affSColin Finck     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3283c2c66affSColin Finck     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3284c2c66affSColin Finck     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3285c2c66affSColin Finck     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3286c2c66affSColin Finck };
3287c2c66affSColin Finck 
3288*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3289c2c66affSColin Finck     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290c2c66affSColin Finck     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291c2c66affSColin Finck     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3297c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3298c2c66affSColin Finck     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3299c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300c2c66affSColin Finck     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3301c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3302c2c66affSColin Finck     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303c2c66affSColin Finck     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3304c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305c2c66affSColin Finck     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3306c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308c2c66affSColin Finck     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316c2c66affSColin Finck };
3317c2c66affSColin Finck 
3318c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3319c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3320c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3321c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3322c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3323c2c66affSColin Finck     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3324c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3325c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3326c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3327c2c66affSColin Finck     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3328c2c66affSColin Finck     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3329c2c66affSColin Finck     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3330c2c66affSColin Finck     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3331c2c66affSColin Finck     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3332c2c66affSColin Finck     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3333c2c66affSColin Finck     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3334c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3335c2c66affSColin Finck };
3336c2c66affSColin Finck 
3337*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3338c2c66affSColin Finck     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3340c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3346c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3347c2c66affSColin Finck     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3348c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349c2c66affSColin Finck     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3354c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3355c2c66affSColin Finck     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3356c2c66affSColin Finck     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3357c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3358c2c66affSColin Finck     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361c2c66affSColin Finck };
3362c2c66affSColin Finck 
3363c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3364c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368c2c66affSColin Finck     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3369c2c66affSColin Finck     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3370c2c66affSColin Finck     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3371c2c66affSColin Finck     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3372c2c66affSColin Finck     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3373c2c66affSColin Finck     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3374c2c66affSColin Finck     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3375c2c66affSColin Finck     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3376c2c66affSColin Finck     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3377c2c66affSColin Finck     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3378c2c66affSColin Finck     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3379c2c66affSColin Finck     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3380c2c66affSColin Finck };
3381c2c66affSColin Finck 
3382*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3383c2c66affSColin Finck     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3384c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385c2c66affSColin Finck     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392c2c66affSColin Finck     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3393c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3394c2c66affSColin Finck     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399c2c66affSColin Finck     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3400c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402c2c66affSColin Finck     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406c2c66affSColin Finck     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3407c2c66affSColin Finck     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3408c2c66affSColin Finck     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3409c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3410c2c66affSColin Finck     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3411c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414c2c66affSColin Finck };
3415c2c66affSColin Finck 
3416c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3417c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3418c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3419c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3420c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3421c2c66affSColin Finck     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3422c2c66affSColin Finck     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3423c2c66affSColin Finck     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3424c2c66affSColin Finck     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3425c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3426c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3427c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3428c2c66affSColin Finck     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3429c2c66affSColin Finck     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3430c2c66affSColin Finck     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3431c2c66affSColin Finck     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3432c2c66affSColin Finck     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3433c2c66affSColin Finck };
3434c2c66affSColin Finck 
3435*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3436c2c66affSColin Finck     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3438c2c66affSColin Finck     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3444c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3445c2c66affSColin Finck     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3446c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3447c2c66affSColin Finck     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3453c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3455c2c66affSColin Finck     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3460c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3461c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3465c2c66affSColin Finck     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3466c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467c2c66affSColin Finck };
3468c2c66affSColin Finck 
3469c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3470c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3471c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3472c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3473c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3474c2c66affSColin Finck     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3475c2c66affSColin Finck     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3476c2c66affSColin Finck     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3477c2c66affSColin Finck     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3478c2c66affSColin Finck     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3479c2c66affSColin Finck     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3480c2c66affSColin Finck     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3481c2c66affSColin Finck     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3482c2c66affSColin Finck     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3483c2c66affSColin Finck     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3484c2c66affSColin Finck     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3485c2c66affSColin Finck     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3486c2c66affSColin Finck };
3487c2c66affSColin Finck 
3488*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3489c2c66affSColin Finck     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3497c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3498c2c66affSColin Finck     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3499c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3500c2c66affSColin Finck     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3501c2c66affSColin Finck     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3502c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3503c2c66affSColin Finck     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3504c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3506c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507c2c66affSColin Finck     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3510c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512c2c66affSColin Finck };
3513c2c66affSColin Finck 
3514c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3515c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519c2c66affSColin Finck     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3520c2c66affSColin Finck     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3521c2c66affSColin Finck     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3522c2c66affSColin Finck     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3523c2c66affSColin Finck     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3524c2c66affSColin Finck     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3525c2c66affSColin Finck     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3526c2c66affSColin Finck     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3527c2c66affSColin Finck     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3528c2c66affSColin Finck     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3529c2c66affSColin Finck     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3530c2c66affSColin Finck     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3531c2c66affSColin Finck };
3532c2c66affSColin Finck 
3533*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3534c2c66affSColin Finck     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536c2c66affSColin Finck     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543c2c66affSColin Finck     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3544c2c66affSColin Finck     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3545c2c66affSColin Finck     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3546c2c66affSColin Finck     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3547c2c66affSColin Finck     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3548c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3549c2c66affSColin Finck     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3550c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551c2c66affSColin Finck     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3552c2c66affSColin Finck     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3553c2c66affSColin Finck     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558c2c66affSColin Finck     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561c2c66affSColin Finck     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3562c2c66affSColin Finck     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3563c2c66affSColin Finck     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3564c2c66affSColin Finck     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3565c2c66affSColin Finck };
3566c2c66affSColin Finck 
3567c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3568c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3569c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3570c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3571c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3572c2c66affSColin Finck     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3573c2c66affSColin Finck     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3574c2c66affSColin Finck     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3575c2c66affSColin Finck     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3576c2c66affSColin Finck     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3577c2c66affSColin Finck     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3578c2c66affSColin Finck     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3579c2c66affSColin Finck     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3580c2c66affSColin Finck     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3581c2c66affSColin Finck     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3582c2c66affSColin Finck     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3583c2c66affSColin Finck     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3584c2c66affSColin Finck };
3585c2c66affSColin Finck 
3586*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3587c2c66affSColin Finck     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589c2c66affSColin Finck     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3595c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3596c2c66affSColin Finck     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3602c2c66affSColin Finck     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3603c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3604c2c66affSColin Finck     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3605c2c66affSColin Finck     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3606c2c66affSColin Finck     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3611c2c66affSColin Finck     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3612c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614c2c66affSColin Finck };
3615c2c66affSColin Finck 
3616c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3617c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3618c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3619c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3620c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3621c2c66affSColin Finck     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3622c2c66affSColin Finck     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3623c2c66affSColin Finck     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3624c2c66affSColin Finck     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3625c2c66affSColin Finck     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3626c2c66affSColin Finck     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3627c2c66affSColin Finck     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3628c2c66affSColin Finck     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3629c2c66affSColin Finck     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3630c2c66affSColin Finck     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3631c2c66affSColin Finck     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3632c2c66affSColin Finck     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3633c2c66affSColin Finck };
3634c2c66affSColin Finck 
3635*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3636c2c66affSColin Finck     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638c2c66affSColin Finck     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3644c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3645c2c66affSColin Finck     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3646c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3647c2c66affSColin Finck     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3653c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655c2c66affSColin Finck     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3656c2c66affSColin Finck     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3657c2c66affSColin Finck     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3658c2c66affSColin Finck     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3659c2c66affSColin Finck     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3660c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3661c2c66affSColin Finck     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3662c2c66affSColin Finck     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3663c2c66affSColin Finck     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3664c2c66affSColin Finck     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3665c2c66affSColin Finck     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3666c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3667c2c66affSColin Finck };
3668c2c66affSColin Finck 
3669c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3670c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3671c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3672c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3673c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3674c2c66affSColin Finck     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3675c2c66affSColin Finck     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3676c2c66affSColin Finck     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3677c2c66affSColin Finck     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3678c2c66affSColin Finck     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3679c2c66affSColin Finck     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3680c2c66affSColin Finck     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3681c2c66affSColin Finck     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3682c2c66affSColin Finck     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3683c2c66affSColin Finck     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3684c2c66affSColin Finck     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3685c2c66affSColin Finck     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3686c2c66affSColin Finck };
3687c2c66affSColin Finck 
3688*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3689c2c66affSColin Finck     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691c2c66affSColin Finck     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3697c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3698c2c66affSColin Finck     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3699c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3704c2c66affSColin Finck     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3705c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3706c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3709c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710c2c66affSColin Finck     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712c2c66affSColin Finck     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719c2c66affSColin Finck     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723c2c66affSColin Finck     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3724c2c66affSColin Finck     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3726c2c66affSColin Finck     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3727c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728c2c66affSColin Finck     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3729c2c66affSColin Finck     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3730c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3731c2c66affSColin Finck     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3732c2c66affSColin Finck };
3733c2c66affSColin Finck 
3734c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3735c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3736c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3737c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3738c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3739c2c66affSColin Finck     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3740c2c66affSColin Finck     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3741c2c66affSColin Finck     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3742c2c66affSColin Finck     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3743c2c66affSColin Finck     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3744c2c66affSColin Finck     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3745c2c66affSColin Finck     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3746c2c66affSColin Finck     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3747c2c66affSColin Finck     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3748c2c66affSColin Finck     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3749c2c66affSColin Finck     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3750c2c66affSColin Finck     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3751c2c66affSColin Finck };
3752c2c66affSColin Finck 
3753*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3754c2c66affSColin Finck     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756c2c66affSColin Finck     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3762c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3763c2c66affSColin Finck     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3764c2c66affSColin Finck     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3765c2c66affSColin Finck     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3772c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774c2c66affSColin Finck     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775c2c66affSColin Finck     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3777c2c66affSColin Finck     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3778c2c66affSColin Finck     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3779c2c66affSColin Finck     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3780c2c66affSColin Finck     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3781c2c66affSColin Finck };
3782c2c66affSColin Finck 
3783c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3784c2c66affSColin Finck     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3785c2c66affSColin Finck     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3786c2c66affSColin Finck     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3787c2c66affSColin Finck     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3788c2c66affSColin Finck     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3789c2c66affSColin Finck     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3790c2c66affSColin Finck     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3791c2c66affSColin Finck     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3792c2c66affSColin Finck     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3793c2c66affSColin Finck     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3794c2c66affSColin Finck     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3795c2c66affSColin Finck     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3796c2c66affSColin Finck     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3797c2c66affSColin Finck     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3798c2c66affSColin Finck     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3799c2c66affSColin Finck     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3800c2c66affSColin Finck };
3801c2c66affSColin Finck 
3802*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3803c2c66affSColin Finck     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3804c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805c2c66affSColin Finck     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810c2c66affSColin Finck     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3811c2c66affSColin Finck     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3812c2c66affSColin Finck     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3813c2c66affSColin Finck     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3814c2c66affSColin Finck     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3815c2c66affSColin Finck     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3816c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818c2c66affSColin Finck     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819c2c66affSColin Finck     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3820c2c66affSColin Finck     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821c2c66affSColin Finck     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3822c2c66affSColin Finck     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3829c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3832c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3836c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837c2c66affSColin Finck     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838c2c66affSColin Finck     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3839c2c66affSColin Finck     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3840c2c66affSColin Finck     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3841c2c66affSColin Finck     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3842c2c66affSColin Finck };
3843c2c66affSColin Finck 
3844c2c66affSColin Finck 
3845c2c66affSColin Finck /*
3846c2c66affSColin Finck  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3847c2c66affSColin Finck  */
3848c2c66affSColin Finck 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3849c2c66affSColin Finck static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3850c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3851c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3852c2c66affSColin Finck }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3853c2c66affSColin Finck static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3854c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3855c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3856c2c66affSColin Finck }
3857c2c66affSColin Finck 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3858c2c66affSColin Finck static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3859c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3860c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3861c2c66affSColin Finck }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3862c2c66affSColin Finck static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3863c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3864c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3865c2c66affSColin Finck }
3866c2c66affSColin Finck 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3867c2c66affSColin Finck static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3868c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3869c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3870c2c66affSColin Finck }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3871c2c66affSColin Finck static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3872c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3873c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3874c2c66affSColin Finck }
3875c2c66affSColin Finck 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3876c2c66affSColin Finck static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3877c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3878c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3879c2c66affSColin Finck }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3880c2c66affSColin Finck static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3881c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3882c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3883c2c66affSColin Finck }
3884c2c66affSColin Finck 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3885c2c66affSColin Finck static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3886c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3887c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3888c2c66affSColin Finck }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3889c2c66affSColin Finck static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3890c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3891c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3892c2c66affSColin Finck }
3893c2c66affSColin Finck 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3894c2c66affSColin Finck static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3895c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3896c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3897c2c66affSColin Finck }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3898c2c66affSColin Finck static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3899c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3900c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3901c2c66affSColin Finck }
3902c2c66affSColin Finck 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3903c2c66affSColin Finck static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3904c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3905c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3906c2c66affSColin Finck }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3907c2c66affSColin Finck static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3908c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3909c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3910c2c66affSColin Finck }
3911c2c66affSColin Finck 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3912c2c66affSColin Finck static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3913c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3914c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3915c2c66affSColin Finck }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3916c2c66affSColin Finck static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3917c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3918c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3919c2c66affSColin Finck }
3920c2c66affSColin Finck 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3921c2c66affSColin Finck static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3922c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3923c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3924c2c66affSColin Finck }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3925c2c66affSColin Finck static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3926c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3927c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3928c2c66affSColin Finck }
3929c2c66affSColin Finck 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3930c2c66affSColin Finck static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3931c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3932c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3933c2c66affSColin Finck }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3934c2c66affSColin Finck static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3935c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3936c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3937c2c66affSColin Finck }
3938c2c66affSColin Finck 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3939c2c66affSColin Finck static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3940c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3941c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3942c2c66affSColin Finck }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3943c2c66affSColin Finck static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3944c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3945c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3946c2c66affSColin Finck }
3947c2c66affSColin Finck 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3948c2c66affSColin Finck static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3949c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3950c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3951c2c66affSColin Finck }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3952c2c66affSColin Finck static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3953c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3954c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3955c2c66affSColin Finck }
3956c2c66affSColin Finck 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3957c2c66affSColin Finck static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3958c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3959c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3960c2c66affSColin Finck }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3961c2c66affSColin Finck static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3962c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3963c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3964c2c66affSColin Finck }
3965c2c66affSColin Finck 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3966c2c66affSColin Finck static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3967c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3968c2c66affSColin Finck     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3969c2c66affSColin Finck }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3970c2c66affSColin Finck static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3971c2c66affSColin Finck     const unsigned char* in, int *inlen) {
3972c2c66affSColin Finck     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3973c2c66affSColin Finck }
3974c2c66affSColin Finck 
3975c2c66affSColin Finck static void
xmlRegisterCharEncodingHandlersISO8859x(void)3976c2c66affSColin Finck xmlRegisterCharEncodingHandlersISO8859x (void) {
3977c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3978c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3979c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3980c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3981c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3982c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3983c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3984c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3985c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3986c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3987c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3988c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3989c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3990c2c66affSColin Finck     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3991c2c66affSColin Finck }
3992c2c66affSColin Finck 
3993c2c66affSColin Finck #endif
3994c2c66affSColin Finck #endif
3995c2c66affSColin Finck 
3996