1c2c66affSColin Finck /*
2c2c66affSColin Finck * encoding.c : implements the encoding conversion functions needed for XML
3c2c66affSColin Finck *
4c2c66affSColin Finck * Related specs:
5c2c66affSColin Finck * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6c2c66affSColin Finck * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7c2c66affSColin Finck * [ISO-10646] UTF-8 and UTF-16 in Annexes
8c2c66affSColin Finck * [ISO-8859-1] ISO Latin-1 characters codes.
9c2c66affSColin Finck * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10c2c66affSColin Finck * Worldwide Character Encoding -- Version 1.0", Addison-
11c2c66affSColin Finck * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12c2c66affSColin Finck * described in Unicode Technical Report #4.
13c2c66affSColin Finck * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14c2c66affSColin Finck * Information Interchange, ANSI X3.4-1986.
15c2c66affSColin Finck *
16c2c66affSColin Finck * See Copyright for the status of this software.
17c2c66affSColin Finck *
18c2c66affSColin Finck * daniel@veillard.com
19c2c66affSColin Finck *
20c2c66affSColin Finck * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21c2c66affSColin Finck */
22c2c66affSColin Finck
23c2c66affSColin Finck #define IN_LIBXML
24c2c66affSColin Finck #include "libxml.h"
25c2c66affSColin Finck
26c2c66affSColin Finck #include <string.h>
27c2c66affSColin Finck #include <limits.h>
28c2c66affSColin Finck #include <ctype.h>
29c2c66affSColin Finck #include <stdlib.h>
30*911153daSThomas Faber
31c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
32c2c66affSColin Finck #include <errno.h>
33c2c66affSColin Finck #endif
34*911153daSThomas Faber
35c2c66affSColin Finck #include <libxml/encoding.h>
36c2c66affSColin Finck #include <libxml/xmlmemory.h>
37c2c66affSColin Finck #ifdef LIBXML_HTML_ENABLED
38c2c66affSColin Finck #include <libxml/HTMLparser.h>
39c2c66affSColin Finck #endif
40c2c66affSColin Finck #include <libxml/globals.h>
41c2c66affSColin Finck #include <libxml/xmlerror.h>
42c2c66affSColin Finck
43c2c66affSColin Finck #include "buf.h"
44c2c66affSColin Finck #include "enc.h"
45c2c66affSColin Finck
46*911153daSThomas Faber #ifdef LIBXML_ICU_ENABLED
47*911153daSThomas Faber #include <unicode/ucnv.h>
48*911153daSThomas Faber /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49*911153daSThomas Faber #define ICU_PIVOT_BUF_SIZE 1024
50*911153daSThomas Faber typedef struct _uconv_t uconv_t;
51*911153daSThomas Faber struct _uconv_t {
52*911153daSThomas Faber UConverter *uconv; /* for conversion between an encoding and UTF-16 */
53*911153daSThomas Faber UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
54*911153daSThomas Faber UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
55*911153daSThomas Faber UChar *pivot_source;
56*911153daSThomas Faber UChar *pivot_target;
57*911153daSThomas Faber };
58*911153daSThomas Faber #endif
59*911153daSThomas Faber
60c2c66affSColin Finck static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
61c2c66affSColin Finck static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
62c2c66affSColin Finck
63c2c66affSColin Finck typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
64c2c66affSColin Finck typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
65c2c66affSColin Finck struct _xmlCharEncodingAlias {
66c2c66affSColin Finck const char *name;
67c2c66affSColin Finck const char *alias;
68c2c66affSColin Finck };
69c2c66affSColin Finck
70c2c66affSColin Finck static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
71c2c66affSColin Finck static int xmlCharEncodingAliasesNb = 0;
72c2c66affSColin Finck static int xmlCharEncodingAliasesMax = 0;
73c2c66affSColin Finck
74c2c66affSColin Finck #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
75c2c66affSColin Finck #if 0
76c2c66affSColin Finck #define DEBUG_ENCODING /* Define this to get encoding traces */
77c2c66affSColin Finck #endif
78c2c66affSColin Finck #else
79c2c66affSColin Finck #ifdef LIBXML_ISO8859X_ENABLED
80c2c66affSColin Finck static void xmlRegisterCharEncodingHandlersISO8859x (void);
81c2c66affSColin Finck #endif
82c2c66affSColin Finck #endif
83c2c66affSColin Finck
84c2c66affSColin Finck static int xmlLittleEndian = 1;
85c2c66affSColin Finck
86c2c66affSColin Finck /**
87c2c66affSColin Finck * xmlEncodingErrMemory:
8840ee59d6SThomas Faber * @extra: extra information
89c2c66affSColin Finck *
90c2c66affSColin Finck * Handle an out of memory condition
91c2c66affSColin Finck */
92c2c66affSColin Finck static void
xmlEncodingErrMemory(const char * extra)93c2c66affSColin Finck xmlEncodingErrMemory(const char *extra)
94c2c66affSColin Finck {
95c2c66affSColin Finck __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96c2c66affSColin Finck }
97c2c66affSColin Finck
98c2c66affSColin Finck /**
99c2c66affSColin Finck * xmlErrEncoding:
100c2c66affSColin Finck * @error: the error number
101c2c66affSColin Finck * @msg: the error message
102c2c66affSColin Finck *
103c2c66affSColin Finck * n encoding error
104c2c66affSColin Finck */
105c2c66affSColin Finck static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)106c2c66affSColin Finck xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
107c2c66affSColin Finck {
108c2c66affSColin Finck __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
109c2c66affSColin Finck XML_FROM_I18N, error, XML_ERR_FATAL,
110c2c66affSColin Finck NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111c2c66affSColin Finck }
112c2c66affSColin Finck
113c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
114c2c66affSColin Finck static uconv_t*
openIcuConverter(const char * name,int toUnicode)115c2c66affSColin Finck openIcuConverter(const char* name, int toUnicode)
116c2c66affSColin Finck {
117c2c66affSColin Finck UErrorCode status = U_ZERO_ERROR;
118c2c66affSColin Finck uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
119c2c66affSColin Finck if (conv == NULL)
120c2c66affSColin Finck return NULL;
121c2c66affSColin Finck
1225bb277a5SThomas Faber conv->pivot_source = conv->pivot_buf;
1235bb277a5SThomas Faber conv->pivot_target = conv->pivot_buf;
1245bb277a5SThomas Faber
125c2c66affSColin Finck conv->uconv = ucnv_open(name, &status);
126c2c66affSColin Finck if (U_FAILURE(status))
127c2c66affSColin Finck goto error;
128c2c66affSColin Finck
129c2c66affSColin Finck status = U_ZERO_ERROR;
130c2c66affSColin Finck if (toUnicode) {
131c2c66affSColin Finck ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
132c2c66affSColin Finck NULL, NULL, NULL, &status);
133c2c66affSColin Finck }
134c2c66affSColin Finck else {
135c2c66affSColin Finck ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
136c2c66affSColin Finck NULL, NULL, NULL, &status);
137c2c66affSColin Finck }
138c2c66affSColin Finck if (U_FAILURE(status))
139c2c66affSColin Finck goto error;
140c2c66affSColin Finck
141c2c66affSColin Finck status = U_ZERO_ERROR;
142c2c66affSColin Finck conv->utf8 = ucnv_open("UTF-8", &status);
143c2c66affSColin Finck if (U_SUCCESS(status))
144c2c66affSColin Finck return conv;
145c2c66affSColin Finck
146c2c66affSColin Finck error:
147c2c66affSColin Finck if (conv->uconv)
148c2c66affSColin Finck ucnv_close(conv->uconv);
149c2c66affSColin Finck xmlFree(conv);
150c2c66affSColin Finck return NULL;
151c2c66affSColin Finck }
152c2c66affSColin Finck
153c2c66affSColin Finck static void
closeIcuConverter(uconv_t * conv)154c2c66affSColin Finck closeIcuConverter(uconv_t *conv)
155c2c66affSColin Finck {
156c2c66affSColin Finck if (conv != NULL) {
157c2c66affSColin Finck ucnv_close(conv->uconv);
158c2c66affSColin Finck ucnv_close(conv->utf8);
159c2c66affSColin Finck xmlFree(conv);
160c2c66affSColin Finck }
161c2c66affSColin Finck }
162c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
163c2c66affSColin Finck
164c2c66affSColin Finck /************************************************************************
165c2c66affSColin Finck * *
166c2c66affSColin Finck * Conversions To/From UTF8 encoding *
167c2c66affSColin Finck * *
168c2c66affSColin Finck ************************************************************************/
169c2c66affSColin Finck
170c2c66affSColin Finck /**
171c2c66affSColin Finck * asciiToUTF8:
172c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
173c2c66affSColin Finck * @outlen: the length of @out
174c2c66affSColin Finck * @in: a pointer to an array of ASCII chars
175c2c66affSColin Finck * @inlen: the length of @in
176c2c66affSColin Finck *
177c2c66affSColin Finck * Take a block of ASCII chars in and try to convert it to an UTF-8
178c2c66affSColin Finck * block of chars out.
179c2c66affSColin Finck * Returns 0 if success, or -1 otherwise
180c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
181c2c66affSColin Finck * if the return value is positive, else unpredictable.
18240ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
183c2c66affSColin Finck */
184c2c66affSColin Finck static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)185c2c66affSColin Finck asciiToUTF8(unsigned char* out, int *outlen,
186c2c66affSColin Finck const unsigned char* in, int *inlen) {
187c2c66affSColin Finck unsigned char* outstart = out;
188c2c66affSColin Finck const unsigned char* base = in;
189c2c66affSColin Finck const unsigned char* processed = in;
190c2c66affSColin Finck unsigned char* outend = out + *outlen;
191c2c66affSColin Finck const unsigned char* inend;
192c2c66affSColin Finck unsigned int c;
193c2c66affSColin Finck
194c2c66affSColin Finck inend = in + (*inlen);
195c2c66affSColin Finck while ((in < inend) && (out - outstart + 5 < *outlen)) {
196c2c66affSColin Finck c= *in++;
197c2c66affSColin Finck
198c2c66affSColin Finck if (out >= outend)
199c2c66affSColin Finck break;
200c2c66affSColin Finck if (c < 0x80) {
201c2c66affSColin Finck *out++ = c;
202c2c66affSColin Finck } else {
203c2c66affSColin Finck *outlen = out - outstart;
204c2c66affSColin Finck *inlen = processed - base;
205c2c66affSColin Finck return(-1);
206c2c66affSColin Finck }
207c2c66affSColin Finck
208c2c66affSColin Finck processed = (const unsigned char*) in;
209c2c66affSColin Finck }
210c2c66affSColin Finck *outlen = out - outstart;
211c2c66affSColin Finck *inlen = processed - base;
212c2c66affSColin Finck return(*outlen);
213c2c66affSColin Finck }
214c2c66affSColin Finck
215c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
216c2c66affSColin Finck /**
217c2c66affSColin Finck * UTF8Toascii:
218c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
219c2c66affSColin Finck * @outlen: the length of @out
220c2c66affSColin Finck * @in: a pointer to an array of UTF-8 chars
221c2c66affSColin Finck * @inlen: the length of @in
222c2c66affSColin Finck *
223c2c66affSColin Finck * Take a block of UTF-8 chars in and try to convert it to an ASCII
224c2c66affSColin Finck * block of chars out.
225c2c66affSColin Finck *
226c2c66affSColin Finck * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
228c2c66affSColin Finck * if the return value is positive, else unpredictable.
22940ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
230c2c66affSColin Finck */
231c2c66affSColin Finck static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)232c2c66affSColin Finck UTF8Toascii(unsigned char* out, int *outlen,
233c2c66affSColin Finck const unsigned char* in, int *inlen) {
234c2c66affSColin Finck const unsigned char* processed = in;
235c2c66affSColin Finck const unsigned char* outend;
236c2c66affSColin Finck const unsigned char* outstart = out;
237c2c66affSColin Finck const unsigned char* instart = in;
238c2c66affSColin Finck const unsigned char* inend;
239c2c66affSColin Finck unsigned int c, d;
240c2c66affSColin Finck int trailing;
241c2c66affSColin Finck
242c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
243c2c66affSColin Finck if (in == NULL) {
244c2c66affSColin Finck /*
245c2c66affSColin Finck * initialization nothing to do
246c2c66affSColin Finck */
247c2c66affSColin Finck *outlen = 0;
248c2c66affSColin Finck *inlen = 0;
249c2c66affSColin Finck return(0);
250c2c66affSColin Finck }
251c2c66affSColin Finck inend = in + (*inlen);
252c2c66affSColin Finck outend = out + (*outlen);
253c2c66affSColin Finck while (in < inend) {
254c2c66affSColin Finck d = *in++;
255c2c66affSColin Finck if (d < 0x80) { c= d; trailing= 0; }
256c2c66affSColin Finck else if (d < 0xC0) {
257c2c66affSColin Finck /* trailing byte in leading position */
258c2c66affSColin Finck *outlen = out - outstart;
259c2c66affSColin Finck *inlen = processed - instart;
260c2c66affSColin Finck return(-2);
261c2c66affSColin Finck } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
262c2c66affSColin Finck else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
263c2c66affSColin Finck else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
264c2c66affSColin Finck else {
265c2c66affSColin Finck /* no chance for this in Ascii */
266c2c66affSColin Finck *outlen = out - outstart;
267c2c66affSColin Finck *inlen = processed - instart;
268c2c66affSColin Finck return(-2);
269c2c66affSColin Finck }
270c2c66affSColin Finck
271c2c66affSColin Finck if (inend - in < trailing) {
272c2c66affSColin Finck break;
273c2c66affSColin Finck }
274c2c66affSColin Finck
275c2c66affSColin Finck for ( ; trailing; trailing--) {
276c2c66affSColin Finck if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
277c2c66affSColin Finck break;
278c2c66affSColin Finck c <<= 6;
279c2c66affSColin Finck c |= d & 0x3F;
280c2c66affSColin Finck }
281c2c66affSColin Finck
282c2c66affSColin Finck /* assertion: c is a single UTF-4 value */
283c2c66affSColin Finck if (c < 0x80) {
284c2c66affSColin Finck if (out >= outend)
285c2c66affSColin Finck break;
286c2c66affSColin Finck *out++ = c;
287c2c66affSColin Finck } else {
288c2c66affSColin Finck /* no chance for this in Ascii */
289c2c66affSColin Finck *outlen = out - outstart;
290c2c66affSColin Finck *inlen = processed - instart;
291c2c66affSColin Finck return(-2);
292c2c66affSColin Finck }
293c2c66affSColin Finck processed = in;
294c2c66affSColin Finck }
295c2c66affSColin Finck *outlen = out - outstart;
296c2c66affSColin Finck *inlen = processed - instart;
297c2c66affSColin Finck return(*outlen);
298c2c66affSColin Finck }
299c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
300c2c66affSColin Finck
301c2c66affSColin Finck /**
302c2c66affSColin Finck * isolat1ToUTF8:
303c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
304c2c66affSColin Finck * @outlen: the length of @out
305c2c66affSColin Finck * @in: a pointer to an array of ISO Latin 1 chars
306c2c66affSColin Finck * @inlen: the length of @in
307c2c66affSColin Finck *
308c2c66affSColin Finck * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309c2c66affSColin Finck * block of chars out.
310c2c66affSColin Finck * Returns the number of bytes written if success, or -1 otherwise
311c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
312c2c66affSColin Finck * if the return value is positive, else unpredictable.
31340ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
314c2c66affSColin Finck */
315c2c66affSColin Finck int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)316c2c66affSColin Finck isolat1ToUTF8(unsigned char* out, int *outlen,
317c2c66affSColin Finck const unsigned char* in, int *inlen) {
318c2c66affSColin Finck unsigned char* outstart = out;
319c2c66affSColin Finck const unsigned char* base = in;
320c2c66affSColin Finck unsigned char* outend;
321c2c66affSColin Finck const unsigned char* inend;
322c2c66affSColin Finck const unsigned char* instop;
323c2c66affSColin Finck
324c2c66affSColin Finck if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325c2c66affSColin Finck return(-1);
326c2c66affSColin Finck
327c2c66affSColin Finck outend = out + *outlen;
328c2c66affSColin Finck inend = in + (*inlen);
329c2c66affSColin Finck instop = inend;
330c2c66affSColin Finck
331c2c66affSColin Finck while ((in < inend) && (out < outend - 1)) {
332c2c66affSColin Finck if (*in >= 0x80) {
333c2c66affSColin Finck *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
334c2c66affSColin Finck *out++ = ((*in) & 0x3F) | 0x80;
335c2c66affSColin Finck ++in;
336c2c66affSColin Finck }
337c2c66affSColin Finck if ((instop - in) > (outend - out)) instop = in + (outend - out);
338c2c66affSColin Finck while ((in < instop) && (*in < 0x80)) {
339c2c66affSColin Finck *out++ = *in++;
340c2c66affSColin Finck }
341c2c66affSColin Finck }
342c2c66affSColin Finck if ((in < inend) && (out < outend) && (*in < 0x80)) {
343c2c66affSColin Finck *out++ = *in++;
344c2c66affSColin Finck }
345c2c66affSColin Finck *outlen = out - outstart;
346c2c66affSColin Finck *inlen = in - base;
347c2c66affSColin Finck return(*outlen);
348c2c66affSColin Finck }
349c2c66affSColin Finck
350c2c66affSColin Finck /**
351c2c66affSColin Finck * UTF8ToUTF8:
352c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
353c2c66affSColin Finck * @outlen: the length of @out
354c2c66affSColin Finck * @inb: a pointer to an array of UTF-8 chars
355c2c66affSColin Finck * @inlenb: the length of @in in UTF-8 chars
356c2c66affSColin Finck *
357c2c66affSColin Finck * No op copy operation for UTF8 handling.
358c2c66affSColin Finck *
359c2c66affSColin Finck * Returns the number of bytes written, or -1 if lack of space.
360c2c66affSColin Finck * The value of *inlen after return is the number of octets consumed
361c2c66affSColin Finck * if the return value is positive, else unpredictable.
362c2c66affSColin Finck */
363c2c66affSColin Finck static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)364c2c66affSColin Finck UTF8ToUTF8(unsigned char* out, int *outlen,
365c2c66affSColin Finck const unsigned char* inb, int *inlenb)
366c2c66affSColin Finck {
367c2c66affSColin Finck int len;
368c2c66affSColin Finck
369fc82f8e2SThomas Faber if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370c2c66affSColin Finck return(-1);
371fc82f8e2SThomas Faber if (inb == NULL) {
372fc82f8e2SThomas Faber /* inb == NULL means output is initialized. */
373fc82f8e2SThomas Faber *outlen = 0;
374fc82f8e2SThomas Faber *inlenb = 0;
375fc82f8e2SThomas Faber return(0);
376fc82f8e2SThomas Faber }
377c2c66affSColin Finck if (*outlen > *inlenb) {
378c2c66affSColin Finck len = *inlenb;
379c2c66affSColin Finck } else {
380c2c66affSColin Finck len = *outlen;
381c2c66affSColin Finck }
382c2c66affSColin Finck if (len < 0)
383c2c66affSColin Finck return(-1);
384c2c66affSColin Finck
38540ee59d6SThomas Faber /*
38640ee59d6SThomas Faber * FIXME: Conversion functions must assure valid UTF-8, so we have
38740ee59d6SThomas Faber * to check for UTF-8 validity. Preferably, this converter shouldn't
38840ee59d6SThomas Faber * be used at all.
38940ee59d6SThomas Faber */
390c2c66affSColin Finck memcpy(out, inb, len);
391c2c66affSColin Finck
392c2c66affSColin Finck *outlen = len;
393c2c66affSColin Finck *inlenb = len;
394c2c66affSColin Finck return(*outlen);
395c2c66affSColin Finck }
396c2c66affSColin Finck
397c2c66affSColin Finck
398c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
399c2c66affSColin Finck /**
400c2c66affSColin Finck * UTF8Toisolat1:
401c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
402c2c66affSColin Finck * @outlen: the length of @out
403c2c66affSColin Finck * @in: a pointer to an array of UTF-8 chars
404c2c66affSColin Finck * @inlen: the length of @in
405c2c66affSColin Finck *
406c2c66affSColin Finck * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407c2c66affSColin Finck * block of chars out.
408c2c66affSColin Finck *
409c2c66affSColin Finck * Returns the number of bytes written if success, -2 if the transcoding fails,
410c2c66affSColin Finck or -1 otherwise
411c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
412c2c66affSColin Finck * if the return value is positive, else unpredictable.
41340ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
414c2c66affSColin Finck */
415c2c66affSColin Finck int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)416c2c66affSColin Finck UTF8Toisolat1(unsigned char* out, int *outlen,
417c2c66affSColin Finck const unsigned char* in, int *inlen) {
418c2c66affSColin Finck const unsigned char* processed = in;
419c2c66affSColin Finck const unsigned char* outend;
420c2c66affSColin Finck const unsigned char* outstart = out;
421c2c66affSColin Finck const unsigned char* instart = in;
422c2c66affSColin Finck const unsigned char* inend;
423c2c66affSColin Finck unsigned int c, d;
424c2c66affSColin Finck int trailing;
425c2c66affSColin Finck
426c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
427c2c66affSColin Finck if (in == NULL) {
428c2c66affSColin Finck /*
429c2c66affSColin Finck * initialization nothing to do
430c2c66affSColin Finck */
431c2c66affSColin Finck *outlen = 0;
432c2c66affSColin Finck *inlen = 0;
433c2c66affSColin Finck return(0);
434c2c66affSColin Finck }
435c2c66affSColin Finck inend = in + (*inlen);
436c2c66affSColin Finck outend = out + (*outlen);
437c2c66affSColin Finck while (in < inend) {
438c2c66affSColin Finck d = *in++;
439c2c66affSColin Finck if (d < 0x80) { c= d; trailing= 0; }
440c2c66affSColin Finck else if (d < 0xC0) {
441c2c66affSColin Finck /* trailing byte in leading position */
442c2c66affSColin Finck *outlen = out - outstart;
443c2c66affSColin Finck *inlen = processed - instart;
444c2c66affSColin Finck return(-2);
445c2c66affSColin Finck } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
446c2c66affSColin Finck else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
447c2c66affSColin Finck else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
448c2c66affSColin Finck else {
449c2c66affSColin Finck /* no chance for this in IsoLat1 */
450c2c66affSColin Finck *outlen = out - outstart;
451c2c66affSColin Finck *inlen = processed - instart;
452c2c66affSColin Finck return(-2);
453c2c66affSColin Finck }
454c2c66affSColin Finck
455c2c66affSColin Finck if (inend - in < trailing) {
456c2c66affSColin Finck break;
457c2c66affSColin Finck }
458c2c66affSColin Finck
459c2c66affSColin Finck for ( ; trailing; trailing--) {
460c2c66affSColin Finck if (in >= inend)
461c2c66affSColin Finck break;
462c2c66affSColin Finck if (((d= *in++) & 0xC0) != 0x80) {
463c2c66affSColin Finck *outlen = out - outstart;
464c2c66affSColin Finck *inlen = processed - instart;
465c2c66affSColin Finck return(-2);
466c2c66affSColin Finck }
467c2c66affSColin Finck c <<= 6;
468c2c66affSColin Finck c |= d & 0x3F;
469c2c66affSColin Finck }
470c2c66affSColin Finck
471c2c66affSColin Finck /* assertion: c is a single UTF-4 value */
472c2c66affSColin Finck if (c <= 0xFF) {
473c2c66affSColin Finck if (out >= outend)
474c2c66affSColin Finck break;
475c2c66affSColin Finck *out++ = c;
476c2c66affSColin Finck } else {
477c2c66affSColin Finck /* no chance for this in IsoLat1 */
478c2c66affSColin Finck *outlen = out - outstart;
479c2c66affSColin Finck *inlen = processed - instart;
480c2c66affSColin Finck return(-2);
481c2c66affSColin Finck }
482c2c66affSColin Finck processed = in;
483c2c66affSColin Finck }
484c2c66affSColin Finck *outlen = out - outstart;
485c2c66affSColin Finck *inlen = processed - instart;
486c2c66affSColin Finck return(*outlen);
487c2c66affSColin Finck }
488c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
489c2c66affSColin Finck
490c2c66affSColin Finck /**
491c2c66affSColin Finck * UTF16LEToUTF8:
492c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
493c2c66affSColin Finck * @outlen: the length of @out
494c2c66affSColin Finck * @inb: a pointer to an array of UTF-16LE passwd as a byte array
495c2c66affSColin Finck * @inlenb: the length of @in in UTF-16LE chars
496c2c66affSColin Finck *
497c2c66affSColin Finck * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498c2c66affSColin Finck * block of chars out. This function assumes the endian property
499c2c66affSColin Finck * is the same between the native type of this machine and the
500c2c66affSColin Finck * inputed one.
501c2c66affSColin Finck *
502c2c66affSColin Finck * Returns the number of bytes written, or -1 if lack of space, or -2
503c2c66affSColin Finck * if the transcoding fails (if *in is not a valid utf16 string)
504c2c66affSColin Finck * The value of *inlen after return is the number of octets consumed
505c2c66affSColin Finck * if the return value is positive, else unpredictable.
506c2c66affSColin Finck */
507c2c66affSColin Finck static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)508c2c66affSColin Finck UTF16LEToUTF8(unsigned char* out, int *outlen,
509c2c66affSColin Finck const unsigned char* inb, int *inlenb)
510c2c66affSColin Finck {
511c2c66affSColin Finck unsigned char* outstart = out;
512c2c66affSColin Finck const unsigned char* processed = inb;
51340ee59d6SThomas Faber unsigned char* outend;
514c2c66affSColin Finck unsigned short* in = (unsigned short*) inb;
515c2c66affSColin Finck unsigned short* inend;
516c2c66affSColin Finck unsigned int c, d, inlen;
517c2c66affSColin Finck unsigned char *tmp;
518c2c66affSColin Finck int bits;
519c2c66affSColin Finck
52040ee59d6SThomas Faber if (*outlen == 0) {
52140ee59d6SThomas Faber *inlenb = 0;
52240ee59d6SThomas Faber return(0);
52340ee59d6SThomas Faber }
52440ee59d6SThomas Faber outend = out + *outlen;
525c2c66affSColin Finck if ((*inlenb % 2) == 1)
526c2c66affSColin Finck (*inlenb)--;
527c2c66affSColin Finck inlen = *inlenb / 2;
528c2c66affSColin Finck inend = in + inlen;
529c2c66affSColin Finck while ((in < inend) && (out - outstart + 5 < *outlen)) {
530c2c66affSColin Finck if (xmlLittleEndian) {
531c2c66affSColin Finck c= *in++;
532c2c66affSColin Finck } else {
533c2c66affSColin Finck tmp = (unsigned char *) in;
534c2c66affSColin Finck c = *tmp++;
535c2c66affSColin Finck c = c | (((unsigned int)*tmp) << 8);
536c2c66affSColin Finck in++;
537c2c66affSColin Finck }
538c2c66affSColin Finck if ((c & 0xFC00) == 0xD800) { /* surrogates */
5397244e0c5SThomas Faber if (in >= inend) { /* handle split mutli-byte characters */
540c2c66affSColin Finck break;
541c2c66affSColin Finck }
542c2c66affSColin Finck if (xmlLittleEndian) {
543c2c66affSColin Finck d = *in++;
544c2c66affSColin Finck } else {
545c2c66affSColin Finck tmp = (unsigned char *) in;
546c2c66affSColin Finck d = *tmp++;
547c2c66affSColin Finck d = d | (((unsigned int)*tmp) << 8);
548c2c66affSColin Finck in++;
549c2c66affSColin Finck }
550c2c66affSColin Finck if ((d & 0xFC00) == 0xDC00) {
551c2c66affSColin Finck c &= 0x03FF;
552c2c66affSColin Finck c <<= 10;
553c2c66affSColin Finck c |= d & 0x03FF;
554c2c66affSColin Finck c += 0x10000;
555c2c66affSColin Finck }
556c2c66affSColin Finck else {
557c2c66affSColin Finck *outlen = out - outstart;
558c2c66affSColin Finck *inlenb = processed - inb;
559c2c66affSColin Finck return(-2);
560c2c66affSColin Finck }
561c2c66affSColin Finck }
562c2c66affSColin Finck
563c2c66affSColin Finck /* assertion: c is a single UTF-4 value */
564c2c66affSColin Finck if (out >= outend)
565c2c66affSColin Finck break;
566c2c66affSColin Finck if (c < 0x80) { *out++= c; bits= -6; }
567c2c66affSColin Finck else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
568c2c66affSColin Finck else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
569c2c66affSColin Finck else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
570c2c66affSColin Finck
571c2c66affSColin Finck for ( ; bits >= 0; bits-= 6) {
572c2c66affSColin Finck if (out >= outend)
573c2c66affSColin Finck break;
574c2c66affSColin Finck *out++= ((c >> bits) & 0x3F) | 0x80;
575c2c66affSColin Finck }
576c2c66affSColin Finck processed = (const unsigned char*) in;
577c2c66affSColin Finck }
578c2c66affSColin Finck *outlen = out - outstart;
579c2c66affSColin Finck *inlenb = processed - inb;
580c2c66affSColin Finck return(*outlen);
581c2c66affSColin Finck }
582c2c66affSColin Finck
583c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
584c2c66affSColin Finck /**
585c2c66affSColin Finck * UTF8ToUTF16LE:
586c2c66affSColin Finck * @outb: a pointer to an array of bytes to store the result
587c2c66affSColin Finck * @outlen: the length of @outb
588c2c66affSColin Finck * @in: a pointer to an array of UTF-8 chars
589c2c66affSColin Finck * @inlen: the length of @in
590c2c66affSColin Finck *
591c2c66affSColin Finck * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592c2c66affSColin Finck * block of chars out.
593c2c66affSColin Finck *
594c2c66affSColin Finck * Returns the number of bytes written, or -1 if lack of space, or -2
595c2c66affSColin Finck * if the transcoding failed.
596c2c66affSColin Finck */
597c2c66affSColin Finck static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)598c2c66affSColin Finck UTF8ToUTF16LE(unsigned char* outb, int *outlen,
599c2c66affSColin Finck const unsigned char* in, int *inlen)
600c2c66affSColin Finck {
601c2c66affSColin Finck unsigned short* out = (unsigned short*) outb;
602c2c66affSColin Finck const unsigned char* processed = in;
603c2c66affSColin Finck const unsigned char *const instart = in;
604c2c66affSColin Finck unsigned short* outstart= out;
605c2c66affSColin Finck unsigned short* outend;
606c2c66affSColin Finck const unsigned char* inend;
607c2c66affSColin Finck unsigned int c, d;
608c2c66affSColin Finck int trailing;
609c2c66affSColin Finck unsigned char *tmp;
610c2c66affSColin Finck unsigned short tmp1, tmp2;
611c2c66affSColin Finck
612c2c66affSColin Finck /* UTF16LE encoding has no BOM */
613c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
614c2c66affSColin Finck if (in == NULL) {
615c2c66affSColin Finck *outlen = 0;
616c2c66affSColin Finck *inlen = 0;
617c2c66affSColin Finck return(0);
618c2c66affSColin Finck }
619c2c66affSColin Finck inend= in + *inlen;
620c2c66affSColin Finck outend = out + (*outlen / 2);
621c2c66affSColin Finck while (in < inend) {
622c2c66affSColin Finck d= *in++;
623c2c66affSColin Finck if (d < 0x80) { c= d; trailing= 0; }
624c2c66affSColin Finck else if (d < 0xC0) {
625c2c66affSColin Finck /* trailing byte in leading position */
626c2c66affSColin Finck *outlen = (out - outstart) * 2;
627c2c66affSColin Finck *inlen = processed - instart;
628c2c66affSColin Finck return(-2);
629c2c66affSColin Finck } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
630c2c66affSColin Finck else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
631c2c66affSColin Finck else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
632c2c66affSColin Finck else {
633c2c66affSColin Finck /* no chance for this in UTF-16 */
634c2c66affSColin Finck *outlen = (out - outstart) * 2;
635c2c66affSColin Finck *inlen = processed - instart;
636c2c66affSColin Finck return(-2);
637c2c66affSColin Finck }
638c2c66affSColin Finck
639c2c66affSColin Finck if (inend - in < trailing) {
640c2c66affSColin Finck break;
641c2c66affSColin Finck }
642c2c66affSColin Finck
643c2c66affSColin Finck for ( ; trailing; trailing--) {
644c2c66affSColin Finck if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
645c2c66affSColin Finck break;
646c2c66affSColin Finck c <<= 6;
647c2c66affSColin Finck c |= d & 0x3F;
648c2c66affSColin Finck }
649c2c66affSColin Finck
650c2c66affSColin Finck /* assertion: c is a single UTF-4 value */
651c2c66affSColin Finck if (c < 0x10000) {
652c2c66affSColin Finck if (out >= outend)
653c2c66affSColin Finck break;
654c2c66affSColin Finck if (xmlLittleEndian) {
655c2c66affSColin Finck *out++ = c;
656c2c66affSColin Finck } else {
657c2c66affSColin Finck tmp = (unsigned char *) out;
658c2c66affSColin Finck *tmp = c ;
659c2c66affSColin Finck *(tmp + 1) = c >> 8 ;
660c2c66affSColin Finck out++;
661c2c66affSColin Finck }
662c2c66affSColin Finck }
663c2c66affSColin Finck else if (c < 0x110000) {
664c2c66affSColin Finck if (out+1 >= outend)
665c2c66affSColin Finck break;
666c2c66affSColin Finck c -= 0x10000;
667c2c66affSColin Finck if (xmlLittleEndian) {
668c2c66affSColin Finck *out++ = 0xD800 | (c >> 10);
669c2c66affSColin Finck *out++ = 0xDC00 | (c & 0x03FF);
670c2c66affSColin Finck } else {
671c2c66affSColin Finck tmp1 = 0xD800 | (c >> 10);
672c2c66affSColin Finck tmp = (unsigned char *) out;
673c2c66affSColin Finck *tmp = (unsigned char) tmp1;
674c2c66affSColin Finck *(tmp + 1) = tmp1 >> 8;
675c2c66affSColin Finck out++;
676c2c66affSColin Finck
677c2c66affSColin Finck tmp2 = 0xDC00 | (c & 0x03FF);
678c2c66affSColin Finck tmp = (unsigned char *) out;
679c2c66affSColin Finck *tmp = (unsigned char) tmp2;
680c2c66affSColin Finck *(tmp + 1) = tmp2 >> 8;
681c2c66affSColin Finck out++;
682c2c66affSColin Finck }
683c2c66affSColin Finck }
684c2c66affSColin Finck else
685c2c66affSColin Finck break;
686c2c66affSColin Finck processed = in;
687c2c66affSColin Finck }
688c2c66affSColin Finck *outlen = (out - outstart) * 2;
689c2c66affSColin Finck *inlen = processed - instart;
690c2c66affSColin Finck return(*outlen);
691c2c66affSColin Finck }
692c2c66affSColin Finck
693c2c66affSColin Finck /**
694c2c66affSColin Finck * UTF8ToUTF16:
695c2c66affSColin Finck * @outb: a pointer to an array of bytes to store the result
696c2c66affSColin Finck * @outlen: the length of @outb
697c2c66affSColin Finck * @in: a pointer to an array of UTF-8 chars
698c2c66affSColin Finck * @inlen: the length of @in
699c2c66affSColin Finck *
700c2c66affSColin Finck * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701c2c66affSColin Finck * block of chars out.
702c2c66affSColin Finck *
703c2c66affSColin Finck * Returns the number of bytes written, or -1 if lack of space, or -2
704c2c66affSColin Finck * if the transcoding failed.
705c2c66affSColin Finck */
706c2c66affSColin Finck static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)707c2c66affSColin Finck UTF8ToUTF16(unsigned char* outb, int *outlen,
708c2c66affSColin Finck const unsigned char* in, int *inlen)
709c2c66affSColin Finck {
710c2c66affSColin Finck if (in == NULL) {
711c2c66affSColin Finck /*
712c2c66affSColin Finck * initialization, add the Byte Order Mark for UTF-16LE
713c2c66affSColin Finck */
714c2c66affSColin Finck if (*outlen >= 2) {
715c2c66affSColin Finck outb[0] = 0xFF;
716c2c66affSColin Finck outb[1] = 0xFE;
717c2c66affSColin Finck *outlen = 2;
718c2c66affSColin Finck *inlen = 0;
719c2c66affSColin Finck #ifdef DEBUG_ENCODING
720c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
721c2c66affSColin Finck "Added FFFE Byte Order Mark\n");
722c2c66affSColin Finck #endif
723c2c66affSColin Finck return(2);
724c2c66affSColin Finck }
725c2c66affSColin Finck *outlen = 0;
726c2c66affSColin Finck *inlen = 0;
727c2c66affSColin Finck return(0);
728c2c66affSColin Finck }
729c2c66affSColin Finck return (UTF8ToUTF16LE(outb, outlen, in, inlen));
730c2c66affSColin Finck }
731c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
732c2c66affSColin Finck
733c2c66affSColin Finck /**
734c2c66affSColin Finck * UTF16BEToUTF8:
735c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
736c2c66affSColin Finck * @outlen: the length of @out
737c2c66affSColin Finck * @inb: a pointer to an array of UTF-16 passed as a byte array
738c2c66affSColin Finck * @inlenb: the length of @in in UTF-16 chars
739c2c66affSColin Finck *
740c2c66affSColin Finck * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741c2c66affSColin Finck * block of chars out. This function assumes the endian property
742c2c66affSColin Finck * is the same between the native type of this machine and the
743c2c66affSColin Finck * inputed one.
744c2c66affSColin Finck *
745c2c66affSColin Finck * Returns the number of bytes written, or -1 if lack of space, or -2
746c2c66affSColin Finck * if the transcoding fails (if *in is not a valid utf16 string)
747c2c66affSColin Finck * The value of *inlen after return is the number of octets consumed
748c2c66affSColin Finck * if the return value is positive, else unpredictable.
749c2c66affSColin Finck */
750c2c66affSColin Finck static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)751c2c66affSColin Finck UTF16BEToUTF8(unsigned char* out, int *outlen,
752c2c66affSColin Finck const unsigned char* inb, int *inlenb)
753c2c66affSColin Finck {
754c2c66affSColin Finck unsigned char* outstart = out;
755c2c66affSColin Finck const unsigned char* processed = inb;
7567244e0c5SThomas Faber unsigned char* outend;
757c2c66affSColin Finck unsigned short* in = (unsigned short*) inb;
758c2c66affSColin Finck unsigned short* inend;
759c2c66affSColin Finck unsigned int c, d, inlen;
760c2c66affSColin Finck unsigned char *tmp;
761c2c66affSColin Finck int bits;
762c2c66affSColin Finck
7637244e0c5SThomas Faber if (*outlen == 0) {
7647244e0c5SThomas Faber *inlenb = 0;
7657244e0c5SThomas Faber return(0);
7667244e0c5SThomas Faber }
7677244e0c5SThomas Faber outend = out + *outlen;
768c2c66affSColin Finck if ((*inlenb % 2) == 1)
769c2c66affSColin Finck (*inlenb)--;
770c2c66affSColin Finck inlen = *inlenb / 2;
771c2c66affSColin Finck inend= in + inlen;
7727244e0c5SThomas Faber while ((in < inend) && (out - outstart + 5 < *outlen)) {
773c2c66affSColin Finck if (xmlLittleEndian) {
774c2c66affSColin Finck tmp = (unsigned char *) in;
775c2c66affSColin Finck c = *tmp++;
7767244e0c5SThomas Faber c = (c << 8) | (unsigned int) *tmp;
777c2c66affSColin Finck in++;
778c2c66affSColin Finck } else {
779c2c66affSColin Finck c= *in++;
780c2c66affSColin Finck }
781c2c66affSColin Finck if ((c & 0xFC00) == 0xD800) { /* surrogates */
7827244e0c5SThomas Faber if (in >= inend) { /* handle split mutli-byte characters */
7837244e0c5SThomas Faber break;
784c2c66affSColin Finck }
785c2c66affSColin Finck if (xmlLittleEndian) {
786c2c66affSColin Finck tmp = (unsigned char *) in;
787c2c66affSColin Finck d = *tmp++;
7887244e0c5SThomas Faber d = (d << 8) | (unsigned int) *tmp;
789c2c66affSColin Finck in++;
790c2c66affSColin Finck } else {
791c2c66affSColin Finck d= *in++;
792c2c66affSColin Finck }
793c2c66affSColin Finck if ((d & 0xFC00) == 0xDC00) {
794c2c66affSColin Finck c &= 0x03FF;
795c2c66affSColin Finck c <<= 10;
796c2c66affSColin Finck c |= d & 0x03FF;
797c2c66affSColin Finck c += 0x10000;
798c2c66affSColin Finck }
799c2c66affSColin Finck else {
800c2c66affSColin Finck *outlen = out - outstart;
801c2c66affSColin Finck *inlenb = processed - inb;
802c2c66affSColin Finck return(-2);
803c2c66affSColin Finck }
804c2c66affSColin Finck }
805c2c66affSColin Finck
806c2c66affSColin Finck /* assertion: c is a single UTF-4 value */
807c2c66affSColin Finck if (out >= outend)
808c2c66affSColin Finck break;
809c2c66affSColin Finck if (c < 0x80) { *out++= c; bits= -6; }
810c2c66affSColin Finck else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
811c2c66affSColin Finck else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
812c2c66affSColin Finck else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
813c2c66affSColin Finck
814c2c66affSColin Finck for ( ; bits >= 0; bits-= 6) {
815c2c66affSColin Finck if (out >= outend)
816c2c66affSColin Finck break;
817c2c66affSColin Finck *out++= ((c >> bits) & 0x3F) | 0x80;
818c2c66affSColin Finck }
819c2c66affSColin Finck processed = (const unsigned char*) in;
820c2c66affSColin Finck }
821c2c66affSColin Finck *outlen = out - outstart;
822c2c66affSColin Finck *inlenb = processed - inb;
823c2c66affSColin Finck return(*outlen);
824c2c66affSColin Finck }
825c2c66affSColin Finck
826c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
827c2c66affSColin Finck /**
828c2c66affSColin Finck * UTF8ToUTF16BE:
829c2c66affSColin Finck * @outb: a pointer to an array of bytes to store the result
830c2c66affSColin Finck * @outlen: the length of @outb
831c2c66affSColin Finck * @in: a pointer to an array of UTF-8 chars
832c2c66affSColin Finck * @inlen: the length of @in
833c2c66affSColin Finck *
834c2c66affSColin Finck * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835c2c66affSColin Finck * block of chars out.
836c2c66affSColin Finck *
837c2c66affSColin Finck * Returns the number of byte written, or -1 by lack of space, or -2
838c2c66affSColin Finck * if the transcoding failed.
839c2c66affSColin Finck */
840c2c66affSColin Finck static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)841c2c66affSColin Finck UTF8ToUTF16BE(unsigned char* outb, int *outlen,
842c2c66affSColin Finck const unsigned char* in, int *inlen)
843c2c66affSColin Finck {
844c2c66affSColin Finck unsigned short* out = (unsigned short*) outb;
845c2c66affSColin Finck const unsigned char* processed = in;
846c2c66affSColin Finck const unsigned char *const instart = in;
847c2c66affSColin Finck unsigned short* outstart= out;
848c2c66affSColin Finck unsigned short* outend;
849c2c66affSColin Finck const unsigned char* inend;
850c2c66affSColin Finck unsigned int c, d;
851c2c66affSColin Finck int trailing;
852c2c66affSColin Finck unsigned char *tmp;
853c2c66affSColin Finck unsigned short tmp1, tmp2;
854c2c66affSColin Finck
855c2c66affSColin Finck /* UTF-16BE has no BOM */
856c2c66affSColin Finck if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
857c2c66affSColin Finck if (in == NULL) {
858c2c66affSColin Finck *outlen = 0;
859c2c66affSColin Finck *inlen = 0;
860c2c66affSColin Finck return(0);
861c2c66affSColin Finck }
862c2c66affSColin Finck inend= in + *inlen;
863c2c66affSColin Finck outend = out + (*outlen / 2);
864c2c66affSColin Finck while (in < inend) {
865c2c66affSColin Finck d= *in++;
866c2c66affSColin Finck if (d < 0x80) { c= d; trailing= 0; }
867c2c66affSColin Finck else if (d < 0xC0) {
868c2c66affSColin Finck /* trailing byte in leading position */
869c2c66affSColin Finck *outlen = out - outstart;
870c2c66affSColin Finck *inlen = processed - instart;
871c2c66affSColin Finck return(-2);
872c2c66affSColin Finck } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
873c2c66affSColin Finck else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
874c2c66affSColin Finck else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
875c2c66affSColin Finck else {
876c2c66affSColin Finck /* no chance for this in UTF-16 */
877c2c66affSColin Finck *outlen = out - outstart;
878c2c66affSColin Finck *inlen = processed - instart;
879c2c66affSColin Finck return(-2);
880c2c66affSColin Finck }
881c2c66affSColin Finck
882c2c66affSColin Finck if (inend - in < trailing) {
883c2c66affSColin Finck break;
884c2c66affSColin Finck }
885c2c66affSColin Finck
886c2c66affSColin Finck for ( ; trailing; trailing--) {
887c2c66affSColin Finck if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
888c2c66affSColin Finck c <<= 6;
889c2c66affSColin Finck c |= d & 0x3F;
890c2c66affSColin Finck }
891c2c66affSColin Finck
892c2c66affSColin Finck /* assertion: c is a single UTF-4 value */
893c2c66affSColin Finck if (c < 0x10000) {
894c2c66affSColin Finck if (out >= outend) break;
895c2c66affSColin Finck if (xmlLittleEndian) {
896c2c66affSColin Finck tmp = (unsigned char *) out;
897c2c66affSColin Finck *tmp = c >> 8;
898c2c66affSColin Finck *(tmp + 1) = c;
899c2c66affSColin Finck out++;
900c2c66affSColin Finck } else {
901c2c66affSColin Finck *out++ = c;
902c2c66affSColin Finck }
903c2c66affSColin Finck }
904c2c66affSColin Finck else if (c < 0x110000) {
905c2c66affSColin Finck if (out+1 >= outend) break;
906c2c66affSColin Finck c -= 0x10000;
907c2c66affSColin Finck if (xmlLittleEndian) {
908c2c66affSColin Finck tmp1 = 0xD800 | (c >> 10);
909c2c66affSColin Finck tmp = (unsigned char *) out;
910c2c66affSColin Finck *tmp = tmp1 >> 8;
911c2c66affSColin Finck *(tmp + 1) = (unsigned char) tmp1;
912c2c66affSColin Finck out++;
913c2c66affSColin Finck
914c2c66affSColin Finck tmp2 = 0xDC00 | (c & 0x03FF);
915c2c66affSColin Finck tmp = (unsigned char *) out;
916c2c66affSColin Finck *tmp = tmp2 >> 8;
917c2c66affSColin Finck *(tmp + 1) = (unsigned char) tmp2;
918c2c66affSColin Finck out++;
919c2c66affSColin Finck } else {
920c2c66affSColin Finck *out++ = 0xD800 | (c >> 10);
921c2c66affSColin Finck *out++ = 0xDC00 | (c & 0x03FF);
922c2c66affSColin Finck }
923c2c66affSColin Finck }
924c2c66affSColin Finck else
925c2c66affSColin Finck break;
926c2c66affSColin Finck processed = in;
927c2c66affSColin Finck }
928c2c66affSColin Finck *outlen = (out - outstart) * 2;
929c2c66affSColin Finck *inlen = processed - instart;
930c2c66affSColin Finck return(*outlen);
931c2c66affSColin Finck }
932c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
933c2c66affSColin Finck
934c2c66affSColin Finck /************************************************************************
935c2c66affSColin Finck * *
936c2c66affSColin Finck * Generic encoding handling routines *
937c2c66affSColin Finck * *
938c2c66affSColin Finck ************************************************************************/
939c2c66affSColin Finck
940c2c66affSColin Finck /**
941c2c66affSColin Finck * xmlDetectCharEncoding:
942c2c66affSColin Finck * @in: a pointer to the first bytes of the XML entity, must be at least
943c2c66affSColin Finck * 2 bytes long (at least 4 if encoding is UTF4 variant).
944c2c66affSColin Finck * @len: pointer to the length of the buffer
945c2c66affSColin Finck *
946c2c66affSColin Finck * Guess the encoding of the entity using the first bytes of the entity content
947c2c66affSColin Finck * according to the non-normative appendix F of the XML-1.0 recommendation.
948c2c66affSColin Finck *
949c2c66affSColin Finck * Returns one of the XML_CHAR_ENCODING_... values.
950c2c66affSColin Finck */
951c2c66affSColin Finck xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)952c2c66affSColin Finck xmlDetectCharEncoding(const unsigned char* in, int len)
953c2c66affSColin Finck {
954c2c66affSColin Finck if (in == NULL)
955c2c66affSColin Finck return(XML_CHAR_ENCODING_NONE);
956c2c66affSColin Finck if (len >= 4) {
957c2c66affSColin Finck if ((in[0] == 0x00) && (in[1] == 0x00) &&
958c2c66affSColin Finck (in[2] == 0x00) && (in[3] == 0x3C))
959c2c66affSColin Finck return(XML_CHAR_ENCODING_UCS4BE);
960c2c66affSColin Finck if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961c2c66affSColin Finck (in[2] == 0x00) && (in[3] == 0x00))
962c2c66affSColin Finck return(XML_CHAR_ENCODING_UCS4LE);
963c2c66affSColin Finck if ((in[0] == 0x00) && (in[1] == 0x00) &&
964c2c66affSColin Finck (in[2] == 0x3C) && (in[3] == 0x00))
965c2c66affSColin Finck return(XML_CHAR_ENCODING_UCS4_2143);
966c2c66affSColin Finck if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967c2c66affSColin Finck (in[2] == 0x00) && (in[3] == 0x00))
968c2c66affSColin Finck return(XML_CHAR_ENCODING_UCS4_3412);
969c2c66affSColin Finck if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970c2c66affSColin Finck (in[2] == 0xA7) && (in[3] == 0x94))
971c2c66affSColin Finck return(XML_CHAR_ENCODING_EBCDIC);
972c2c66affSColin Finck if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973c2c66affSColin Finck (in[2] == 0x78) && (in[3] == 0x6D))
974c2c66affSColin Finck return(XML_CHAR_ENCODING_UTF8);
975c2c66affSColin Finck /*
976c2c66affSColin Finck * Although not part of the recommendation, we also
977c2c66affSColin Finck * attempt an "auto-recognition" of UTF-16LE and
978c2c66affSColin Finck * UTF-16BE encodings.
979c2c66affSColin Finck */
980c2c66affSColin Finck if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981c2c66affSColin Finck (in[2] == 0x3F) && (in[3] == 0x00))
982c2c66affSColin Finck return(XML_CHAR_ENCODING_UTF16LE);
983c2c66affSColin Finck if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984c2c66affSColin Finck (in[2] == 0x00) && (in[3] == 0x3F))
985c2c66affSColin Finck return(XML_CHAR_ENCODING_UTF16BE);
986c2c66affSColin Finck }
987c2c66affSColin Finck if (len >= 3) {
988c2c66affSColin Finck /*
989c2c66affSColin Finck * Errata on XML-1.0 June 20 2001
990c2c66affSColin Finck * We now allow an UTF8 encoded BOM
991c2c66affSColin Finck */
992c2c66affSColin Finck if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993c2c66affSColin Finck (in[2] == 0xBF))
994c2c66affSColin Finck return(XML_CHAR_ENCODING_UTF8);
995c2c66affSColin Finck }
996c2c66affSColin Finck /* For UTF-16 we can recognize by the BOM */
997c2c66affSColin Finck if (len >= 2) {
998c2c66affSColin Finck if ((in[0] == 0xFE) && (in[1] == 0xFF))
999c2c66affSColin Finck return(XML_CHAR_ENCODING_UTF16BE);
1000c2c66affSColin Finck if ((in[0] == 0xFF) && (in[1] == 0xFE))
1001c2c66affSColin Finck return(XML_CHAR_ENCODING_UTF16LE);
1002c2c66affSColin Finck }
1003c2c66affSColin Finck return(XML_CHAR_ENCODING_NONE);
1004c2c66affSColin Finck }
1005c2c66affSColin Finck
1006c2c66affSColin Finck /**
1007c2c66affSColin Finck * xmlCleanupEncodingAliases:
1008c2c66affSColin Finck *
1009c2c66affSColin Finck * Unregisters all aliases
1010c2c66affSColin Finck */
1011c2c66affSColin Finck void
xmlCleanupEncodingAliases(void)1012c2c66affSColin Finck xmlCleanupEncodingAliases(void) {
1013c2c66affSColin Finck int i;
1014c2c66affSColin Finck
1015c2c66affSColin Finck if (xmlCharEncodingAliases == NULL)
1016c2c66affSColin Finck return;
1017c2c66affSColin Finck
1018c2c66affSColin Finck for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1019c2c66affSColin Finck if (xmlCharEncodingAliases[i].name != NULL)
1020c2c66affSColin Finck xmlFree((char *) xmlCharEncodingAliases[i].name);
1021c2c66affSColin Finck if (xmlCharEncodingAliases[i].alias != NULL)
1022c2c66affSColin Finck xmlFree((char *) xmlCharEncodingAliases[i].alias);
1023c2c66affSColin Finck }
1024c2c66affSColin Finck xmlCharEncodingAliasesNb = 0;
1025c2c66affSColin Finck xmlCharEncodingAliasesMax = 0;
1026c2c66affSColin Finck xmlFree(xmlCharEncodingAliases);
1027c2c66affSColin Finck xmlCharEncodingAliases = NULL;
1028c2c66affSColin Finck }
1029c2c66affSColin Finck
1030c2c66affSColin Finck /**
1031c2c66affSColin Finck * xmlGetEncodingAlias:
1032c2c66affSColin Finck * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1033c2c66affSColin Finck *
1034c2c66affSColin Finck * Lookup an encoding name for the given alias.
1035c2c66affSColin Finck *
1036c2c66affSColin Finck * Returns NULL if not found, otherwise the original name
1037c2c66affSColin Finck */
1038c2c66affSColin Finck const char *
xmlGetEncodingAlias(const char * alias)1039c2c66affSColin Finck xmlGetEncodingAlias(const char *alias) {
1040c2c66affSColin Finck int i;
1041c2c66affSColin Finck char upper[100];
1042c2c66affSColin Finck
1043c2c66affSColin Finck if (alias == NULL)
1044c2c66affSColin Finck return(NULL);
1045c2c66affSColin Finck
1046c2c66affSColin Finck if (xmlCharEncodingAliases == NULL)
1047c2c66affSColin Finck return(NULL);
1048c2c66affSColin Finck
1049c2c66affSColin Finck for (i = 0;i < 99;i++) {
1050c2c66affSColin Finck upper[i] = toupper(alias[i]);
1051c2c66affSColin Finck if (upper[i] == 0) break;
1052c2c66affSColin Finck }
1053c2c66affSColin Finck upper[i] = 0;
1054c2c66affSColin Finck
1055c2c66affSColin Finck /*
1056c2c66affSColin Finck * Walk down the list looking for a definition of the alias
1057c2c66affSColin Finck */
1058c2c66affSColin Finck for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059c2c66affSColin Finck if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060c2c66affSColin Finck return(xmlCharEncodingAliases[i].name);
1061c2c66affSColin Finck }
1062c2c66affSColin Finck }
1063c2c66affSColin Finck return(NULL);
1064c2c66affSColin Finck }
1065c2c66affSColin Finck
1066c2c66affSColin Finck /**
1067c2c66affSColin Finck * xmlAddEncodingAlias:
1068c2c66affSColin Finck * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1069c2c66affSColin Finck * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1070c2c66affSColin Finck *
1071c2c66affSColin Finck * Registers an alias @alias for an encoding named @name. Existing alias
1072c2c66affSColin Finck * will be overwritten.
1073c2c66affSColin Finck *
1074c2c66affSColin Finck * Returns 0 in case of success, -1 in case of error
1075c2c66affSColin Finck */
1076c2c66affSColin Finck int
xmlAddEncodingAlias(const char * name,const char * alias)1077c2c66affSColin Finck xmlAddEncodingAlias(const char *name, const char *alias) {
1078c2c66affSColin Finck int i;
1079c2c66affSColin Finck char upper[100];
1080c2c66affSColin Finck
1081c2c66affSColin Finck if ((name == NULL) || (alias == NULL))
1082c2c66affSColin Finck return(-1);
1083c2c66affSColin Finck
1084c2c66affSColin Finck for (i = 0;i < 99;i++) {
1085c2c66affSColin Finck upper[i] = toupper(alias[i]);
1086c2c66affSColin Finck if (upper[i] == 0) break;
1087c2c66affSColin Finck }
1088c2c66affSColin Finck upper[i] = 0;
1089c2c66affSColin Finck
1090c2c66affSColin Finck if (xmlCharEncodingAliases == NULL) {
1091c2c66affSColin Finck xmlCharEncodingAliasesNb = 0;
1092c2c66affSColin Finck xmlCharEncodingAliasesMax = 20;
1093c2c66affSColin Finck xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1094c2c66affSColin Finck xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095c2c66affSColin Finck if (xmlCharEncodingAliases == NULL)
1096c2c66affSColin Finck return(-1);
1097c2c66affSColin Finck } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1098c2c66affSColin Finck xmlCharEncodingAliasesMax *= 2;
1099c2c66affSColin Finck xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1100c2c66affSColin Finck xmlRealloc(xmlCharEncodingAliases,
1101c2c66affSColin Finck xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1102c2c66affSColin Finck }
1103c2c66affSColin Finck /*
1104c2c66affSColin Finck * Walk down the list looking for a definition of the alias
1105c2c66affSColin Finck */
1106c2c66affSColin Finck for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107c2c66affSColin Finck if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108c2c66affSColin Finck /*
1109c2c66affSColin Finck * Replace the definition.
1110c2c66affSColin Finck */
1111c2c66affSColin Finck xmlFree((char *) xmlCharEncodingAliases[i].name);
1112c2c66affSColin Finck xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1113c2c66affSColin Finck return(0);
1114c2c66affSColin Finck }
1115c2c66affSColin Finck }
1116c2c66affSColin Finck /*
1117c2c66affSColin Finck * Add the definition
1118c2c66affSColin Finck */
1119c2c66affSColin Finck xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1120c2c66affSColin Finck xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1121c2c66affSColin Finck xmlCharEncodingAliasesNb++;
1122c2c66affSColin Finck return(0);
1123c2c66affSColin Finck }
1124c2c66affSColin Finck
1125c2c66affSColin Finck /**
1126c2c66affSColin Finck * xmlDelEncodingAlias:
1127c2c66affSColin Finck * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1128c2c66affSColin Finck *
1129c2c66affSColin Finck * Unregisters an encoding alias @alias
1130c2c66affSColin Finck *
1131c2c66affSColin Finck * Returns 0 in case of success, -1 in case of error
1132c2c66affSColin Finck */
1133c2c66affSColin Finck int
xmlDelEncodingAlias(const char * alias)1134c2c66affSColin Finck xmlDelEncodingAlias(const char *alias) {
1135c2c66affSColin Finck int i;
1136c2c66affSColin Finck
1137c2c66affSColin Finck if (alias == NULL)
1138c2c66affSColin Finck return(-1);
1139c2c66affSColin Finck
1140c2c66affSColin Finck if (xmlCharEncodingAliases == NULL)
1141c2c66affSColin Finck return(-1);
1142c2c66affSColin Finck /*
1143c2c66affSColin Finck * Walk down the list looking for a definition of the alias
1144c2c66affSColin Finck */
1145c2c66affSColin Finck for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1146c2c66affSColin Finck if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1147c2c66affSColin Finck xmlFree((char *) xmlCharEncodingAliases[i].name);
1148c2c66affSColin Finck xmlFree((char *) xmlCharEncodingAliases[i].alias);
1149c2c66affSColin Finck xmlCharEncodingAliasesNb--;
1150c2c66affSColin Finck memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1151c2c66affSColin Finck sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1152c2c66affSColin Finck return(0);
1153c2c66affSColin Finck }
1154c2c66affSColin Finck }
1155c2c66affSColin Finck return(-1);
1156c2c66affSColin Finck }
1157c2c66affSColin Finck
1158c2c66affSColin Finck /**
1159c2c66affSColin Finck * xmlParseCharEncoding:
1160c2c66affSColin Finck * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1161c2c66affSColin Finck *
1162c2c66affSColin Finck * Compare the string to the encoding schemes already known. Note
1163c2c66affSColin Finck * that the comparison is case insensitive accordingly to the section
1164c2c66affSColin Finck * [XML] 4.3.3 Character Encoding in Entities.
1165c2c66affSColin Finck *
1166c2c66affSColin Finck * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167c2c66affSColin Finck * if not recognized.
1168c2c66affSColin Finck */
1169c2c66affSColin Finck xmlCharEncoding
xmlParseCharEncoding(const char * name)1170c2c66affSColin Finck xmlParseCharEncoding(const char* name)
1171c2c66affSColin Finck {
1172c2c66affSColin Finck const char *alias;
1173c2c66affSColin Finck char upper[500];
1174c2c66affSColin Finck int i;
1175c2c66affSColin Finck
1176c2c66affSColin Finck if (name == NULL)
1177c2c66affSColin Finck return(XML_CHAR_ENCODING_NONE);
1178c2c66affSColin Finck
1179c2c66affSColin Finck /*
1180c2c66affSColin Finck * Do the alias resolution
1181c2c66affSColin Finck */
1182c2c66affSColin Finck alias = xmlGetEncodingAlias(name);
1183c2c66affSColin Finck if (alias != NULL)
1184c2c66affSColin Finck name = alias;
1185c2c66affSColin Finck
1186c2c66affSColin Finck for (i = 0;i < 499;i++) {
1187c2c66affSColin Finck upper[i] = toupper(name[i]);
1188c2c66affSColin Finck if (upper[i] == 0) break;
1189c2c66affSColin Finck }
1190c2c66affSColin Finck upper[i] = 0;
1191c2c66affSColin Finck
1192c2c66affSColin Finck if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193c2c66affSColin Finck if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194c2c66affSColin Finck if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195c2c66affSColin Finck
1196c2c66affSColin Finck /*
1197c2c66affSColin Finck * NOTE: if we were able to parse this, the endianness of UTF16 is
1198c2c66affSColin Finck * already found and in use
1199c2c66affSColin Finck */
1200c2c66affSColin Finck if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201c2c66affSColin Finck if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202c2c66affSColin Finck
1203c2c66affSColin Finck if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204c2c66affSColin Finck if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205c2c66affSColin Finck if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206c2c66affSColin Finck
1207c2c66affSColin Finck /*
1208c2c66affSColin Finck * NOTE: if we were able to parse this, the endianness of UCS4 is
1209c2c66affSColin Finck * already found and in use
1210c2c66affSColin Finck */
1211c2c66affSColin Finck if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212c2c66affSColin Finck if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213c2c66affSColin Finck if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214c2c66affSColin Finck
1215c2c66affSColin Finck
1216c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217c2c66affSColin Finck if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218c2c66affSColin Finck if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219c2c66affSColin Finck
1220c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221c2c66affSColin Finck if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222c2c66affSColin Finck if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223c2c66affSColin Finck
1224c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230c2c66affSColin Finck if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231c2c66affSColin Finck
1232c2c66affSColin Finck if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233c2c66affSColin Finck if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234c2c66affSColin Finck if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235c2c66affSColin Finck
1236c2c66affSColin Finck #ifdef DEBUG_ENCODING
1237c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238c2c66affSColin Finck #endif
1239c2c66affSColin Finck return(XML_CHAR_ENCODING_ERROR);
1240c2c66affSColin Finck }
1241c2c66affSColin Finck
1242c2c66affSColin Finck /**
1243c2c66affSColin Finck * xmlGetCharEncodingName:
1244c2c66affSColin Finck * @enc: the encoding
1245c2c66affSColin Finck *
1246c2c66affSColin Finck * The "canonical" name for XML encoding.
1247c2c66affSColin Finck * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248c2c66affSColin Finck * Section 4.3.3 Character Encoding in Entities
1249c2c66affSColin Finck *
1250c2c66affSColin Finck * Returns the canonical name for the given encoding
1251c2c66affSColin Finck */
1252c2c66affSColin Finck
1253c2c66affSColin Finck const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1254c2c66affSColin Finck xmlGetCharEncodingName(xmlCharEncoding enc) {
1255c2c66affSColin Finck switch (enc) {
1256c2c66affSColin Finck case XML_CHAR_ENCODING_ERROR:
1257c2c66affSColin Finck return(NULL);
1258c2c66affSColin Finck case XML_CHAR_ENCODING_NONE:
1259c2c66affSColin Finck return(NULL);
1260c2c66affSColin Finck case XML_CHAR_ENCODING_UTF8:
1261c2c66affSColin Finck return("UTF-8");
1262c2c66affSColin Finck case XML_CHAR_ENCODING_UTF16LE:
1263c2c66affSColin Finck return("UTF-16");
1264c2c66affSColin Finck case XML_CHAR_ENCODING_UTF16BE:
1265c2c66affSColin Finck return("UTF-16");
1266c2c66affSColin Finck case XML_CHAR_ENCODING_EBCDIC:
1267c2c66affSColin Finck return("EBCDIC");
1268c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4LE:
1269c2c66affSColin Finck return("ISO-10646-UCS-4");
1270c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4BE:
1271c2c66affSColin Finck return("ISO-10646-UCS-4");
1272c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4_2143:
1273c2c66affSColin Finck return("ISO-10646-UCS-4");
1274c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4_3412:
1275c2c66affSColin Finck return("ISO-10646-UCS-4");
1276c2c66affSColin Finck case XML_CHAR_ENCODING_UCS2:
1277c2c66affSColin Finck return("ISO-10646-UCS-2");
1278c2c66affSColin Finck case XML_CHAR_ENCODING_8859_1:
1279c2c66affSColin Finck return("ISO-8859-1");
1280c2c66affSColin Finck case XML_CHAR_ENCODING_8859_2:
1281c2c66affSColin Finck return("ISO-8859-2");
1282c2c66affSColin Finck case XML_CHAR_ENCODING_8859_3:
1283c2c66affSColin Finck return("ISO-8859-3");
1284c2c66affSColin Finck case XML_CHAR_ENCODING_8859_4:
1285c2c66affSColin Finck return("ISO-8859-4");
1286c2c66affSColin Finck case XML_CHAR_ENCODING_8859_5:
1287c2c66affSColin Finck return("ISO-8859-5");
1288c2c66affSColin Finck case XML_CHAR_ENCODING_8859_6:
1289c2c66affSColin Finck return("ISO-8859-6");
1290c2c66affSColin Finck case XML_CHAR_ENCODING_8859_7:
1291c2c66affSColin Finck return("ISO-8859-7");
1292c2c66affSColin Finck case XML_CHAR_ENCODING_8859_8:
1293c2c66affSColin Finck return("ISO-8859-8");
1294c2c66affSColin Finck case XML_CHAR_ENCODING_8859_9:
1295c2c66affSColin Finck return("ISO-8859-9");
1296c2c66affSColin Finck case XML_CHAR_ENCODING_2022_JP:
1297c2c66affSColin Finck return("ISO-2022-JP");
1298c2c66affSColin Finck case XML_CHAR_ENCODING_SHIFT_JIS:
1299c2c66affSColin Finck return("Shift-JIS");
1300c2c66affSColin Finck case XML_CHAR_ENCODING_EUC_JP:
1301c2c66affSColin Finck return("EUC-JP");
1302c2c66affSColin Finck case XML_CHAR_ENCODING_ASCII:
1303c2c66affSColin Finck return(NULL);
1304c2c66affSColin Finck }
1305c2c66affSColin Finck return(NULL);
1306c2c66affSColin Finck }
1307c2c66affSColin Finck
1308c2c66affSColin Finck /************************************************************************
1309c2c66affSColin Finck * *
1310c2c66affSColin Finck * Char encoding handlers *
1311c2c66affSColin Finck * *
1312c2c66affSColin Finck ************************************************************************/
1313c2c66affSColin Finck
1314c2c66affSColin Finck
1315c2c66affSColin Finck /* the size should be growable, but it's not a big deal ... */
1316c2c66affSColin Finck #define MAX_ENCODING_HANDLERS 50
1317c2c66affSColin Finck static xmlCharEncodingHandlerPtr *handlers = NULL;
1318c2c66affSColin Finck static int nbCharEncodingHandler = 0;
1319c2c66affSColin Finck
1320c2c66affSColin Finck /*
1321c2c66affSColin Finck * The default is UTF-8 for XML, that's also the default used for the
1322c2c66affSColin Finck * parser internals, so the default encoding handler is NULL
1323c2c66affSColin Finck */
1324c2c66affSColin Finck
1325c2c66affSColin Finck static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1326c2c66affSColin Finck
1327c2c66affSColin Finck /**
1328c2c66affSColin Finck * xmlNewCharEncodingHandler:
1329c2c66affSColin Finck * @name: the encoding name, in UTF-8 format (ASCII actually)
1330c2c66affSColin Finck * @input: the xmlCharEncodingInputFunc to read that encoding
1331c2c66affSColin Finck * @output: the xmlCharEncodingOutputFunc to write that encoding
1332c2c66affSColin Finck *
1333c2c66affSColin Finck * Create and registers an xmlCharEncodingHandler.
1334c2c66affSColin Finck *
1335c2c66affSColin Finck * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1336c2c66affSColin Finck */
1337c2c66affSColin Finck xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1338c2c66affSColin Finck xmlNewCharEncodingHandler(const char *name,
1339c2c66affSColin Finck xmlCharEncodingInputFunc input,
1340c2c66affSColin Finck xmlCharEncodingOutputFunc output) {
1341c2c66affSColin Finck xmlCharEncodingHandlerPtr handler;
1342c2c66affSColin Finck const char *alias;
1343c2c66affSColin Finck char upper[500];
1344c2c66affSColin Finck int i;
1345c2c66affSColin Finck char *up = NULL;
1346c2c66affSColin Finck
1347c2c66affSColin Finck /*
1348c2c66affSColin Finck * Do the alias resolution
1349c2c66affSColin Finck */
1350c2c66affSColin Finck alias = xmlGetEncodingAlias(name);
1351c2c66affSColin Finck if (alias != NULL)
1352c2c66affSColin Finck name = alias;
1353c2c66affSColin Finck
1354c2c66affSColin Finck /*
1355c2c66affSColin Finck * Keep only the uppercase version of the encoding.
1356c2c66affSColin Finck */
1357c2c66affSColin Finck if (name == NULL) {
1358c2c66affSColin Finck xmlEncodingErr(XML_I18N_NO_NAME,
1359c2c66affSColin Finck "xmlNewCharEncodingHandler : no name !\n", NULL);
1360c2c66affSColin Finck return(NULL);
1361c2c66affSColin Finck }
1362c2c66affSColin Finck for (i = 0;i < 499;i++) {
1363c2c66affSColin Finck upper[i] = toupper(name[i]);
1364c2c66affSColin Finck if (upper[i] == 0) break;
1365c2c66affSColin Finck }
1366c2c66affSColin Finck upper[i] = 0;
1367c2c66affSColin Finck up = xmlMemStrdup(upper);
1368c2c66affSColin Finck if (up == NULL) {
1369c2c66affSColin Finck xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370c2c66affSColin Finck return(NULL);
1371c2c66affSColin Finck }
1372c2c66affSColin Finck
1373c2c66affSColin Finck /*
1374c2c66affSColin Finck * allocate and fill-up an handler block.
1375c2c66affSColin Finck */
1376c2c66affSColin Finck handler = (xmlCharEncodingHandlerPtr)
1377c2c66affSColin Finck xmlMalloc(sizeof(xmlCharEncodingHandler));
1378c2c66affSColin Finck if (handler == NULL) {
1379c2c66affSColin Finck xmlFree(up);
1380c2c66affSColin Finck xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381c2c66affSColin Finck return(NULL);
1382c2c66affSColin Finck }
1383c2c66affSColin Finck memset(handler, 0, sizeof(xmlCharEncodingHandler));
1384c2c66affSColin Finck handler->input = input;
1385c2c66affSColin Finck handler->output = output;
1386c2c66affSColin Finck handler->name = up;
1387c2c66affSColin Finck
1388c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1389c2c66affSColin Finck handler->iconv_in = NULL;
1390c2c66affSColin Finck handler->iconv_out = NULL;
1391c2c66affSColin Finck #endif
1392c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1393c2c66affSColin Finck handler->uconv_in = NULL;
1394c2c66affSColin Finck handler->uconv_out = NULL;
1395c2c66affSColin Finck #endif
1396c2c66affSColin Finck
1397c2c66affSColin Finck /*
1398c2c66affSColin Finck * registers and returns the handler.
1399c2c66affSColin Finck */
1400c2c66affSColin Finck xmlRegisterCharEncodingHandler(handler);
1401c2c66affSColin Finck #ifdef DEBUG_ENCODING
1402c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
1403c2c66affSColin Finck "Registered encoding handler for %s\n", name);
1404c2c66affSColin Finck #endif
1405c2c66affSColin Finck return(handler);
1406c2c66affSColin Finck }
1407c2c66affSColin Finck
1408c2c66affSColin Finck /**
1409c2c66affSColin Finck * xmlInitCharEncodingHandlers:
1410c2c66affSColin Finck *
1411*911153daSThomas Faber * DEPRECATED: This function will be made private. Call xmlInitParser to
1412*911153daSThomas Faber * initialize the library.
1413*911153daSThomas Faber *
1414c2c66affSColin Finck * Initialize the char encoding support, it registers the default
1415c2c66affSColin Finck * encoding supported.
1416c2c66affSColin Finck * NOTE: while public, this function usually doesn't need to be called
1417c2c66affSColin Finck * in normal processing.
1418c2c66affSColin Finck */
1419c2c66affSColin Finck void
xmlInitCharEncodingHandlers(void)1420c2c66affSColin Finck xmlInitCharEncodingHandlers(void) {
1421c2c66affSColin Finck unsigned short int tst = 0x1234;
1422c2c66affSColin Finck unsigned char *ptr = (unsigned char *) &tst;
1423c2c66affSColin Finck
1424c2c66affSColin Finck if (handlers != NULL) return;
1425c2c66affSColin Finck
1426c2c66affSColin Finck handlers = (xmlCharEncodingHandlerPtr *)
1427c2c66affSColin Finck xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1428c2c66affSColin Finck
1429c2c66affSColin Finck if (*ptr == 0x12) xmlLittleEndian = 0;
1430c2c66affSColin Finck else if (*ptr == 0x34) xmlLittleEndian = 1;
1431c2c66affSColin Finck else {
1432c2c66affSColin Finck xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433c2c66affSColin Finck "Odd problem at endianness detection\n", NULL);
1434c2c66affSColin Finck }
1435c2c66affSColin Finck
1436c2c66affSColin Finck if (handlers == NULL) {
1437c2c66affSColin Finck xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438c2c66affSColin Finck return;
1439c2c66affSColin Finck }
1440c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1441c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
1442c2c66affSColin Finck xmlUTF16LEHandler =
1443c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1444c2c66affSColin Finck xmlUTF16BEHandler =
1445c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447c2c66affSColin Finck xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448c2c66affSColin Finck xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449c2c66affSColin Finck xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450c2c66affSColin Finck #ifdef LIBXML_HTML_ENABLED
1451c2c66affSColin Finck xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452c2c66affSColin Finck #endif
1453c2c66affSColin Finck #else
1454c2c66affSColin Finck xmlUTF16LEHandler =
1455c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1456c2c66affSColin Finck xmlUTF16BEHandler =
1457c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1458c2c66affSColin Finck xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1459c2c66affSColin Finck xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1460c2c66affSColin Finck xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1461c2c66affSColin Finck xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1462c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
1463c2c66affSColin Finck #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464c2c66affSColin Finck #ifdef LIBXML_ISO8859X_ENABLED
1465c2c66affSColin Finck xmlRegisterCharEncodingHandlersISO8859x ();
1466c2c66affSColin Finck #endif
1467c2c66affSColin Finck #endif
1468c2c66affSColin Finck
1469c2c66affSColin Finck }
1470c2c66affSColin Finck
1471c2c66affSColin Finck /**
1472c2c66affSColin Finck * xmlCleanupCharEncodingHandlers:
1473c2c66affSColin Finck *
1474*911153daSThomas Faber * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475*911153daSThomas Faber * to free global state but see the warnings there. xmlCleanupParser
1476*911153daSThomas Faber * should be only called once at program exit. In most cases, you don't
1477*911153daSThomas Faber * have call cleanup functions at all.
1478*911153daSThomas Faber *
1479c2c66affSColin Finck * Cleanup the memory allocated for the char encoding support, it
1480c2c66affSColin Finck * unregisters all the encoding handlers and the aliases.
1481c2c66affSColin Finck */
1482c2c66affSColin Finck void
xmlCleanupCharEncodingHandlers(void)1483c2c66affSColin Finck xmlCleanupCharEncodingHandlers(void) {
1484c2c66affSColin Finck xmlCleanupEncodingAliases();
1485c2c66affSColin Finck
1486c2c66affSColin Finck if (handlers == NULL) return;
1487c2c66affSColin Finck
1488c2c66affSColin Finck for (;nbCharEncodingHandler > 0;) {
1489c2c66affSColin Finck nbCharEncodingHandler--;
1490c2c66affSColin Finck if (handlers[nbCharEncodingHandler] != NULL) {
1491c2c66affSColin Finck if (handlers[nbCharEncodingHandler]->name != NULL)
1492c2c66affSColin Finck xmlFree(handlers[nbCharEncodingHandler]->name);
1493c2c66affSColin Finck xmlFree(handlers[nbCharEncodingHandler]);
1494c2c66affSColin Finck }
1495c2c66affSColin Finck }
1496c2c66affSColin Finck xmlFree(handlers);
1497c2c66affSColin Finck handlers = NULL;
1498c2c66affSColin Finck nbCharEncodingHandler = 0;
1499c2c66affSColin Finck xmlDefaultCharEncodingHandler = NULL;
1500c2c66affSColin Finck }
1501c2c66affSColin Finck
1502c2c66affSColin Finck /**
1503c2c66affSColin Finck * xmlRegisterCharEncodingHandler:
1504c2c66affSColin Finck * @handler: the xmlCharEncodingHandlerPtr handler block
1505c2c66affSColin Finck *
1506c2c66affSColin Finck * Register the char encoding handler, surprising, isn't it ?
1507c2c66affSColin Finck */
1508c2c66affSColin Finck void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1509c2c66affSColin Finck xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1510c2c66affSColin Finck if (handlers == NULL) xmlInitCharEncodingHandlers();
1511c2c66affSColin Finck if ((handler == NULL) || (handlers == NULL)) {
1512c2c66affSColin Finck xmlEncodingErr(XML_I18N_NO_HANDLER,
1513c2c66affSColin Finck "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
151440ee59d6SThomas Faber goto free_handler;
1515c2c66affSColin Finck }
1516c2c66affSColin Finck
1517c2c66affSColin Finck if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1518c2c66affSColin Finck xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519c2c66affSColin Finck "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520c2c66affSColin Finck "MAX_ENCODING_HANDLERS");
152140ee59d6SThomas Faber goto free_handler;
1522c2c66affSColin Finck }
1523c2c66affSColin Finck handlers[nbCharEncodingHandler++] = handler;
152440ee59d6SThomas Faber return;
152540ee59d6SThomas Faber
152640ee59d6SThomas Faber free_handler:
152740ee59d6SThomas Faber if (handler != NULL) {
152840ee59d6SThomas Faber if (handler->name != NULL) {
152940ee59d6SThomas Faber xmlFree(handler->name);
153040ee59d6SThomas Faber }
153140ee59d6SThomas Faber xmlFree(handler);
153240ee59d6SThomas Faber }
1533c2c66affSColin Finck }
1534c2c66affSColin Finck
1535c2c66affSColin Finck /**
1536c2c66affSColin Finck * xmlGetCharEncodingHandler:
1537c2c66affSColin Finck * @enc: an xmlCharEncoding value.
1538c2c66affSColin Finck *
1539c2c66affSColin Finck * Search in the registered set the handler able to read/write that encoding.
1540c2c66affSColin Finck *
1541c2c66affSColin Finck * Returns the handler or NULL if not found
1542c2c66affSColin Finck */
1543c2c66affSColin Finck xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1544c2c66affSColin Finck xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1545c2c66affSColin Finck xmlCharEncodingHandlerPtr handler;
1546c2c66affSColin Finck
1547c2c66affSColin Finck if (handlers == NULL) xmlInitCharEncodingHandlers();
1548c2c66affSColin Finck switch (enc) {
1549c2c66affSColin Finck case XML_CHAR_ENCODING_ERROR:
1550c2c66affSColin Finck return(NULL);
1551c2c66affSColin Finck case XML_CHAR_ENCODING_NONE:
1552c2c66affSColin Finck return(NULL);
1553c2c66affSColin Finck case XML_CHAR_ENCODING_UTF8:
1554c2c66affSColin Finck return(NULL);
1555c2c66affSColin Finck case XML_CHAR_ENCODING_UTF16LE:
1556c2c66affSColin Finck return(xmlUTF16LEHandler);
1557c2c66affSColin Finck case XML_CHAR_ENCODING_UTF16BE:
1558c2c66affSColin Finck return(xmlUTF16BEHandler);
1559c2c66affSColin Finck case XML_CHAR_ENCODING_EBCDIC:
1560c2c66affSColin Finck handler = xmlFindCharEncodingHandler("EBCDIC");
1561c2c66affSColin Finck if (handler != NULL) return(handler);
1562c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ebcdic");
1563c2c66affSColin Finck if (handler != NULL) return(handler);
1564c2c66affSColin Finck handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565c2c66affSColin Finck if (handler != NULL) return(handler);
1566c2c66affSColin Finck handler = xmlFindCharEncodingHandler("IBM-037");
1567c2c66affSColin Finck if (handler != NULL) return(handler);
1568c2c66affSColin Finck break;
1569c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4BE:
1570c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571c2c66affSColin Finck if (handler != NULL) return(handler);
1572c2c66affSColin Finck handler = xmlFindCharEncodingHandler("UCS-4");
1573c2c66affSColin Finck if (handler != NULL) return(handler);
1574c2c66affSColin Finck handler = xmlFindCharEncodingHandler("UCS4");
1575c2c66affSColin Finck if (handler != NULL) return(handler);
1576c2c66affSColin Finck break;
1577c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4LE:
1578c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579c2c66affSColin Finck if (handler != NULL) return(handler);
1580c2c66affSColin Finck handler = xmlFindCharEncodingHandler("UCS-4");
1581c2c66affSColin Finck if (handler != NULL) return(handler);
1582c2c66affSColin Finck handler = xmlFindCharEncodingHandler("UCS4");
1583c2c66affSColin Finck if (handler != NULL) return(handler);
1584c2c66affSColin Finck break;
1585c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4_2143:
1586c2c66affSColin Finck break;
1587c2c66affSColin Finck case XML_CHAR_ENCODING_UCS4_3412:
1588c2c66affSColin Finck break;
1589c2c66affSColin Finck case XML_CHAR_ENCODING_UCS2:
1590c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591c2c66affSColin Finck if (handler != NULL) return(handler);
1592c2c66affSColin Finck handler = xmlFindCharEncodingHandler("UCS-2");
1593c2c66affSColin Finck if (handler != NULL) return(handler);
1594c2c66affSColin Finck handler = xmlFindCharEncodingHandler("UCS2");
1595c2c66affSColin Finck if (handler != NULL) return(handler);
1596c2c66affSColin Finck break;
1597c2c66affSColin Finck
1598c2c66affSColin Finck /*
1599c2c66affSColin Finck * We used to keep ISO Latin encodings native in the
1600c2c66affSColin Finck * generated data. This led to so many problems that
1601c2c66affSColin Finck * this has been removed. One can still change this
1602c2c66affSColin Finck * back by registering no-ops encoders for those
1603c2c66affSColin Finck */
1604c2c66affSColin Finck case XML_CHAR_ENCODING_8859_1:
1605c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606c2c66affSColin Finck if (handler != NULL) return(handler);
1607c2c66affSColin Finck break;
1608c2c66affSColin Finck case XML_CHAR_ENCODING_8859_2:
1609c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610c2c66affSColin Finck if (handler != NULL) return(handler);
1611c2c66affSColin Finck break;
1612c2c66affSColin Finck case XML_CHAR_ENCODING_8859_3:
1613c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614c2c66affSColin Finck if (handler != NULL) return(handler);
1615c2c66affSColin Finck break;
1616c2c66affSColin Finck case XML_CHAR_ENCODING_8859_4:
1617c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618c2c66affSColin Finck if (handler != NULL) return(handler);
1619c2c66affSColin Finck break;
1620c2c66affSColin Finck case XML_CHAR_ENCODING_8859_5:
1621c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622c2c66affSColin Finck if (handler != NULL) return(handler);
1623c2c66affSColin Finck break;
1624c2c66affSColin Finck case XML_CHAR_ENCODING_8859_6:
1625c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626c2c66affSColin Finck if (handler != NULL) return(handler);
1627c2c66affSColin Finck break;
1628c2c66affSColin Finck case XML_CHAR_ENCODING_8859_7:
1629c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630c2c66affSColin Finck if (handler != NULL) return(handler);
1631c2c66affSColin Finck break;
1632c2c66affSColin Finck case XML_CHAR_ENCODING_8859_8:
1633c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634c2c66affSColin Finck if (handler != NULL) return(handler);
1635c2c66affSColin Finck break;
1636c2c66affSColin Finck case XML_CHAR_ENCODING_8859_9:
1637c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638c2c66affSColin Finck if (handler != NULL) return(handler);
1639c2c66affSColin Finck break;
1640c2c66affSColin Finck
1641c2c66affSColin Finck
1642c2c66affSColin Finck case XML_CHAR_ENCODING_2022_JP:
1643c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644c2c66affSColin Finck if (handler != NULL) return(handler);
1645c2c66affSColin Finck break;
1646c2c66affSColin Finck case XML_CHAR_ENCODING_SHIFT_JIS:
1647c2c66affSColin Finck handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648c2c66affSColin Finck if (handler != NULL) return(handler);
1649c2c66affSColin Finck handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650c2c66affSColin Finck if (handler != NULL) return(handler);
1651c2c66affSColin Finck handler = xmlFindCharEncodingHandler("Shift_JIS");
1652c2c66affSColin Finck if (handler != NULL) return(handler);
1653c2c66affSColin Finck break;
1654c2c66affSColin Finck case XML_CHAR_ENCODING_EUC_JP:
1655c2c66affSColin Finck handler = xmlFindCharEncodingHandler("EUC-JP");
1656c2c66affSColin Finck if (handler != NULL) return(handler);
1657c2c66affSColin Finck break;
1658c2c66affSColin Finck default:
1659c2c66affSColin Finck break;
1660c2c66affSColin Finck }
1661c2c66affSColin Finck
1662c2c66affSColin Finck #ifdef DEBUG_ENCODING
1663c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
1664c2c66affSColin Finck "No handler found for encoding %d\n", enc);
1665c2c66affSColin Finck #endif
1666c2c66affSColin Finck return(NULL);
1667c2c66affSColin Finck }
1668c2c66affSColin Finck
1669c2c66affSColin Finck /**
1670c2c66affSColin Finck * xmlFindCharEncodingHandler:
1671c2c66affSColin Finck * @name: a string describing the char encoding.
1672c2c66affSColin Finck *
1673c2c66affSColin Finck * Search in the registered set the handler able to read/write that encoding.
1674c2c66affSColin Finck *
1675c2c66affSColin Finck * Returns the handler or NULL if not found
1676c2c66affSColin Finck */
1677c2c66affSColin Finck xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1678c2c66affSColin Finck xmlFindCharEncodingHandler(const char *name) {
1679c2c66affSColin Finck const char *nalias;
1680c2c66affSColin Finck const char *norig;
1681c2c66affSColin Finck xmlCharEncoding alias;
1682c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1683c2c66affSColin Finck xmlCharEncodingHandlerPtr enc;
1684c2c66affSColin Finck iconv_t icv_in, icv_out;
1685c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
1686c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1687c2c66affSColin Finck xmlCharEncodingHandlerPtr encu;
1688c2c66affSColin Finck uconv_t *ucv_in, *ucv_out;
1689c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
1690c2c66affSColin Finck char upper[100];
1691c2c66affSColin Finck int i;
1692c2c66affSColin Finck
1693c2c66affSColin Finck if (handlers == NULL) xmlInitCharEncodingHandlers();
1694c2c66affSColin Finck if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695c2c66affSColin Finck if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696c2c66affSColin Finck
1697c2c66affSColin Finck /*
1698c2c66affSColin Finck * Do the alias resolution
1699c2c66affSColin Finck */
1700c2c66affSColin Finck norig = name;
1701c2c66affSColin Finck nalias = xmlGetEncodingAlias(name);
1702c2c66affSColin Finck if (nalias != NULL)
1703c2c66affSColin Finck name = nalias;
1704c2c66affSColin Finck
1705c2c66affSColin Finck /*
1706c2c66affSColin Finck * Check first for directly registered encoding names
1707c2c66affSColin Finck */
1708c2c66affSColin Finck for (i = 0;i < 99;i++) {
1709c2c66affSColin Finck upper[i] = toupper(name[i]);
1710c2c66affSColin Finck if (upper[i] == 0) break;
1711c2c66affSColin Finck }
1712c2c66affSColin Finck upper[i] = 0;
1713c2c66affSColin Finck
1714c2c66affSColin Finck if (handlers != NULL) {
1715c2c66affSColin Finck for (i = 0;i < nbCharEncodingHandler; i++) {
1716c2c66affSColin Finck if (!strcmp(upper, handlers[i]->name)) {
1717c2c66affSColin Finck #ifdef DEBUG_ENCODING
1718c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
1719c2c66affSColin Finck "Found registered handler for encoding %s\n", name);
1720c2c66affSColin Finck #endif
1721c2c66affSColin Finck return(handlers[i]);
1722c2c66affSColin Finck }
1723c2c66affSColin Finck }
1724c2c66affSColin Finck }
1725c2c66affSColin Finck
1726c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1727c2c66affSColin Finck /* check whether iconv can handle this */
1728c2c66affSColin Finck icv_in = iconv_open("UTF-8", name);
1729c2c66affSColin Finck icv_out = iconv_open(name, "UTF-8");
1730c2c66affSColin Finck if (icv_in == (iconv_t) -1) {
1731c2c66affSColin Finck icv_in = iconv_open("UTF-8", upper);
1732c2c66affSColin Finck }
1733c2c66affSColin Finck if (icv_out == (iconv_t) -1) {
1734c2c66affSColin Finck icv_out = iconv_open(upper, "UTF-8");
1735c2c66affSColin Finck }
1736c2c66affSColin Finck if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1737c2c66affSColin Finck enc = (xmlCharEncodingHandlerPtr)
1738c2c66affSColin Finck xmlMalloc(sizeof(xmlCharEncodingHandler));
1739c2c66affSColin Finck if (enc == NULL) {
1740c2c66affSColin Finck iconv_close(icv_in);
1741c2c66affSColin Finck iconv_close(icv_out);
1742c2c66affSColin Finck return(NULL);
1743c2c66affSColin Finck }
1744c2c66affSColin Finck memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745c2c66affSColin Finck enc->name = xmlMemStrdup(name);
1746c2c66affSColin Finck enc->input = NULL;
1747c2c66affSColin Finck enc->output = NULL;
1748c2c66affSColin Finck enc->iconv_in = icv_in;
1749c2c66affSColin Finck enc->iconv_out = icv_out;
1750c2c66affSColin Finck #ifdef DEBUG_ENCODING
1751c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
1752c2c66affSColin Finck "Found iconv handler for encoding %s\n", name);
1753c2c66affSColin Finck #endif
1754c2c66affSColin Finck return enc;
1755c2c66affSColin Finck } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756c2c66affSColin Finck xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757c2c66affSColin Finck "iconv : problems with filters for '%s'\n", name);
17588940614aSThomas Faber if (icv_in != (iconv_t) -1)
17598940614aSThomas Faber iconv_close(icv_in);
17608940614aSThomas Faber else
17618940614aSThomas Faber iconv_close(icv_out);
1762c2c66affSColin Finck }
1763c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
1764c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1765c2c66affSColin Finck /* check whether icu can handle this */
1766c2c66affSColin Finck ucv_in = openIcuConverter(name, 1);
1767c2c66affSColin Finck ucv_out = openIcuConverter(name, 0);
1768c2c66affSColin Finck if (ucv_in != NULL && ucv_out != NULL) {
1769c2c66affSColin Finck encu = (xmlCharEncodingHandlerPtr)
1770c2c66affSColin Finck xmlMalloc(sizeof(xmlCharEncodingHandler));
1771c2c66affSColin Finck if (encu == NULL) {
1772c2c66affSColin Finck closeIcuConverter(ucv_in);
1773c2c66affSColin Finck closeIcuConverter(ucv_out);
1774c2c66affSColin Finck return(NULL);
1775c2c66affSColin Finck }
1776c2c66affSColin Finck memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777c2c66affSColin Finck encu->name = xmlMemStrdup(name);
1778c2c66affSColin Finck encu->input = NULL;
1779c2c66affSColin Finck encu->output = NULL;
1780c2c66affSColin Finck encu->uconv_in = ucv_in;
1781c2c66affSColin Finck encu->uconv_out = ucv_out;
1782c2c66affSColin Finck #ifdef DEBUG_ENCODING
1783c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
1784c2c66affSColin Finck "Found ICU converter handler for encoding %s\n", name);
1785c2c66affSColin Finck #endif
1786c2c66affSColin Finck return encu;
1787c2c66affSColin Finck } else if (ucv_in != NULL || ucv_out != NULL) {
1788c2c66affSColin Finck closeIcuConverter(ucv_in);
1789c2c66affSColin Finck closeIcuConverter(ucv_out);
1790c2c66affSColin Finck xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791c2c66affSColin Finck "ICU converter : problems with filters for '%s'\n", name);
1792c2c66affSColin Finck }
1793c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
1794c2c66affSColin Finck
1795c2c66affSColin Finck #ifdef DEBUG_ENCODING
1796c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
1797c2c66affSColin Finck "No handler found for encoding %s\n", name);
1798c2c66affSColin Finck #endif
1799c2c66affSColin Finck
1800c2c66affSColin Finck /*
1801c2c66affSColin Finck * Fallback using the canonical names
1802c2c66affSColin Finck */
1803c2c66affSColin Finck alias = xmlParseCharEncoding(norig);
1804c2c66affSColin Finck if (alias != XML_CHAR_ENCODING_ERROR) {
1805c2c66affSColin Finck const char* canon;
1806c2c66affSColin Finck canon = xmlGetCharEncodingName(alias);
1807c2c66affSColin Finck if ((canon != NULL) && (strcmp(name, canon))) {
1808c2c66affSColin Finck return(xmlFindCharEncodingHandler(canon));
1809c2c66affSColin Finck }
1810c2c66affSColin Finck }
1811c2c66affSColin Finck
1812c2c66affSColin Finck /* If "none of the above", give up */
1813c2c66affSColin Finck return(NULL);
1814c2c66affSColin Finck }
1815c2c66affSColin Finck
1816c2c66affSColin Finck /************************************************************************
1817c2c66affSColin Finck * *
1818c2c66affSColin Finck * ICONV based generic conversion functions *
1819c2c66affSColin Finck * *
1820c2c66affSColin Finck ************************************************************************/
1821c2c66affSColin Finck
1822c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
1823c2c66affSColin Finck /**
1824c2c66affSColin Finck * xmlIconvWrapper:
1825c2c66affSColin Finck * @cd: iconv converter data structure
1826c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
1827c2c66affSColin Finck * @outlen: the length of @out
182840ee59d6SThomas Faber * @in: a pointer to an array of input bytes
1829c2c66affSColin Finck * @inlen: the length of @in
1830c2c66affSColin Finck *
1831c2c66affSColin Finck * Returns 0 if success, or
1832c2c66affSColin Finck * -1 by lack of space, or
1833c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
1834c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
1835c2c66affSColin Finck * -3 if there the last byte can't form a single output char.
1836c2c66affSColin Finck *
1837c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
1838c2c66affSColin Finck * as the return value is positive, else unpredictable.
183940ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
1840c2c66affSColin Finck */
1841c2c66affSColin Finck static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1842c2c66affSColin Finck xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1843c2c66affSColin Finck const unsigned char *in, int *inlen) {
1844c2c66affSColin Finck size_t icv_inlen, icv_outlen;
1845c2c66affSColin Finck const char *icv_in = (const char *) in;
1846c2c66affSColin Finck char *icv_out = (char *) out;
18477244e0c5SThomas Faber size_t ret;
1848c2c66affSColin Finck
1849c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1850c2c66affSColin Finck if (outlen != NULL) *outlen = 0;
1851c2c66affSColin Finck return(-1);
1852c2c66affSColin Finck }
1853c2c66affSColin Finck icv_inlen = *inlen;
1854c2c66affSColin Finck icv_outlen = *outlen;
1855*911153daSThomas Faber /*
1856*911153daSThomas Faber * Some versions take const, other versions take non-const input.
1857*911153daSThomas Faber */
1858*911153daSThomas Faber ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1859c2c66affSColin Finck *inlen -= icv_inlen;
1860c2c66affSColin Finck *outlen -= icv_outlen;
18617244e0c5SThomas Faber if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1862c2c66affSColin Finck #ifdef EILSEQ
1863c2c66affSColin Finck if (errno == EILSEQ) {
1864c2c66affSColin Finck return -2;
1865c2c66affSColin Finck } else
1866c2c66affSColin Finck #endif
1867c2c66affSColin Finck #ifdef E2BIG
1868c2c66affSColin Finck if (errno == E2BIG) {
1869c2c66affSColin Finck return -1;
1870c2c66affSColin Finck } else
1871c2c66affSColin Finck #endif
1872c2c66affSColin Finck #ifdef EINVAL
1873c2c66affSColin Finck if (errno == EINVAL) {
1874c2c66affSColin Finck return -3;
1875c2c66affSColin Finck } else
1876c2c66affSColin Finck #endif
1877c2c66affSColin Finck {
1878c2c66affSColin Finck return -3;
1879c2c66affSColin Finck }
1880c2c66affSColin Finck }
1881c2c66affSColin Finck return 0;
1882c2c66affSColin Finck }
1883c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
1884c2c66affSColin Finck
1885c2c66affSColin Finck /************************************************************************
1886c2c66affSColin Finck * *
1887c2c66affSColin Finck * ICU based generic conversion functions *
1888c2c66affSColin Finck * *
1889c2c66affSColin Finck ************************************************************************/
1890c2c66affSColin Finck
1891c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
1892c2c66affSColin Finck /**
1893c2c66affSColin Finck * xmlUconvWrapper:
1894c2c66affSColin Finck * @cd: ICU uconverter data structure
1895c2c66affSColin Finck * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
1897c2c66affSColin Finck * @outlen: the length of @out
189840ee59d6SThomas Faber * @in: a pointer to an array of input bytes
1899c2c66affSColin Finck * @inlen: the length of @in
19005bb277a5SThomas Faber * @flush: if true, indicates end of input
1901c2c66affSColin Finck *
1902c2c66affSColin Finck * Returns 0 if success, or
1903c2c66affSColin Finck * -1 by lack of space, or
1904c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
1905c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
1906c2c66affSColin Finck * -3 if there the last byte can't form a single output char.
1907c2c66affSColin Finck *
1908c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
1909c2c66affSColin Finck * as the return value is positive, else unpredictable.
191040ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
1911c2c66affSColin Finck */
1912c2c66affSColin Finck static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1913c2c66affSColin Finck xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
19145bb277a5SThomas Faber const unsigned char *in, int *inlen, int flush) {
1915c2c66affSColin Finck const char *ucv_in = (const char *) in;
1916c2c66affSColin Finck char *ucv_out = (char *) out;
1917c2c66affSColin Finck UErrorCode err = U_ZERO_ERROR;
1918c2c66affSColin Finck
1919c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1920c2c66affSColin Finck if (outlen != NULL) *outlen = 0;
1921c2c66affSColin Finck return(-1);
1922c2c66affSColin Finck }
1923c2c66affSColin Finck
1924c2c66affSColin Finck if (toUnicode) {
1925c2c66affSColin Finck /* encoding => UTF-16 => UTF-8 */
1926c2c66affSColin Finck ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
19275bb277a5SThomas Faber &ucv_in, ucv_in + *inlen, cd->pivot_buf,
19285bb277a5SThomas Faber &cd->pivot_source, &cd->pivot_target,
19295bb277a5SThomas Faber cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1930c2c66affSColin Finck } else {
1931c2c66affSColin Finck /* UTF-8 => UTF-16 => encoding */
1932c2c66affSColin Finck ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
19335bb277a5SThomas Faber &ucv_in, ucv_in + *inlen, cd->pivot_buf,
19345bb277a5SThomas Faber &cd->pivot_source, &cd->pivot_target,
19355bb277a5SThomas Faber cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1936c2c66affSColin Finck }
1937c2c66affSColin Finck *inlen = ucv_in - (const char*) in;
1938c2c66affSColin Finck *outlen = ucv_out - (char *) out;
19395bb277a5SThomas Faber if (U_SUCCESS(err)) {
19405bb277a5SThomas Faber /* reset pivot buf if this is the last call for input (flush==TRUE) */
19415bb277a5SThomas Faber if (flush)
19425bb277a5SThomas Faber cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1943c2c66affSColin Finck return 0;
19445bb277a5SThomas Faber }
1945c2c66affSColin Finck if (err == U_BUFFER_OVERFLOW_ERROR)
1946c2c66affSColin Finck return -1;
1947c2c66affSColin Finck if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1948c2c66affSColin Finck return -2;
1949c2c66affSColin Finck return -3;
1950c2c66affSColin Finck }
1951c2c66affSColin Finck #endif /* LIBXML_ICU_ENABLED */
1952c2c66affSColin Finck
1953c2c66affSColin Finck /************************************************************************
1954c2c66affSColin Finck * *
1955c2c66affSColin Finck * The real API used by libxml for on-the-fly conversion *
1956c2c66affSColin Finck * *
1957c2c66affSColin Finck ************************************************************************/
1958c2c66affSColin Finck
195940ee59d6SThomas Faber /**
196040ee59d6SThomas Faber * xmlEncInputChunk:
196140ee59d6SThomas Faber * @handler: encoding handler
196240ee59d6SThomas Faber * @out: a pointer to an array of bytes to store the result
196340ee59d6SThomas Faber * @outlen: the length of @out
196440ee59d6SThomas Faber * @in: a pointer to an array of input bytes
196540ee59d6SThomas Faber * @inlen: the length of @in
196640ee59d6SThomas Faber * @flush: flush (ICU-related)
196740ee59d6SThomas Faber *
196840ee59d6SThomas Faber * Returns 0 if success, or
196940ee59d6SThomas Faber * -1 by lack of space, or
197040ee59d6SThomas Faber * -2 if the transcoding fails (for *in is not valid utf8 string or
197140ee59d6SThomas Faber * the result of transformation can't fit into the encoding we want), or
197240ee59d6SThomas Faber * -3 if there the last byte can't form a single output char.
197340ee59d6SThomas Faber *
197440ee59d6SThomas Faber * The value of @inlen after return is the number of octets consumed
197540ee59d6SThomas Faber * as the return value is 0, else unpredictable.
197640ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
197740ee59d6SThomas Faber */
1978fc82f8e2SThomas Faber static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1979fc82f8e2SThomas Faber xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
19805bb277a5SThomas Faber int *outlen, const unsigned char *in, int *inlen, int flush) {
1981fc82f8e2SThomas Faber int ret;
19825bb277a5SThomas Faber (void)flush;
1983fc82f8e2SThomas Faber
1984fc82f8e2SThomas Faber if (handler->input != NULL) {
1985fc82f8e2SThomas Faber ret = handler->input(out, outlen, in, inlen);
198640ee59d6SThomas Faber if (ret > 0)
198740ee59d6SThomas Faber ret = 0;
1988fc82f8e2SThomas Faber }
1989fc82f8e2SThomas Faber #ifdef LIBXML_ICONV_ENABLED
1990fc82f8e2SThomas Faber else if (handler->iconv_in != NULL) {
1991fc82f8e2SThomas Faber ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992fc82f8e2SThomas Faber }
1993fc82f8e2SThomas Faber #endif /* LIBXML_ICONV_ENABLED */
1994fc82f8e2SThomas Faber #ifdef LIBXML_ICU_ENABLED
1995fc82f8e2SThomas Faber else if (handler->uconv_in != NULL) {
19965bb277a5SThomas Faber ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
19975bb277a5SThomas Faber flush);
1998fc82f8e2SThomas Faber }
1999fc82f8e2SThomas Faber #endif /* LIBXML_ICU_ENABLED */
2000fc82f8e2SThomas Faber else {
2001fc82f8e2SThomas Faber *outlen = 0;
2002fc82f8e2SThomas Faber *inlen = 0;
2003fc82f8e2SThomas Faber ret = -2;
2004fc82f8e2SThomas Faber }
2005fc82f8e2SThomas Faber
2006fc82f8e2SThomas Faber return(ret);
2007fc82f8e2SThomas Faber }
2008fc82f8e2SThomas Faber
200940ee59d6SThomas Faber /**
201040ee59d6SThomas Faber * xmlEncOutputChunk:
201140ee59d6SThomas Faber * @handler: encoding handler
201240ee59d6SThomas Faber * @out: a pointer to an array of bytes to store the result
201340ee59d6SThomas Faber * @outlen: the length of @out
201440ee59d6SThomas Faber * @in: a pointer to an array of input bytes
201540ee59d6SThomas Faber * @inlen: the length of @in
201640ee59d6SThomas Faber *
201740ee59d6SThomas Faber * Returns 0 if success, or
201840ee59d6SThomas Faber * -1 by lack of space, or
201940ee59d6SThomas Faber * -2 if the transcoding fails (for *in is not valid utf8 string or
202040ee59d6SThomas Faber * the result of transformation can't fit into the encoding we want), or
202140ee59d6SThomas Faber * -3 if there the last byte can't form a single output char.
202240ee59d6SThomas Faber * -4 if no output function was found.
202340ee59d6SThomas Faber *
202440ee59d6SThomas Faber * The value of @inlen after return is the number of octets consumed
202540ee59d6SThomas Faber * as the return value is 0, else unpredictable.
202640ee59d6SThomas Faber * The value of @outlen after return is the number of octets produced.
202740ee59d6SThomas Faber */
2028fc82f8e2SThomas Faber static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2029fc82f8e2SThomas Faber xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2030fc82f8e2SThomas Faber int *outlen, const unsigned char *in, int *inlen) {
2031fc82f8e2SThomas Faber int ret;
2032fc82f8e2SThomas Faber
2033fc82f8e2SThomas Faber if (handler->output != NULL) {
2034fc82f8e2SThomas Faber ret = handler->output(out, outlen, in, inlen);
203540ee59d6SThomas Faber if (ret > 0)
203640ee59d6SThomas Faber ret = 0;
2037fc82f8e2SThomas Faber }
2038fc82f8e2SThomas Faber #ifdef LIBXML_ICONV_ENABLED
2039fc82f8e2SThomas Faber else if (handler->iconv_out != NULL) {
2040fc82f8e2SThomas Faber ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041fc82f8e2SThomas Faber }
2042fc82f8e2SThomas Faber #endif /* LIBXML_ICONV_ENABLED */
2043fc82f8e2SThomas Faber #ifdef LIBXML_ICU_ENABLED
2044fc82f8e2SThomas Faber else if (handler->uconv_out != NULL) {
20455bb277a5SThomas Faber ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
204640ee59d6SThomas Faber 1);
2047fc82f8e2SThomas Faber }
2048fc82f8e2SThomas Faber #endif /* LIBXML_ICU_ENABLED */
2049fc82f8e2SThomas Faber else {
2050fc82f8e2SThomas Faber *outlen = 0;
2051fc82f8e2SThomas Faber *inlen = 0;
2052fc82f8e2SThomas Faber ret = -4;
2053fc82f8e2SThomas Faber }
2054fc82f8e2SThomas Faber
2055fc82f8e2SThomas Faber return(ret);
2056fc82f8e2SThomas Faber }
2057fc82f8e2SThomas Faber
2058c2c66affSColin Finck /**
2059c2c66affSColin Finck * xmlCharEncFirstLineInt:
2060f22fa382SThomas Faber * @handler: char encoding transformation data structure
2061c2c66affSColin Finck * @out: an xmlBuffer for the output.
2062c2c66affSColin Finck * @in: an xmlBuffer for the input
2063c2c66affSColin Finck * @len: number of bytes to convert for the first line, or -1
2064c2c66affSColin Finck *
2065c2c66affSColin Finck * Front-end for the encoding handler input function, but handle only
2066c2c66affSColin Finck * the very first line, i.e. limit itself to 45 chars.
2067c2c66affSColin Finck *
2068c2c66affSColin Finck * Returns the number of byte written if success, or
2069c2c66affSColin Finck * -1 general error
2070c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2071c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2072c2c66affSColin Finck */
2073c2c66affSColin Finck int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2074c2c66affSColin Finck xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2075c2c66affSColin Finck xmlBufferPtr in, int len) {
2076fc82f8e2SThomas Faber int ret;
2077c2c66affSColin Finck int written;
2078c2c66affSColin Finck int toconv;
2079c2c66affSColin Finck
2080c2c66affSColin Finck if (handler == NULL) return(-1);
2081c2c66affSColin Finck if (out == NULL) return(-1);
2082c2c66affSColin Finck if (in == NULL) return(-1);
2083c2c66affSColin Finck
2084c2c66affSColin Finck /* calculate space available */
2085c2c66affSColin Finck written = out->size - out->use - 1; /* count '\0' */
2086c2c66affSColin Finck toconv = in->use;
2087c2c66affSColin Finck /*
2088c2c66affSColin Finck * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089c2c66affSColin Finck * 45 chars should be sufficient to reach the end of the encoding
2090c2c66affSColin Finck * declaration without going too far inside the document content.
2091c2c66affSColin Finck * on UTF-16 this means 90bytes, on UCS4 this means 180
2092c2c66affSColin Finck * The actual value depending on guessed encoding is passed as @len
2093c2c66affSColin Finck * if provided
2094c2c66affSColin Finck */
2095c2c66affSColin Finck if (len >= 0) {
2096c2c66affSColin Finck if (toconv > len)
2097c2c66affSColin Finck toconv = len;
2098c2c66affSColin Finck } else {
2099c2c66affSColin Finck if (toconv > 180)
2100c2c66affSColin Finck toconv = 180;
2101c2c66affSColin Finck }
2102c2c66affSColin Finck if (toconv * 2 >= written) {
2103c2c66affSColin Finck xmlBufferGrow(out, toconv * 2);
2104c2c66affSColin Finck written = out->size - out->use - 1;
2105c2c66affSColin Finck }
2106c2c66affSColin Finck
2107fc82f8e2SThomas Faber ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
21085bb277a5SThomas Faber in->content, &toconv, 0);
2109c2c66affSColin Finck xmlBufferShrink(in, toconv);
2110c2c66affSColin Finck out->use += written;
2111c2c66affSColin Finck out->content[out->use] = 0;
2112c2c66affSColin Finck if (ret == -1) ret = -3;
2113fc82f8e2SThomas Faber
2114c2c66affSColin Finck #ifdef DEBUG_ENCODING
2115c2c66affSColin Finck switch (ret) {
2116c2c66affSColin Finck case 0:
2117c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2118c2c66affSColin Finck "converted %d bytes to %d bytes of input\n",
2119c2c66affSColin Finck toconv, written);
2120c2c66affSColin Finck break;
2121c2c66affSColin Finck case -1:
2122c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123c2c66affSColin Finck toconv, written, in->use);
2124c2c66affSColin Finck break;
2125c2c66affSColin Finck case -2:
2126c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2127c2c66affSColin Finck "input conversion failed due to input error\n");
2128c2c66affSColin Finck break;
2129c2c66affSColin Finck case -3:
2130c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131c2c66affSColin Finck toconv, written, in->use);
2132c2c66affSColin Finck break;
2133c2c66affSColin Finck default:
2134c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135c2c66affSColin Finck }
2136c2c66affSColin Finck #endif /* DEBUG_ENCODING */
2137c2c66affSColin Finck /*
2138c2c66affSColin Finck * Ignore when input buffer is not on a boundary
2139c2c66affSColin Finck */
2140c2c66affSColin Finck if (ret == -3) ret = 0;
2141c2c66affSColin Finck if (ret == -1) ret = 0;
214240ee59d6SThomas Faber return(written ? written : ret);
2143c2c66affSColin Finck }
2144c2c66affSColin Finck
2145c2c66affSColin Finck /**
2146c2c66affSColin Finck * xmlCharEncFirstLine:
2147f22fa382SThomas Faber * @handler: char encoding transformation data structure
2148c2c66affSColin Finck * @out: an xmlBuffer for the output.
2149c2c66affSColin Finck * @in: an xmlBuffer for the input
2150c2c66affSColin Finck *
2151c2c66affSColin Finck * Front-end for the encoding handler input function, but handle only
2152c2c66affSColin Finck * the very first line, i.e. limit itself to 45 chars.
2153c2c66affSColin Finck *
2154c2c66affSColin Finck * Returns the number of byte written if success, or
2155c2c66affSColin Finck * -1 general error
2156c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2157c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2158c2c66affSColin Finck */
2159c2c66affSColin Finck int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2160c2c66affSColin Finck xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2161c2c66affSColin Finck xmlBufferPtr in) {
2162c2c66affSColin Finck return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163c2c66affSColin Finck }
2164c2c66affSColin Finck
2165c2c66affSColin Finck /**
2166c2c66affSColin Finck * xmlCharEncFirstLineInput:
2167c2c66affSColin Finck * @input: a parser input buffer
2168c2c66affSColin Finck * @len: number of bytes to convert for the first line, or -1
2169c2c66affSColin Finck *
2170c2c66affSColin Finck * Front-end for the encoding handler input function, but handle only
2171c2c66affSColin Finck * the very first line. Point is that this is based on autodetection
2172c2c66affSColin Finck * of the encoding and once that first line is converted we may find
2173c2c66affSColin Finck * out that a different decoder is needed to process the input.
2174c2c66affSColin Finck *
2175c2c66affSColin Finck * Returns the number of byte written if success, or
2176c2c66affSColin Finck * -1 general error
2177c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2178c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2179c2c66affSColin Finck */
2180c2c66affSColin Finck int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2181c2c66affSColin Finck xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2182c2c66affSColin Finck {
2183fc82f8e2SThomas Faber int ret;
2184c2c66affSColin Finck size_t written;
2185c2c66affSColin Finck size_t toconv;
2186c2c66affSColin Finck int c_in;
2187c2c66affSColin Finck int c_out;
2188c2c66affSColin Finck xmlBufPtr in;
2189c2c66affSColin Finck xmlBufPtr out;
2190c2c66affSColin Finck
2191c2c66affSColin Finck if ((input == NULL) || (input->encoder == NULL) ||
2192c2c66affSColin Finck (input->buffer == NULL) || (input->raw == NULL))
2193c2c66affSColin Finck return (-1);
2194c2c66affSColin Finck out = input->buffer;
2195c2c66affSColin Finck in = input->raw;
2196c2c66affSColin Finck
2197c2c66affSColin Finck toconv = xmlBufUse(in);
2198c2c66affSColin Finck if (toconv == 0)
2199c2c66affSColin Finck return (0);
2200*911153daSThomas Faber written = xmlBufAvail(out);
2201c2c66affSColin Finck /*
2202c2c66affSColin Finck * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203c2c66affSColin Finck * 45 chars should be sufficient to reach the end of the encoding
2204c2c66affSColin Finck * declaration without going too far inside the document content.
2205c2c66affSColin Finck * on UTF-16 this means 90bytes, on UCS4 this means 180
2206c2c66affSColin Finck * The actual value depending on guessed encoding is passed as @len
2207c2c66affSColin Finck * if provided
2208c2c66affSColin Finck */
2209c2c66affSColin Finck if (len >= 0) {
2210c2c66affSColin Finck if (toconv > (unsigned int) len)
2211c2c66affSColin Finck toconv = len;
2212c2c66affSColin Finck } else {
2213c2c66affSColin Finck if (toconv > 180)
2214c2c66affSColin Finck toconv = 180;
2215c2c66affSColin Finck }
2216c2c66affSColin Finck if (toconv * 2 >= written) {
2217c2c66affSColin Finck xmlBufGrow(out, toconv * 2);
2218*911153daSThomas Faber written = xmlBufAvail(out);
2219c2c66affSColin Finck }
2220c2c66affSColin Finck if (written > 360)
2221c2c66affSColin Finck written = 360;
2222c2c66affSColin Finck
2223c2c66affSColin Finck c_in = toconv;
2224c2c66affSColin Finck c_out = written;
2225fc82f8e2SThomas Faber ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
22265bb277a5SThomas Faber xmlBufContent(in), &c_in, 0);
2227c2c66affSColin Finck xmlBufShrink(in, c_in);
2228c2c66affSColin Finck xmlBufAddLen(out, c_out);
2229c2c66affSColin Finck if (ret == -1)
2230c2c66affSColin Finck ret = -3;
2231fc82f8e2SThomas Faber
2232c2c66affSColin Finck switch (ret) {
2233c2c66affSColin Finck case 0:
2234c2c66affSColin Finck #ifdef DEBUG_ENCODING
2235c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2236c2c66affSColin Finck "converted %d bytes to %d bytes of input\n",
2237c2c66affSColin Finck c_in, c_out);
2238c2c66affSColin Finck #endif
2239c2c66affSColin Finck break;
2240c2c66affSColin Finck case -1:
2241c2c66affSColin Finck #ifdef DEBUG_ENCODING
2242c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2243c2c66affSColin Finck "converted %d bytes to %d bytes of input, %d left\n",
2244c2c66affSColin Finck c_in, c_out, (int)xmlBufUse(in));
2245c2c66affSColin Finck #endif
2246c2c66affSColin Finck break;
2247c2c66affSColin Finck case -3:
2248c2c66affSColin Finck #ifdef DEBUG_ENCODING
2249c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2250c2c66affSColin Finck "converted %d bytes to %d bytes of input, %d left\n",
2251c2c66affSColin Finck c_in, c_out, (int)xmlBufUse(in));
2252c2c66affSColin Finck #endif
2253c2c66affSColin Finck break;
2254c2c66affSColin Finck case -2: {
2255c2c66affSColin Finck char buf[50];
2256c2c66affSColin Finck const xmlChar *content = xmlBufContent(in);
2257c2c66affSColin Finck
2258c2c66affSColin Finck snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259c2c66affSColin Finck content[0], content[1],
2260c2c66affSColin Finck content[2], content[3]);
2261c2c66affSColin Finck buf[49] = 0;
2262c2c66affSColin Finck xmlEncodingErr(XML_I18N_CONV_FAILED,
2263c2c66affSColin Finck "input conversion failed due to input error, bytes %s\n",
2264c2c66affSColin Finck buf);
2265c2c66affSColin Finck }
2266c2c66affSColin Finck }
2267c2c66affSColin Finck /*
2268c2c66affSColin Finck * Ignore when input buffer is not on a boundary
2269c2c66affSColin Finck */
2270c2c66affSColin Finck if (ret == -3) ret = 0;
2271c2c66affSColin Finck if (ret == -1) ret = 0;
227240ee59d6SThomas Faber return(c_out ? c_out : ret);
2273c2c66affSColin Finck }
2274c2c66affSColin Finck
2275c2c66affSColin Finck /**
2276c2c66affSColin Finck * xmlCharEncInput:
2277c2c66affSColin Finck * @input: a parser input buffer
2278c2c66affSColin Finck * @flush: try to flush all the raw buffer
2279c2c66affSColin Finck *
2280c2c66affSColin Finck * Generic front-end for the encoding handler on parser input
2281c2c66affSColin Finck *
2282c2c66affSColin Finck * Returns the number of byte written if success, or
2283c2c66affSColin Finck * -1 general error
2284c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2285c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2286c2c66affSColin Finck */
2287c2c66affSColin Finck int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2288c2c66affSColin Finck xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2289c2c66affSColin Finck {
2290fc82f8e2SThomas Faber int ret;
2291c2c66affSColin Finck size_t written;
2292c2c66affSColin Finck size_t toconv;
2293c2c66affSColin Finck int c_in;
2294c2c66affSColin Finck int c_out;
2295c2c66affSColin Finck xmlBufPtr in;
2296c2c66affSColin Finck xmlBufPtr out;
2297c2c66affSColin Finck
2298c2c66affSColin Finck if ((input == NULL) || (input->encoder == NULL) ||
2299c2c66affSColin Finck (input->buffer == NULL) || (input->raw == NULL))
2300c2c66affSColin Finck return (-1);
2301c2c66affSColin Finck out = input->buffer;
2302c2c66affSColin Finck in = input->raw;
2303c2c66affSColin Finck
2304c2c66affSColin Finck toconv = xmlBufUse(in);
2305c2c66affSColin Finck if (toconv == 0)
2306c2c66affSColin Finck return (0);
2307c2c66affSColin Finck if ((toconv > 64 * 1024) && (flush == 0))
2308c2c66affSColin Finck toconv = 64 * 1024;
2309c2c66affSColin Finck written = xmlBufAvail(out);
2310c2c66affSColin Finck if (toconv * 2 >= written) {
2311c2c66affSColin Finck xmlBufGrow(out, toconv * 2);
2312c2c66affSColin Finck written = xmlBufAvail(out);
2313c2c66affSColin Finck }
2314c2c66affSColin Finck if ((written > 128 * 1024) && (flush == 0))
2315c2c66affSColin Finck written = 128 * 1024;
2316c2c66affSColin Finck
2317c2c66affSColin Finck c_in = toconv;
2318c2c66affSColin Finck c_out = written;
2319fc82f8e2SThomas Faber ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
23205bb277a5SThomas Faber xmlBufContent(in), &c_in, flush);
2321c2c66affSColin Finck xmlBufShrink(in, c_in);
2322c2c66affSColin Finck xmlBufAddLen(out, c_out);
2323c2c66affSColin Finck if (ret == -1)
2324c2c66affSColin Finck ret = -3;
2325fc82f8e2SThomas Faber
2326c2c66affSColin Finck switch (ret) {
2327c2c66affSColin Finck case 0:
2328c2c66affSColin Finck #ifdef DEBUG_ENCODING
2329c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2330c2c66affSColin Finck "converted %d bytes to %d bytes of input\n",
2331c2c66affSColin Finck c_in, c_out);
2332c2c66affSColin Finck #endif
2333c2c66affSColin Finck break;
2334c2c66affSColin Finck case -1:
2335c2c66affSColin Finck #ifdef DEBUG_ENCODING
2336c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2337c2c66affSColin Finck "converted %d bytes to %d bytes of input, %d left\n",
2338c2c66affSColin Finck c_in, c_out, (int)xmlBufUse(in));
2339c2c66affSColin Finck #endif
2340c2c66affSColin Finck break;
2341c2c66affSColin Finck case -3:
2342c2c66affSColin Finck #ifdef DEBUG_ENCODING
2343c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2344c2c66affSColin Finck "converted %d bytes to %d bytes of input, %d left\n",
2345c2c66affSColin Finck c_in, c_out, (int)xmlBufUse(in));
2346c2c66affSColin Finck #endif
2347c2c66affSColin Finck break;
2348c2c66affSColin Finck case -2: {
2349c2c66affSColin Finck char buf[50];
2350c2c66affSColin Finck const xmlChar *content = xmlBufContent(in);
2351c2c66affSColin Finck
2352c2c66affSColin Finck snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353c2c66affSColin Finck content[0], content[1],
2354c2c66affSColin Finck content[2], content[3]);
2355c2c66affSColin Finck buf[49] = 0;
2356c2c66affSColin Finck xmlEncodingErr(XML_I18N_CONV_FAILED,
2357c2c66affSColin Finck "input conversion failed due to input error, bytes %s\n",
2358c2c66affSColin Finck buf);
2359c2c66affSColin Finck }
2360c2c66affSColin Finck }
2361c2c66affSColin Finck /*
2362c2c66affSColin Finck * Ignore when input buffer is not on a boundary
2363c2c66affSColin Finck */
2364c2c66affSColin Finck if (ret == -3)
2365c2c66affSColin Finck ret = 0;
2366c2c66affSColin Finck return (c_out? c_out : ret);
2367c2c66affSColin Finck }
2368c2c66affSColin Finck
2369c2c66affSColin Finck /**
2370c2c66affSColin Finck * xmlCharEncInFunc:
2371c2c66affSColin Finck * @handler: char encoding transformation data structure
2372c2c66affSColin Finck * @out: an xmlBuffer for the output.
2373c2c66affSColin Finck * @in: an xmlBuffer for the input
2374c2c66affSColin Finck *
2375c2c66affSColin Finck * Generic front-end for the encoding handler input function
2376c2c66affSColin Finck *
2377c2c66affSColin Finck * Returns the number of byte written if success, or
2378c2c66affSColin Finck * -1 general error
2379c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2380c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2381c2c66affSColin Finck */
2382c2c66affSColin Finck int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2383c2c66affSColin Finck xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2384c2c66affSColin Finck xmlBufferPtr in)
2385c2c66affSColin Finck {
2386fc82f8e2SThomas Faber int ret;
2387c2c66affSColin Finck int written;
2388c2c66affSColin Finck int toconv;
2389c2c66affSColin Finck
2390c2c66affSColin Finck if (handler == NULL)
2391c2c66affSColin Finck return (-1);
2392c2c66affSColin Finck if (out == NULL)
2393c2c66affSColin Finck return (-1);
2394c2c66affSColin Finck if (in == NULL)
2395c2c66affSColin Finck return (-1);
2396c2c66affSColin Finck
2397c2c66affSColin Finck toconv = in->use;
2398c2c66affSColin Finck if (toconv == 0)
2399c2c66affSColin Finck return (0);
2400c2c66affSColin Finck written = out->size - out->use -1; /* count '\0' */
2401c2c66affSColin Finck if (toconv * 2 >= written) {
2402c2c66affSColin Finck xmlBufferGrow(out, out->size + toconv * 2);
2403c2c66affSColin Finck written = out->size - out->use - 1;
2404c2c66affSColin Finck }
2405fc82f8e2SThomas Faber ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
24065bb277a5SThomas Faber in->content, &toconv, 1);
2407c2c66affSColin Finck xmlBufferShrink(in, toconv);
2408c2c66affSColin Finck out->use += written;
2409c2c66affSColin Finck out->content[out->use] = 0;
2410c2c66affSColin Finck if (ret == -1)
2411c2c66affSColin Finck ret = -3;
2412fc82f8e2SThomas Faber
2413c2c66affSColin Finck switch (ret) {
2414c2c66affSColin Finck case 0:
2415c2c66affSColin Finck #ifdef DEBUG_ENCODING
2416c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2417c2c66affSColin Finck "converted %d bytes to %d bytes of input\n",
2418c2c66affSColin Finck toconv, written);
2419c2c66affSColin Finck #endif
2420c2c66affSColin Finck break;
2421c2c66affSColin Finck case -1:
2422c2c66affSColin Finck #ifdef DEBUG_ENCODING
2423c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2424c2c66affSColin Finck "converted %d bytes to %d bytes of input, %d left\n",
2425c2c66affSColin Finck toconv, written, in->use);
2426c2c66affSColin Finck #endif
2427c2c66affSColin Finck break;
2428c2c66affSColin Finck case -3:
2429c2c66affSColin Finck #ifdef DEBUG_ENCODING
2430c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2431c2c66affSColin Finck "converted %d bytes to %d bytes of input, %d left\n",
2432c2c66affSColin Finck toconv, written, in->use);
2433c2c66affSColin Finck #endif
2434c2c66affSColin Finck break;
2435c2c66affSColin Finck case -2: {
2436c2c66affSColin Finck char buf[50];
2437c2c66affSColin Finck
2438c2c66affSColin Finck snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439c2c66affSColin Finck in->content[0], in->content[1],
2440c2c66affSColin Finck in->content[2], in->content[3]);
2441c2c66affSColin Finck buf[49] = 0;
2442c2c66affSColin Finck xmlEncodingErr(XML_I18N_CONV_FAILED,
2443c2c66affSColin Finck "input conversion failed due to input error, bytes %s\n",
2444c2c66affSColin Finck buf);
2445c2c66affSColin Finck }
2446c2c66affSColin Finck }
2447c2c66affSColin Finck /*
2448c2c66affSColin Finck * Ignore when input buffer is not on a boundary
2449c2c66affSColin Finck */
2450c2c66affSColin Finck if (ret == -3)
2451c2c66affSColin Finck ret = 0;
2452c2c66affSColin Finck return (written? written : ret);
2453c2c66affSColin Finck }
2454c2c66affSColin Finck
2455c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
2456c2c66affSColin Finck /**
2457c2c66affSColin Finck * xmlCharEncOutput:
2458c2c66affSColin Finck * @output: a parser output buffer
2459c2c66affSColin Finck * @init: is this an initialization call without data
2460c2c66affSColin Finck *
2461c2c66affSColin Finck * Generic front-end for the encoding handler on parser output
2462c2c66affSColin Finck * a first call with @init == 1 has to be made first to initiate the
2463c2c66affSColin Finck * output in case of non-stateless encoding needing to initiate their
2464c2c66affSColin Finck * state or the output (like the BOM in UTF16).
2465c2c66affSColin Finck * In case of UTF8 sequence conversion errors for the given encoder,
2466c2c66affSColin Finck * the content will be automatically remapped to a CharRef sequence.
2467c2c66affSColin Finck *
2468c2c66affSColin Finck * Returns the number of byte written if success, or
2469c2c66affSColin Finck * -1 general error
2470c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2471c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2472c2c66affSColin Finck */
2473c2c66affSColin Finck int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2474c2c66affSColin Finck xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2475c2c66affSColin Finck {
2476fc82f8e2SThomas Faber int ret;
2477c2c66affSColin Finck size_t written;
247840ee59d6SThomas Faber int writtentot = 0;
2479c2c66affSColin Finck size_t toconv;
2480c2c66affSColin Finck int c_in;
2481c2c66affSColin Finck int c_out;
2482c2c66affSColin Finck xmlBufPtr in;
2483c2c66affSColin Finck xmlBufPtr out;
2484c2c66affSColin Finck
2485c2c66affSColin Finck if ((output == NULL) || (output->encoder == NULL) ||
2486c2c66affSColin Finck (output->buffer == NULL) || (output->conv == NULL))
2487c2c66affSColin Finck return (-1);
2488c2c66affSColin Finck out = output->conv;
2489c2c66affSColin Finck in = output->buffer;
2490c2c66affSColin Finck
2491c2c66affSColin Finck retry:
2492c2c66affSColin Finck
2493c2c66affSColin Finck written = xmlBufAvail(out);
2494c2c66affSColin Finck
2495c2c66affSColin Finck /*
2496c2c66affSColin Finck * First specific handling of the initialization call
2497c2c66affSColin Finck */
2498c2c66affSColin Finck if (init) {
2499c2c66affSColin Finck c_in = 0;
2500c2c66affSColin Finck c_out = written;
2501fc82f8e2SThomas Faber /* TODO: Check return value. */
2502fc82f8e2SThomas Faber xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2503c2c66affSColin Finck NULL, &c_in);
2504c2c66affSColin Finck xmlBufAddLen(out, c_out);
2505c2c66affSColin Finck #ifdef DEBUG_ENCODING
2506c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2507c2c66affSColin Finck "initialized encoder\n");
2508c2c66affSColin Finck #endif
250940ee59d6SThomas Faber return(c_out);
2510c2c66affSColin Finck }
2511c2c66affSColin Finck
2512c2c66affSColin Finck /*
2513c2c66affSColin Finck * Conversion itself.
2514c2c66affSColin Finck */
2515c2c66affSColin Finck toconv = xmlBufUse(in);
2516c2c66affSColin Finck if (toconv == 0)
25177244e0c5SThomas Faber return (writtentot);
2518c2c66affSColin Finck if (toconv > 64 * 1024)
2519c2c66affSColin Finck toconv = 64 * 1024;
2520c2c66affSColin Finck if (toconv * 4 >= written) {
2521c2c66affSColin Finck xmlBufGrow(out, toconv * 4);
2522*911153daSThomas Faber written = xmlBufAvail(out);
2523c2c66affSColin Finck }
2524c2c66affSColin Finck if (written > 256 * 1024)
2525c2c66affSColin Finck written = 256 * 1024;
2526c2c66affSColin Finck
2527c2c66affSColin Finck c_in = toconv;
2528c2c66affSColin Finck c_out = written;
2529fc82f8e2SThomas Faber ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2530c2c66affSColin Finck xmlBufContent(in), &c_in);
2531c2c66affSColin Finck xmlBufShrink(in, c_in);
2532c2c66affSColin Finck xmlBufAddLen(out, c_out);
2533c2c66affSColin Finck writtentot += c_out;
2534c2c66affSColin Finck if (ret == -1) {
2535c2c66affSColin Finck if (c_out > 0) {
2536fc82f8e2SThomas Faber /* Can be a limitation of iconv or uconv */
2537c2c66affSColin Finck goto retry;
2538c2c66affSColin Finck }
2539c2c66affSColin Finck ret = -3;
2540c2c66affSColin Finck }
2541c2c66affSColin Finck
2542c2c66affSColin Finck /*
2543c2c66affSColin Finck * Attempt to handle error cases
2544c2c66affSColin Finck */
2545c2c66affSColin Finck switch (ret) {
2546c2c66affSColin Finck case 0:
2547c2c66affSColin Finck #ifdef DEBUG_ENCODING
2548c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2549c2c66affSColin Finck "converted %d bytes to %d bytes of output\n",
2550c2c66affSColin Finck c_in, c_out);
2551c2c66affSColin Finck #endif
2552c2c66affSColin Finck break;
2553c2c66affSColin Finck case -1:
2554c2c66affSColin Finck #ifdef DEBUG_ENCODING
2555c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2556c2c66affSColin Finck "output conversion failed by lack of space\n");
2557c2c66affSColin Finck #endif
2558c2c66affSColin Finck break;
2559c2c66affSColin Finck case -3:
2560c2c66affSColin Finck #ifdef DEBUG_ENCODING
2561c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2562c2c66affSColin Finck c_in, c_out, (int) xmlBufUse(in));
2563c2c66affSColin Finck #endif
2564c2c66affSColin Finck break;
2565fc82f8e2SThomas Faber case -4:
2566fc82f8e2SThomas Faber xmlEncodingErr(XML_I18N_NO_OUTPUT,
2567fc82f8e2SThomas Faber "xmlCharEncOutFunc: no output function !\n", NULL);
2568c2c66affSColin Finck ret = -1;
2569c2c66affSColin Finck break;
2570fc82f8e2SThomas Faber case -2: {
2571c2c66affSColin Finck xmlChar charref[20];
2572fc82f8e2SThomas Faber int len = (int) xmlBufUse(in);
2573fc82f8e2SThomas Faber xmlChar *content = xmlBufContent(in);
2574fc82f8e2SThomas Faber int cur, charrefLen;
2575fc82f8e2SThomas Faber
2576fc82f8e2SThomas Faber cur = xmlGetUTF8Char(content, &len);
2577fc82f8e2SThomas Faber if (cur <= 0)
2578fc82f8e2SThomas Faber break;
2579c2c66affSColin Finck
2580c2c66affSColin Finck #ifdef DEBUG_ENCODING
2581c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2582c2c66affSColin Finck "handling output conversion error\n");
2583c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2584c2c66affSColin Finck "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2585c2c66affSColin Finck content[0], content[1],
2586c2c66affSColin Finck content[2], content[3]);
2587c2c66affSColin Finck #endif
2588c2c66affSColin Finck /*
2589c2c66affSColin Finck * Removes the UTF8 sequence, and replace it by a charref
2590c2c66affSColin Finck * and continue the transcoding phase, hoping the error
2591c2c66affSColin Finck * did not mangle the encoder state.
2592c2c66affSColin Finck */
2593fc82f8e2SThomas Faber charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2594c2c66affSColin Finck "&#%d;", cur);
2595c2c66affSColin Finck xmlBufShrink(in, len);
2596fc82f8e2SThomas Faber xmlBufGrow(out, charrefLen * 4);
2597*911153daSThomas Faber c_out = xmlBufAvail(out);
2598fc82f8e2SThomas Faber c_in = charrefLen;
2599fc82f8e2SThomas Faber ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2600fc82f8e2SThomas Faber charref, &c_in);
2601c2c66affSColin Finck
2602fc82f8e2SThomas Faber if ((ret < 0) || (c_in != charrefLen)) {
2603c2c66affSColin Finck char buf[50];
2604c2c66affSColin Finck
2605c2c66affSColin Finck snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2606c2c66affSColin Finck content[0], content[1],
2607c2c66affSColin Finck content[2], content[3]);
2608c2c66affSColin Finck buf[49] = 0;
2609c2c66affSColin Finck xmlEncodingErr(XML_I18N_CONV_FAILED,
2610c2c66affSColin Finck "output conversion failed due to conv error, bytes %s\n",
2611c2c66affSColin Finck buf);
2612c2c66affSColin Finck if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2613c2c66affSColin Finck content[0] = ' ';
2614c2c66affSColin Finck break;
2615c2c66affSColin Finck }
2616fc82f8e2SThomas Faber
2617fc82f8e2SThomas Faber xmlBufAddLen(out, c_out);
2618fc82f8e2SThomas Faber writtentot += c_out;
2619fc82f8e2SThomas Faber goto retry;
2620fc82f8e2SThomas Faber }
2621c2c66affSColin Finck }
262240ee59d6SThomas Faber return(writtentot ? writtentot : ret);
2623c2c66affSColin Finck }
2624c2c66affSColin Finck #endif
2625c2c66affSColin Finck
2626c2c66affSColin Finck /**
2627c2c66affSColin Finck * xmlCharEncOutFunc:
2628f22fa382SThomas Faber * @handler: char encoding transformation data structure
2629c2c66affSColin Finck * @out: an xmlBuffer for the output.
2630c2c66affSColin Finck * @in: an xmlBuffer for the input
2631c2c66affSColin Finck *
2632c2c66affSColin Finck * Generic front-end for the encoding handler output function
2633c2c66affSColin Finck * a first call with @in == NULL has to be made firs to initiate the
2634c2c66affSColin Finck * output in case of non-stateless encoding needing to initiate their
2635c2c66affSColin Finck * state or the output (like the BOM in UTF16).
2636c2c66affSColin Finck * In case of UTF8 sequence conversion errors for the given encoder,
2637c2c66affSColin Finck * the content will be automatically remapped to a CharRef sequence.
2638c2c66affSColin Finck *
2639c2c66affSColin Finck * Returns the number of byte written if success, or
2640c2c66affSColin Finck * -1 general error
2641c2c66affSColin Finck * -2 if the transcoding fails (for *in is not valid utf8 string or
2642c2c66affSColin Finck * the result of transformation can't fit into the encoding we want), or
2643c2c66affSColin Finck */
2644c2c66affSColin Finck int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2645c2c66affSColin Finck xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2646c2c66affSColin Finck xmlBufferPtr in) {
2647fc82f8e2SThomas Faber int ret;
2648c2c66affSColin Finck int written;
2649c2c66affSColin Finck int writtentot = 0;
2650c2c66affSColin Finck int toconv;
2651c2c66affSColin Finck
2652c2c66affSColin Finck if (handler == NULL) return(-1);
2653c2c66affSColin Finck if (out == NULL) return(-1);
2654c2c66affSColin Finck
2655c2c66affSColin Finck retry:
2656c2c66affSColin Finck
2657c2c66affSColin Finck written = out->size - out->use;
2658c2c66affSColin Finck
2659c2c66affSColin Finck if (written > 0)
2660c2c66affSColin Finck written--; /* Gennady: count '/0' */
2661c2c66affSColin Finck
2662c2c66affSColin Finck /*
2663c2c66affSColin Finck * First specific handling of in = NULL, i.e. the initialization call
2664c2c66affSColin Finck */
2665c2c66affSColin Finck if (in == NULL) {
2666c2c66affSColin Finck toconv = 0;
2667fc82f8e2SThomas Faber /* TODO: Check return value. */
2668fc82f8e2SThomas Faber xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669c2c66affSColin Finck NULL, &toconv);
2670c2c66affSColin Finck out->use += written;
2671c2c66affSColin Finck out->content[out->use] = 0;
2672c2c66affSColin Finck #ifdef DEBUG_ENCODING
2673c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2674c2c66affSColin Finck "initialized encoder\n");
2675c2c66affSColin Finck #endif
2676c2c66affSColin Finck return(0);
2677c2c66affSColin Finck }
2678c2c66affSColin Finck
2679c2c66affSColin Finck /*
2680c2c66affSColin Finck * Conversion itself.
2681c2c66affSColin Finck */
2682c2c66affSColin Finck toconv = in->use;
2683c2c66affSColin Finck if (toconv == 0)
2684c2c66affSColin Finck return(0);
2685c2c66affSColin Finck if (toconv * 4 >= written) {
2686c2c66affSColin Finck xmlBufferGrow(out, toconv * 4);
2687c2c66affSColin Finck written = out->size - out->use - 1;
2688c2c66affSColin Finck }
2689fc82f8e2SThomas Faber ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690c2c66affSColin Finck in->content, &toconv);
2691c2c66affSColin Finck xmlBufferShrink(in, toconv);
2692c2c66affSColin Finck out->use += written;
2693c2c66affSColin Finck writtentot += written;
2694c2c66affSColin Finck out->content[out->use] = 0;
2695c2c66affSColin Finck if (ret == -1) {
2696c2c66affSColin Finck if (written > 0) {
2697fc82f8e2SThomas Faber /* Can be a limitation of iconv or uconv */
2698c2c66affSColin Finck goto retry;
2699c2c66affSColin Finck }
2700c2c66affSColin Finck ret = -3;
2701c2c66affSColin Finck }
2702c2c66affSColin Finck
2703c2c66affSColin Finck /*
2704c2c66affSColin Finck * Attempt to handle error cases
2705c2c66affSColin Finck */
2706c2c66affSColin Finck switch (ret) {
2707c2c66affSColin Finck case 0:
2708c2c66affSColin Finck #ifdef DEBUG_ENCODING
2709c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2710c2c66affSColin Finck "converted %d bytes to %d bytes of output\n",
2711c2c66affSColin Finck toconv, written);
2712c2c66affSColin Finck #endif
2713c2c66affSColin Finck break;
2714c2c66affSColin Finck case -1:
2715c2c66affSColin Finck #ifdef DEBUG_ENCODING
2716c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2717c2c66affSColin Finck "output conversion failed by lack of space\n");
2718c2c66affSColin Finck #endif
2719c2c66affSColin Finck break;
2720c2c66affSColin Finck case -3:
2721c2c66affSColin Finck #ifdef DEBUG_ENCODING
2722c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723c2c66affSColin Finck toconv, written, in->use);
2724c2c66affSColin Finck #endif
2725c2c66affSColin Finck break;
2726fc82f8e2SThomas Faber case -4:
2727fc82f8e2SThomas Faber xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728fc82f8e2SThomas Faber "xmlCharEncOutFunc: no output function !\n", NULL);
2729c2c66affSColin Finck ret = -1;
2730c2c66affSColin Finck break;
2731fc82f8e2SThomas Faber case -2: {
2732c2c66affSColin Finck xmlChar charref[20];
2733fc82f8e2SThomas Faber int len = in->use;
2734fc82f8e2SThomas Faber const xmlChar *utf = (const xmlChar *) in->content;
2735fc82f8e2SThomas Faber int cur, charrefLen;
2736fc82f8e2SThomas Faber
2737fc82f8e2SThomas Faber cur = xmlGetUTF8Char(utf, &len);
2738fc82f8e2SThomas Faber if (cur <= 0)
2739fc82f8e2SThomas Faber break;
2740c2c66affSColin Finck
2741c2c66affSColin Finck #ifdef DEBUG_ENCODING
2742c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2743c2c66affSColin Finck "handling output conversion error\n");
2744c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2745c2c66affSColin Finck "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746c2c66affSColin Finck in->content[0], in->content[1],
2747c2c66affSColin Finck in->content[2], in->content[3]);
2748c2c66affSColin Finck #endif
2749c2c66affSColin Finck /*
2750c2c66affSColin Finck * Removes the UTF8 sequence, and replace it by a charref
2751c2c66affSColin Finck * and continue the transcoding phase, hoping the error
2752c2c66affSColin Finck * did not mangle the encoder state.
2753c2c66affSColin Finck */
2754fc82f8e2SThomas Faber charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755c2c66affSColin Finck "&#%d;", cur);
2756c2c66affSColin Finck xmlBufferShrink(in, len);
2757fc82f8e2SThomas Faber xmlBufferGrow(out, charrefLen * 4);
2758fc82f8e2SThomas Faber written = out->size - out->use - 1;
2759fc82f8e2SThomas Faber toconv = charrefLen;
2760fc82f8e2SThomas Faber ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761fc82f8e2SThomas Faber charref, &toconv);
2762c2c66affSColin Finck
2763fc82f8e2SThomas Faber if ((ret < 0) || (toconv != charrefLen)) {
2764c2c66affSColin Finck char buf[50];
2765c2c66affSColin Finck
2766c2c66affSColin Finck snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767c2c66affSColin Finck in->content[0], in->content[1],
2768c2c66affSColin Finck in->content[2], in->content[3]);
2769c2c66affSColin Finck buf[49] = 0;
2770c2c66affSColin Finck xmlEncodingErr(XML_I18N_CONV_FAILED,
2771c2c66affSColin Finck "output conversion failed due to conv error, bytes %s\n",
2772c2c66affSColin Finck buf);
2773c2c66affSColin Finck if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774c2c66affSColin Finck in->content[0] = ' ';
2775c2c66affSColin Finck break;
2776c2c66affSColin Finck }
2777fc82f8e2SThomas Faber
2778fc82f8e2SThomas Faber out->use += written;
2779fc82f8e2SThomas Faber writtentot += written;
2780fc82f8e2SThomas Faber out->content[out->use] = 0;
2781fc82f8e2SThomas Faber goto retry;
2782fc82f8e2SThomas Faber }
2783c2c66affSColin Finck }
278440ee59d6SThomas Faber return(writtentot ? writtentot : ret);
2785c2c66affSColin Finck }
2786c2c66affSColin Finck
2787c2c66affSColin Finck /**
2788c2c66affSColin Finck * xmlCharEncCloseFunc:
2789f22fa382SThomas Faber * @handler: char encoding transformation data structure
2790c2c66affSColin Finck *
2791c2c66affSColin Finck * Generic front-end for encoding handler close function
2792c2c66affSColin Finck *
2793c2c66affSColin Finck * Returns 0 if success, or -1 in case of error
2794c2c66affSColin Finck */
2795c2c66affSColin Finck int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2796c2c66affSColin Finck xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2797c2c66affSColin Finck int ret = 0;
2798c2c66affSColin Finck int tofree = 0;
2799c2c66affSColin Finck int i, handler_in_list = 0;
2800c2c66affSColin Finck
2801*911153daSThomas Faber /* Avoid unused variable warning if features are disabled. */
2802*911153daSThomas Faber (void) handler_in_list;
2803*911153daSThomas Faber
2804c2c66affSColin Finck if (handler == NULL) return(-1);
2805c2c66affSColin Finck if (handler->name == NULL) return(-1);
2806c2c66affSColin Finck if (handlers != NULL) {
2807c2c66affSColin Finck for (i = 0;i < nbCharEncodingHandler; i++) {
2808c2c66affSColin Finck if (handler == handlers[i]) {
2809c2c66affSColin Finck handler_in_list = 1;
2810c2c66affSColin Finck break;
2811c2c66affSColin Finck }
2812c2c66affSColin Finck }
2813c2c66affSColin Finck }
2814c2c66affSColin Finck #ifdef LIBXML_ICONV_ENABLED
2815c2c66affSColin Finck /*
2816c2c66affSColin Finck * Iconv handlers can be used only once, free the whole block.
2817c2c66affSColin Finck * and the associated icon resources.
2818c2c66affSColin Finck */
2819c2c66affSColin Finck if ((handler_in_list == 0) &&
2820c2c66affSColin Finck ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821c2c66affSColin Finck tofree = 1;
2822c2c66affSColin Finck if (handler->iconv_out != NULL) {
2823c2c66affSColin Finck if (iconv_close(handler->iconv_out))
2824c2c66affSColin Finck ret = -1;
2825c2c66affSColin Finck handler->iconv_out = NULL;
2826c2c66affSColin Finck }
2827c2c66affSColin Finck if (handler->iconv_in != NULL) {
2828c2c66affSColin Finck if (iconv_close(handler->iconv_in))
2829c2c66affSColin Finck ret = -1;
2830c2c66affSColin Finck handler->iconv_in = NULL;
2831c2c66affSColin Finck }
2832c2c66affSColin Finck }
2833c2c66affSColin Finck #endif /* LIBXML_ICONV_ENABLED */
2834c2c66affSColin Finck #ifdef LIBXML_ICU_ENABLED
2835c2c66affSColin Finck if ((handler_in_list == 0) &&
2836c2c66affSColin Finck ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837c2c66affSColin Finck tofree = 1;
2838c2c66affSColin Finck if (handler->uconv_out != NULL) {
2839c2c66affSColin Finck closeIcuConverter(handler->uconv_out);
2840c2c66affSColin Finck handler->uconv_out = NULL;
2841c2c66affSColin Finck }
2842c2c66affSColin Finck if (handler->uconv_in != NULL) {
2843c2c66affSColin Finck closeIcuConverter(handler->uconv_in);
2844c2c66affSColin Finck handler->uconv_in = NULL;
2845c2c66affSColin Finck }
2846c2c66affSColin Finck }
2847c2c66affSColin Finck #endif
2848c2c66affSColin Finck if (tofree) {
2849c2c66affSColin Finck /* free up only dynamic handlers iconv/uconv */
2850c2c66affSColin Finck if (handler->name != NULL)
2851c2c66affSColin Finck xmlFree(handler->name);
2852c2c66affSColin Finck handler->name = NULL;
2853c2c66affSColin Finck xmlFree(handler);
2854c2c66affSColin Finck }
2855c2c66affSColin Finck #ifdef DEBUG_ENCODING
2856c2c66affSColin Finck if (ret)
2857c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2858c2c66affSColin Finck "failed to close the encoding handler\n");
2859c2c66affSColin Finck else
2860c2c66affSColin Finck xmlGenericError(xmlGenericErrorContext,
2861c2c66affSColin Finck "closed the encoding handler\n");
2862c2c66affSColin Finck #endif
2863c2c66affSColin Finck
2864c2c66affSColin Finck return(ret);
2865c2c66affSColin Finck }
2866c2c66affSColin Finck
2867c2c66affSColin Finck /**
2868c2c66affSColin Finck * xmlByteConsumed:
2869c2c66affSColin Finck * @ctxt: an XML parser context
2870c2c66affSColin Finck *
2871c2c66affSColin Finck * This function provides the current index of the parser relative
2872c2c66affSColin Finck * to the start of the current entity. This function is computed in
2873c2c66affSColin Finck * bytes from the beginning starting at zero and finishing at the
2874c2c66affSColin Finck * size in byte of the file if parsing a file. The function is
2875c2c66affSColin Finck * of constant cost if the input is UTF-8 but can be costly if run
2876c2c66affSColin Finck * on non-UTF-8 input.
2877c2c66affSColin Finck *
2878c2c66affSColin Finck * Returns the index in bytes from the beginning of the entity or -1
2879c2c66affSColin Finck * in case the index could not be computed.
2880c2c66affSColin Finck */
2881c2c66affSColin Finck long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2882c2c66affSColin Finck xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2883c2c66affSColin Finck xmlParserInputPtr in;
2884c2c66affSColin Finck
2885c2c66affSColin Finck if (ctxt == NULL) return(-1);
2886c2c66affSColin Finck in = ctxt->input;
2887c2c66affSColin Finck if (in == NULL) return(-1);
2888c2c66affSColin Finck if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2889c2c66affSColin Finck unsigned int unused = 0;
2890c2c66affSColin Finck xmlCharEncodingHandler * handler = in->buf->encoder;
2891c2c66affSColin Finck /*
2892c2c66affSColin Finck * Encoding conversion, compute the number of unused original
2893f22fa382SThomas Faber * bytes from the input not consumed and subtract that from
2894c2c66affSColin Finck * the raw consumed value, this is not a cheap operation
2895c2c66affSColin Finck */
2896c2c66affSColin Finck if (in->end - in->cur > 0) {
2897c2c66affSColin Finck unsigned char convbuf[32000];
2898c2c66affSColin Finck const unsigned char *cur = (const unsigned char *)in->cur;
2899c2c66affSColin Finck int toconv = in->end - in->cur, written = 32000;
2900c2c66affSColin Finck
2901c2c66affSColin Finck int ret;
2902c2c66affSColin Finck
2903c2c66affSColin Finck do {
2904c2c66affSColin Finck toconv = in->end - cur;
2905c2c66affSColin Finck written = 32000;
2906fc82f8e2SThomas Faber ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2907c2c66affSColin Finck cur, &toconv);
2908c2c66affSColin Finck if (ret < 0) {
2909c2c66affSColin Finck if (written > 0)
2910c2c66affSColin Finck ret = -2;
2911c2c66affSColin Finck else
2912c2c66affSColin Finck return(-1);
2913c2c66affSColin Finck }
2914c2c66affSColin Finck unused += written;
2915c2c66affSColin Finck cur += toconv;
2916c2c66affSColin Finck } while (ret == -2);
2917c2c66affSColin Finck }
2918c2c66affSColin Finck if (in->buf->rawconsumed < unused)
2919c2c66affSColin Finck return(-1);
2920c2c66affSColin Finck return(in->buf->rawconsumed - unused);
2921c2c66affSColin Finck }
2922c2c66affSColin Finck return(in->consumed + (in->cur - in->base));
2923c2c66affSColin Finck }
2924c2c66affSColin Finck
2925c2c66affSColin Finck #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2926c2c66affSColin Finck #ifdef LIBXML_ISO8859X_ENABLED
2927c2c66affSColin Finck
2928c2c66affSColin Finck /**
2929c2c66affSColin Finck * UTF8ToISO8859x:
2930c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
2931c2c66affSColin Finck * @outlen: the length of @out
2932c2c66affSColin Finck * @in: a pointer to an array of UTF-8 chars
2933c2c66affSColin Finck * @inlen: the length of @in
2934c2c66affSColin Finck * @xlattable: the 2-level transcoding table
2935c2c66affSColin Finck *
2936c2c66affSColin Finck * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2937c2c66affSColin Finck * block of chars out.
2938c2c66affSColin Finck *
2939c2c66affSColin Finck * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2940c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
2941c2c66affSColin Finck * as the return value is positive, else unpredictable.
2942f22fa382SThomas Faber * The value of @outlen after return is the number of octets consumed.
2943c2c66affSColin Finck */
2944c2c66affSColin Finck static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,const unsigned char * const xlattable)2945c2c66affSColin Finck UTF8ToISO8859x(unsigned char* out, int *outlen,
2946c2c66affSColin Finck const unsigned char* in, int *inlen,
2947*911153daSThomas Faber const unsigned char* const xlattable) {
2948c2c66affSColin Finck const unsigned char* outstart = out;
2949c2c66affSColin Finck const unsigned char* inend;
2950c2c66affSColin Finck const unsigned char* instart = in;
2951c2c66affSColin Finck const unsigned char* processed = in;
2952c2c66affSColin Finck
2953c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2954c2c66affSColin Finck (xlattable == NULL))
2955c2c66affSColin Finck return(-1);
2956c2c66affSColin Finck if (in == NULL) {
2957c2c66affSColin Finck /*
2958c2c66affSColin Finck * initialization nothing to do
2959c2c66affSColin Finck */
2960c2c66affSColin Finck *outlen = 0;
2961c2c66affSColin Finck *inlen = 0;
2962c2c66affSColin Finck return(0);
2963c2c66affSColin Finck }
2964c2c66affSColin Finck inend = in + (*inlen);
2965c2c66affSColin Finck while (in < inend) {
2966c2c66affSColin Finck unsigned char d = *in++;
2967c2c66affSColin Finck if (d < 0x80) {
2968c2c66affSColin Finck *out++ = d;
2969c2c66affSColin Finck } else if (d < 0xC0) {
2970c2c66affSColin Finck /* trailing byte in leading position */
2971c2c66affSColin Finck *outlen = out - outstart;
2972c2c66affSColin Finck *inlen = processed - instart;
2973c2c66affSColin Finck return(-2);
2974c2c66affSColin Finck } else if (d < 0xE0) {
2975c2c66affSColin Finck unsigned char c;
2976c2c66affSColin Finck if (!(in < inend)) {
2977c2c66affSColin Finck /* trailing byte not in input buffer */
2978c2c66affSColin Finck *outlen = out - outstart;
2979c2c66affSColin Finck *inlen = processed - instart;
2980c2c66affSColin Finck return(-3);
2981c2c66affSColin Finck }
2982c2c66affSColin Finck c = *in++;
2983c2c66affSColin Finck if ((c & 0xC0) != 0x80) {
2984c2c66affSColin Finck /* not a trailing byte */
2985c2c66affSColin Finck *outlen = out - outstart;
2986c2c66affSColin Finck *inlen = processed - instart;
2987c2c66affSColin Finck return(-2);
2988c2c66affSColin Finck }
2989c2c66affSColin Finck c = c & 0x3F;
2990c2c66affSColin Finck d = d & 0x1F;
2991c2c66affSColin Finck d = xlattable [48 + c + xlattable [d] * 64];
2992c2c66affSColin Finck if (d == 0) {
2993c2c66affSColin Finck /* not in character set */
2994c2c66affSColin Finck *outlen = out - outstart;
2995c2c66affSColin Finck *inlen = processed - instart;
2996c2c66affSColin Finck return(-2);
2997c2c66affSColin Finck }
2998c2c66affSColin Finck *out++ = d;
2999c2c66affSColin Finck } else if (d < 0xF0) {
3000c2c66affSColin Finck unsigned char c1;
3001c2c66affSColin Finck unsigned char c2;
3002c2c66affSColin Finck if (!(in < inend - 1)) {
3003c2c66affSColin Finck /* trailing bytes not in input buffer */
3004c2c66affSColin Finck *outlen = out - outstart;
3005c2c66affSColin Finck *inlen = processed - instart;
3006c2c66affSColin Finck return(-3);
3007c2c66affSColin Finck }
3008c2c66affSColin Finck c1 = *in++;
3009c2c66affSColin Finck if ((c1 & 0xC0) != 0x80) {
3010c2c66affSColin Finck /* not a trailing byte (c1) */
3011c2c66affSColin Finck *outlen = out - outstart;
3012c2c66affSColin Finck *inlen = processed - instart;
3013c2c66affSColin Finck return(-2);
3014c2c66affSColin Finck }
3015c2c66affSColin Finck c2 = *in++;
3016c2c66affSColin Finck if ((c2 & 0xC0) != 0x80) {
3017c2c66affSColin Finck /* not a trailing byte (c2) */
3018c2c66affSColin Finck *outlen = out - outstart;
3019c2c66affSColin Finck *inlen = processed - instart;
3020c2c66affSColin Finck return(-2);
3021c2c66affSColin Finck }
3022c2c66affSColin Finck c1 = c1 & 0x3F;
3023c2c66affSColin Finck c2 = c2 & 0x3F;
3024c2c66affSColin Finck d = d & 0x0F;
3025c2c66affSColin Finck d = xlattable [48 + c2 + xlattable [48 + c1 +
3026c2c66affSColin Finck xlattable [32 + d] * 64] * 64];
3027c2c66affSColin Finck if (d == 0) {
3028c2c66affSColin Finck /* not in character set */
3029c2c66affSColin Finck *outlen = out - outstart;
3030c2c66affSColin Finck *inlen = processed - instart;
3031c2c66affSColin Finck return(-2);
3032c2c66affSColin Finck }
3033c2c66affSColin Finck *out++ = d;
3034c2c66affSColin Finck } else {
3035c2c66affSColin Finck /* cannot transcode >= U+010000 */
3036c2c66affSColin Finck *outlen = out - outstart;
3037c2c66affSColin Finck *inlen = processed - instart;
3038c2c66affSColin Finck return(-2);
3039c2c66affSColin Finck }
3040c2c66affSColin Finck processed = in;
3041c2c66affSColin Finck }
3042c2c66affSColin Finck *outlen = out - outstart;
3043c2c66affSColin Finck *inlen = processed - instart;
3044c2c66affSColin Finck return(*outlen);
3045c2c66affSColin Finck }
3046c2c66affSColin Finck
3047c2c66affSColin Finck /**
3048c2c66affSColin Finck * ISO8859xToUTF8
3049c2c66affSColin Finck * @out: a pointer to an array of bytes to store the result
3050c2c66affSColin Finck * @outlen: the length of @out
3051c2c66affSColin Finck * @in: a pointer to an array of ISO Latin 1 chars
3052c2c66affSColin Finck * @inlen: the length of @in
3053c2c66affSColin Finck *
3054c2c66affSColin Finck * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3055c2c66affSColin Finck * block of chars out.
3056c2c66affSColin Finck * Returns 0 if success, or -1 otherwise
3057c2c66affSColin Finck * The value of @inlen after return is the number of octets consumed
3058f22fa382SThomas Faber * The value of @outlen after return is the number of octets produced.
3059c2c66affSColin Finck */
3060c2c66affSColin Finck static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3061c2c66affSColin Finck ISO8859xToUTF8(unsigned char* out, int *outlen,
3062c2c66affSColin Finck const unsigned char* in, int *inlen,
3063c2c66affSColin Finck unsigned short const *unicodetable) {
3064c2c66affSColin Finck unsigned char* outstart = out;
3065c2c66affSColin Finck unsigned char* outend;
3066c2c66affSColin Finck const unsigned char* instart = in;
3067c2c66affSColin Finck const unsigned char* inend;
3068c2c66affSColin Finck const unsigned char* instop;
3069c2c66affSColin Finck unsigned int c;
3070c2c66affSColin Finck
3071c2c66affSColin Finck if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3072c2c66affSColin Finck (in == NULL) || (unicodetable == NULL))
3073c2c66affSColin Finck return(-1);
3074c2c66affSColin Finck outend = out + *outlen;
3075c2c66affSColin Finck inend = in + *inlen;
3076c2c66affSColin Finck instop = inend;
3077c2c66affSColin Finck
3078c2c66affSColin Finck while ((in < inend) && (out < outend - 2)) {
3079c2c66affSColin Finck if (*in >= 0x80) {
3080c2c66affSColin Finck c = unicodetable [*in - 0x80];
3081c2c66affSColin Finck if (c == 0) {
3082c2c66affSColin Finck /* undefined code point */
3083c2c66affSColin Finck *outlen = out - outstart;
3084c2c66affSColin Finck *inlen = in - instart;
3085c2c66affSColin Finck return (-1);
3086c2c66affSColin Finck }
3087c2c66affSColin Finck if (c < 0x800) {
3088c2c66affSColin Finck *out++ = ((c >> 6) & 0x1F) | 0xC0;
3089c2c66affSColin Finck *out++ = (c & 0x3F) | 0x80;
3090c2c66affSColin Finck } else {
3091c2c66affSColin Finck *out++ = ((c >> 12) & 0x0F) | 0xE0;
3092c2c66affSColin Finck *out++ = ((c >> 6) & 0x3F) | 0x80;
3093c2c66affSColin Finck *out++ = (c & 0x3F) | 0x80;
3094c2c66affSColin Finck }
3095c2c66affSColin Finck ++in;
3096c2c66affSColin Finck }
3097c2c66affSColin Finck if (instop - in > outend - out) instop = in + (outend - out);
3098c2c66affSColin Finck while ((*in < 0x80) && (in < instop)) {
3099c2c66affSColin Finck *out++ = *in++;
3100c2c66affSColin Finck }
3101c2c66affSColin Finck }
3102c2c66affSColin Finck if ((in < inend) && (out < outend) && (*in < 0x80)) {
3103c2c66affSColin Finck *out++ = *in++;
3104c2c66affSColin Finck }
3105c2c66affSColin Finck if ((in < inend) && (out < outend) && (*in < 0x80)) {
3106c2c66affSColin Finck *out++ = *in++;
3107c2c66affSColin Finck }
3108c2c66affSColin Finck *outlen = out - outstart;
3109c2c66affSColin Finck *inlen = in - instart;
3110c2c66affSColin Finck return (*outlen);
3111c2c66affSColin Finck }
3112c2c66affSColin Finck
3113c2c66affSColin Finck
3114c2c66affSColin Finck /************************************************************************
3115c2c66affSColin Finck * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3116c2c66affSColin Finck ************************************************************************/
3117c2c66affSColin Finck
3118c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3119c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3120c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3121c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3122c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3123c2c66affSColin Finck 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3124c2c66affSColin Finck 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3125c2c66affSColin Finck 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3126c2c66affSColin Finck 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3127c2c66affSColin Finck 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3128c2c66affSColin Finck 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3129c2c66affSColin Finck 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3130c2c66affSColin Finck 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3131c2c66affSColin Finck 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3132c2c66affSColin Finck 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3133c2c66affSColin Finck 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3134c2c66affSColin Finck 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3135c2c66affSColin Finck };
3136c2c66affSColin Finck
3137*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3138c2c66affSColin Finck "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3139c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3146c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3147c2c66affSColin Finck "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3148c2c66affSColin Finck "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3149c2c66affSColin Finck "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3150c2c66affSColin Finck "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3151c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3153c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3154c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3155c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157c2c66affSColin Finck "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3158c2c66affSColin Finck "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3159c2c66affSColin Finck "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3160c2c66affSColin Finck "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3161c2c66affSColin Finck "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3162c2c66affSColin Finck "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3163c2c66affSColin Finck "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3164c2c66affSColin Finck "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3165c2c66affSColin Finck };
3166c2c66affSColin Finck
3167c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3168c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3169c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3170c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3171c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3172c2c66affSColin Finck 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3173c2c66affSColin Finck 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3174c2c66affSColin Finck 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3175c2c66affSColin Finck 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3176c2c66affSColin Finck 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3177c2c66affSColin Finck 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3178c2c66affSColin Finck 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3179c2c66affSColin Finck 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3180c2c66affSColin Finck 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3181c2c66affSColin Finck 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3182c2c66affSColin Finck 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3183c2c66affSColin Finck 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3184c2c66affSColin Finck };
3185c2c66affSColin Finck
3186*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3187c2c66affSColin Finck "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3188c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3195c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3196c2c66affSColin Finck "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3197c2c66affSColin Finck "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3198c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3199c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3200c2c66affSColin Finck "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3201c2c66affSColin Finck "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3204c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206c2c66affSColin Finck "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3212c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3213c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3214c2c66affSColin Finck "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3215c2c66affSColin Finck "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3216c2c66affSColin Finck "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3217c2c66affSColin Finck "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3218c2c66affSColin Finck };
3219c2c66affSColin Finck
3220c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3221c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3222c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3223c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3224c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3225c2c66affSColin Finck 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3226c2c66affSColin Finck 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3227c2c66affSColin Finck 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3228c2c66affSColin Finck 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3229c2c66affSColin Finck 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3230c2c66affSColin Finck 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3231c2c66affSColin Finck 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3232c2c66affSColin Finck 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3233c2c66affSColin Finck 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3234c2c66affSColin Finck 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3235c2c66affSColin Finck 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3236c2c66affSColin Finck 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3237c2c66affSColin Finck };
3238c2c66affSColin Finck
3239*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3240c2c66affSColin Finck "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3241c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3248c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3249c2c66affSColin Finck "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3250c2c66affSColin Finck "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3251c2c66affSColin Finck "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3252c2c66affSColin Finck "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3253c2c66affSColin Finck "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3254c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3255c2c66affSColin Finck "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3256c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3257c2c66affSColin Finck "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3258c2c66affSColin Finck "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3259c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3260c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3261c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263c2c66affSColin Finck "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3264c2c66affSColin Finck "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3265c2c66affSColin Finck "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3266c2c66affSColin Finck "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3267c2c66affSColin Finck };
3268c2c66affSColin Finck
3269c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3270c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3271c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3272c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3273c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3274c2c66affSColin Finck 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3275c2c66affSColin Finck 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3276c2c66affSColin Finck 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3277c2c66affSColin Finck 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3278c2c66affSColin Finck 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3279c2c66affSColin Finck 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3280c2c66affSColin Finck 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3281c2c66affSColin Finck 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3282c2c66affSColin Finck 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3283c2c66affSColin Finck 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3284c2c66affSColin Finck 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3285c2c66affSColin Finck 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3286c2c66affSColin Finck };
3287c2c66affSColin Finck
3288*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3289c2c66affSColin Finck "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290c2c66affSColin Finck "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291c2c66affSColin Finck "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3297c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3298c2c66affSColin Finck "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3299c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300c2c66affSColin Finck "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3301c2c66affSColin Finck "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3302c2c66affSColin Finck "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303c2c66affSColin Finck "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3304c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305c2c66affSColin Finck "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3306c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308c2c66affSColin Finck "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316c2c66affSColin Finck };
3317c2c66affSColin Finck
3318c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3319c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3320c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3321c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3322c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3323c2c66affSColin Finck 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3324c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3325c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3326c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3327c2c66affSColin Finck 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3328c2c66affSColin Finck 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3329c2c66affSColin Finck 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3330c2c66affSColin Finck 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3331c2c66affSColin Finck 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3332c2c66affSColin Finck 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3333c2c66affSColin Finck 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3334c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3335c2c66affSColin Finck };
3336c2c66affSColin Finck
3337*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3338c2c66affSColin Finck "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3340c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3346c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3347c2c66affSColin Finck "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3348c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349c2c66affSColin Finck "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3354c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3355c2c66affSColin Finck "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3356c2c66affSColin Finck "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3357c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3358c2c66affSColin Finck "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361c2c66affSColin Finck };
3362c2c66affSColin Finck
3363c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3364c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368c2c66affSColin Finck 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3369c2c66affSColin Finck 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3370c2c66affSColin Finck 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3371c2c66affSColin Finck 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3372c2c66affSColin Finck 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3373c2c66affSColin Finck 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3374c2c66affSColin Finck 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3375c2c66affSColin Finck 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3376c2c66affSColin Finck 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3377c2c66affSColin Finck 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3378c2c66affSColin Finck 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3379c2c66affSColin Finck 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3380c2c66affSColin Finck };
3381c2c66affSColin Finck
3382*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3383c2c66affSColin Finck "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3384c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385c2c66affSColin Finck "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392c2c66affSColin Finck "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3393c2c66affSColin Finck "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3394c2c66affSColin Finck "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399c2c66affSColin Finck "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3400c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402c2c66affSColin Finck "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406c2c66affSColin Finck "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3407c2c66affSColin Finck "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3408c2c66affSColin Finck "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3409c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3410c2c66affSColin Finck "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3411c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414c2c66affSColin Finck };
3415c2c66affSColin Finck
3416c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3417c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3418c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3419c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3420c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3421c2c66affSColin Finck 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3422c2c66affSColin Finck 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3423c2c66affSColin Finck 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3424c2c66affSColin Finck 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3425c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3426c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3427c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3428c2c66affSColin Finck 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3429c2c66affSColin Finck 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3430c2c66affSColin Finck 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3431c2c66affSColin Finck 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3432c2c66affSColin Finck 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3433c2c66affSColin Finck };
3434c2c66affSColin Finck
3435*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3436c2c66affSColin Finck "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3438c2c66affSColin Finck "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3444c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3445c2c66affSColin Finck "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3446c2c66affSColin Finck "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3447c2c66affSColin Finck "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3453c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3455c2c66affSColin Finck "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3460c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3461c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3465c2c66affSColin Finck "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3466c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467c2c66affSColin Finck };
3468c2c66affSColin Finck
3469c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3470c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3471c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3472c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3473c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3474c2c66affSColin Finck 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3475c2c66affSColin Finck 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3476c2c66affSColin Finck 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3477c2c66affSColin Finck 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3478c2c66affSColin Finck 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3479c2c66affSColin Finck 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3480c2c66affSColin Finck 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3481c2c66affSColin Finck 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3482c2c66affSColin Finck 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3483c2c66affSColin Finck 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3484c2c66affSColin Finck 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3485c2c66affSColin Finck 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3486c2c66affSColin Finck };
3487c2c66affSColin Finck
3488*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3489c2c66affSColin Finck "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3497c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3498c2c66affSColin Finck "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3499c2c66affSColin Finck "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3500c2c66affSColin Finck "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3501c2c66affSColin Finck "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3502c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3503c2c66affSColin Finck "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3504c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3506c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507c2c66affSColin Finck "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3510c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512c2c66affSColin Finck };
3513c2c66affSColin Finck
3514c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3515c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519c2c66affSColin Finck 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3520c2c66affSColin Finck 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3521c2c66affSColin Finck 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3522c2c66affSColin Finck 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3523c2c66affSColin Finck 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3524c2c66affSColin Finck 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3525c2c66affSColin Finck 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3526c2c66affSColin Finck 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3527c2c66affSColin Finck 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3528c2c66affSColin Finck 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3529c2c66affSColin Finck 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3530c2c66affSColin Finck 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3531c2c66affSColin Finck };
3532c2c66affSColin Finck
3533*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3534c2c66affSColin Finck "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536c2c66affSColin Finck "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543c2c66affSColin Finck "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3544c2c66affSColin Finck "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3545c2c66affSColin Finck "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3546c2c66affSColin Finck "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3547c2c66affSColin Finck "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3548c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3549c2c66affSColin Finck "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3550c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551c2c66affSColin Finck "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3552c2c66affSColin Finck "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3553c2c66affSColin Finck "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558c2c66affSColin Finck "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561c2c66affSColin Finck "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3562c2c66affSColin Finck "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3563c2c66affSColin Finck "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3564c2c66affSColin Finck "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3565c2c66affSColin Finck };
3566c2c66affSColin Finck
3567c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3568c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3569c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3570c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3571c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3572c2c66affSColin Finck 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3573c2c66affSColin Finck 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3574c2c66affSColin Finck 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3575c2c66affSColin Finck 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3576c2c66affSColin Finck 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3577c2c66affSColin Finck 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3578c2c66affSColin Finck 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3579c2c66affSColin Finck 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3580c2c66affSColin Finck 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3581c2c66affSColin Finck 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3582c2c66affSColin Finck 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3583c2c66affSColin Finck 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3584c2c66affSColin Finck };
3585c2c66affSColin Finck
3586*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3587c2c66affSColin Finck "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589c2c66affSColin Finck "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3595c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3596c2c66affSColin Finck "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3602c2c66affSColin Finck "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3603c2c66affSColin Finck "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3604c2c66affSColin Finck "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3605c2c66affSColin Finck "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3606c2c66affSColin Finck "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3611c2c66affSColin Finck "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3612c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614c2c66affSColin Finck };
3615c2c66affSColin Finck
3616c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3617c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3618c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3619c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3620c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3621c2c66affSColin Finck 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3622c2c66affSColin Finck 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3623c2c66affSColin Finck 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3624c2c66affSColin Finck 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3625c2c66affSColin Finck 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3626c2c66affSColin Finck 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3627c2c66affSColin Finck 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3628c2c66affSColin Finck 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3629c2c66affSColin Finck 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3630c2c66affSColin Finck 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3631c2c66affSColin Finck 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3632c2c66affSColin Finck 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3633c2c66affSColin Finck };
3634c2c66affSColin Finck
3635*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3636c2c66affSColin Finck "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638c2c66affSColin Finck "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3644c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3645c2c66affSColin Finck "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3646c2c66affSColin Finck "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3647c2c66affSColin Finck "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3653c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655c2c66affSColin Finck "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3656c2c66affSColin Finck "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3657c2c66affSColin Finck "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3658c2c66affSColin Finck "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3659c2c66affSColin Finck "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3660c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3661c2c66affSColin Finck "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3662c2c66affSColin Finck "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3663c2c66affSColin Finck "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3664c2c66affSColin Finck "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3665c2c66affSColin Finck "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3666c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3667c2c66affSColin Finck };
3668c2c66affSColin Finck
3669c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3670c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3671c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3672c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3673c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3674c2c66affSColin Finck 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3675c2c66affSColin Finck 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3676c2c66affSColin Finck 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3677c2c66affSColin Finck 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3678c2c66affSColin Finck 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3679c2c66affSColin Finck 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3680c2c66affSColin Finck 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3681c2c66affSColin Finck 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3682c2c66affSColin Finck 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3683c2c66affSColin Finck 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3684c2c66affSColin Finck 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3685c2c66affSColin Finck 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3686c2c66affSColin Finck };
3687c2c66affSColin Finck
3688*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3689c2c66affSColin Finck "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691c2c66affSColin Finck "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3697c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3698c2c66affSColin Finck "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3699c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3704c2c66affSColin Finck "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3705c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3706c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3709c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710c2c66affSColin Finck "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712c2c66affSColin Finck "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719c2c66affSColin Finck "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723c2c66affSColin Finck "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3724c2c66affSColin Finck "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3726c2c66affSColin Finck "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3727c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728c2c66affSColin Finck "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3729c2c66affSColin Finck "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3730c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3731c2c66affSColin Finck "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3732c2c66affSColin Finck };
3733c2c66affSColin Finck
3734c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3735c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3736c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3737c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3738c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3739c2c66affSColin Finck 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3740c2c66affSColin Finck 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3741c2c66affSColin Finck 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3742c2c66affSColin Finck 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3743c2c66affSColin Finck 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3744c2c66affSColin Finck 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3745c2c66affSColin Finck 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3746c2c66affSColin Finck 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3747c2c66affSColin Finck 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3748c2c66affSColin Finck 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3749c2c66affSColin Finck 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3750c2c66affSColin Finck 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3751c2c66affSColin Finck };
3752c2c66affSColin Finck
3753*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3754c2c66affSColin Finck "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756c2c66affSColin Finck "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3762c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3763c2c66affSColin Finck "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3764c2c66affSColin Finck "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3765c2c66affSColin Finck "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3772c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774c2c66affSColin Finck "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775c2c66affSColin Finck "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3777c2c66affSColin Finck "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3778c2c66affSColin Finck "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3779c2c66affSColin Finck "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3780c2c66affSColin Finck "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3781c2c66affSColin Finck };
3782c2c66affSColin Finck
3783c2c66affSColin Finck static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3784c2c66affSColin Finck 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3785c2c66affSColin Finck 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3786c2c66affSColin Finck 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3787c2c66affSColin Finck 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3788c2c66affSColin Finck 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3789c2c66affSColin Finck 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3790c2c66affSColin Finck 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3791c2c66affSColin Finck 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3792c2c66affSColin Finck 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3793c2c66affSColin Finck 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3794c2c66affSColin Finck 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3795c2c66affSColin Finck 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3796c2c66affSColin Finck 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3797c2c66affSColin Finck 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3798c2c66affSColin Finck 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3799c2c66affSColin Finck 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3800c2c66affSColin Finck };
3801c2c66affSColin Finck
3802*911153daSThomas Faber static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3803c2c66affSColin Finck "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3804c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805c2c66affSColin Finck "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810c2c66affSColin Finck "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3811c2c66affSColin Finck "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3812c2c66affSColin Finck "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3813c2c66affSColin Finck "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3814c2c66affSColin Finck "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3815c2c66affSColin Finck "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3816c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818c2c66affSColin Finck "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819c2c66affSColin Finck "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3820c2c66affSColin Finck "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821c2c66affSColin Finck "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3822c2c66affSColin Finck "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3829c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3832c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3836c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837c2c66affSColin Finck "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838c2c66affSColin Finck "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3839c2c66affSColin Finck "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3840c2c66affSColin Finck "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3841c2c66affSColin Finck "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3842c2c66affSColin Finck };
3843c2c66affSColin Finck
3844c2c66affSColin Finck
3845c2c66affSColin Finck /*
3846c2c66affSColin Finck * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3847c2c66affSColin Finck */
3848c2c66affSColin Finck
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3849c2c66affSColin Finck static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3850c2c66affSColin Finck const unsigned char* in, int *inlen) {
3851c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3852c2c66affSColin Finck }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3853c2c66affSColin Finck static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3854c2c66affSColin Finck const unsigned char* in, int *inlen) {
3855c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3856c2c66affSColin Finck }
3857c2c66affSColin Finck
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3858c2c66affSColin Finck static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3859c2c66affSColin Finck const unsigned char* in, int *inlen) {
3860c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3861c2c66affSColin Finck }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3862c2c66affSColin Finck static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3863c2c66affSColin Finck const unsigned char* in, int *inlen) {
3864c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3865c2c66affSColin Finck }
3866c2c66affSColin Finck
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3867c2c66affSColin Finck static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3868c2c66affSColin Finck const unsigned char* in, int *inlen) {
3869c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3870c2c66affSColin Finck }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3871c2c66affSColin Finck static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3872c2c66affSColin Finck const unsigned char* in, int *inlen) {
3873c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3874c2c66affSColin Finck }
3875c2c66affSColin Finck
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3876c2c66affSColin Finck static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3877c2c66affSColin Finck const unsigned char* in, int *inlen) {
3878c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3879c2c66affSColin Finck }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3880c2c66affSColin Finck static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3881c2c66affSColin Finck const unsigned char* in, int *inlen) {
3882c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3883c2c66affSColin Finck }
3884c2c66affSColin Finck
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3885c2c66affSColin Finck static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3886c2c66affSColin Finck const unsigned char* in, int *inlen) {
3887c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3888c2c66affSColin Finck }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3889c2c66affSColin Finck static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3890c2c66affSColin Finck const unsigned char* in, int *inlen) {
3891c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3892c2c66affSColin Finck }
3893c2c66affSColin Finck
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3894c2c66affSColin Finck static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3895c2c66affSColin Finck const unsigned char* in, int *inlen) {
3896c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3897c2c66affSColin Finck }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3898c2c66affSColin Finck static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3899c2c66affSColin Finck const unsigned char* in, int *inlen) {
3900c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3901c2c66affSColin Finck }
3902c2c66affSColin Finck
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3903c2c66affSColin Finck static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3904c2c66affSColin Finck const unsigned char* in, int *inlen) {
3905c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3906c2c66affSColin Finck }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3907c2c66affSColin Finck static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3908c2c66affSColin Finck const unsigned char* in, int *inlen) {
3909c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3910c2c66affSColin Finck }
3911c2c66affSColin Finck
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3912c2c66affSColin Finck static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3913c2c66affSColin Finck const unsigned char* in, int *inlen) {
3914c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3915c2c66affSColin Finck }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3916c2c66affSColin Finck static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3917c2c66affSColin Finck const unsigned char* in, int *inlen) {
3918c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3919c2c66affSColin Finck }
3920c2c66affSColin Finck
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3921c2c66affSColin Finck static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3922c2c66affSColin Finck const unsigned char* in, int *inlen) {
3923c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3924c2c66affSColin Finck }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3925c2c66affSColin Finck static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3926c2c66affSColin Finck const unsigned char* in, int *inlen) {
3927c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3928c2c66affSColin Finck }
3929c2c66affSColin Finck
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3930c2c66affSColin Finck static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3931c2c66affSColin Finck const unsigned char* in, int *inlen) {
3932c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3933c2c66affSColin Finck }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3934c2c66affSColin Finck static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3935c2c66affSColin Finck const unsigned char* in, int *inlen) {
3936c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3937c2c66affSColin Finck }
3938c2c66affSColin Finck
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3939c2c66affSColin Finck static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3940c2c66affSColin Finck const unsigned char* in, int *inlen) {
3941c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3942c2c66affSColin Finck }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3943c2c66affSColin Finck static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3944c2c66affSColin Finck const unsigned char* in, int *inlen) {
3945c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3946c2c66affSColin Finck }
3947c2c66affSColin Finck
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3948c2c66affSColin Finck static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3949c2c66affSColin Finck const unsigned char* in, int *inlen) {
3950c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3951c2c66affSColin Finck }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3952c2c66affSColin Finck static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3953c2c66affSColin Finck const unsigned char* in, int *inlen) {
3954c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3955c2c66affSColin Finck }
3956c2c66affSColin Finck
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3957c2c66affSColin Finck static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3958c2c66affSColin Finck const unsigned char* in, int *inlen) {
3959c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3960c2c66affSColin Finck }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3961c2c66affSColin Finck static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3962c2c66affSColin Finck const unsigned char* in, int *inlen) {
3963c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3964c2c66affSColin Finck }
3965c2c66affSColin Finck
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3966c2c66affSColin Finck static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3967c2c66affSColin Finck const unsigned char* in, int *inlen) {
3968c2c66affSColin Finck return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3969c2c66affSColin Finck }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3970c2c66affSColin Finck static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3971c2c66affSColin Finck const unsigned char* in, int *inlen) {
3972c2c66affSColin Finck return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3973c2c66affSColin Finck }
3974c2c66affSColin Finck
3975c2c66affSColin Finck static void
xmlRegisterCharEncodingHandlersISO8859x(void)3976c2c66affSColin Finck xmlRegisterCharEncodingHandlersISO8859x (void) {
3977c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3978c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3979c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3980c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3981c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3982c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3983c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3984c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3985c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3986c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3987c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3988c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3989c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3990c2c66affSColin Finck xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3991c2c66affSColin Finck }
3992c2c66affSColin Finck
3993c2c66affSColin Finck #endif
3994c2c66affSColin Finck #endif
3995c2c66affSColin Finck
3996