1 /* Copyright (C) 1999-2000 Free Software Foundation, Inc.
2 This file is part of the GNU ICONV Library.
3
4 The GNU ICONV Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
8
9 The GNU ICONV Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU ICONV Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
18
19 #include "common.h"
20 #include "iconv.h"
21 #include "libcharset.h"
22
23 #if 0
24
25 /*
26 * Consider those system dependent encodings that are needed for the
27 * current system.
28 */
29 #ifdef _AIX
30 #define USE_AIX
31 #endif
32
33 #endif
34
35 /*
36 * Data type for general conversion loop.
37 */
38 struct loop_funcs {
39 size_t (*loop_convert) (iconv_t icd,
40 const char* * inbuf, size_t *inbytesleft,
41 char* * outbuf, size_t *outbytesleft);
42 size_t (*loop_reset) (iconv_t icd,
43 char* * outbuf, size_t *outbytesleft);
44 };
45
46 /*
47 * Converters.
48 */
49 #include "converters.h"
50
51 /*
52 * Transliteration tables.
53 */
54 #include "cjk_variants.h"
55 #include "translit.h"
56
57 /*
58 * Table of all supported encodings.
59 */
60 struct encoding {
61 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
62 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
63 int oflags; /* flags for unicode -> multibyte conversion */
64 };
65 enum {
66 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
67 ei_##xxx ,
68 #include "encodings.def"
69 #ifdef USE_AIX
70 #include "encodings_aix.def"
71 #endif
72 #include "encodings_local.def"
73 #undef DEFENCODING
74 ei_for_broken_compilers_that_dont_like_trailing_commas
75 };
76 #include "flags.h"
77 static struct encoding const all_encodings[] = {
78 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
79 { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
80 #include "encodings.def"
81 #ifdef USE_AIX
82 #include "encodings_aix.def"
83 #endif
84 #undef DEFENCODING
85 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
86 { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, 0 },
87 #include "encodings_local.def"
88 #undef DEFENCODING
89 };
90
91 /*
92 * Conversion loops.
93 */
94 #include "loops.h"
95
96 /*
97 * Alias lookup function.
98 * Defines
99 * struct alias { const char* name; unsigned int encoding_index; };
100 * const struct alias * aliases_lookup (const char *str, unsigned int len);
101 * #define MAX_WORD_LENGTH ...
102 */
103 #include "aliases.h"
104
105 /*
106 * System dependent alias lookup function.
107 * Defines
108 * const struct alias * aliases2_lookup (const char *str);
109 */
110 #if defined(USE_AIX) /* || ... */
111 static struct alias sysdep_aliases[] = {
112 #ifdef USE_AIX
113 #include "aliases_aix.h"
114 #endif
115 };
116 #ifdef __GNUC__
117 __inline
118 #endif
119 const struct alias *
aliases2_lookup(register const char * str)120 aliases2_lookup (register const char *str)
121 {
122 struct alias * ptr;
123 unsigned int count;
124 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
125 if (!strcmp(str,ptr->name))
126 return ptr;
127 return NULL;
128 }
129 #else
130 #define aliases2_lookup(str) NULL
131 #endif
132
133 #if 0
134 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
135 and the first string can be assumed to be in uppercase. */
136 static int strequal (const char* str1, const char* str2)
137 {
138 unsigned char c1;
139 unsigned char c2;
140 for (;;) {
141 c1 = * (unsigned char *) str1++;
142 c2 = * (unsigned char *) str2++;
143 if (c1 == 0)
144 break;
145 if (c2 >= 'a' && c2 <= 'z')
146 c2 -= 'a'-'A';
147 if (c1 != c2)
148 break;
149 }
150 return (c1 == c2);
151 }
152 #endif
153
iconv_open(const char * tocode,const char * fromcode)154 iconv_t iconv_open (const char* tocode, const char* fromcode)
155 {
156 struct conv_struct * cd;
157 char buf[MAX_WORD_LENGTH+10+1];
158 const char* cp;
159 char* bp;
160 const struct alias * ap;
161 unsigned int count;
162 unsigned int from_index;
163 int from_wchar;
164 unsigned int to_index;
165 int to_wchar;
166 int transliterate = 0;
167
168 /* Before calling aliases_lookup, convert the input string to upper case,
169 * and check whether it's entirely ASCII (we call gperf with option "-7"
170 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
171 * or if it's too long, it is not a valid encoding name.
172 */
173 for (to_wchar = 0;;) {
174 /* Search tocode in the table. */
175 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
176 unsigned char c = * (unsigned char *) cp;
177 if (c >= 0x80)
178 goto invalid;
179 if (c >= 'a' && c <= 'z')
180 c -= 'a'-'A';
181 *bp = c;
182 if (c == '\0')
183 break;
184 if (--count == 0)
185 goto invalid;
186 }
187 if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
188 bp -= 10;
189 *bp = '\0';
190 transliterate = 1;
191 }
192 ap = aliases_lookup(buf,bp-buf);
193 if (ap == NULL) {
194 ap = aliases2_lookup(buf);
195 if (ap == NULL)
196 goto invalid;
197 }
198 if (ap->encoding_index == ei_local_char) {
199 tocode = locale_charset();
200 if (tocode != NULL)
201 continue;
202 goto invalid;
203 }
204 if (ap->encoding_index == ei_local_wchar_t) {
205 #if __STDC_ISO_10646__
206 if (sizeof(wchar_t) == 4) {
207 to_index = ei_ucs4internal;
208 break;
209 }
210 if (sizeof(wchar_t) == 2) {
211 to_index = ei_ucs2internal;
212 break;
213 }
214 if (sizeof(wchar_t) == 1) {
215 to_index = ei_iso8859_1;
216 break;
217 }
218 #endif
219 #if HAVE_MBRTOWC
220 to_wchar = 1;
221 tocode = locale_charset();
222 if (tocode != NULL)
223 continue;
224 #endif
225 goto invalid;
226 }
227 to_index = ap->encoding_index;
228 break;
229 }
230 for (from_wchar = 0;;) {
231 /* Search fromcode in the table. */
232 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
233 unsigned char c = * (unsigned char *) cp;
234 if (c >= 0x80)
235 goto invalid;
236 if (c >= 'a' && c <= 'z')
237 c -= 'a'-'A';
238 *bp = c;
239 if (c == '\0')
240 break;
241 if (--count == 0)
242 goto invalid;
243 }
244 if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
245 bp -= 10;
246 *bp = '\0';
247 }
248 ap = aliases_lookup(buf,bp-buf);
249 if (ap == NULL) {
250 ap = aliases2_lookup(buf);
251 if (ap == NULL)
252 goto invalid;
253 }
254 if (ap->encoding_index == ei_local_char) {
255 fromcode = locale_charset();
256 if (fromcode != NULL)
257 continue;
258 goto invalid;
259 }
260 if (ap->encoding_index == ei_local_wchar_t) {
261 #if __STDC_ISO_10646__
262 if (sizeof(wchar_t) == 4) {
263 from_index = ei_ucs4internal;
264 break;
265 }
266 if (sizeof(wchar_t) == 2) {
267 from_index = ei_ucs2internal;
268 break;
269 }
270 if (sizeof(wchar_t) == 1) {
271 from_index = ei_iso8859_1;
272 break;
273 }
274 #endif
275 #if HAVE_WCRTOMB
276 from_wchar = 1;
277 fromcode = locale_charset();
278 if (fromcode != NULL)
279 continue;
280 #endif
281 goto invalid;
282 }
283 from_index = ap->encoding_index;
284 break;
285 }
286 cd = (struct conv_struct *) malloc(from_wchar != to_wchar
287 ? sizeof(struct wchar_conv_struct)
288 : sizeof(struct conv_struct));
289 if (cd == NULL) {
290 errno = ENOMEM;
291 return (iconv_t)(-1);
292 }
293 cd->iindex = from_index;
294 cd->ifuncs = all_encodings[from_index].ifuncs;
295 cd->oindex = to_index;
296 cd->ofuncs = all_encodings[to_index].ofuncs;
297 cd->oflags = all_encodings[to_index].oflags;
298 /* Initialize the loop functions. */
299 #if HAVE_MBRTOWC
300 if (to_wchar) {
301 #if HAVE_WCRTOMB
302 if (from_wchar) {
303 cd->lfuncs.loop_convert = wchar_id_loop_convert;
304 cd->lfuncs.loop_reset = wchar_id_loop_reset;
305 } else
306 #endif
307 {
308 cd->lfuncs.loop_convert = wchar_to_loop_convert;
309 cd->lfuncs.loop_reset = wchar_to_loop_reset;
310 }
311 } else
312 #endif
313 {
314 #if HAVE_WCRTOMB
315 if (from_wchar) {
316 cd->lfuncs.loop_convert = wchar_from_loop_convert;
317 cd->lfuncs.loop_reset = wchar_from_loop_reset;
318 } else
319 #endif
320 {
321 cd->lfuncs.loop_convert = unicode_loop_convert;
322 cd->lfuncs.loop_reset = unicode_loop_reset;
323 }
324 }
325 /* Initialize the states. */
326 memset(&cd->istate,'\0',sizeof(state_t));
327 memset(&cd->ostate,'\0',sizeof(state_t));
328 /* Initialize the operation flags. */
329 cd->transliterate = transliterate;
330 /* Initialize additional fields. */
331 if (from_wchar != to_wchar) {
332 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
333 memset(&wcd->state,'\0',sizeof(mbstate_t));
334 }
335 /* Done. */
336 return (iconv_t)cd;
337 invalid:
338 errno = EINVAL;
339 return (iconv_t)(-1);
340 }
341
iconv(iconv_t icd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)342 size_t iconv (iconv_t icd,
343 const char* * inbuf, size_t *inbytesleft,
344 char* * outbuf, size_t *outbytesleft)
345 {
346 conv_t cd = (conv_t) icd;
347 if (inbuf == NULL || *inbuf == NULL)
348 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
349 else
350 return cd->lfuncs.loop_convert(icd,
351 (const char* *)inbuf,inbytesleft,
352 outbuf,outbytesleft);
353 }
354
iconv_close(iconv_t icd)355 int iconv_close (iconv_t icd)
356 {
357 conv_t cd = (conv_t) icd;
358 free(cd);
359 return 0;
360 }
361
362 #ifndef LIBICONV_PLUG
363
iconvctl(iconv_t icd,int request,void * argument)364 int iconvctl (iconv_t icd, int request, void* argument)
365 {
366 conv_t cd = (conv_t) icd;
367 switch (request) {
368 case ICONV_TRIVIALP:
369 *(int *)argument =
370 ((cd->lfuncs.loop_convert == unicode_loop_convert
371 && cd->iindex == cd->oindex)
372 || cd->lfuncs.loop_convert == wchar_id_loop_convert
373 ? 1 : 0);
374 return 0;
375 case ICONV_GET_TRANSLITERATE:
376 *(int *)argument = cd->transliterate;
377 return 0;
378 case ICONV_SET_TRANSLITERATE:
379 cd->transliterate = (*(const int *)argument ? 1 : 0);
380 return 0;
381 default:
382 errno = EINVAL;
383 return -1;
384 }
385 }
386
387 int _libiconv_version = _LIBICONV_VERSION;
388
389 #endif
390