1 /* Copyright (C) 1999-2000 Free Software Foundation, Inc.
2    This file is part of the GNU ICONV Library.
3 
4    The GNU ICONV Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public License as
6    published by the Free Software Foundation; either version 2 of the
7    License, or (at your option) any later version.
8 
9    The GNU ICONV Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU ICONV Library; see the file COPYING.LIB.  If not,
16    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17    Boston, MA 02111-1307, USA.  */
18 
19 #include "common.h"
20 #include "iconv.h"
21 #include "libcharset.h"
22 
23 #if 0
24 
25 /*
26  * Consider those system dependent encodings that are needed for the
27  * current system.
28  */
29 #ifdef _AIX
30 #define USE_AIX
31 #endif
32 
33 #endif
34 
35 /*
36  * Data type for general conversion loop.
37  */
38 struct loop_funcs {
39   size_t (*loop_convert) (iconv_t icd,
40                           const char* * inbuf, size_t *inbytesleft,
41                           char* * outbuf, size_t *outbytesleft);
42   size_t (*loop_reset) (iconv_t icd,
43                         char* * outbuf, size_t *outbytesleft);
44 };
45 
46 /*
47  * Converters.
48  */
49 #include "converters.h"
50 
51 /*
52  * Transliteration tables.
53  */
54 #include "cjk_variants.h"
55 #include "translit.h"
56 
57 /*
58  * Table of all supported encodings.
59  */
60 struct encoding {
61   struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
62   struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
63   int oflags;                 /* flags for unicode -> multibyte conversion */
64 };
65 enum {
66 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
67   ei_##xxx ,
68 #include "encodings.def"
69 #ifdef USE_AIX
70 #include "encodings_aix.def"
71 #endif
72 #include "encodings_local.def"
73 #undef DEFENCODING
74 ei_for_broken_compilers_that_dont_like_trailing_commas
75 };
76 #include "flags.h"
77 static struct encoding const all_encodings[] = {
78 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
79   { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
80 #include "encodings.def"
81 #ifdef USE_AIX
82 #include "encodings_aix.def"
83 #endif
84 #undef DEFENCODING
85 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
86   { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, 0 },
87 #include "encodings_local.def"
88 #undef DEFENCODING
89 };
90 
91 /*
92  * Conversion loops.
93  */
94 #include "loops.h"
95 
96 /*
97  * Alias lookup function.
98  * Defines
99  *   struct alias { const char* name; unsigned int encoding_index; };
100  *   const struct alias * aliases_lookup (const char *str, unsigned int len);
101  *   #define MAX_WORD_LENGTH ...
102  */
103 #include "aliases.h"
104 
105 /*
106  * System dependent alias lookup function.
107  * Defines
108  *   const struct alias * aliases2_lookup (const char *str);
109  */
110 #if defined(USE_AIX) /* || ... */
111 static struct alias sysdep_aliases[] = {
112 #ifdef USE_AIX
113 #include "aliases_aix.h"
114 #endif
115 };
116 #ifdef __GNUC__
117 __inline
118 #endif
119 const struct alias *
aliases2_lookup(register const char * str)120 aliases2_lookup (register const char *str)
121 {
122   struct alias * ptr;
123   unsigned int count;
124   for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
125     if (!strcmp(str,ptr->name))
126       return ptr;
127   return NULL;
128 }
129 #else
130 #define aliases2_lookup(str)  NULL
131 #endif
132 
133 #if 0
134 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
135    and the first string can be assumed to be in uppercase. */
136 static int strequal (const char* str1, const char* str2)
137 {
138   unsigned char c1;
139   unsigned char c2;
140   for (;;) {
141     c1 = * (unsigned char *) str1++;
142     c2 = * (unsigned char *) str2++;
143     if (c1 == 0)
144       break;
145     if (c2 >= 'a' && c2 <= 'z')
146       c2 -= 'a'-'A';
147     if (c1 != c2)
148       break;
149   }
150   return (c1 == c2);
151 }
152 #endif
153 
iconv_open(const char * tocode,const char * fromcode)154 iconv_t iconv_open (const char* tocode, const char* fromcode)
155 {
156   struct conv_struct * cd;
157   char buf[MAX_WORD_LENGTH+10+1];
158   const char* cp;
159   char* bp;
160   const struct alias * ap;
161   unsigned int count;
162   unsigned int from_index;
163   int from_wchar;
164   unsigned int to_index;
165   int to_wchar;
166   int transliterate = 0;
167 
168   /* Before calling aliases_lookup, convert the input string to upper case,
169    * and check whether it's entirely ASCII (we call gperf with option "-7"
170    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
171    * or if it's too long, it is not a valid encoding name.
172    */
173   for (to_wchar = 0;;) {
174     /* Search tocode in the table. */
175     for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
176       unsigned char c = * (unsigned char *) cp;
177       if (c >= 0x80)
178         goto invalid;
179       if (c >= 'a' && c <= 'z')
180         c -= 'a'-'A';
181       *bp = c;
182       if (c == '\0')
183         break;
184       if (--count == 0)
185         goto invalid;
186     }
187     if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
188       bp -= 10;
189       *bp = '\0';
190       transliterate = 1;
191     }
192     ap = aliases_lookup(buf,bp-buf);
193     if (ap == NULL) {
194       ap = aliases2_lookup(buf);
195       if (ap == NULL)
196         goto invalid;
197     }
198     if (ap->encoding_index == ei_local_char) {
199       tocode = locale_charset();
200       if (tocode != NULL)
201         continue;
202       goto invalid;
203     }
204     if (ap->encoding_index == ei_local_wchar_t) {
205 #if __STDC_ISO_10646__
206       if (sizeof(wchar_t) == 4) {
207         to_index = ei_ucs4internal;
208         break;
209       }
210       if (sizeof(wchar_t) == 2) {
211         to_index = ei_ucs2internal;
212         break;
213       }
214       if (sizeof(wchar_t) == 1) {
215         to_index = ei_iso8859_1;
216         break;
217       }
218 #endif
219 #if HAVE_MBRTOWC
220       to_wchar = 1;
221       tocode = locale_charset();
222       if (tocode != NULL)
223         continue;
224 #endif
225       goto invalid;
226     }
227     to_index = ap->encoding_index;
228     break;
229   }
230   for (from_wchar = 0;;) {
231     /* Search fromcode in the table. */
232     for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
233       unsigned char c = * (unsigned char *) cp;
234       if (c >= 0x80)
235         goto invalid;
236       if (c >= 'a' && c <= 'z')
237         c -= 'a'-'A';
238       *bp = c;
239       if (c == '\0')
240         break;
241       if (--count == 0)
242         goto invalid;
243     }
244     if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
245       bp -= 10;
246       *bp = '\0';
247     }
248     ap = aliases_lookup(buf,bp-buf);
249     if (ap == NULL) {
250       ap = aliases2_lookup(buf);
251       if (ap == NULL)
252         goto invalid;
253     }
254     if (ap->encoding_index == ei_local_char) {
255       fromcode = locale_charset();
256       if (fromcode != NULL)
257         continue;
258       goto invalid;
259     }
260     if (ap->encoding_index == ei_local_wchar_t) {
261 #if __STDC_ISO_10646__
262       if (sizeof(wchar_t) == 4) {
263         from_index = ei_ucs4internal;
264         break;
265       }
266       if (sizeof(wchar_t) == 2) {
267         from_index = ei_ucs2internal;
268         break;
269       }
270       if (sizeof(wchar_t) == 1) {
271         from_index = ei_iso8859_1;
272         break;
273       }
274 #endif
275 #if HAVE_WCRTOMB
276       from_wchar = 1;
277       fromcode = locale_charset();
278       if (fromcode != NULL)
279         continue;
280 #endif
281       goto invalid;
282     }
283     from_index = ap->encoding_index;
284     break;
285   }
286   cd = (struct conv_struct *) malloc(from_wchar != to_wchar
287                                      ? sizeof(struct wchar_conv_struct)
288                                      : sizeof(struct conv_struct));
289   if (cd == NULL) {
290     errno = ENOMEM;
291     return (iconv_t)(-1);
292   }
293   cd->iindex = from_index;
294   cd->ifuncs = all_encodings[from_index].ifuncs;
295   cd->oindex = to_index;
296   cd->ofuncs = all_encodings[to_index].ofuncs;
297   cd->oflags = all_encodings[to_index].oflags;
298   /* Initialize the loop functions. */
299 #if HAVE_MBRTOWC
300   if (to_wchar) {
301 #if HAVE_WCRTOMB
302     if (from_wchar) {
303       cd->lfuncs.loop_convert = wchar_id_loop_convert;
304       cd->lfuncs.loop_reset = wchar_id_loop_reset;
305     } else
306 #endif
307     {
308       cd->lfuncs.loop_convert = wchar_to_loop_convert;
309       cd->lfuncs.loop_reset = wchar_to_loop_reset;
310     }
311   } else
312 #endif
313   {
314 #if HAVE_WCRTOMB
315     if (from_wchar) {
316       cd->lfuncs.loop_convert = wchar_from_loop_convert;
317       cd->lfuncs.loop_reset = wchar_from_loop_reset;
318     } else
319 #endif
320     {
321       cd->lfuncs.loop_convert = unicode_loop_convert;
322       cd->lfuncs.loop_reset = unicode_loop_reset;
323     }
324   }
325   /* Initialize the states. */
326   memset(&cd->istate,'\0',sizeof(state_t));
327   memset(&cd->ostate,'\0',sizeof(state_t));
328   /* Initialize the operation flags. */
329   cd->transliterate = transliterate;
330   /* Initialize additional fields. */
331   if (from_wchar != to_wchar) {
332     struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
333     memset(&wcd->state,'\0',sizeof(mbstate_t));
334   }
335   /* Done. */
336   return (iconv_t)cd;
337 invalid:
338   errno = EINVAL;
339   return (iconv_t)(-1);
340 }
341 
iconv(iconv_t icd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)342 size_t iconv (iconv_t icd,
343               const char* * inbuf, size_t *inbytesleft,
344               char* * outbuf, size_t *outbytesleft)
345 {
346   conv_t cd = (conv_t) icd;
347   if (inbuf == NULL || *inbuf == NULL)
348     return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
349   else
350     return cd->lfuncs.loop_convert(icd,
351                                    (const char* *)inbuf,inbytesleft,
352                                    outbuf,outbytesleft);
353 }
354 
iconv_close(iconv_t icd)355 int iconv_close (iconv_t icd)
356 {
357   conv_t cd = (conv_t) icd;
358   free(cd);
359   return 0;
360 }
361 
362 #ifndef LIBICONV_PLUG
363 
iconvctl(iconv_t icd,int request,void * argument)364 int iconvctl (iconv_t icd, int request, void* argument)
365 {
366   conv_t cd = (conv_t) icd;
367   switch (request) {
368     case ICONV_TRIVIALP:
369       *(int *)argument =
370         ((cd->lfuncs.loop_convert == unicode_loop_convert
371           && cd->iindex == cd->oindex)
372          || cd->lfuncs.loop_convert == wchar_id_loop_convert
373          ? 1 : 0);
374       return 0;
375     case ICONV_GET_TRANSLITERATE:
376       *(int *)argument = cd->transliterate;
377       return 0;
378     case ICONV_SET_TRANSLITERATE:
379       cd->transliterate = (*(const int *)argument ? 1 : 0);
380       return 0;
381     default:
382       errno = EINVAL;
383       return -1;
384   }
385 }
386 
387 int _libiconv_version = _LIBICONV_VERSION;
388 
389 #endif
390