1 /*
2  * Copyright (C) 1999-2008, 2011, 2016, 2018 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, see <https://www.gnu.org/licenses/>.
18  */
19 
20 #include <iconv.h>
21 
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "config.h"
26 #include "localcharset.h"
27 
28 #ifdef __CYGWIN__
29 #include <cygwin/version.h>
30 #endif
31 
32 #if ENABLE_EXTRA
33 /*
34  * Consider all system dependent encodings, for any system,
35  * and the extra encodings.
36  */
37 #define USE_AIX
38 #define USE_OSF1
39 #define USE_DOS
40 #define USE_EXTRA
41 #else
42 /*
43  * Consider those system dependent encodings that are needed for the
44  * current system.
45  */
46 #ifdef _AIX
47 #define USE_AIX
48 #endif
49 #if defined(__osf__) || defined(VMS)
50 #define USE_OSF1
51 #endif
52 #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
53 #define USE_DOS
54 #endif
55 #endif
56 
57 /*
58  * Data type for general conversion loop.
59  */
60 struct loop_funcs {
61   size_t (*loop_convert) (iconv_t icd,
62                           const char* * inbuf, size_t *inbytesleft,
63                           char* * outbuf, size_t *outbytesleft);
64   size_t (*loop_reset) (iconv_t icd,
65                         char* * outbuf, size_t *outbytesleft);
66 };
67 
68 /*
69  * Converters.
70  */
71 #include "converters.h"
72 
73 /*
74  * Transliteration tables.
75  */
76 #include "cjk_variants.h"
77 #include "translit.h"
78 
79 /*
80  * Table of all supported encodings.
81  */
82 struct encoding {
83   struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
84   struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
85   int oflags;                 /* flags for unicode -> multibyte conversion */
86 };
87 #define DEFALIAS(xxx_alias,xxx) /* nothing */
88 enum {
89 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
90   ei_##xxx ,
91 #include "encodings.def"
92 #ifdef USE_AIX
93 # include "encodings_aix.def"
94 #endif
95 #ifdef USE_OSF1
96 # include "encodings_osf1.def"
97 #endif
98 #ifdef USE_DOS
99 # include "encodings_dos.def"
100 #endif
101 #ifdef USE_EXTRA
102 # include "encodings_extra.def"
103 #endif
104 #include "encodings_local.def"
105 #undef DEFENCODING
106 ei_for_broken_compilers_that_dont_like_trailing_commas
107 };
108 #include "flags.h"
109 static struct encoding const all_encodings[] = {
110 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
111   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
112 #include "encodings.def"
113 #ifdef USE_AIX
114 # include "encodings_aix.def"
115 #endif
116 #ifdef USE_OSF1
117 # include "encodings_osf1.def"
118 #endif
119 #ifdef USE_DOS
120 # include "encodings_dos.def"
121 #endif
122 #ifdef USE_EXTRA
123 # include "encodings_extra.def"
124 #endif
125 #undef DEFENCODING
126 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
127   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
128 #include "encodings_local.def"
129 #undef DEFENCODING
130 };
131 #undef DEFALIAS
132 
133 /*
134  * Conversion loops.
135  */
136 #include "loops.h"
137 
138 /*
139  * Alias lookup function.
140  * Defines
141  *   struct alias { int name; unsigned int encoding_index; };
142  *   const struct alias * aliases_lookup (const char *str, unsigned int len);
143  *   #define MAX_WORD_LENGTH ...
144  */
145 #if defined _AIX
146 # include "aliases_sysaix.h"
147 #elif defined hpux || defined __hpux
148 # include "aliases_syshpux.h"
149 #elif defined __osf__
150 # include "aliases_sysosf1.h"
151 #elif defined __sun
152 # include "aliases_syssolaris.h"
153 #else
154 # include "aliases.h"
155 #endif
156 
157 /*
158  * System dependent alias lookup function.
159  * Defines
160  *   const struct alias * aliases2_lookup (const char *str);
161  */
162 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
163 struct stringpool2_t {
164 #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
165 #include "aliases2.h"
166 #undef S
167 };
168 static const struct stringpool2_t stringpool2_contents = {
169 #define S(tag,name,encoding_index) name,
170 #include "aliases2.h"
171 #undef S
172 };
173 #define stringpool2 ((const char *) &stringpool2_contents)
174 static const struct alias sysdep_aliases[] = {
175 #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
176 #include "aliases2.h"
177 #undef S
178 };
179 #ifdef __GNUC__
180 __inline
181 #else
182 #ifdef __cplusplus
183 inline
184 #endif
185 #endif
186 static const struct alias *
aliases2_lookup(register const char * str)187 aliases2_lookup (register const char *str)
188 {
189   const struct alias * ptr;
190   unsigned int count;
191   for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
192     if (!strcmp(str, stringpool2 + ptr->name))
193       return ptr;
194   return NULL;
195 }
196 #else
197 #define aliases2_lookup(str)  NULL
198 #define stringpool2  NULL
199 #endif
200 
201 #if 0
202 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
203    and the first string can be assumed to be in uppercase. */
204 static int strequal (const char* str1, const char* str2)
205 {
206   unsigned char c1;
207   unsigned char c2;
208   for (;;) {
209     c1 = * (unsigned char *) str1++;
210     c2 = * (unsigned char *) str2++;
211     if (c1 == 0)
212       break;
213     if (c2 >= 'a' && c2 <= 'z')
214       c2 -= 'a'-'A';
215     if (c1 != c2)
216       break;
217   }
218   return (c1 == c2);
219 }
220 #endif
221 
iconv_open(const char * tocode,const char * fromcode)222 iconv_t iconv_open (const char* tocode, const char* fromcode)
223 {
224   struct conv_struct * cd;
225   unsigned int from_index;
226   int from_wchar;
227   unsigned int to_index;
228   int to_wchar;
229   int transliterate;
230   int discard_ilseq;
231 
232 #include "iconv_open1.h"
233 
234   cd = (struct conv_struct *) malloc(from_wchar != to_wchar
235                                      ? sizeof(struct wchar_conv_struct)
236                                      : sizeof(struct conv_struct));
237   if (cd == NULL) {
238     errno = ENOMEM;
239     return (iconv_t)(-1);
240   }
241 
242 #include "iconv_open2.h"
243 
244   return (iconv_t)cd;
245 invalid:
246   errno = EINVAL;
247   return (iconv_t)(-1);
248 }
249 
iconv(iconv_t icd,ICONV_CONST char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)250 size_t iconv (iconv_t icd,
251               ICONV_CONST char* * inbuf, size_t *inbytesleft,
252               char* * outbuf, size_t *outbytesleft)
253 {
254   conv_t cd = (conv_t) icd;
255   if (inbuf == NULL || *inbuf == NULL)
256     return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
257   else
258     return cd->lfuncs.loop_convert(icd,
259                                    (const char* *)inbuf,inbytesleft,
260                                    outbuf,outbytesleft);
261 }
262 
iconv_close(iconv_t icd)263 int iconv_close (iconv_t icd)
264 {
265   conv_t cd = (conv_t) icd;
266   free(cd);
267   return 0;
268 }
269 
270 #ifndef LIBICONV_PLUG
271 
272 /*
273  * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
274  * fit in an iconv_allocation_t.
275  * If this verification fails, iconv_allocation_t must be made larger and
276  * the major version in LIBICONV_VERSION_INFO must be bumped.
277  * Currently 'struct conv_struct' has 21 integer/pointer fields, and
278  * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
279  */
280 typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
281 typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
282 
iconv_open_into(const char * tocode,const char * fromcode,iconv_allocation_t * resultp)283 int iconv_open_into (const char* tocode, const char* fromcode,
284                      iconv_allocation_t* resultp)
285 {
286   struct conv_struct * cd;
287   unsigned int from_index;
288   int from_wchar;
289   unsigned int to_index;
290   int to_wchar;
291   int transliterate;
292   int discard_ilseq;
293 
294 #include "iconv_open1.h"
295 
296   cd = (struct conv_struct *) resultp;
297 
298 #include "iconv_open2.h"
299 
300   return 0;
301 invalid:
302   errno = EINVAL;
303   return -1;
304 }
305 
iconvctl(iconv_t icd,int request,void * argument)306 int iconvctl (iconv_t icd, int request, void* argument)
307 {
308   conv_t cd = (conv_t) icd;
309   switch (request) {
310     case ICONV_TRIVIALP:
311       *(int *)argument =
312         ((cd->lfuncs.loop_convert == unicode_loop_convert
313           && cd->iindex == cd->oindex)
314          || cd->lfuncs.loop_convert == wchar_id_loop_convert
315          ? 1 : 0);
316       return 0;
317     case ICONV_GET_TRANSLITERATE:
318       *(int *)argument = cd->transliterate;
319       return 0;
320     case ICONV_SET_TRANSLITERATE:
321       cd->transliterate = (*(const int *)argument ? 1 : 0);
322       return 0;
323     case ICONV_GET_DISCARD_ILSEQ:
324       *(int *)argument = cd->discard_ilseq;
325       return 0;
326     case ICONV_SET_DISCARD_ILSEQ:
327       cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
328       return 0;
329     case ICONV_SET_HOOKS:
330       if (argument != NULL) {
331         cd->hooks = *(const struct iconv_hooks *)argument;
332       } else {
333         cd->hooks.uc_hook = NULL;
334         cd->hooks.wc_hook = NULL;
335         cd->hooks.data = NULL;
336       }
337       return 0;
338     case ICONV_SET_FALLBACKS:
339       if (argument != NULL) {
340         cd->fallbacks = *(const struct iconv_fallbacks *)argument;
341       } else {
342         cd->fallbacks.mb_to_uc_fallback = NULL;
343         cd->fallbacks.uc_to_mb_fallback = NULL;
344         cd->fallbacks.mb_to_wc_fallback = NULL;
345         cd->fallbacks.wc_to_mb_fallback = NULL;
346         cd->fallbacks.data = NULL;
347       }
348       return 0;
349     default:
350       errno = EINVAL;
351       return -1;
352   }
353 }
354 
355 /* An alias after its name has been converted from 'int' to 'const char*'. */
356 struct nalias { const char* name; unsigned int encoding_index; };
357 
compare_by_index(const void * arg1,const void * arg2)358 static int compare_by_index (const void * arg1, const void * arg2)
359 {
360   const struct nalias * alias1 = (const struct nalias *) arg1;
361   const struct nalias * alias2 = (const struct nalias *) arg2;
362   return (int)alias1->encoding_index - (int)alias2->encoding_index;
363 }
364 
compare_by_name(const void * arg1,const void * arg2)365 static int compare_by_name (const void * arg1, const void * arg2)
366 {
367   const char * name1 = *(const char **)arg1;
368   const char * name2 = *(const char **)arg2;
369   /* Compare alphabetically, but put "CS" names at the end. */
370   int sign = strcmp(name1,name2);
371   if (sign != 0) {
372     sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
373            * 4 + (sign >= 0 ? 1 : -1);
374   }
375   return sign;
376 }
377 
iconvlist(int (* do_one)(unsigned int namescount,const char * const * names,void * data),void * data)378 void iconvlist (int (*do_one) (unsigned int namescount,
379                                const char * const * names,
380                                void* data),
381                 void* data)
382 {
383 #define aliascount1  sizeof(aliases)/sizeof(aliases[0])
384 #ifndef aliases2_lookup
385 #define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
386 #else
387 #define aliascount2  0
388 #endif
389 #define aliascount  (aliascount1+aliascount2)
390   struct nalias aliasbuf[aliascount];
391   const char * namesbuf[aliascount];
392   size_t num_aliases;
393   {
394     /* Put all existing aliases into a buffer. */
395     size_t i;
396     size_t j;
397     j = 0;
398     for (i = 0; i < aliascount1; i++) {
399       const struct alias * p = &aliases[i];
400       if (p->name >= 0
401           && p->encoding_index != ei_local_char
402           && p->encoding_index != ei_local_wchar_t) {
403         aliasbuf[j].name = stringpool + p->name;
404         aliasbuf[j].encoding_index = p->encoding_index;
405         j++;
406       }
407     }
408 #ifndef aliases2_lookup
409     for (i = 0; i < aliascount2; i++) {
410       aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
411       aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
412       j++;
413     }
414 #endif
415     num_aliases = j;
416   }
417   /* Sort by encoding_index. */
418   if (num_aliases > 1)
419     qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
420   {
421     /* Process all aliases with the same encoding_index together. */
422     size_t j;
423     j = 0;
424     while (j < num_aliases) {
425       unsigned int ei = aliasbuf[j].encoding_index;
426       size_t i = 0;
427       do
428         namesbuf[i++] = aliasbuf[j++].name;
429       while (j < num_aliases && aliasbuf[j].encoding_index == ei);
430       if (i > 1)
431         qsort(namesbuf, i, sizeof(const char *), compare_by_name);
432       /* Call the callback. */
433       if (do_one(i,namesbuf,data))
434         break;
435     }
436   }
437 #undef aliascount
438 #undef aliascount2
439 #undef aliascount1
440 }
441 
442 /*
443  * Table of canonical names of encodings.
444  * Instead of strings, it contains offsets into stringpool and stringpool2.
445  */
446 static const unsigned short all_canonical[] = {
447 #if defined _AIX
448 # include "canonical_sysaix.h"
449 #elif defined hpux || defined __hpux
450 # include "canonical_syshpux.h"
451 #elif defined __osf__
452 # include "canonical_sysosf1.h"
453 #elif defined __sun
454 # include "canonical_syssolaris.h"
455 #else
456 # include "canonical.h"
457 #endif
458 #ifdef USE_AIX
459 # if defined _AIX
460 #  include "canonical_aix_sysaix.h"
461 # else
462 #  include "canonical_aix.h"
463 # endif
464 #endif
465 #ifdef USE_OSF1
466 # if defined __osf__
467 #  include "canonical_osf1_sysosf1.h"
468 # else
469 #  include "canonical_osf1.h"
470 # endif
471 #endif
472 #ifdef USE_DOS
473 # include "canonical_dos.h"
474 #endif
475 #ifdef USE_EXTRA
476 # include "canonical_extra.h"
477 #endif
478 #if defined _AIX
479 # include "canonical_local_sysaix.h"
480 #elif defined hpux || defined __hpux
481 # include "canonical_local_syshpux.h"
482 #elif defined __osf__
483 # include "canonical_local_sysosf1.h"
484 #elif defined __sun
485 # include "canonical_local_syssolaris.h"
486 #else
487 # include "canonical_local.h"
488 #endif
489 };
490 
iconv_canonicalize(const char * name)491 const char * iconv_canonicalize (const char * name)
492 {
493   const char* code;
494   char buf[MAX_WORD_LENGTH+10+1];
495   const char* cp;
496   char* bp;
497   const struct alias * ap;
498   unsigned int count;
499   unsigned int index;
500   const char* pool;
501 
502   /* Before calling aliases_lookup, convert the input string to upper case,
503    * and check whether it's entirely ASCII (we call gperf with option "-7"
504    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
505    * or if it's too long, it is not a valid encoding name.
506    */
507   for (code = name;;) {
508     /* Search code in the table. */
509     for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
510       unsigned char c = * (unsigned char *) cp;
511       if (c >= 0x80)
512         goto invalid;
513       if (c >= 'a' && c <= 'z')
514         c -= 'a'-'A';
515       *bp = c;
516       if (c == '\0')
517         break;
518       if (--count == 0)
519         goto invalid;
520     }
521     for (;;) {
522       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
523         bp -= 10;
524         *bp = '\0';
525         continue;
526       }
527       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
528         bp -= 8;
529         *bp = '\0';
530         continue;
531       }
532       break;
533     }
534     if (buf[0] == '\0') {
535       code = locale_charset();
536       /* Avoid an endless loop that could occur when using an older version
537          of localcharset.c. */
538       if (code[0] == '\0')
539         goto invalid;
540       continue;
541     }
542     pool = stringpool;
543     ap = aliases_lookup(buf,bp-buf);
544     if (ap == NULL) {
545       pool = stringpool2;
546       ap = aliases2_lookup(buf);
547       if (ap == NULL)
548         goto invalid;
549     }
550     if (ap->encoding_index == ei_local_char) {
551       code = locale_charset();
552       /* Avoid an endless loop that could occur when using an older version
553          of localcharset.c. */
554       if (code[0] == '\0')
555         goto invalid;
556       continue;
557     }
558     if (ap->encoding_index == ei_local_wchar_t) {
559       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
560          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
561          we know that it is UTF-16.  */
562 #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
563       if (sizeof(wchar_t) == 4) {
564         index = ei_ucs4internal;
565         break;
566       }
567       if (sizeof(wchar_t) == 2) {
568 # if WORDS_LITTLEENDIAN
569         index = ei_utf16le;
570 # else
571         index = ei_utf16be;
572 # endif
573         break;
574       }
575 #elif __STDC_ISO_10646__
576       if (sizeof(wchar_t) == 4) {
577         index = ei_ucs4internal;
578         break;
579       }
580       if (sizeof(wchar_t) == 2) {
581         index = ei_ucs2internal;
582         break;
583       }
584       if (sizeof(wchar_t) == 1) {
585         index = ei_iso8859_1;
586         break;
587       }
588 #endif
589     }
590     index = ap->encoding_index;
591     break;
592   }
593   return all_canonical[index] + pool;
594  invalid:
595   return name;
596 }
597 
598 int _libiconv_version = _LIBICONV_VERSION;
599 
600 #if defined __FreeBSD__ && !defined __gnu_freebsd__
601 /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
602    It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'.  */
603 #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
604 #define _strong_alias(name, aliasname) \
605   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
606 #undef iconv_open
607 #undef iconv
608 #undef iconv_close
609 strong_alias (libiconv_open, iconv_open)
610 strong_alias (libiconv, iconv)
611 strong_alias (libiconv_close, iconv_close)
612 #endif
613 
614 #endif
615