1 /* Determine a canonical name for the current locale's character encoding.
2 
3    Copyright (C) 2000-2006, 2008-2017 Free Software Foundation, Inc.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU Lesser General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public License along
16    with this program; if not, see <http://www.gnu.org/licenses/>.  */
17 
18 /* Written by Bruno Haible <bruno@clisp.org>.  */
19 
20 #include <config.h>
21 
22 /* Specification.  */
23 #include "localcharset.h"
24 
25 #include <fcntl.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
30 
31 #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
32 # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
33 #endif
34 
35 #if defined _WIN32 || defined __WIN32__
36 # define WINDOWS_NATIVE
37 # include <locale.h>
38 #endif
39 
40 #if defined __EMX__
41 /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
42 # ifndef OS2
43 #  define OS2
44 # endif
45 #endif
46 
47 #if !defined WINDOWS_NATIVE
48 # include <unistd.h>
49 # if HAVE_LANGINFO_CODESET
50 #  include <langinfo.h>
51 # else
52 #  if 0 /* see comment below */
53 #   include <locale.h>
54 #  endif
55 # endif
56 # ifdef __CYGWIN__
57 #  define WIN32_LEAN_AND_MEAN
58 #  include <windows.h>
59 # endif
60 #elif defined WINDOWS_NATIVE
61 # define WIN32_LEAN_AND_MEAN
62 # include <windows.h>
63 #endif
64 #if defined OS2
65 # define INCL_DOS
66 # include <os2.h>
67 #endif
68 
69 /* For MB_CUR_MAX_L */
70 #if defined DARWIN7
71 # include <xlocale.h>
72 #endif
73 
74 #if ENABLE_RELOCATABLE
75 # include "relocatable.h"
76 #else
77 # define relocate(pathname) (pathname)
78 #endif
79 
80 /* Get LIBDIR.  */
81 #ifndef LIBDIR
82 # include "configmake.h"
83 #endif
84 
85 /* Define O_NOFOLLOW to 0 on platforms where it does not exist.  */
86 #ifndef O_NOFOLLOW
87 # define O_NOFOLLOW 0
88 #endif
89 
90 #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
91   /* Native Windows, Cygwin, OS/2, DOS */
92 # define ISSLASH(C) ((C) == '/' || (C) == '\\')
93 #endif
94 
95 #ifndef DIRECTORY_SEPARATOR
96 # define DIRECTORY_SEPARATOR '/'
97 #endif
98 
99 #ifndef ISSLASH
100 # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
101 #endif
102 
103 #if HAVE_DECL_GETC_UNLOCKED
104 # undef getc
105 # define getc getc_unlocked
106 #endif
107 
108 /* The following static variable is declared 'volatile' to avoid a
109    possible multithread problem in the function get_charset_aliases. If we
110    are running in a threaded environment, and if two threads initialize
111    'charset_aliases' simultaneously, both will produce the same value,
112    and everything will be ok if the two assignments to 'charset_aliases'
113    are atomic. But I don't know what will happen if the two assignments mix.  */
114 #if __STDC__ != 1
115 # define volatile /* empty */
116 #endif
117 /* Pointer to the contents of the charset.alias file, if it has already been
118    read, else NULL.  Its format is:
119    ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
120 static const char * volatile charset_aliases;
121 
122 /* Return a pointer to the contents of the charset.alias file.  */
123 static const char *
get_charset_aliases(void)124 get_charset_aliases (void)
125 {
126   const char *cp;
127 
128   cp = charset_aliases;
129   if (cp == NULL)
130     {
131 #if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
132       const char *dir;
133       const char *base = "charset.alias";
134       char *file_name;
135 
136       /* Make it possible to override the charset.alias location.  This is
137          necessary for running the testsuite before "make install".  */
138       dir = getenv ("CHARSETALIASDIR");
139       if (dir == NULL || dir[0] == '\0')
140         dir = relocate (LIBDIR);
141 
142       /* Concatenate dir and base into freshly allocated file_name.  */
143       {
144         size_t dir_len = strlen (dir);
145         size_t base_len = strlen (base);
146         int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
147         file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
148         if (file_name != NULL)
149           {
150             memcpy (file_name, dir, dir_len);
151             if (add_slash)
152               file_name[dir_len] = DIRECTORY_SEPARATOR;
153             memcpy (file_name + dir_len + add_slash, base, base_len + 1);
154           }
155       }
156 
157       if (file_name == NULL)
158         /* Out of memory.  Treat the file as empty.  */
159         cp = "";
160       else
161         {
162           int fd;
163 
164           /* Open the file.  Reject symbolic links on platforms that support
165              O_NOFOLLOW.  This is a security feature.  Without it, an attacker
166              could retrieve parts of the contents (namely, the tail of the
167              first line that starts with "* ") of an arbitrary file by placing
168              a symbolic link to that file under the name "charset.alias" in
169              some writable directory and defining the environment variable
170              CHARSETALIASDIR to point to that directory.  */
171           fd = open (file_name,
172                      O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0));
173           if (fd < 0)
174             /* File not found.  Treat it as empty.  */
175             cp = "";
176           else
177             {
178               FILE *fp;
179 
180               fp = fdopen (fd, "r");
181               if (fp == NULL)
182                 {
183                   /* Out of memory.  Treat the file as empty.  */
184                   close (fd);
185                   cp = "";
186                 }
187               else
188                 {
189                   /* Parse the file's contents.  */
190                   char *res_ptr = NULL;
191                   size_t res_size = 0;
192 
193                   for (;;)
194                     {
195                       int c;
196                       char buf1[50+1];
197                       char buf2[50+1];
198                       size_t l1, l2;
199                       char *old_res_ptr;
200 
201                       c = getc (fp);
202                       if (c == EOF)
203                         break;
204                       if (c == '\n' || c == ' ' || c == '\t')
205                         continue;
206                       if (c == '#')
207                         {
208                           /* Skip comment, to end of line.  */
209                           do
210                             c = getc (fp);
211                           while (!(c == EOF || c == '\n'));
212                           if (c == EOF)
213                             break;
214                           continue;
215                         }
216                       ungetc (c, fp);
217                       if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
218                         break;
219                       l1 = strlen (buf1);
220                       l2 = strlen (buf2);
221                       old_res_ptr = res_ptr;
222                       if (res_size == 0)
223                         {
224                           res_size = l1 + 1 + l2 + 1;
225                           res_ptr = (char *) malloc (res_size + 1);
226                         }
227                       else
228                         {
229                           res_size += l1 + 1 + l2 + 1;
230                           res_ptr = (char *) realloc (res_ptr, res_size + 1);
231                         }
232                       if (res_ptr == NULL)
233                         {
234                           /* Out of memory. */
235                           res_size = 0;
236                           free (old_res_ptr);
237                           break;
238                         }
239                       strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
240                       strcpy (res_ptr + res_size - (l2 + 1), buf2);
241                     }
242                   fclose (fp);
243                   if (res_size == 0)
244                     cp = "";
245                   else
246                     {
247                       *(res_ptr + res_size) = '\0';
248                       cp = res_ptr;
249                     }
250                 }
251             }
252 
253           free (file_name);
254         }
255 
256 #else
257 
258 # if defined DARWIN7
259       /* To avoid the trouble of installing a file that is shared by many
260          GNU packages -- many packaging systems have problems with this --,
261          simply inline the aliases here.  */
262       cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
263            "ISO8859-2" "\0" "ISO-8859-2" "\0"
264            "ISO8859-4" "\0" "ISO-8859-4" "\0"
265            "ISO8859-5" "\0" "ISO-8859-5" "\0"
266            "ISO8859-7" "\0" "ISO-8859-7" "\0"
267            "ISO8859-9" "\0" "ISO-8859-9" "\0"
268            "ISO8859-13" "\0" "ISO-8859-13" "\0"
269            "ISO8859-15" "\0" "ISO-8859-15" "\0"
270            "KOI8-R" "\0" "KOI8-R" "\0"
271            "KOI8-U" "\0" "KOI8-U" "\0"
272            "CP866" "\0" "CP866" "\0"
273            "CP949" "\0" "CP949" "\0"
274            "CP1131" "\0" "CP1131" "\0"
275            "CP1251" "\0" "CP1251" "\0"
276            "eucCN" "\0" "GB2312" "\0"
277            "GB2312" "\0" "GB2312" "\0"
278            "eucJP" "\0" "EUC-JP" "\0"
279            "eucKR" "\0" "EUC-KR" "\0"
280            "Big5" "\0" "BIG5" "\0"
281            "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
282            "GBK" "\0" "GBK" "\0"
283            "GB18030" "\0" "GB18030" "\0"
284            "SJIS" "\0" "SHIFT_JIS" "\0"
285            "ARMSCII-8" "\0" "ARMSCII-8" "\0"
286            "PT154" "\0" "PT154" "\0"
287          /*"ISCII-DEV" "\0" "?" "\0"*/
288            "*" "\0" "UTF-8" "\0";
289 # endif
290 
291 # if defined VMS
292       /* To avoid the troubles of an extra file charset.alias_vms in the
293          sources of many GNU packages, simply inline the aliases here.  */
294       /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
295          "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
296          section 10.7 "Handling Different Character Sets".  */
297       cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
298            "ISO8859-2" "\0" "ISO-8859-2" "\0"
299            "ISO8859-5" "\0" "ISO-8859-5" "\0"
300            "ISO8859-7" "\0" "ISO-8859-7" "\0"
301            "ISO8859-8" "\0" "ISO-8859-8" "\0"
302            "ISO8859-9" "\0" "ISO-8859-9" "\0"
303            /* Japanese */
304            "eucJP" "\0" "EUC-JP" "\0"
305            "SJIS" "\0" "SHIFT_JIS" "\0"
306            "DECKANJI" "\0" "DEC-KANJI" "\0"
307            "SDECKANJI" "\0" "EUC-JP" "\0"
308            /* Chinese */
309            "eucTW" "\0" "EUC-TW" "\0"
310            "DECHANYU" "\0" "DEC-HANYU" "\0"
311            "DECHANZI" "\0" "GB2312" "\0"
312            /* Korean */
313            "DECKOREAN" "\0" "EUC-KR" "\0";
314 # endif
315 
316 # if defined WINDOWS_NATIVE || defined __CYGWIN__
317       /* To avoid the troubles of installing a separate file in the same
318          directory as the DLL and of retrieving the DLL's directory at
319          runtime, simply inline the aliases here.  */
320 
321       cp = "CP936" "\0" "GBK" "\0"
322            "CP1361" "\0" "JOHAB" "\0"
323            "CP20127" "\0" "ASCII" "\0"
324            "CP20866" "\0" "KOI8-R" "\0"
325            "CP20936" "\0" "GB2312" "\0"
326            "CP21866" "\0" "KOI8-RU" "\0"
327            "CP28591" "\0" "ISO-8859-1" "\0"
328            "CP28592" "\0" "ISO-8859-2" "\0"
329            "CP28593" "\0" "ISO-8859-3" "\0"
330            "CP28594" "\0" "ISO-8859-4" "\0"
331            "CP28595" "\0" "ISO-8859-5" "\0"
332            "CP28596" "\0" "ISO-8859-6" "\0"
333            "CP28597" "\0" "ISO-8859-7" "\0"
334            "CP28598" "\0" "ISO-8859-8" "\0"
335            "CP28599" "\0" "ISO-8859-9" "\0"
336            "CP28605" "\0" "ISO-8859-15" "\0"
337            "CP38598" "\0" "ISO-8859-8" "\0"
338            "CP51932" "\0" "EUC-JP" "\0"
339            "CP51936" "\0" "GB2312" "\0"
340            "CP51949" "\0" "EUC-KR" "\0"
341            "CP51950" "\0" "EUC-TW" "\0"
342            "CP54936" "\0" "GB18030" "\0"
343            "CP65001" "\0" "UTF-8" "\0";
344 # endif
345 # if defined OS2
346       /* To avoid the troubles of installing a separate file in the same
347          directory as the DLL and of retrieving the DLL's directory at
348          runtime, simply inline the aliases here.  */
349 
350       /* The list of encodings is taken from "List of OS/2 Codepages"
351          by Alex Taylor:
352          <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
353          See also "IBM Globalization - Code page identifiers":
354          <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
355       cp = "CP813" "\0" "ISO-8859-7" "\0"
356            "CP878" "\0" "KOI8-R" "\0"
357            "CP819" "\0" "ISO-8859-1" "\0"
358            "CP912" "\0" "ISO-8859-2" "\0"
359            "CP913" "\0" "ISO-8859-3" "\0"
360            "CP914" "\0" "ISO-8859-4" "\0"
361            "CP915" "\0" "ISO-8859-5" "\0"
362            "CP916" "\0" "ISO-8859-8" "\0"
363            "CP920" "\0" "ISO-8859-9" "\0"
364            "CP921" "\0" "ISO-8859-13" "\0"
365            "CP923" "\0" "ISO-8859-15" "\0"
366            "CP954" "\0" "EUC-JP" "\0"
367            "CP964" "\0" "EUC-TW" "\0"
368            "CP970" "\0" "EUC-KR" "\0"
369            "CP1089" "\0" "ISO-8859-6" "\0"
370            "CP1208" "\0" "UTF-8" "\0"
371            "CP1381" "\0" "GB2312" "\0"
372            "CP1386" "\0" "GBK" "\0"
373            "CP3372" "\0" "EUC-JP" "\0";
374 # endif
375 #endif
376 
377       charset_aliases = cp;
378     }
379 
380   return cp;
381 }
382 
383 /* Determine the current locale's character encoding, and canonicalize it
384    into one of the canonical names listed in config.charset.
385    The result must not be freed; it is statically allocated.
386    If the canonical name cannot be determined, the result is a non-canonical
387    name.  */
388 
389 #ifdef STATIC
390 STATIC
391 #endif
392 const char *
locale_charset(void)393 locale_charset (void)
394 {
395   const char *codeset;
396   const char *aliases;
397 
398 #if !(defined WINDOWS_NATIVE || defined OS2)
399 
400 # if HAVE_LANGINFO_CODESET
401 
402   /* Most systems support nl_langinfo (CODESET) nowadays.  */
403   codeset = nl_langinfo (CODESET);
404 
405 #  ifdef __CYGWIN__
406   /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
407      returns "US-ASCII".  Return the suffix of the locale name from the
408      environment variables (if present) or the codepage as a number.  */
409   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
410     {
411       const char *locale;
412       static char buf[2 + 10 + 1];
413 
414       locale = getenv ("LC_ALL");
415       if (locale == NULL || locale[0] == '\0')
416         {
417           locale = getenv ("LC_CTYPE");
418           if (locale == NULL || locale[0] == '\0')
419             locale = getenv ("LANG");
420         }
421       if (locale != NULL && locale[0] != '\0')
422         {
423           /* If the locale name contains an encoding after the dot, return
424              it.  */
425           const char *dot = strchr (locale, '.');
426 
427           if (dot != NULL)
428             {
429               const char *modifier;
430 
431               dot++;
432               /* Look for the possible @... trailer and remove it, if any.  */
433               modifier = strchr (dot, '@');
434               if (modifier == NULL)
435                 return dot;
436               if (modifier - dot < sizeof (buf))
437                 {
438                   memcpy (buf, dot, modifier - dot);
439                   buf [modifier - dot] = '\0';
440                   return buf;
441                 }
442             }
443         }
444 
445       /* The Windows API has a function returning the locale's codepage as a
446          number: GetACP().  This encoding is used by Cygwin, unless the user
447          has set the environment variable CYGWIN=codepage:oem (which very few
448          people do).
449          Output directed to console windows needs to be converted (to
450          GetOEMCP() if the console is using a raster font, or to
451          GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
452          this conversion transparently (see winsup/cygwin/fhandler_console.cc),
453          converting to GetConsoleOutputCP().  This leads to correct results,
454          except when SetConsoleOutputCP has been called and a raster font is
455          in use.  */
456       sprintf (buf, "CP%u", GetACP ());
457       codeset = buf;
458     }
459 #  endif
460 
461 # else
462 
463   /* On old systems which lack it, use setlocale or getenv.  */
464   const char *locale = NULL;
465 
466   /* But most old systems don't have a complete set of locales.  Some
467      (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
468      use setlocale here; it would return "C" when it doesn't support the
469      locale name the user has set.  */
470 #  if 0
471   locale = setlocale (LC_CTYPE, NULL);
472 #  endif
473   if (locale == NULL || locale[0] == '\0')
474     {
475       locale = getenv ("LC_ALL");
476       if (locale == NULL || locale[0] == '\0')
477         {
478           locale = getenv ("LC_CTYPE");
479           if (locale == NULL || locale[0] == '\0')
480             locale = getenv ("LANG");
481         }
482     }
483 
484   /* On some old systems, one used to set locale = "iso8859_1". On others,
485      you set it to "language_COUNTRY.charset". In any case, we resolve it
486      through the charset.alias file.  */
487   codeset = locale;
488 
489 # endif
490 
491 #elif defined WINDOWS_NATIVE
492 
493   static char buf[2 + 10 + 1];
494 
495   /* The Windows API has a function returning the locale's codepage as
496      a number, but the value doesn't change according to what the
497      'setlocale' call specified.  So we use it as a last resort, in
498      case the string returned by 'setlocale' doesn't specify the
499      codepage.  */
500   char *current_locale = setlocale (LC_ALL, NULL);
501   char *pdot;
502 
503   /* If they set different locales for different categories,
504      'setlocale' will return a semi-colon separated list of locale
505      values.  To make sure we use the correct one, we choose LC_CTYPE.  */
506   if (strchr (current_locale, ';'))
507     current_locale = setlocale (LC_CTYPE, NULL);
508 
509   pdot = strrchr (current_locale, '.');
510   if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
511     sprintf (buf, "CP%s", pdot + 1);
512   else
513     {
514       /* The Windows API has a function returning the locale's codepage as a
515         number: GetACP().
516         When the output goes to a console window, it needs to be provided in
517         GetOEMCP() encoding if the console is using a raster font, or in
518         GetConsoleOutputCP() encoding if it is using a TrueType font.
519         But in GUI programs and for output sent to files and pipes, GetACP()
520         encoding is the best bet.  */
521       sprintf (buf, "CP%u", GetACP ());
522     }
523   codeset = buf;
524 
525 #elif defined OS2
526 
527   const char *locale;
528   static char buf[2 + 10 + 1];
529   ULONG cp[3];
530   ULONG cplen;
531 
532   codeset = NULL;
533 
534   /* Allow user to override the codeset, as set in the operating system,
535      with standard language environment variables.  */
536   locale = getenv ("LC_ALL");
537   if (locale == NULL || locale[0] == '\0')
538     {
539       locale = getenv ("LC_CTYPE");
540       if (locale == NULL || locale[0] == '\0')
541         locale = getenv ("LANG");
542     }
543   if (locale != NULL && locale[0] != '\0')
544     {
545       /* If the locale name contains an encoding after the dot, return it.  */
546       const char *dot = strchr (locale, '.');
547 
548       if (dot != NULL)
549         {
550           const char *modifier;
551 
552           dot++;
553           /* Look for the possible @... trailer and remove it, if any.  */
554           modifier = strchr (dot, '@');
555           if (modifier == NULL)
556             return dot;
557           if (modifier - dot < sizeof (buf))
558             {
559               memcpy (buf, dot, modifier - dot);
560               buf [modifier - dot] = '\0';
561               return buf;
562             }
563         }
564 
565       /* For the POSIX locale, don't use the system's codepage.  */
566       if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
567         codeset = "";
568     }
569 
570   if (codeset == NULL)
571     {
572       /* OS/2 has a function returning the locale's codepage as a number.  */
573       if (DosQueryCp (sizeof (cp), cp, &cplen))
574         codeset = "";
575       else
576         {
577           sprintf (buf, "CP%u", cp[0]);
578           codeset = buf;
579         }
580     }
581 
582 #endif
583 
584   if (codeset == NULL)
585     /* The canonical name cannot be determined.  */
586     codeset = "";
587 
588   /* Resolve alias. */
589   for (aliases = get_charset_aliases ();
590        *aliases != '\0';
591        aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
592     if (strcmp (codeset, aliases) == 0
593         || (aliases[0] == '*' && aliases[1] == '\0'))
594       {
595         codeset = aliases + strlen (aliases) + 1;
596         break;
597       }
598 
599   /* Don't return an empty string.  GNU libc and GNU libiconv interpret
600      the empty string as denoting "the locale's character encoding",
601      thus GNU libiconv would call this function a second time.  */
602   if (codeset[0] == '\0')
603     codeset = "ASCII";
604 
605 #ifdef DARWIN7
606   /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
607      (the default codeset) does not work when MB_CUR_MAX is 1.  */
608   if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
609     codeset = "ASCII";
610 #endif
611 
612   return codeset;
613 }
614 
615 /* A variant of the above, without calls to `setlocale', `nl_langinfo',
616    etc.  */
617 const char *
environ_locale_charset(void)618 environ_locale_charset (void)
619 {
620   static char buf[2 + 10 + 1];
621   const char *codeset, *aliases;
622   const char *locale = NULL;
623 
624   locale = getenv ("LC_ALL");
625   if (locale == NULL || locale[0] == '\0')
626     {
627       locale = getenv ("LC_CTYPE");
628       if (locale == NULL || locale[0] == '\0')
629 	locale = getenv ("LANG");
630     }
631 
632   if (locale != NULL && locale[0] != '\0')
633     {
634       /* If the locale name contains an encoding after the dot, return it.  */
635       const char *dot = strchr (locale, '.');
636 
637       if (dot != NULL)
638         {
639           const char *modifier;
640 
641           dot++;
642           /* Look for the possible @... trailer and remove it, if any.  */
643           modifier = strchr (dot, '@');
644           if (modifier == NULL)
645             return dot;
646           if (modifier - dot < sizeof (buf))
647             {
648               memcpy (buf, dot, modifier - dot);
649               buf [modifier - dot] = '\0';
650               return buf;
651             }
652         }
653       else if (strcmp (locale, "C") == 0)
654 	{
655 	  strcpy (buf, "ASCII");
656 	  return buf;
657 	}
658       else
659 	codeset = "";
660     }
661   else
662     codeset = "";
663 
664   /* Resolve alias. */
665   for (aliases = get_charset_aliases ();
666        *aliases != '\0';
667        aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
668     if (strcmp (codeset, aliases) == 0
669         || (aliases[0] == '*' && aliases[1] == '\0'))
670       {
671         codeset = aliases + strlen (aliases) + 1;
672         break;
673       }
674 
675   /* Don't return an empty string.  GNU libc and GNU libiconv interpret
676      the empty string as denoting "the locale's character encoding",
677      thus GNU libiconv would call this function a second time.  */
678   if (codeset[0] == '\0')
679     /* Default to Latin-1, for backward compatibility with Guile 1.8.  */
680     codeset = "ISO-8859-1";
681 
682   return codeset;
683 }
684