144b87433SJohn Marino /* Determine a canonical name for the current locale's character encoding.
244b87433SJohn Marino 
3*6ea1f93eSDaniel Fojt    Copyright (C) 2000-2006, 2008-2018 Free Software Foundation, Inc.
444b87433SJohn Marino 
544b87433SJohn Marino    This program is free software; you can redistribute it and/or modify
644b87433SJohn Marino    it under the terms of the GNU General Public License as published by
744b87433SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
844b87433SJohn Marino    any later version.
944b87433SJohn Marino 
1044b87433SJohn Marino    This program is distributed in the hope that it will be useful,
1144b87433SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
1244b87433SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1344b87433SJohn Marino    GNU General Public License for more details.
1444b87433SJohn Marino 
1544b87433SJohn Marino    You should have received a copy of the GNU General Public License along
16*6ea1f93eSDaniel Fojt    with this program; if not, see <https://www.gnu.org/licenses/>.  */
1744b87433SJohn Marino 
1844b87433SJohn Marino /* Written by Bruno Haible <bruno@clisp.org>.  */
1944b87433SJohn Marino 
2044b87433SJohn Marino #include <config.h>
2144b87433SJohn Marino 
2244b87433SJohn Marino /* Specification.  */
2344b87433SJohn Marino #include "localcharset.h"
2444b87433SJohn Marino 
2544b87433SJohn Marino #include <stddef.h>
2644b87433SJohn Marino #include <stdio.h>
2744b87433SJohn Marino #include <string.h>
2844b87433SJohn Marino #include <stdlib.h>
2944b87433SJohn Marino 
3044b87433SJohn Marino #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
3144b87433SJohn Marino # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
3244b87433SJohn Marino #endif
3344b87433SJohn Marino 
34*6ea1f93eSDaniel Fojt #if defined _WIN32 && !defined __CYGWIN__
354536c563SJohn Marino # define WINDOWS_NATIVE
36*6ea1f93eSDaniel Fojt # include <locale.h>
3744b87433SJohn Marino #endif
3844b87433SJohn Marino 
3944b87433SJohn Marino #if defined __EMX__
4044b87433SJohn Marino /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
4144b87433SJohn Marino # ifndef OS2
4244b87433SJohn Marino #  define OS2
4344b87433SJohn Marino # endif
4444b87433SJohn Marino #endif
4544b87433SJohn Marino 
464536c563SJohn Marino #if !defined WINDOWS_NATIVE
4744b87433SJohn Marino # if HAVE_LANGINFO_CODESET
4844b87433SJohn Marino #  include <langinfo.h>
4944b87433SJohn Marino # else
50*6ea1f93eSDaniel Fojt #  if 0 /* see comment regarding use of setlocale(), below */
5144b87433SJohn Marino #   include <locale.h>
5244b87433SJohn Marino #  endif
5344b87433SJohn Marino # endif
5444b87433SJohn Marino # ifdef __CYGWIN__
5544b87433SJohn Marino #  define WIN32_LEAN_AND_MEAN
5644b87433SJohn Marino #  include <windows.h>
5744b87433SJohn Marino # endif
584536c563SJohn Marino #elif defined WINDOWS_NATIVE
5944b87433SJohn Marino # define WIN32_LEAN_AND_MEAN
6044b87433SJohn Marino # include <windows.h>
6144b87433SJohn Marino #endif
6244b87433SJohn Marino #if defined OS2
6344b87433SJohn Marino # define INCL_DOS
6444b87433SJohn Marino # include <os2.h>
6544b87433SJohn Marino #endif
6644b87433SJohn Marino 
67*6ea1f93eSDaniel Fojt /* For MB_CUR_MAX_L */
6844b87433SJohn Marino #if defined DARWIN7
69*6ea1f93eSDaniel Fojt # include <xlocale.h>
7044b87433SJohn Marino #endif
7144b87433SJohn Marino 
72*6ea1f93eSDaniel Fojt 
73*6ea1f93eSDaniel Fojt #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
74*6ea1f93eSDaniel Fojt 
75*6ea1f93eSDaniel Fojt /* On these platforms, we use a mapping from non-canonical encoding name
76*6ea1f93eSDaniel Fojt    to GNU canonical encoding name.  */
77*6ea1f93eSDaniel Fojt 
78*6ea1f93eSDaniel Fojt /* With glibc-2.1 or newer, we don't need any canonicalization,
79*6ea1f93eSDaniel Fojt    because glibc has iconv and both glibc and libiconv support all
80*6ea1f93eSDaniel Fojt    GNU canonical names directly.  */
81*6ea1f93eSDaniel Fojt # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
82*6ea1f93eSDaniel Fojt 
83*6ea1f93eSDaniel Fojt struct table_entry
84*6ea1f93eSDaniel Fojt {
85*6ea1f93eSDaniel Fojt   const char alias[11+1];
86*6ea1f93eSDaniel Fojt   const char canonical[11+1];
87*6ea1f93eSDaniel Fojt };
88*6ea1f93eSDaniel Fojt 
89*6ea1f93eSDaniel Fojt /* Table of platform-dependent mappings, sorted in ascending order.  */
90*6ea1f93eSDaniel Fojt static const struct table_entry alias_table[] =
91*6ea1f93eSDaniel Fojt   {
92*6ea1f93eSDaniel Fojt #  if defined __FreeBSD__                                   /* FreeBSD */
93*6ea1f93eSDaniel Fojt   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
94*6ea1f93eSDaniel Fojt     { "Big5",       "BIG5" },
95*6ea1f93eSDaniel Fojt     { "C",          "ASCII" },
96*6ea1f93eSDaniel Fojt   /*{ "CP1131",     "CP1131" },*/
97*6ea1f93eSDaniel Fojt   /*{ "CP1251",     "CP1251" },*/
98*6ea1f93eSDaniel Fojt   /*{ "CP866",      "CP866" },*/
99*6ea1f93eSDaniel Fojt   /*{ "GB18030",    "GB18030" },*/
100*6ea1f93eSDaniel Fojt   /*{ "GB2312",     "GB2312" },*/
101*6ea1f93eSDaniel Fojt   /*{ "GBK",        "GBK" },*/
102*6ea1f93eSDaniel Fojt   /*{ "ISCII-DEV",  "?" },*/
103*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
104*6ea1f93eSDaniel Fojt     { "ISO8859-13", "ISO-8859-13" },
105*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
106*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
107*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
108*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" },
109*6ea1f93eSDaniel Fojt     { "ISO8859-9",  "ISO-8859-9" },
110*6ea1f93eSDaniel Fojt   /*{ "KOI8-R",     "KOI8-R" },*/
111*6ea1f93eSDaniel Fojt   /*{ "KOI8-U",     "KOI8-U" },*/
112*6ea1f93eSDaniel Fojt     { "SJIS",       "SHIFT_JIS" },
113*6ea1f93eSDaniel Fojt     { "US-ASCII",   "ASCII" },
114*6ea1f93eSDaniel Fojt     { "eucCN",      "GB2312" },
115*6ea1f93eSDaniel Fojt     { "eucJP",      "EUC-JP" },
116*6ea1f93eSDaniel Fojt     { "eucKR",      "EUC-KR" }
117*6ea1f93eSDaniel Fojt #   define alias_table_defined
118*6ea1f93eSDaniel Fojt #  endif
119*6ea1f93eSDaniel Fojt #  if defined __NetBSD__                                    /* NetBSD */
120*6ea1f93eSDaniel Fojt     { "646",        "ASCII" },
121*6ea1f93eSDaniel Fojt   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
122*6ea1f93eSDaniel Fojt   /*{ "BIG5",       "BIG5" },*/
123*6ea1f93eSDaniel Fojt     { "Big5-HKSCS", "BIG5-HKSCS" },
124*6ea1f93eSDaniel Fojt   /*{ "CP1251",     "CP1251" },*/
125*6ea1f93eSDaniel Fojt   /*{ "CP866",      "CP866" },*/
126*6ea1f93eSDaniel Fojt   /*{ "GB18030",    "GB18030" },*/
127*6ea1f93eSDaniel Fojt   /*{ "GB2312",     "GB2312" },*/
128*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
129*6ea1f93eSDaniel Fojt     { "ISO8859-13", "ISO-8859-13" },
130*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
131*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
132*6ea1f93eSDaniel Fojt     { "ISO8859-4",  "ISO-8859-4" },
133*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
134*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" },
135*6ea1f93eSDaniel Fojt   /*{ "KOI8-R",     "KOI8-R" },*/
136*6ea1f93eSDaniel Fojt   /*{ "KOI8-U",     "KOI8-U" },*/
137*6ea1f93eSDaniel Fojt   /*{ "PT154",      "PT154" },*/
138*6ea1f93eSDaniel Fojt     { "SJIS",       "SHIFT_JIS" },
139*6ea1f93eSDaniel Fojt     { "eucCN",      "GB2312" },
140*6ea1f93eSDaniel Fojt     { "eucJP",      "EUC-JP" },
141*6ea1f93eSDaniel Fojt     { "eucKR",      "EUC-KR" },
142*6ea1f93eSDaniel Fojt     { "eucTW",      "EUC-TW" }
143*6ea1f93eSDaniel Fojt #   define alias_table_defined
144*6ea1f93eSDaniel Fojt #  endif
145*6ea1f93eSDaniel Fojt #  if defined __OpenBSD__                                   /* OpenBSD */
146*6ea1f93eSDaniel Fojt     { "646",        "ASCII" },
147*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
148*6ea1f93eSDaniel Fojt     { "ISO8859-13", "ISO-8859-13" },
149*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
150*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
151*6ea1f93eSDaniel Fojt     { "ISO8859-4",  "ISO-8859-4" },
152*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
153*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" }
154*6ea1f93eSDaniel Fojt #   define alias_table_defined
155*6ea1f93eSDaniel Fojt #  endif
156*6ea1f93eSDaniel Fojt #  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
157*6ea1f93eSDaniel Fojt     /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
158*6ea1f93eSDaniel Fojt        useless:
159*6ea1f93eSDaniel Fojt        - It returns the empty string when LANG is set to a locale of the
160*6ea1f93eSDaniel Fojt          form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
161*6ea1f93eSDaniel Fojt          LC_CTYPE file.
162*6ea1f93eSDaniel Fojt        - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
163*6ea1f93eSDaniel Fojt          the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
164*6ea1f93eSDaniel Fojt        - The documentation says:
165*6ea1f93eSDaniel Fojt            "... all code that calls BSD system routines should ensure
166*6ea1f93eSDaniel Fojt             that the const *char parameters of these routines are in UTF-8
167*6ea1f93eSDaniel Fojt             encoding. All BSD system functions expect their string
168*6ea1f93eSDaniel Fojt             parameters to be in UTF-8 encoding and nothing else."
169*6ea1f93eSDaniel Fojt          It also says
170*6ea1f93eSDaniel Fojt            "An additional caveat is that string parameters for files,
171*6ea1f93eSDaniel Fojt             paths, and other file-system entities must be in canonical
172*6ea1f93eSDaniel Fojt             UTF-8. In a canonical UTF-8 Unicode string, all decomposable
173*6ea1f93eSDaniel Fojt             characters are decomposed ..."
174*6ea1f93eSDaniel Fojt          but this is not true: You can pass non-decomposed UTF-8 strings
175*6ea1f93eSDaniel Fojt          to file system functions, and it is the OS which will convert
176*6ea1f93eSDaniel Fojt          them to decomposed UTF-8 before accessing the file system.
177*6ea1f93eSDaniel Fojt        - The Apple Terminal application displays UTF-8 by default.
178*6ea1f93eSDaniel Fojt        - However, other applications are free to use different encodings:
179*6ea1f93eSDaniel Fojt          - xterm uses ISO-8859-1 by default.
180*6ea1f93eSDaniel Fojt          - TextEdit uses MacRoman by default.
181*6ea1f93eSDaniel Fojt        We prefer UTF-8 over decomposed UTF-8-MAC because one should
182*6ea1f93eSDaniel Fojt        minimize the use of decomposed Unicode. Unfortunately, through the
183*6ea1f93eSDaniel Fojt        Darwin file system, decomposed UTF-8 strings are leaked into user
184*6ea1f93eSDaniel Fojt        space nevertheless.
185*6ea1f93eSDaniel Fojt        Then there are also the locales with encodings other than US-ASCII
186*6ea1f93eSDaniel Fojt        and UTF-8. These locales can be occasionally useful to users (e.g.
187*6ea1f93eSDaniel Fojt        when grepping through ISO-8859-1 encoded text files), when all their
188*6ea1f93eSDaniel Fojt        file names are in US-ASCII.
189*6ea1f93eSDaniel Fojt      */
190*6ea1f93eSDaniel Fojt     { "ARMSCII-8",  "ARMSCII-8" },
191*6ea1f93eSDaniel Fojt     { "Big5",       "BIG5" },
192*6ea1f93eSDaniel Fojt     { "Big5HKSCS",  "BIG5-HKSCS" },
193*6ea1f93eSDaniel Fojt     { "CP1131",     "CP1131" },
194*6ea1f93eSDaniel Fojt     { "CP1251",     "CP1251" },
195*6ea1f93eSDaniel Fojt     { "CP866",      "CP866" },
196*6ea1f93eSDaniel Fojt     { "CP949",      "CP949" },
197*6ea1f93eSDaniel Fojt     { "GB18030",    "GB18030" },
198*6ea1f93eSDaniel Fojt     { "GB2312",     "GB2312" },
199*6ea1f93eSDaniel Fojt     { "GBK",        "GBK" },
200*6ea1f93eSDaniel Fojt   /*{ "ISCII-DEV",  "?" },*/
201*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
202*6ea1f93eSDaniel Fojt     { "ISO8859-13", "ISO-8859-13" },
203*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
204*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
205*6ea1f93eSDaniel Fojt     { "ISO8859-4",  "ISO-8859-4" },
206*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
207*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" },
208*6ea1f93eSDaniel Fojt     { "ISO8859-9",  "ISO-8859-9" },
209*6ea1f93eSDaniel Fojt     { "KOI8-R",     "KOI8-R" },
210*6ea1f93eSDaniel Fojt     { "KOI8-U",     "KOI8-U" },
211*6ea1f93eSDaniel Fojt     { "PT154",      "PT154" },
212*6ea1f93eSDaniel Fojt     { "SJIS",       "SHIFT_JIS" },
213*6ea1f93eSDaniel Fojt     { "eucCN",      "GB2312" },
214*6ea1f93eSDaniel Fojt     { "eucJP",      "EUC-JP" },
215*6ea1f93eSDaniel Fojt     { "eucKR",      "EUC-KR" }
216*6ea1f93eSDaniel Fojt #   define alias_table_defined
217*6ea1f93eSDaniel Fojt #  endif
218*6ea1f93eSDaniel Fojt #  if defined _AIX                                          /* AIX */
219*6ea1f93eSDaniel Fojt   /*{ "GBK",        "GBK" },*/
220*6ea1f93eSDaniel Fojt     { "IBM-1046",   "CP1046" },
221*6ea1f93eSDaniel Fojt     { "IBM-1124",   "CP1124" },
222*6ea1f93eSDaniel Fojt     { "IBM-1129",   "CP1129" },
223*6ea1f93eSDaniel Fojt     { "IBM-1252",   "CP1252" },
224*6ea1f93eSDaniel Fojt     { "IBM-850",    "CP850" },
225*6ea1f93eSDaniel Fojt     { "IBM-856",    "CP856" },
226*6ea1f93eSDaniel Fojt     { "IBM-921",    "ISO-8859-13" },
227*6ea1f93eSDaniel Fojt     { "IBM-922",    "CP922" },
228*6ea1f93eSDaniel Fojt     { "IBM-932",    "CP932" },
229*6ea1f93eSDaniel Fojt     { "IBM-943",    "CP943" },
230*6ea1f93eSDaniel Fojt     { "IBM-eucCN",  "GB2312" },
231*6ea1f93eSDaniel Fojt     { "IBM-eucJP",  "EUC-JP" },
232*6ea1f93eSDaniel Fojt     { "IBM-eucKR",  "EUC-KR" },
233*6ea1f93eSDaniel Fojt     { "IBM-eucTW",  "EUC-TW" },
234*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
235*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
236*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
237*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
238*6ea1f93eSDaniel Fojt     { "ISO8859-6",  "ISO-8859-6" },
239*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" },
240*6ea1f93eSDaniel Fojt     { "ISO8859-8",  "ISO-8859-8" },
241*6ea1f93eSDaniel Fojt     { "ISO8859-9",  "ISO-8859-9" },
242*6ea1f93eSDaniel Fojt     { "TIS-620",    "TIS-620" },
243*6ea1f93eSDaniel Fojt   /*{ "UTF-8",      "UTF-8" },*/
244*6ea1f93eSDaniel Fojt     { "big5",       "BIG5" }
245*6ea1f93eSDaniel Fojt #   define alias_table_defined
246*6ea1f93eSDaniel Fojt #  endif
247*6ea1f93eSDaniel Fojt #  if defined __hpux                                        /* HP-UX */
248*6ea1f93eSDaniel Fojt     { "SJIS",      "SHIFT_JIS" },
249*6ea1f93eSDaniel Fojt     { "arabic8",   "HP-ARABIC8" },
250*6ea1f93eSDaniel Fojt     { "big5",      "BIG5" },
251*6ea1f93eSDaniel Fojt     { "cp1251",    "CP1251" },
252*6ea1f93eSDaniel Fojt     { "eucJP",     "EUC-JP" },
253*6ea1f93eSDaniel Fojt     { "eucKR",     "EUC-KR" },
254*6ea1f93eSDaniel Fojt     { "eucTW",     "EUC-TW" },
255*6ea1f93eSDaniel Fojt     { "gb18030",   "GB18030" },
256*6ea1f93eSDaniel Fojt     { "greek8",    "HP-GREEK8" },
257*6ea1f93eSDaniel Fojt     { "hebrew8",   "HP-HEBREW8" },
258*6ea1f93eSDaniel Fojt     { "hkbig5",    "BIG5-HKSCS" },
259*6ea1f93eSDaniel Fojt     { "hp15CN",    "GB2312" },
260*6ea1f93eSDaniel Fojt     { "iso88591",  "ISO-8859-1" },
261*6ea1f93eSDaniel Fojt     { "iso885913", "ISO-8859-13" },
262*6ea1f93eSDaniel Fojt     { "iso885915", "ISO-8859-15" },
263*6ea1f93eSDaniel Fojt     { "iso88592",  "ISO-8859-2" },
264*6ea1f93eSDaniel Fojt     { "iso88594",  "ISO-8859-4" },
265*6ea1f93eSDaniel Fojt     { "iso88595",  "ISO-8859-5" },
266*6ea1f93eSDaniel Fojt     { "iso88596",  "ISO-8859-6" },
267*6ea1f93eSDaniel Fojt     { "iso88597",  "ISO-8859-7" },
268*6ea1f93eSDaniel Fojt     { "iso88598",  "ISO-8859-8" },
269*6ea1f93eSDaniel Fojt     { "iso88599",  "ISO-8859-9" },
270*6ea1f93eSDaniel Fojt     { "kana8",     "HP-KANA8" },
271*6ea1f93eSDaniel Fojt     { "koi8r",     "KOI8-R" },
272*6ea1f93eSDaniel Fojt     { "roman8",    "HP-ROMAN8" },
273*6ea1f93eSDaniel Fojt     { "tis620",    "TIS-620" },
274*6ea1f93eSDaniel Fojt     { "turkish8",  "HP-TURKISH8" },
275*6ea1f93eSDaniel Fojt     { "utf8",      "UTF-8" }
276*6ea1f93eSDaniel Fojt #   define alias_table_defined
277*6ea1f93eSDaniel Fojt #  endif
278*6ea1f93eSDaniel Fojt #  if defined __sgi                                         /* IRIX */
279*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
280*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
281*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
282*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
283*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" },
284*6ea1f93eSDaniel Fojt     { "ISO8859-9",  "ISO-8859-9" },
285*6ea1f93eSDaniel Fojt     { "eucCN",      "GB2312" },
286*6ea1f93eSDaniel Fojt     { "eucJP",      "EUC-JP" },
287*6ea1f93eSDaniel Fojt     { "eucKR",      "EUC-KR" },
288*6ea1f93eSDaniel Fojt     { "eucTW",      "EUC-TW" }
289*6ea1f93eSDaniel Fojt #   define alias_table_defined
290*6ea1f93eSDaniel Fojt #  endif
291*6ea1f93eSDaniel Fojt #  if defined __osf__                                       /* OSF/1 */
292*6ea1f93eSDaniel Fojt   /*{ "GBK",        "GBK" },*/
293*6ea1f93eSDaniel Fojt     { "ISO8859-1",  "ISO-8859-1" },
294*6ea1f93eSDaniel Fojt     { "ISO8859-15", "ISO-8859-15" },
295*6ea1f93eSDaniel Fojt     { "ISO8859-2",  "ISO-8859-2" },
296*6ea1f93eSDaniel Fojt     { "ISO8859-4",  "ISO-8859-4" },
297*6ea1f93eSDaniel Fojt     { "ISO8859-5",  "ISO-8859-5" },
298*6ea1f93eSDaniel Fojt     { "ISO8859-7",  "ISO-8859-7" },
299*6ea1f93eSDaniel Fojt     { "ISO8859-8",  "ISO-8859-8" },
300*6ea1f93eSDaniel Fojt     { "ISO8859-9",  "ISO-8859-9" },
301*6ea1f93eSDaniel Fojt     { "KSC5601",    "CP949" },
302*6ea1f93eSDaniel Fojt     { "SJIS",       "SHIFT_JIS" },
303*6ea1f93eSDaniel Fojt     { "TACTIS",     "TIS-620" },
304*6ea1f93eSDaniel Fojt   /*{ "UTF-8",      "UTF-8" },*/
305*6ea1f93eSDaniel Fojt     { "big5",       "BIG5" },
306*6ea1f93eSDaniel Fojt     { "cp850",      "CP850" },
307*6ea1f93eSDaniel Fojt     { "dechanyu",   "DEC-HANYU" },
308*6ea1f93eSDaniel Fojt     { "dechanzi",   "GB2312" },
309*6ea1f93eSDaniel Fojt     { "deckanji",   "DEC-KANJI" },
310*6ea1f93eSDaniel Fojt     { "deckorean",  "EUC-KR" },
311*6ea1f93eSDaniel Fojt     { "eucJP",      "EUC-JP" },
312*6ea1f93eSDaniel Fojt     { "eucKR",      "EUC-KR" },
313*6ea1f93eSDaniel Fojt     { "eucTW",      "EUC-TW" },
314*6ea1f93eSDaniel Fojt     { "sdeckanji",  "EUC-JP" }
315*6ea1f93eSDaniel Fojt #   define alias_table_defined
316*6ea1f93eSDaniel Fojt #  endif
317*6ea1f93eSDaniel Fojt #  if defined __sun                                         /* Solaris */
318*6ea1f93eSDaniel Fojt     { "5601",        "EUC-KR" },
319*6ea1f93eSDaniel Fojt     { "646",         "ASCII" },
320*6ea1f93eSDaniel Fojt   /*{ "BIG5",        "BIG5" },*/
321*6ea1f93eSDaniel Fojt     { "Big5-HKSCS",  "BIG5-HKSCS" },
322*6ea1f93eSDaniel Fojt     { "GB18030",     "GB18030" },
323*6ea1f93eSDaniel Fojt   /*{ "GBK",         "GBK" },*/
324*6ea1f93eSDaniel Fojt     { "ISO8859-1",   "ISO-8859-1" },
325*6ea1f93eSDaniel Fojt     { "ISO8859-11",  "TIS-620" },
326*6ea1f93eSDaniel Fojt     { "ISO8859-13",  "ISO-8859-13" },
327*6ea1f93eSDaniel Fojt     { "ISO8859-15",  "ISO-8859-15" },
328*6ea1f93eSDaniel Fojt     { "ISO8859-2",   "ISO-8859-2" },
329*6ea1f93eSDaniel Fojt     { "ISO8859-3",   "ISO-8859-3" },
330*6ea1f93eSDaniel Fojt     { "ISO8859-4",   "ISO-8859-4" },
331*6ea1f93eSDaniel Fojt     { "ISO8859-5",   "ISO-8859-5" },
332*6ea1f93eSDaniel Fojt     { "ISO8859-6",   "ISO-8859-6" },
333*6ea1f93eSDaniel Fojt     { "ISO8859-7",   "ISO-8859-7" },
334*6ea1f93eSDaniel Fojt     { "ISO8859-8",   "ISO-8859-8" },
335*6ea1f93eSDaniel Fojt     { "ISO8859-9",   "ISO-8859-9" },
336*6ea1f93eSDaniel Fojt     { "PCK",         "SHIFT_JIS" },
337*6ea1f93eSDaniel Fojt     { "TIS620.2533", "TIS-620" },
338*6ea1f93eSDaniel Fojt   /*{ "UTF-8",       "UTF-8" },*/
339*6ea1f93eSDaniel Fojt     { "ansi-1251",   "CP1251" },
340*6ea1f93eSDaniel Fojt     { "cns11643",    "EUC-TW" },
341*6ea1f93eSDaniel Fojt     { "eucJP",       "EUC-JP" },
342*6ea1f93eSDaniel Fojt     { "gb2312",      "GB2312" },
343*6ea1f93eSDaniel Fojt     { "koi8-r",      "KOI8-R" }
344*6ea1f93eSDaniel Fojt #   define alias_table_defined
345*6ea1f93eSDaniel Fojt #  endif
346*6ea1f93eSDaniel Fojt #  if defined __minix                                       /* Minix */
347*6ea1f93eSDaniel Fojt     { "646", "ASCII" }
348*6ea1f93eSDaniel Fojt #   define alias_table_defined
349*6ea1f93eSDaniel Fojt #  endif
350*6ea1f93eSDaniel Fojt #  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
351*6ea1f93eSDaniel Fojt     { "CP1361",  "JOHAB" },
352*6ea1f93eSDaniel Fojt     { "CP20127", "ASCII" },
353*6ea1f93eSDaniel Fojt     { "CP20866", "KOI8-R" },
354*6ea1f93eSDaniel Fojt     { "CP20936", "GB2312" },
355*6ea1f93eSDaniel Fojt     { "CP21866", "KOI8-RU" },
356*6ea1f93eSDaniel Fojt     { "CP28591", "ISO-8859-1" },
357*6ea1f93eSDaniel Fojt     { "CP28592", "ISO-8859-2" },
358*6ea1f93eSDaniel Fojt     { "CP28593", "ISO-8859-3" },
359*6ea1f93eSDaniel Fojt     { "CP28594", "ISO-8859-4" },
360*6ea1f93eSDaniel Fojt     { "CP28595", "ISO-8859-5" },
361*6ea1f93eSDaniel Fojt     { "CP28596", "ISO-8859-6" },
362*6ea1f93eSDaniel Fojt     { "CP28597", "ISO-8859-7" },
363*6ea1f93eSDaniel Fojt     { "CP28598", "ISO-8859-8" },
364*6ea1f93eSDaniel Fojt     { "CP28599", "ISO-8859-9" },
365*6ea1f93eSDaniel Fojt     { "CP28605", "ISO-8859-15" },
366*6ea1f93eSDaniel Fojt     { "CP38598", "ISO-8859-8" },
367*6ea1f93eSDaniel Fojt     { "CP51932", "EUC-JP" },
368*6ea1f93eSDaniel Fojt     { "CP51936", "GB2312" },
369*6ea1f93eSDaniel Fojt     { "CP51949", "EUC-KR" },
370*6ea1f93eSDaniel Fojt     { "CP51950", "EUC-TW" },
371*6ea1f93eSDaniel Fojt     { "CP54936", "GB18030" },
372*6ea1f93eSDaniel Fojt     { "CP65001", "UTF-8" },
373*6ea1f93eSDaniel Fojt     { "CP936",   "GBK" }
374*6ea1f93eSDaniel Fojt #   define alias_table_defined
375*6ea1f93eSDaniel Fojt #  endif
376*6ea1f93eSDaniel Fojt #  if defined OS2                                           /* OS/2 */
377*6ea1f93eSDaniel Fojt     /* The list of encodings is taken from "List of OS/2 Codepages"
378*6ea1f93eSDaniel Fojt        by Alex Taylor:
379*6ea1f93eSDaniel Fojt        <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
380*6ea1f93eSDaniel Fojt        See also "IBM Globalization - Code page identifiers":
381*6ea1f93eSDaniel Fojt        <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
382*6ea1f93eSDaniel Fojt     { "CP1089", "ISO-8859-6" },
383*6ea1f93eSDaniel Fojt     { "CP1208", "UTF-8" },
384*6ea1f93eSDaniel Fojt     { "CP1381", "GB2312" },
385*6ea1f93eSDaniel Fojt     { "CP1386", "GBK" },
386*6ea1f93eSDaniel Fojt     { "CP3372", "EUC-JP" },
387*6ea1f93eSDaniel Fojt     { "CP813",  "ISO-8859-7" },
388*6ea1f93eSDaniel Fojt     { "CP819",  "ISO-8859-1" },
389*6ea1f93eSDaniel Fojt     { "CP878",  "KOI8-R" },
390*6ea1f93eSDaniel Fojt     { "CP912",  "ISO-8859-2" },
391*6ea1f93eSDaniel Fojt     { "CP913",  "ISO-8859-3" },
392*6ea1f93eSDaniel Fojt     { "CP914",  "ISO-8859-4" },
393*6ea1f93eSDaniel Fojt     { "CP915",  "ISO-8859-5" },
394*6ea1f93eSDaniel Fojt     { "CP916",  "ISO-8859-8" },
395*6ea1f93eSDaniel Fojt     { "CP920",  "ISO-8859-9" },
396*6ea1f93eSDaniel Fojt     { "CP921",  "ISO-8859-13" },
397*6ea1f93eSDaniel Fojt     { "CP923",  "ISO-8859-15" },
398*6ea1f93eSDaniel Fojt     { "CP954",  "EUC-JP" },
399*6ea1f93eSDaniel Fojt     { "CP964",  "EUC-TW" },
400*6ea1f93eSDaniel Fojt     { "CP970",  "EUC-KR" }
401*6ea1f93eSDaniel Fojt #   define alias_table_defined
402*6ea1f93eSDaniel Fojt #  endif
403*6ea1f93eSDaniel Fojt #  if defined VMS                                           /* OpenVMS */
40444b87433SJohn Marino     /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
40544b87433SJohn Marino        "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
40644b87433SJohn Marino        section 10.7 "Handling Different Character Sets".  */
407*6ea1f93eSDaniel Fojt     { "DECHANYU",  "DEC-HANYU" },
408*6ea1f93eSDaniel Fojt     { "DECHANZI",  "GB2312" },
409*6ea1f93eSDaniel Fojt     { "DECKANJI",  "DEC-KANJI" },
410*6ea1f93eSDaniel Fojt     { "DECKOREAN", "EUC-KR" },
411*6ea1f93eSDaniel Fojt     { "ISO8859-1", "ISO-8859-1" },
412*6ea1f93eSDaniel Fojt     { "ISO8859-2", "ISO-8859-2" },
413*6ea1f93eSDaniel Fojt     { "ISO8859-5", "ISO-8859-5" },
414*6ea1f93eSDaniel Fojt     { "ISO8859-7", "ISO-8859-7" },
415*6ea1f93eSDaniel Fojt     { "ISO8859-8", "ISO-8859-8" },
416*6ea1f93eSDaniel Fojt     { "ISO8859-9", "ISO-8859-9" },
417*6ea1f93eSDaniel Fojt     { "SDECKANJI", "EUC-JP" },
418*6ea1f93eSDaniel Fojt     { "SJIS",      "SHIFT_JIS" },
419*6ea1f93eSDaniel Fojt     { "eucJP",     "EUC-JP" },
420*6ea1f93eSDaniel Fojt     { "eucTW",     "EUC-TW" }
421*6ea1f93eSDaniel Fojt #   define alias_table_defined
422*6ea1f93eSDaniel Fojt #  endif
423*6ea1f93eSDaniel Fojt #  ifndef alias_table_defined
424*6ea1f93eSDaniel Fojt     /* Just a dummy entry, to avoid a C syntax error.  */
425*6ea1f93eSDaniel Fojt     { "", "" }
426*6ea1f93eSDaniel Fojt #  endif
427*6ea1f93eSDaniel Fojt   };
428*6ea1f93eSDaniel Fojt 
42944b87433SJohn Marino # endif
43044b87433SJohn Marino 
431*6ea1f93eSDaniel Fojt #else
43244b87433SJohn Marino 
433*6ea1f93eSDaniel Fojt /* On these platforms, we use a mapping from locale name to GNU canonical
434*6ea1f93eSDaniel Fojt    encoding name.  */
435*6ea1f93eSDaniel Fojt 
436*6ea1f93eSDaniel Fojt struct table_entry
437*6ea1f93eSDaniel Fojt {
438*6ea1f93eSDaniel Fojt   const char locale[17+1];
439*6ea1f93eSDaniel Fojt   const char canonical[11+1];
440*6ea1f93eSDaniel Fojt };
441*6ea1f93eSDaniel Fojt 
442*6ea1f93eSDaniel Fojt /* Table of platform-dependent mappings, sorted in ascending order.  */
443*6ea1f93eSDaniel Fojt static const struct table_entry locale_table[] =
444*6ea1f93eSDaniel Fojt   {
445*6ea1f93eSDaniel Fojt # if defined __FreeBSD__                                    /* FreeBSD 4.2 */
446*6ea1f93eSDaniel Fojt     { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
447*6ea1f93eSDaniel Fojt     { "da_DK.DIS_8859-15", "ISO-8859-15" },
448*6ea1f93eSDaniel Fojt     { "da_DK.ISO_8859-1",  "ISO-8859-1" },
449*6ea1f93eSDaniel Fojt     { "de_AT.DIS_8859-15", "ISO-8859-15" },
450*6ea1f93eSDaniel Fojt     { "de_AT.ISO_8859-1",  "ISO-8859-1" },
451*6ea1f93eSDaniel Fojt     { "de_CH.DIS_8859-15", "ISO-8859-15" },
452*6ea1f93eSDaniel Fojt     { "de_CH.ISO_8859-1",  "ISO-8859-1" },
453*6ea1f93eSDaniel Fojt     { "de_DE.DIS_8859-15", "ISO-8859-15" },
454*6ea1f93eSDaniel Fojt     { "de_DE.ISO_8859-1",  "ISO-8859-1" },
455*6ea1f93eSDaniel Fojt     { "en_AU.DIS_8859-15", "ISO-8859-15" },
456*6ea1f93eSDaniel Fojt     { "en_AU.ISO_8859-1",  "ISO-8859-1" },
457*6ea1f93eSDaniel Fojt     { "en_CA.DIS_8859-15", "ISO-8859-15" },
458*6ea1f93eSDaniel Fojt     { "en_CA.ISO_8859-1",  "ISO-8859-1" },
459*6ea1f93eSDaniel Fojt     { "en_GB.DIS_8859-15", "ISO-8859-15" },
460*6ea1f93eSDaniel Fojt     { "en_GB.ISO_8859-1",  "ISO-8859-1" },
461*6ea1f93eSDaniel Fojt     { "en_US.DIS_8859-15", "ISO-8859-15" },
462*6ea1f93eSDaniel Fojt     { "en_US.ISO_8859-1",  "ISO-8859-1" },
463*6ea1f93eSDaniel Fojt     { "es_ES.DIS_8859-15", "ISO-8859-15" },
464*6ea1f93eSDaniel Fojt     { "es_ES.ISO_8859-1",  "ISO-8859-1" },
465*6ea1f93eSDaniel Fojt     { "fi_FI.DIS_8859-15", "ISO-8859-15" },
466*6ea1f93eSDaniel Fojt     { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
467*6ea1f93eSDaniel Fojt     { "fr_BE.DIS_8859-15", "ISO-8859-15" },
468*6ea1f93eSDaniel Fojt     { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
469*6ea1f93eSDaniel Fojt     { "fr_CA.DIS_8859-15", "ISO-8859-15" },
470*6ea1f93eSDaniel Fojt     { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
471*6ea1f93eSDaniel Fojt     { "fr_CH.DIS_8859-15", "ISO-8859-15" },
472*6ea1f93eSDaniel Fojt     { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
473*6ea1f93eSDaniel Fojt     { "fr_FR.DIS_8859-15", "ISO-8859-15" },
474*6ea1f93eSDaniel Fojt     { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
475*6ea1f93eSDaniel Fojt     { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
476*6ea1f93eSDaniel Fojt     { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
477*6ea1f93eSDaniel Fojt     { "is_IS.DIS_8859-15", "ISO-8859-15" },
478*6ea1f93eSDaniel Fojt     { "is_IS.ISO_8859-1",  "ISO-8859-1" },
479*6ea1f93eSDaniel Fojt     { "it_CH.DIS_8859-15", "ISO-8859-15" },
480*6ea1f93eSDaniel Fojt     { "it_CH.ISO_8859-1",  "ISO-8859-1" },
481*6ea1f93eSDaniel Fojt     { "it_IT.DIS_8859-15", "ISO-8859-15" },
482*6ea1f93eSDaniel Fojt     { "it_IT.ISO_8859-1",  "ISO-8859-1" },
483*6ea1f93eSDaniel Fojt     { "ja_JP.EUC",         "EUC-JP" },
484*6ea1f93eSDaniel Fojt     { "ja_JP.SJIS",        "SHIFT_JIS" },
485*6ea1f93eSDaniel Fojt     { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
486*6ea1f93eSDaniel Fojt     { "ko_KR.EUC",         "EUC-KR" },
487*6ea1f93eSDaniel Fojt     { "la_LN.ASCII",       "ASCII" },
488*6ea1f93eSDaniel Fojt     { "la_LN.DIS_8859-15", "ISO-8859-15" },
489*6ea1f93eSDaniel Fojt     { "la_LN.ISO_8859-1",  "ISO-8859-1" },
490*6ea1f93eSDaniel Fojt     { "la_LN.ISO_8859-2",  "ISO-8859-2" },
491*6ea1f93eSDaniel Fojt     { "la_LN.ISO_8859-4",  "ISO-8859-4" },
492*6ea1f93eSDaniel Fojt     { "lt_LN.ASCII",       "ASCII" },
493*6ea1f93eSDaniel Fojt     { "lt_LN.DIS_8859-15", "ISO-8859-15" },
494*6ea1f93eSDaniel Fojt     { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
495*6ea1f93eSDaniel Fojt     { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
496*6ea1f93eSDaniel Fojt     { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
497*6ea1f93eSDaniel Fojt     { "nl_BE.DIS_8859-15", "ISO-8859-15" },
498*6ea1f93eSDaniel Fojt     { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
499*6ea1f93eSDaniel Fojt     { "nl_NL.DIS_8859-15", "ISO-8859-15" },
500*6ea1f93eSDaniel Fojt     { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
501*6ea1f93eSDaniel Fojt     { "no_NO.DIS_8859-15", "ISO-8859-15" },
502*6ea1f93eSDaniel Fojt     { "no_NO.ISO_8859-1",  "ISO-8859-1" },
503*6ea1f93eSDaniel Fojt     { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
504*6ea1f93eSDaniel Fojt     { "pt_PT.DIS_8859-15", "ISO-8859-15" },
505*6ea1f93eSDaniel Fojt     { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
506*6ea1f93eSDaniel Fojt     { "ru_RU.CP866",       "CP866" },
507*6ea1f93eSDaniel Fojt     { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
508*6ea1f93eSDaniel Fojt     { "ru_RU.KOI8-R",      "KOI8-R" },
509*6ea1f93eSDaniel Fojt     { "ru_SU.CP866",       "CP866" },
510*6ea1f93eSDaniel Fojt     { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
511*6ea1f93eSDaniel Fojt     { "ru_SU.KOI8-R",      "KOI8-R" },
512*6ea1f93eSDaniel Fojt     { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
513*6ea1f93eSDaniel Fojt     { "sv_SE.DIS_8859-15", "ISO-8859-15" },
514*6ea1f93eSDaniel Fojt     { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
515*6ea1f93eSDaniel Fojt     { "uk_UA.KOI8-U",      "KOI8-U" },
516*6ea1f93eSDaniel Fojt     { "zh_CN.EUC",         "GB2312" },
517*6ea1f93eSDaniel Fojt     { "zh_TW.BIG5",        "BIG5" },
518*6ea1f93eSDaniel Fojt     { "zh_TW.Big5",        "BIG5" }
519*6ea1f93eSDaniel Fojt #  define locale_table_defined
52044b87433SJohn Marino # endif
521*6ea1f93eSDaniel Fojt # if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
522*6ea1f93eSDaniel Fojt     /* The encodings given here may not all be correct.
523*6ea1f93eSDaniel Fojt        If you find that the encoding given for your language and
524*6ea1f93eSDaniel Fojt        country is not the one your DOS machine actually uses, just
525*6ea1f93eSDaniel Fojt        correct it in this file, and send a mail to
526*6ea1f93eSDaniel Fojt        Juan Manuel Guerrero <juan.guerrero@gmx.de>
527*6ea1f93eSDaniel Fojt        and <bug-gnulib@gnu.org>.  */
528*6ea1f93eSDaniel Fojt     { "C",     "ASCII" },
529*6ea1f93eSDaniel Fojt     { "ar",    "CP864" },
530*6ea1f93eSDaniel Fojt     { "ar_AE", "CP864" },
531*6ea1f93eSDaniel Fojt     { "ar_DZ", "CP864" },
532*6ea1f93eSDaniel Fojt     { "ar_EG", "CP864" },
533*6ea1f93eSDaniel Fojt     { "ar_IQ", "CP864" },
534*6ea1f93eSDaniel Fojt     { "ar_IR", "CP864" },
535*6ea1f93eSDaniel Fojt     { "ar_JO", "CP864" },
536*6ea1f93eSDaniel Fojt     { "ar_KW", "CP864" },
537*6ea1f93eSDaniel Fojt     { "ar_MA", "CP864" },
538*6ea1f93eSDaniel Fojt     { "ar_OM", "CP864" },
539*6ea1f93eSDaniel Fojt     { "ar_QA", "CP864" },
540*6ea1f93eSDaniel Fojt     { "ar_SA", "CP864" },
541*6ea1f93eSDaniel Fojt     { "ar_SY", "CP864" },
542*6ea1f93eSDaniel Fojt     { "be",    "CP866" },
543*6ea1f93eSDaniel Fojt     { "be_BE", "CP866" },
544*6ea1f93eSDaniel Fojt     { "bg",    "CP866" }, /* not CP855 ?? */
545*6ea1f93eSDaniel Fojt     { "bg_BG", "CP866" }, /* not CP855 ?? */
546*6ea1f93eSDaniel Fojt     { "ca",    "CP850" },
547*6ea1f93eSDaniel Fojt     { "ca_ES", "CP850" },
548*6ea1f93eSDaniel Fojt     { "cs",    "CP852" },
549*6ea1f93eSDaniel Fojt     { "cs_CZ", "CP852" },
550*6ea1f93eSDaniel Fojt     { "da",    "CP865" }, /* not CP850 ?? */
551*6ea1f93eSDaniel Fojt     { "da_DK", "CP865" }, /* not CP850 ?? */
552*6ea1f93eSDaniel Fojt     { "de",    "CP850" },
553*6ea1f93eSDaniel Fojt     { "de_AT", "CP850" },
554*6ea1f93eSDaniel Fojt     { "de_CH", "CP850" },
555*6ea1f93eSDaniel Fojt     { "de_DE", "CP850" },
556*6ea1f93eSDaniel Fojt     { "el",    "CP869" },
557*6ea1f93eSDaniel Fojt     { "el_GR", "CP869" },
558*6ea1f93eSDaniel Fojt     { "en",    "CP850" },
559*6ea1f93eSDaniel Fojt     { "en_AU", "CP850" }, /* not CP437 ?? */
560*6ea1f93eSDaniel Fojt     { "en_CA", "CP850" },
561*6ea1f93eSDaniel Fojt     { "en_GB", "CP850" },
562*6ea1f93eSDaniel Fojt     { "en_NZ", "CP437" },
563*6ea1f93eSDaniel Fojt     { "en_US", "CP437" },
564*6ea1f93eSDaniel Fojt     { "en_ZA", "CP850" }, /* not CP437 ?? */
565*6ea1f93eSDaniel Fojt     { "eo",    "CP850" },
566*6ea1f93eSDaniel Fojt     { "eo_EO", "CP850" },
567*6ea1f93eSDaniel Fojt     { "es",    "CP850" },
568*6ea1f93eSDaniel Fojt     { "es_AR", "CP850" },
569*6ea1f93eSDaniel Fojt     { "es_BO", "CP850" },
570*6ea1f93eSDaniel Fojt     { "es_CL", "CP850" },
571*6ea1f93eSDaniel Fojt     { "es_CO", "CP850" },
572*6ea1f93eSDaniel Fojt     { "es_CR", "CP850" },
573*6ea1f93eSDaniel Fojt     { "es_CU", "CP850" },
574*6ea1f93eSDaniel Fojt     { "es_DO", "CP850" },
575*6ea1f93eSDaniel Fojt     { "es_EC", "CP850" },
576*6ea1f93eSDaniel Fojt     { "es_ES", "CP850" },
577*6ea1f93eSDaniel Fojt     { "es_GT", "CP850" },
578*6ea1f93eSDaniel Fojt     { "es_HN", "CP850" },
579*6ea1f93eSDaniel Fojt     { "es_MX", "CP850" },
580*6ea1f93eSDaniel Fojt     { "es_NI", "CP850" },
581*6ea1f93eSDaniel Fojt     { "es_PA", "CP850" },
582*6ea1f93eSDaniel Fojt     { "es_PE", "CP850" },
583*6ea1f93eSDaniel Fojt     { "es_PY", "CP850" },
584*6ea1f93eSDaniel Fojt     { "es_SV", "CP850" },
585*6ea1f93eSDaniel Fojt     { "es_UY", "CP850" },
586*6ea1f93eSDaniel Fojt     { "es_VE", "CP850" },
587*6ea1f93eSDaniel Fojt     { "et",    "CP850" },
588*6ea1f93eSDaniel Fojt     { "et_EE", "CP850" },
589*6ea1f93eSDaniel Fojt     { "eu",    "CP850" },
590*6ea1f93eSDaniel Fojt     { "eu_ES", "CP850" },
591*6ea1f93eSDaniel Fojt     { "fi",    "CP850" },
592*6ea1f93eSDaniel Fojt     { "fi_FI", "CP850" },
593*6ea1f93eSDaniel Fojt     { "fr",    "CP850" },
594*6ea1f93eSDaniel Fojt     { "fr_BE", "CP850" },
595*6ea1f93eSDaniel Fojt     { "fr_CA", "CP850" },
596*6ea1f93eSDaniel Fojt     { "fr_CH", "CP850" },
597*6ea1f93eSDaniel Fojt     { "fr_FR", "CP850" },
598*6ea1f93eSDaniel Fojt     { "ga",    "CP850" },
599*6ea1f93eSDaniel Fojt     { "ga_IE", "CP850" },
600*6ea1f93eSDaniel Fojt     { "gd",    "CP850" },
601*6ea1f93eSDaniel Fojt     { "gd_GB", "CP850" },
602*6ea1f93eSDaniel Fojt     { "gl",    "CP850" },
603*6ea1f93eSDaniel Fojt     { "gl_ES", "CP850" },
604*6ea1f93eSDaniel Fojt     { "he",    "CP862" },
605*6ea1f93eSDaniel Fojt     { "he_IL", "CP862" },
606*6ea1f93eSDaniel Fojt     { "hr",    "CP852" },
607*6ea1f93eSDaniel Fojt     { "hr_HR", "CP852" },
608*6ea1f93eSDaniel Fojt     { "hu",    "CP852" },
609*6ea1f93eSDaniel Fojt     { "hu_HU", "CP852" },
610*6ea1f93eSDaniel Fojt     { "id",    "CP850" }, /* not CP437 ?? */
611*6ea1f93eSDaniel Fojt     { "id_ID", "CP850" }, /* not CP437 ?? */
612*6ea1f93eSDaniel Fojt     { "is",    "CP861" }, /* not CP850 ?? */
613*6ea1f93eSDaniel Fojt     { "is_IS", "CP861" }, /* not CP850 ?? */
614*6ea1f93eSDaniel Fojt     { "it",    "CP850" },
615*6ea1f93eSDaniel Fojt     { "it_CH", "CP850" },
616*6ea1f93eSDaniel Fojt     { "it_IT", "CP850" },
617*6ea1f93eSDaniel Fojt     { "ja",    "CP932" },
618*6ea1f93eSDaniel Fojt     { "ja_JP", "CP932" },
619*6ea1f93eSDaniel Fojt     { "kr",    "CP949" }, /* not CP934 ?? */
620*6ea1f93eSDaniel Fojt     { "kr_KR", "CP949" }, /* not CP934 ?? */
621*6ea1f93eSDaniel Fojt     { "lt",    "CP775" },
622*6ea1f93eSDaniel Fojt     { "lt_LT", "CP775" },
623*6ea1f93eSDaniel Fojt     { "lv",    "CP775" },
624*6ea1f93eSDaniel Fojt     { "lv_LV", "CP775" },
625*6ea1f93eSDaniel Fojt     { "mk",    "CP866" }, /* not CP855 ?? */
626*6ea1f93eSDaniel Fojt     { "mk_MK", "CP866" }, /* not CP855 ?? */
627*6ea1f93eSDaniel Fojt     { "mt",    "CP850" },
628*6ea1f93eSDaniel Fojt     { "mt_MT", "CP850" },
629*6ea1f93eSDaniel Fojt     { "nb",    "CP865" }, /* not CP850 ?? */
630*6ea1f93eSDaniel Fojt     { "nb_NO", "CP865" }, /* not CP850 ?? */
631*6ea1f93eSDaniel Fojt     { "nl",    "CP850" },
632*6ea1f93eSDaniel Fojt     { "nl_BE", "CP850" },
633*6ea1f93eSDaniel Fojt     { "nl_NL", "CP850" },
634*6ea1f93eSDaniel Fojt     { "nn",    "CP865" }, /* not CP850 ?? */
635*6ea1f93eSDaniel Fojt     { "nn_NO", "CP865" }, /* not CP850 ?? */
636*6ea1f93eSDaniel Fojt     { "no",    "CP865" }, /* not CP850 ?? */
637*6ea1f93eSDaniel Fojt     { "no_NO", "CP865" }, /* not CP850 ?? */
638*6ea1f93eSDaniel Fojt     { "pl",    "CP852" },
639*6ea1f93eSDaniel Fojt     { "pl_PL", "CP852" },
640*6ea1f93eSDaniel Fojt     { "pt",    "CP850" },
641*6ea1f93eSDaniel Fojt     { "pt_BR", "CP850" },
642*6ea1f93eSDaniel Fojt     { "pt_PT", "CP850" },
643*6ea1f93eSDaniel Fojt     { "ro",    "CP852" },
644*6ea1f93eSDaniel Fojt     { "ro_RO", "CP852" },
645*6ea1f93eSDaniel Fojt     { "ru",    "CP866" },
646*6ea1f93eSDaniel Fojt     { "ru_RU", "CP866" },
647*6ea1f93eSDaniel Fojt     { "sk",    "CP852" },
648*6ea1f93eSDaniel Fojt     { "sk_SK", "CP852" },
649*6ea1f93eSDaniel Fojt     { "sl",    "CP852" },
650*6ea1f93eSDaniel Fojt     { "sl_SI", "CP852" },
651*6ea1f93eSDaniel Fojt     { "sq",    "CP852" },
652*6ea1f93eSDaniel Fojt     { "sq_AL", "CP852" },
653*6ea1f93eSDaniel Fojt     { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
654*6ea1f93eSDaniel Fojt     { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
655*6ea1f93eSDaniel Fojt     { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
656*6ea1f93eSDaniel Fojt     { "sv",    "CP850" },
657*6ea1f93eSDaniel Fojt     { "sv_SE", "CP850" },
658*6ea1f93eSDaniel Fojt     { "th",    "CP874" },
659*6ea1f93eSDaniel Fojt     { "th_TH", "CP874" },
660*6ea1f93eSDaniel Fojt     { "tr",    "CP857" },
661*6ea1f93eSDaniel Fojt     { "tr_TR", "CP857" },
662*6ea1f93eSDaniel Fojt     { "uk",    "CP1125" },
663*6ea1f93eSDaniel Fojt     { "uk_UA", "CP1125" },
664*6ea1f93eSDaniel Fojt     { "zh_CN", "GBK" },
665*6ea1f93eSDaniel Fojt     { "zh_TW", "CP950" } /* not CP938 ?? */
666*6ea1f93eSDaniel Fojt #  define locale_table_defined
667*6ea1f93eSDaniel Fojt # endif
668*6ea1f93eSDaniel Fojt # ifndef locale_table_defined
669*6ea1f93eSDaniel Fojt     /* Just a dummy entry, to avoid a C syntax error.  */
670*6ea1f93eSDaniel Fojt     { "", "" }
671*6ea1f93eSDaniel Fojt # endif
672*6ea1f93eSDaniel Fojt   };
673*6ea1f93eSDaniel Fojt 
67444b87433SJohn Marino #endif
67544b87433SJohn Marino 
67644b87433SJohn Marino 
67744b87433SJohn Marino /* Determine the current locale's character encoding, and canonicalize it
678*6ea1f93eSDaniel Fojt    into one of the canonical names listed in localcharset.h.
67944b87433SJohn Marino    The result must not be freed; it is statically allocated.
68044b87433SJohn Marino    If the canonical name cannot be determined, the result is a non-canonical
68144b87433SJohn Marino    name.  */
68244b87433SJohn Marino 
68344b87433SJohn Marino #ifdef STATIC
68444b87433SJohn Marino STATIC
68544b87433SJohn Marino #endif
68644b87433SJohn Marino const char *
locale_charset(void)68744b87433SJohn Marino locale_charset (void)
68844b87433SJohn Marino {
68944b87433SJohn Marino   const char *codeset;
69044b87433SJohn Marino 
691*6ea1f93eSDaniel Fojt #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
69244b87433SJohn Marino 
69344b87433SJohn Marino # if HAVE_LANGINFO_CODESET
69444b87433SJohn Marino 
69544b87433SJohn Marino   /* Most systems support nl_langinfo (CODESET) nowadays.  */
69644b87433SJohn Marino   codeset = nl_langinfo (CODESET);
69744b87433SJohn Marino 
69844b87433SJohn Marino #  ifdef __CYGWIN__
699008e37b6SJohn Marino   /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
700008e37b6SJohn Marino      returns "US-ASCII".  Return the suffix of the locale name from the
701008e37b6SJohn Marino      environment variables (if present) or the codepage as a number.  */
70244b87433SJohn Marino   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
70344b87433SJohn Marino     {
70444b87433SJohn Marino       const char *locale;
70544b87433SJohn Marino       static char buf[2 + 10 + 1];
70644b87433SJohn Marino 
70744b87433SJohn Marino       locale = getenv ("LC_ALL");
70844b87433SJohn Marino       if (locale == NULL || locale[0] == '\0')
70944b87433SJohn Marino         {
71044b87433SJohn Marino           locale = getenv ("LC_CTYPE");
71144b87433SJohn Marino           if (locale == NULL || locale[0] == '\0')
71244b87433SJohn Marino             locale = getenv ("LANG");
71344b87433SJohn Marino         }
71444b87433SJohn Marino       if (locale != NULL && locale[0] != '\0')
71544b87433SJohn Marino         {
71644b87433SJohn Marino           /* If the locale name contains an encoding after the dot, return
71744b87433SJohn Marino              it.  */
71844b87433SJohn Marino           const char *dot = strchr (locale, '.');
71944b87433SJohn Marino 
72044b87433SJohn Marino           if (dot != NULL)
72144b87433SJohn Marino             {
72244b87433SJohn Marino               const char *modifier;
72344b87433SJohn Marino 
72444b87433SJohn Marino               dot++;
72544b87433SJohn Marino               /* Look for the possible @... trailer and remove it, if any.  */
72644b87433SJohn Marino               modifier = strchr (dot, '@');
72744b87433SJohn Marino               if (modifier == NULL)
72844b87433SJohn Marino                 return dot;
72944b87433SJohn Marino               if (modifier - dot < sizeof (buf))
73044b87433SJohn Marino                 {
73144b87433SJohn Marino                   memcpy (buf, dot, modifier - dot);
73244b87433SJohn Marino                   buf [modifier - dot] = '\0';
73344b87433SJohn Marino                   return buf;
73444b87433SJohn Marino                 }
73544b87433SJohn Marino             }
73644b87433SJohn Marino         }
73744b87433SJohn Marino 
7384536c563SJohn Marino       /* The Windows API has a function returning the locale's codepage as a
7394536c563SJohn Marino          number: GetACP().  This encoding is used by Cygwin, unless the user
7404536c563SJohn Marino          has set the environment variable CYGWIN=codepage:oem (which very few
7414536c563SJohn Marino          people do).
74244b87433SJohn Marino          Output directed to console windows needs to be converted (to
74344b87433SJohn Marino          GetOEMCP() if the console is using a raster font, or to
74444b87433SJohn Marino          GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
74544b87433SJohn Marino          this conversion transparently (see winsup/cygwin/fhandler_console.cc),
74644b87433SJohn Marino          converting to GetConsoleOutputCP().  This leads to correct results,
74744b87433SJohn Marino          except when SetConsoleOutputCP has been called and a raster font is
74844b87433SJohn Marino          in use.  */
74944b87433SJohn Marino       sprintf (buf, "CP%u", GetACP ());
75044b87433SJohn Marino       codeset = buf;
75144b87433SJohn Marino     }
75244b87433SJohn Marino #  endif
75344b87433SJohn Marino 
754*6ea1f93eSDaniel Fojt   if (codeset == NULL)
755*6ea1f93eSDaniel Fojt     /* The canonical name cannot be determined.  */
756*6ea1f93eSDaniel Fojt     codeset = "";
75744b87433SJohn Marino 
7584536c563SJohn Marino # elif defined WINDOWS_NATIVE
75944b87433SJohn Marino 
76044b87433SJohn Marino   static char buf[2 + 10 + 1];
76144b87433SJohn Marino 
762*6ea1f93eSDaniel Fojt   /* The Windows API has a function returning the locale's codepage as
763*6ea1f93eSDaniel Fojt      a number, but the value doesn't change according to what the
764*6ea1f93eSDaniel Fojt      'setlocale' call specified.  So we use it as a last resort, in
765*6ea1f93eSDaniel Fojt      case the string returned by 'setlocale' doesn't specify the
766*6ea1f93eSDaniel Fojt      codepage.  */
767*6ea1f93eSDaniel Fojt   char *current_locale = setlocale (LC_ALL, NULL);
768*6ea1f93eSDaniel Fojt   char *pdot;
769*6ea1f93eSDaniel Fojt 
770*6ea1f93eSDaniel Fojt   /* If they set different locales for different categories,
771*6ea1f93eSDaniel Fojt      'setlocale' will return a semi-colon separated list of locale
772*6ea1f93eSDaniel Fojt      values.  To make sure we use the correct one, we choose LC_CTYPE.  */
773*6ea1f93eSDaniel Fojt   if (strchr (current_locale, ';'))
774*6ea1f93eSDaniel Fojt     current_locale = setlocale (LC_CTYPE, NULL);
775*6ea1f93eSDaniel Fojt 
776*6ea1f93eSDaniel Fojt   pdot = strrchr (current_locale, '.');
777*6ea1f93eSDaniel Fojt   if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
778*6ea1f93eSDaniel Fojt     sprintf (buf, "CP%s", pdot + 1);
779*6ea1f93eSDaniel Fojt   else
780*6ea1f93eSDaniel Fojt     {
7814536c563SJohn Marino       /* The Windows API has a function returning the locale's codepage as a
7824536c563SJohn Marino         number: GetACP().
78344b87433SJohn Marino         When the output goes to a console window, it needs to be provided in
78444b87433SJohn Marino         GetOEMCP() encoding if the console is using a raster font, or in
78544b87433SJohn Marino         GetConsoleOutputCP() encoding if it is using a TrueType font.
78644b87433SJohn Marino         But in GUI programs and for output sent to files and pipes, GetACP()
78744b87433SJohn Marino         encoding is the best bet.  */
78844b87433SJohn Marino       sprintf (buf, "CP%u", GetACP ());
789*6ea1f93eSDaniel Fojt     }
79044b87433SJohn Marino   codeset = buf;
79144b87433SJohn Marino 
79244b87433SJohn Marino # elif defined OS2
79344b87433SJohn Marino 
79444b87433SJohn Marino   const char *locale;
79544b87433SJohn Marino   static char buf[2 + 10 + 1];
79644b87433SJohn Marino   ULONG cp[3];
79744b87433SJohn Marino   ULONG cplen;
79844b87433SJohn Marino 
799*6ea1f93eSDaniel Fojt   codeset = NULL;
800*6ea1f93eSDaniel Fojt 
80144b87433SJohn Marino   /* Allow user to override the codeset, as set in the operating system,
80244b87433SJohn Marino      with standard language environment variables.  */
80344b87433SJohn Marino   locale = getenv ("LC_ALL");
80444b87433SJohn Marino   if (locale == NULL || locale[0] == '\0')
80544b87433SJohn Marino     {
80644b87433SJohn Marino       locale = getenv ("LC_CTYPE");
80744b87433SJohn Marino       if (locale == NULL || locale[0] == '\0')
80844b87433SJohn Marino         locale = getenv ("LANG");
80944b87433SJohn Marino     }
81044b87433SJohn Marino   if (locale != NULL && locale[0] != '\0')
81144b87433SJohn Marino     {
81244b87433SJohn Marino       /* If the locale name contains an encoding after the dot, return it.  */
81344b87433SJohn Marino       const char *dot = strchr (locale, '.');
81444b87433SJohn Marino 
81544b87433SJohn Marino       if (dot != NULL)
81644b87433SJohn Marino         {
81744b87433SJohn Marino           const char *modifier;
81844b87433SJohn Marino 
81944b87433SJohn Marino           dot++;
82044b87433SJohn Marino           /* Look for the possible @... trailer and remove it, if any.  */
82144b87433SJohn Marino           modifier = strchr (dot, '@');
82244b87433SJohn Marino           if (modifier == NULL)
82344b87433SJohn Marino             return dot;
82444b87433SJohn Marino           if (modifier - dot < sizeof (buf))
82544b87433SJohn Marino             {
82644b87433SJohn Marino               memcpy (buf, dot, modifier - dot);
82744b87433SJohn Marino               buf [modifier - dot] = '\0';
82844b87433SJohn Marino               return buf;
82944b87433SJohn Marino             }
83044b87433SJohn Marino         }
83144b87433SJohn Marino 
832*6ea1f93eSDaniel Fojt       /* For the POSIX locale, don't use the system's codepage.  */
833*6ea1f93eSDaniel Fojt       if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
834*6ea1f93eSDaniel Fojt         codeset = "";
83544b87433SJohn Marino     }
836*6ea1f93eSDaniel Fojt 
837*6ea1f93eSDaniel Fojt   if (codeset == NULL)
83844b87433SJohn Marino     {
83944b87433SJohn Marino       /* OS/2 has a function returning the locale's codepage as a number.  */
84044b87433SJohn Marino       if (DosQueryCp (sizeof (cp), cp, &cplen))
84144b87433SJohn Marino         codeset = "";
84244b87433SJohn Marino       else
84344b87433SJohn Marino         {
84444b87433SJohn Marino           sprintf (buf, "CP%u", cp[0]);
84544b87433SJohn Marino           codeset = buf;
84644b87433SJohn Marino         }
84744b87433SJohn Marino     }
84844b87433SJohn Marino 
849*6ea1f93eSDaniel Fojt # else
850*6ea1f93eSDaniel Fojt 
851*6ea1f93eSDaniel Fojt #  error "Add code for other platforms here."
852*6ea1f93eSDaniel Fojt 
85344b87433SJohn Marino # endif
85444b87433SJohn Marino 
85544b87433SJohn Marino   /* Resolve alias.  */
85644b87433SJohn Marino   {
857*6ea1f93eSDaniel Fojt # ifdef alias_table_defined
858*6ea1f93eSDaniel Fojt     /* On some platforms, UTF-8 locales are the most frequently used ones.
859*6ea1f93eSDaniel Fojt        Speed up the common case and slow down the less common cases by
860*6ea1f93eSDaniel Fojt        testing for this case first.  */
861*6ea1f93eSDaniel Fojt #  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
862*6ea1f93eSDaniel Fojt     if (strcmp (codeset, "UTF-8") == 0)
863*6ea1f93eSDaniel Fojt       goto done_table_lookup;
864*6ea1f93eSDaniel Fojt     else
865*6ea1f93eSDaniel Fojt #  endif
866*6ea1f93eSDaniel Fojt       {
867*6ea1f93eSDaniel Fojt         const struct table_entry * const table = alias_table;
868*6ea1f93eSDaniel Fojt         size_t const table_size =
869*6ea1f93eSDaniel Fojt           sizeof (alias_table) / sizeof (struct table_entry);
870*6ea1f93eSDaniel Fojt         /* The table is sorted.  Perform a binary search.  */
871*6ea1f93eSDaniel Fojt         size_t hi = table_size;
872*6ea1f93eSDaniel Fojt         size_t lo = 0;
873*6ea1f93eSDaniel Fojt         while (lo < hi)
874*6ea1f93eSDaniel Fojt           {
875*6ea1f93eSDaniel Fojt             /* Invariant:
876*6ea1f93eSDaniel Fojt                for i < lo, strcmp (table[i].alias, codeset) < 0,
877*6ea1f93eSDaniel Fojt                for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
878*6ea1f93eSDaniel Fojt             size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
879*6ea1f93eSDaniel Fojt             int cmp = strcmp (table[mid].alias, codeset);
880*6ea1f93eSDaniel Fojt             if (cmp < 0)
881*6ea1f93eSDaniel Fojt               lo = mid + 1;
882*6ea1f93eSDaniel Fojt             else if (cmp > 0)
883*6ea1f93eSDaniel Fojt               hi = mid;
884*6ea1f93eSDaniel Fojt             else
885*6ea1f93eSDaniel Fojt               {
886*6ea1f93eSDaniel Fojt                 /* Found an i with
887*6ea1f93eSDaniel Fojt                      strcmp (table[i].alias, codeset) == 0.  */
888*6ea1f93eSDaniel Fojt                 codeset = table[mid].canonical;
889*6ea1f93eSDaniel Fojt                 goto done_table_lookup;
89044b87433SJohn Marino               }
891*6ea1f93eSDaniel Fojt           }
892*6ea1f93eSDaniel Fojt       }
893*6ea1f93eSDaniel Fojt     if (0)
894*6ea1f93eSDaniel Fojt       done_table_lookup: ;
895*6ea1f93eSDaniel Fojt     else
896*6ea1f93eSDaniel Fojt # endif
897*6ea1f93eSDaniel Fojt       {
898*6ea1f93eSDaniel Fojt         /* Did not find it in the table.  */
899*6ea1f93eSDaniel Fojt         /* On Mac OS X, all modern locales use the UTF-8 encoding.
900*6ea1f93eSDaniel Fojt            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
901*6ea1f93eSDaniel Fojt # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
902*6ea1f93eSDaniel Fojt         codeset = "UTF-8";
903*6ea1f93eSDaniel Fojt # else
90444b87433SJohn Marino         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
90544b87433SJohn Marino            the empty string as denoting "the locale's character encoding",
90644b87433SJohn Marino            thus GNU libiconv would call this function a second time.  */
90744b87433SJohn Marino         if (codeset[0] == '\0')
90844b87433SJohn Marino           codeset = "ASCII";
909*6ea1f93eSDaniel Fojt # endif
910*6ea1f93eSDaniel Fojt       }
911*6ea1f93eSDaniel Fojt   }
912*6ea1f93eSDaniel Fojt 
913*6ea1f93eSDaniel Fojt #else
914*6ea1f93eSDaniel Fojt 
915*6ea1f93eSDaniel Fojt   /* On old systems which lack it, use setlocale or getenv.  */
916*6ea1f93eSDaniel Fojt   const char *locale = NULL;
917*6ea1f93eSDaniel Fojt 
918*6ea1f93eSDaniel Fojt   /* But most old systems don't have a complete set of locales.  Some
919*6ea1f93eSDaniel Fojt      (like DJGPP) have only the C locale.  Therefore we don't use setlocale
920*6ea1f93eSDaniel Fojt      here; it would return "C" when it doesn't support the locale name the
921*6ea1f93eSDaniel Fojt      user has set.  */
922*6ea1f93eSDaniel Fojt # if 0
923*6ea1f93eSDaniel Fojt   locale = setlocale (LC_CTYPE, NULL);
924*6ea1f93eSDaniel Fojt # endif
925*6ea1f93eSDaniel Fojt   if (locale == NULL || locale[0] == '\0')
926*6ea1f93eSDaniel Fojt     {
927*6ea1f93eSDaniel Fojt       locale = getenv ("LC_ALL");
928*6ea1f93eSDaniel Fojt       if (locale == NULL || locale[0] == '\0')
929*6ea1f93eSDaniel Fojt         {
930*6ea1f93eSDaniel Fojt           locale = getenv ("LC_CTYPE");
931*6ea1f93eSDaniel Fojt           if (locale == NULL || locale[0] == '\0')
932*6ea1f93eSDaniel Fojt             locale = getenv ("LANG");
933*6ea1f93eSDaniel Fojt             if (locale == NULL)
934*6ea1f93eSDaniel Fojt               locale = "";
935*6ea1f93eSDaniel Fojt         }
936*6ea1f93eSDaniel Fojt     }
937*6ea1f93eSDaniel Fojt 
938*6ea1f93eSDaniel Fojt   /* Map locale name to canonical encoding name.  */
939*6ea1f93eSDaniel Fojt   {
940*6ea1f93eSDaniel Fojt # ifdef locale_table_defined
941*6ea1f93eSDaniel Fojt     const struct table_entry * const table = locale_table;
942*6ea1f93eSDaniel Fojt     size_t const table_size =
943*6ea1f93eSDaniel Fojt       sizeof (locale_table) / sizeof (struct table_entry);
944*6ea1f93eSDaniel Fojt     /* The table is sorted.  Perform a binary search.  */
945*6ea1f93eSDaniel Fojt     size_t hi = table_size;
946*6ea1f93eSDaniel Fojt     size_t lo = 0;
947*6ea1f93eSDaniel Fojt     while (lo < hi)
948*6ea1f93eSDaniel Fojt       {
949*6ea1f93eSDaniel Fojt         /* Invariant:
950*6ea1f93eSDaniel Fojt            for i < lo, strcmp (table[i].locale, locale) < 0,
951*6ea1f93eSDaniel Fojt            for i >= hi, strcmp (table[i].locale, locale) > 0.  */
952*6ea1f93eSDaniel Fojt         size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
953*6ea1f93eSDaniel Fojt         int cmp = strcmp (table[mid].locale, locale);
954*6ea1f93eSDaniel Fojt         if (cmp < 0)
955*6ea1f93eSDaniel Fojt           lo = mid + 1;
956*6ea1f93eSDaniel Fojt         else if (cmp > 0)
957*6ea1f93eSDaniel Fojt           hi = mid;
958*6ea1f93eSDaniel Fojt         else
959*6ea1f93eSDaniel Fojt           {
960*6ea1f93eSDaniel Fojt             /* Found an i with
961*6ea1f93eSDaniel Fojt                  strcmp (table[i].locale, locale) == 0.  */
962*6ea1f93eSDaniel Fojt             codeset = table[mid].canonical;
963*6ea1f93eSDaniel Fojt             goto done_table_lookup;
964*6ea1f93eSDaniel Fojt           }
965*6ea1f93eSDaniel Fojt       }
966*6ea1f93eSDaniel Fojt     if (0)
967*6ea1f93eSDaniel Fojt       done_table_lookup: ;
968*6ea1f93eSDaniel Fojt     else
969*6ea1f93eSDaniel Fojt # endif
970*6ea1f93eSDaniel Fojt       {
971*6ea1f93eSDaniel Fojt         /* Did not find it in the table.  */
972*6ea1f93eSDaniel Fojt         /* On Mac OS X, all modern locales use the UTF-8 encoding.
973*6ea1f93eSDaniel Fojt            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
974*6ea1f93eSDaniel Fojt # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
975*6ea1f93eSDaniel Fojt         codeset = "UTF-8";
976*6ea1f93eSDaniel Fojt # else
977*6ea1f93eSDaniel Fojt         /* The canonical name cannot be determined.  */
978*6ea1f93eSDaniel Fojt         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
979*6ea1f93eSDaniel Fojt            the empty string as denoting "the locale's character encoding",
980*6ea1f93eSDaniel Fojt            thus GNU libiconv would call this function a second time.  */
981*6ea1f93eSDaniel Fojt         codeset = "ASCII";
982*6ea1f93eSDaniel Fojt # endif
983*6ea1f93eSDaniel Fojt       }
984*6ea1f93eSDaniel Fojt   }
985*6ea1f93eSDaniel Fojt 
986*6ea1f93eSDaniel Fojt #endif
98744b87433SJohn Marino 
9884536c563SJohn Marino #ifdef DARWIN7
9894536c563SJohn Marino   /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
9904536c563SJohn Marino      (the default codeset) does not work when MB_CUR_MAX is 1.  */
991*6ea1f93eSDaniel Fojt   if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
9924536c563SJohn Marino     codeset = "ASCII";
9934536c563SJohn Marino #endif
9944536c563SJohn Marino 
99544b87433SJohn Marino   return codeset;
99644b87433SJohn Marino }
997