144b87433SJohn Marino /* Determine a canonical name for the current locale's character encoding.
244b87433SJohn Marino
3*6ea1f93eSDaniel Fojt Copyright (C) 2000-2006, 2008-2018 Free Software Foundation, Inc.
444b87433SJohn Marino
544b87433SJohn Marino This program is free software; you can redistribute it and/or modify
644b87433SJohn Marino it under the terms of the GNU General Public License as published by
744b87433SJohn Marino the Free Software Foundation; either version 3, or (at your option)
844b87433SJohn Marino any later version.
944b87433SJohn Marino
1044b87433SJohn Marino This program is distributed in the hope that it will be useful,
1144b87433SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
1244b87433SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1344b87433SJohn Marino GNU General Public License for more details.
1444b87433SJohn Marino
1544b87433SJohn Marino You should have received a copy of the GNU General Public License along
16*6ea1f93eSDaniel Fojt with this program; if not, see <https://www.gnu.org/licenses/>. */
1744b87433SJohn Marino
1844b87433SJohn Marino /* Written by Bruno Haible <bruno@clisp.org>. */
1944b87433SJohn Marino
2044b87433SJohn Marino #include <config.h>
2144b87433SJohn Marino
2244b87433SJohn Marino /* Specification. */
2344b87433SJohn Marino #include "localcharset.h"
2444b87433SJohn Marino
2544b87433SJohn Marino #include <stddef.h>
2644b87433SJohn Marino #include <stdio.h>
2744b87433SJohn Marino #include <string.h>
2844b87433SJohn Marino #include <stdlib.h>
2944b87433SJohn Marino
3044b87433SJohn Marino #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
3144b87433SJohn Marino # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
3244b87433SJohn Marino #endif
3344b87433SJohn Marino
34*6ea1f93eSDaniel Fojt #if defined _WIN32 && !defined __CYGWIN__
354536c563SJohn Marino # define WINDOWS_NATIVE
36*6ea1f93eSDaniel Fojt # include <locale.h>
3744b87433SJohn Marino #endif
3844b87433SJohn Marino
3944b87433SJohn Marino #if defined __EMX__
4044b87433SJohn Marino /* Assume EMX program runs on OS/2, even if compiled under DOS. */
4144b87433SJohn Marino # ifndef OS2
4244b87433SJohn Marino # define OS2
4344b87433SJohn Marino # endif
4444b87433SJohn Marino #endif
4544b87433SJohn Marino
464536c563SJohn Marino #if !defined WINDOWS_NATIVE
4744b87433SJohn Marino # if HAVE_LANGINFO_CODESET
4844b87433SJohn Marino # include <langinfo.h>
4944b87433SJohn Marino # else
50*6ea1f93eSDaniel Fojt # if 0 /* see comment regarding use of setlocale(), below */
5144b87433SJohn Marino # include <locale.h>
5244b87433SJohn Marino # endif
5344b87433SJohn Marino # endif
5444b87433SJohn Marino # ifdef __CYGWIN__
5544b87433SJohn Marino # define WIN32_LEAN_AND_MEAN
5644b87433SJohn Marino # include <windows.h>
5744b87433SJohn Marino # endif
584536c563SJohn Marino #elif defined WINDOWS_NATIVE
5944b87433SJohn Marino # define WIN32_LEAN_AND_MEAN
6044b87433SJohn Marino # include <windows.h>
6144b87433SJohn Marino #endif
6244b87433SJohn Marino #if defined OS2
6344b87433SJohn Marino # define INCL_DOS
6444b87433SJohn Marino # include <os2.h>
6544b87433SJohn Marino #endif
6644b87433SJohn Marino
67*6ea1f93eSDaniel Fojt /* For MB_CUR_MAX_L */
6844b87433SJohn Marino #if defined DARWIN7
69*6ea1f93eSDaniel Fojt # include <xlocale.h>
7044b87433SJohn Marino #endif
7144b87433SJohn Marino
72*6ea1f93eSDaniel Fojt
73*6ea1f93eSDaniel Fojt #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
74*6ea1f93eSDaniel Fojt
75*6ea1f93eSDaniel Fojt /* On these platforms, we use a mapping from non-canonical encoding name
76*6ea1f93eSDaniel Fojt to GNU canonical encoding name. */
77*6ea1f93eSDaniel Fojt
78*6ea1f93eSDaniel Fojt /* With glibc-2.1 or newer, we don't need any canonicalization,
79*6ea1f93eSDaniel Fojt because glibc has iconv and both glibc and libiconv support all
80*6ea1f93eSDaniel Fojt GNU canonical names directly. */
81*6ea1f93eSDaniel Fojt # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
82*6ea1f93eSDaniel Fojt
83*6ea1f93eSDaniel Fojt struct table_entry
84*6ea1f93eSDaniel Fojt {
85*6ea1f93eSDaniel Fojt const char alias[11+1];
86*6ea1f93eSDaniel Fojt const char canonical[11+1];
87*6ea1f93eSDaniel Fojt };
88*6ea1f93eSDaniel Fojt
89*6ea1f93eSDaniel Fojt /* Table of platform-dependent mappings, sorted in ascending order. */
90*6ea1f93eSDaniel Fojt static const struct table_entry alias_table[] =
91*6ea1f93eSDaniel Fojt {
92*6ea1f93eSDaniel Fojt # if defined __FreeBSD__ /* FreeBSD */
93*6ea1f93eSDaniel Fojt /*{ "ARMSCII-8", "ARMSCII-8" },*/
94*6ea1f93eSDaniel Fojt { "Big5", "BIG5" },
95*6ea1f93eSDaniel Fojt { "C", "ASCII" },
96*6ea1f93eSDaniel Fojt /*{ "CP1131", "CP1131" },*/
97*6ea1f93eSDaniel Fojt /*{ "CP1251", "CP1251" },*/
98*6ea1f93eSDaniel Fojt /*{ "CP866", "CP866" },*/
99*6ea1f93eSDaniel Fojt /*{ "GB18030", "GB18030" },*/
100*6ea1f93eSDaniel Fojt /*{ "GB2312", "GB2312" },*/
101*6ea1f93eSDaniel Fojt /*{ "GBK", "GBK" },*/
102*6ea1f93eSDaniel Fojt /*{ "ISCII-DEV", "?" },*/
103*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
104*6ea1f93eSDaniel Fojt { "ISO8859-13", "ISO-8859-13" },
105*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
106*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
107*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
108*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
109*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
110*6ea1f93eSDaniel Fojt /*{ "KOI8-R", "KOI8-R" },*/
111*6ea1f93eSDaniel Fojt /*{ "KOI8-U", "KOI8-U" },*/
112*6ea1f93eSDaniel Fojt { "SJIS", "SHIFT_JIS" },
113*6ea1f93eSDaniel Fojt { "US-ASCII", "ASCII" },
114*6ea1f93eSDaniel Fojt { "eucCN", "GB2312" },
115*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
116*6ea1f93eSDaniel Fojt { "eucKR", "EUC-KR" }
117*6ea1f93eSDaniel Fojt # define alias_table_defined
118*6ea1f93eSDaniel Fojt # endif
119*6ea1f93eSDaniel Fojt # if defined __NetBSD__ /* NetBSD */
120*6ea1f93eSDaniel Fojt { "646", "ASCII" },
121*6ea1f93eSDaniel Fojt /*{ "ARMSCII-8", "ARMSCII-8" },*/
122*6ea1f93eSDaniel Fojt /*{ "BIG5", "BIG5" },*/
123*6ea1f93eSDaniel Fojt { "Big5-HKSCS", "BIG5-HKSCS" },
124*6ea1f93eSDaniel Fojt /*{ "CP1251", "CP1251" },*/
125*6ea1f93eSDaniel Fojt /*{ "CP866", "CP866" },*/
126*6ea1f93eSDaniel Fojt /*{ "GB18030", "GB18030" },*/
127*6ea1f93eSDaniel Fojt /*{ "GB2312", "GB2312" },*/
128*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
129*6ea1f93eSDaniel Fojt { "ISO8859-13", "ISO-8859-13" },
130*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
131*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
132*6ea1f93eSDaniel Fojt { "ISO8859-4", "ISO-8859-4" },
133*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
134*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
135*6ea1f93eSDaniel Fojt /*{ "KOI8-R", "KOI8-R" },*/
136*6ea1f93eSDaniel Fojt /*{ "KOI8-U", "KOI8-U" },*/
137*6ea1f93eSDaniel Fojt /*{ "PT154", "PT154" },*/
138*6ea1f93eSDaniel Fojt { "SJIS", "SHIFT_JIS" },
139*6ea1f93eSDaniel Fojt { "eucCN", "GB2312" },
140*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
141*6ea1f93eSDaniel Fojt { "eucKR", "EUC-KR" },
142*6ea1f93eSDaniel Fojt { "eucTW", "EUC-TW" }
143*6ea1f93eSDaniel Fojt # define alias_table_defined
144*6ea1f93eSDaniel Fojt # endif
145*6ea1f93eSDaniel Fojt # if defined __OpenBSD__ /* OpenBSD */
146*6ea1f93eSDaniel Fojt { "646", "ASCII" },
147*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
148*6ea1f93eSDaniel Fojt { "ISO8859-13", "ISO-8859-13" },
149*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
150*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
151*6ea1f93eSDaniel Fojt { "ISO8859-4", "ISO-8859-4" },
152*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
153*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" }
154*6ea1f93eSDaniel Fojt # define alias_table_defined
155*6ea1f93eSDaniel Fojt # endif
156*6ea1f93eSDaniel Fojt # if defined __APPLE__ && defined __MACH__ /* Mac OS X */
157*6ea1f93eSDaniel Fojt /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
158*6ea1f93eSDaniel Fojt useless:
159*6ea1f93eSDaniel Fojt - It returns the empty string when LANG is set to a locale of the
160*6ea1f93eSDaniel Fojt form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
161*6ea1f93eSDaniel Fojt LC_CTYPE file.
162*6ea1f93eSDaniel Fojt - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
163*6ea1f93eSDaniel Fojt the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
164*6ea1f93eSDaniel Fojt - The documentation says:
165*6ea1f93eSDaniel Fojt "... all code that calls BSD system routines should ensure
166*6ea1f93eSDaniel Fojt that the const *char parameters of these routines are in UTF-8
167*6ea1f93eSDaniel Fojt encoding. All BSD system functions expect their string
168*6ea1f93eSDaniel Fojt parameters to be in UTF-8 encoding and nothing else."
169*6ea1f93eSDaniel Fojt It also says
170*6ea1f93eSDaniel Fojt "An additional caveat is that string parameters for files,
171*6ea1f93eSDaniel Fojt paths, and other file-system entities must be in canonical
172*6ea1f93eSDaniel Fojt UTF-8. In a canonical UTF-8 Unicode string, all decomposable
173*6ea1f93eSDaniel Fojt characters are decomposed ..."
174*6ea1f93eSDaniel Fojt but this is not true: You can pass non-decomposed UTF-8 strings
175*6ea1f93eSDaniel Fojt to file system functions, and it is the OS which will convert
176*6ea1f93eSDaniel Fojt them to decomposed UTF-8 before accessing the file system.
177*6ea1f93eSDaniel Fojt - The Apple Terminal application displays UTF-8 by default.
178*6ea1f93eSDaniel Fojt - However, other applications are free to use different encodings:
179*6ea1f93eSDaniel Fojt - xterm uses ISO-8859-1 by default.
180*6ea1f93eSDaniel Fojt - TextEdit uses MacRoman by default.
181*6ea1f93eSDaniel Fojt We prefer UTF-8 over decomposed UTF-8-MAC because one should
182*6ea1f93eSDaniel Fojt minimize the use of decomposed Unicode. Unfortunately, through the
183*6ea1f93eSDaniel Fojt Darwin file system, decomposed UTF-8 strings are leaked into user
184*6ea1f93eSDaniel Fojt space nevertheless.
185*6ea1f93eSDaniel Fojt Then there are also the locales with encodings other than US-ASCII
186*6ea1f93eSDaniel Fojt and UTF-8. These locales can be occasionally useful to users (e.g.
187*6ea1f93eSDaniel Fojt when grepping through ISO-8859-1 encoded text files), when all their
188*6ea1f93eSDaniel Fojt file names are in US-ASCII.
189*6ea1f93eSDaniel Fojt */
190*6ea1f93eSDaniel Fojt { "ARMSCII-8", "ARMSCII-8" },
191*6ea1f93eSDaniel Fojt { "Big5", "BIG5" },
192*6ea1f93eSDaniel Fojt { "Big5HKSCS", "BIG5-HKSCS" },
193*6ea1f93eSDaniel Fojt { "CP1131", "CP1131" },
194*6ea1f93eSDaniel Fojt { "CP1251", "CP1251" },
195*6ea1f93eSDaniel Fojt { "CP866", "CP866" },
196*6ea1f93eSDaniel Fojt { "CP949", "CP949" },
197*6ea1f93eSDaniel Fojt { "GB18030", "GB18030" },
198*6ea1f93eSDaniel Fojt { "GB2312", "GB2312" },
199*6ea1f93eSDaniel Fojt { "GBK", "GBK" },
200*6ea1f93eSDaniel Fojt /*{ "ISCII-DEV", "?" },*/
201*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
202*6ea1f93eSDaniel Fojt { "ISO8859-13", "ISO-8859-13" },
203*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
204*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
205*6ea1f93eSDaniel Fojt { "ISO8859-4", "ISO-8859-4" },
206*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
207*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
208*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
209*6ea1f93eSDaniel Fojt { "KOI8-R", "KOI8-R" },
210*6ea1f93eSDaniel Fojt { "KOI8-U", "KOI8-U" },
211*6ea1f93eSDaniel Fojt { "PT154", "PT154" },
212*6ea1f93eSDaniel Fojt { "SJIS", "SHIFT_JIS" },
213*6ea1f93eSDaniel Fojt { "eucCN", "GB2312" },
214*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
215*6ea1f93eSDaniel Fojt { "eucKR", "EUC-KR" }
216*6ea1f93eSDaniel Fojt # define alias_table_defined
217*6ea1f93eSDaniel Fojt # endif
218*6ea1f93eSDaniel Fojt # if defined _AIX /* AIX */
219*6ea1f93eSDaniel Fojt /*{ "GBK", "GBK" },*/
220*6ea1f93eSDaniel Fojt { "IBM-1046", "CP1046" },
221*6ea1f93eSDaniel Fojt { "IBM-1124", "CP1124" },
222*6ea1f93eSDaniel Fojt { "IBM-1129", "CP1129" },
223*6ea1f93eSDaniel Fojt { "IBM-1252", "CP1252" },
224*6ea1f93eSDaniel Fojt { "IBM-850", "CP850" },
225*6ea1f93eSDaniel Fojt { "IBM-856", "CP856" },
226*6ea1f93eSDaniel Fojt { "IBM-921", "ISO-8859-13" },
227*6ea1f93eSDaniel Fojt { "IBM-922", "CP922" },
228*6ea1f93eSDaniel Fojt { "IBM-932", "CP932" },
229*6ea1f93eSDaniel Fojt { "IBM-943", "CP943" },
230*6ea1f93eSDaniel Fojt { "IBM-eucCN", "GB2312" },
231*6ea1f93eSDaniel Fojt { "IBM-eucJP", "EUC-JP" },
232*6ea1f93eSDaniel Fojt { "IBM-eucKR", "EUC-KR" },
233*6ea1f93eSDaniel Fojt { "IBM-eucTW", "EUC-TW" },
234*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
235*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
236*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
237*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
238*6ea1f93eSDaniel Fojt { "ISO8859-6", "ISO-8859-6" },
239*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
240*6ea1f93eSDaniel Fojt { "ISO8859-8", "ISO-8859-8" },
241*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
242*6ea1f93eSDaniel Fojt { "TIS-620", "TIS-620" },
243*6ea1f93eSDaniel Fojt /*{ "UTF-8", "UTF-8" },*/
244*6ea1f93eSDaniel Fojt { "big5", "BIG5" }
245*6ea1f93eSDaniel Fojt # define alias_table_defined
246*6ea1f93eSDaniel Fojt # endif
247*6ea1f93eSDaniel Fojt # if defined __hpux /* HP-UX */
248*6ea1f93eSDaniel Fojt { "SJIS", "SHIFT_JIS" },
249*6ea1f93eSDaniel Fojt { "arabic8", "HP-ARABIC8" },
250*6ea1f93eSDaniel Fojt { "big5", "BIG5" },
251*6ea1f93eSDaniel Fojt { "cp1251", "CP1251" },
252*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
253*6ea1f93eSDaniel Fojt { "eucKR", "EUC-KR" },
254*6ea1f93eSDaniel Fojt { "eucTW", "EUC-TW" },
255*6ea1f93eSDaniel Fojt { "gb18030", "GB18030" },
256*6ea1f93eSDaniel Fojt { "greek8", "HP-GREEK8" },
257*6ea1f93eSDaniel Fojt { "hebrew8", "HP-HEBREW8" },
258*6ea1f93eSDaniel Fojt { "hkbig5", "BIG5-HKSCS" },
259*6ea1f93eSDaniel Fojt { "hp15CN", "GB2312" },
260*6ea1f93eSDaniel Fojt { "iso88591", "ISO-8859-1" },
261*6ea1f93eSDaniel Fojt { "iso885913", "ISO-8859-13" },
262*6ea1f93eSDaniel Fojt { "iso885915", "ISO-8859-15" },
263*6ea1f93eSDaniel Fojt { "iso88592", "ISO-8859-2" },
264*6ea1f93eSDaniel Fojt { "iso88594", "ISO-8859-4" },
265*6ea1f93eSDaniel Fojt { "iso88595", "ISO-8859-5" },
266*6ea1f93eSDaniel Fojt { "iso88596", "ISO-8859-6" },
267*6ea1f93eSDaniel Fojt { "iso88597", "ISO-8859-7" },
268*6ea1f93eSDaniel Fojt { "iso88598", "ISO-8859-8" },
269*6ea1f93eSDaniel Fojt { "iso88599", "ISO-8859-9" },
270*6ea1f93eSDaniel Fojt { "kana8", "HP-KANA8" },
271*6ea1f93eSDaniel Fojt { "koi8r", "KOI8-R" },
272*6ea1f93eSDaniel Fojt { "roman8", "HP-ROMAN8" },
273*6ea1f93eSDaniel Fojt { "tis620", "TIS-620" },
274*6ea1f93eSDaniel Fojt { "turkish8", "HP-TURKISH8" },
275*6ea1f93eSDaniel Fojt { "utf8", "UTF-8" }
276*6ea1f93eSDaniel Fojt # define alias_table_defined
277*6ea1f93eSDaniel Fojt # endif
278*6ea1f93eSDaniel Fojt # if defined __sgi /* IRIX */
279*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
280*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
281*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
282*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
283*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
284*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
285*6ea1f93eSDaniel Fojt { "eucCN", "GB2312" },
286*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
287*6ea1f93eSDaniel Fojt { "eucKR", "EUC-KR" },
288*6ea1f93eSDaniel Fojt { "eucTW", "EUC-TW" }
289*6ea1f93eSDaniel Fojt # define alias_table_defined
290*6ea1f93eSDaniel Fojt # endif
291*6ea1f93eSDaniel Fojt # if defined __osf__ /* OSF/1 */
292*6ea1f93eSDaniel Fojt /*{ "GBK", "GBK" },*/
293*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
294*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
295*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
296*6ea1f93eSDaniel Fojt { "ISO8859-4", "ISO-8859-4" },
297*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
298*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
299*6ea1f93eSDaniel Fojt { "ISO8859-8", "ISO-8859-8" },
300*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
301*6ea1f93eSDaniel Fojt { "KSC5601", "CP949" },
302*6ea1f93eSDaniel Fojt { "SJIS", "SHIFT_JIS" },
303*6ea1f93eSDaniel Fojt { "TACTIS", "TIS-620" },
304*6ea1f93eSDaniel Fojt /*{ "UTF-8", "UTF-8" },*/
305*6ea1f93eSDaniel Fojt { "big5", "BIG5" },
306*6ea1f93eSDaniel Fojt { "cp850", "CP850" },
307*6ea1f93eSDaniel Fojt { "dechanyu", "DEC-HANYU" },
308*6ea1f93eSDaniel Fojt { "dechanzi", "GB2312" },
309*6ea1f93eSDaniel Fojt { "deckanji", "DEC-KANJI" },
310*6ea1f93eSDaniel Fojt { "deckorean", "EUC-KR" },
311*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
312*6ea1f93eSDaniel Fojt { "eucKR", "EUC-KR" },
313*6ea1f93eSDaniel Fojt { "eucTW", "EUC-TW" },
314*6ea1f93eSDaniel Fojt { "sdeckanji", "EUC-JP" }
315*6ea1f93eSDaniel Fojt # define alias_table_defined
316*6ea1f93eSDaniel Fojt # endif
317*6ea1f93eSDaniel Fojt # if defined __sun /* Solaris */
318*6ea1f93eSDaniel Fojt { "5601", "EUC-KR" },
319*6ea1f93eSDaniel Fojt { "646", "ASCII" },
320*6ea1f93eSDaniel Fojt /*{ "BIG5", "BIG5" },*/
321*6ea1f93eSDaniel Fojt { "Big5-HKSCS", "BIG5-HKSCS" },
322*6ea1f93eSDaniel Fojt { "GB18030", "GB18030" },
323*6ea1f93eSDaniel Fojt /*{ "GBK", "GBK" },*/
324*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
325*6ea1f93eSDaniel Fojt { "ISO8859-11", "TIS-620" },
326*6ea1f93eSDaniel Fojt { "ISO8859-13", "ISO-8859-13" },
327*6ea1f93eSDaniel Fojt { "ISO8859-15", "ISO-8859-15" },
328*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
329*6ea1f93eSDaniel Fojt { "ISO8859-3", "ISO-8859-3" },
330*6ea1f93eSDaniel Fojt { "ISO8859-4", "ISO-8859-4" },
331*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
332*6ea1f93eSDaniel Fojt { "ISO8859-6", "ISO-8859-6" },
333*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
334*6ea1f93eSDaniel Fojt { "ISO8859-8", "ISO-8859-8" },
335*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
336*6ea1f93eSDaniel Fojt { "PCK", "SHIFT_JIS" },
337*6ea1f93eSDaniel Fojt { "TIS620.2533", "TIS-620" },
338*6ea1f93eSDaniel Fojt /*{ "UTF-8", "UTF-8" },*/
339*6ea1f93eSDaniel Fojt { "ansi-1251", "CP1251" },
340*6ea1f93eSDaniel Fojt { "cns11643", "EUC-TW" },
341*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
342*6ea1f93eSDaniel Fojt { "gb2312", "GB2312" },
343*6ea1f93eSDaniel Fojt { "koi8-r", "KOI8-R" }
344*6ea1f93eSDaniel Fojt # define alias_table_defined
345*6ea1f93eSDaniel Fojt # endif
346*6ea1f93eSDaniel Fojt # if defined __minix /* Minix */
347*6ea1f93eSDaniel Fojt { "646", "ASCII" }
348*6ea1f93eSDaniel Fojt # define alias_table_defined
349*6ea1f93eSDaniel Fojt # endif
350*6ea1f93eSDaniel Fojt # if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */
351*6ea1f93eSDaniel Fojt { "CP1361", "JOHAB" },
352*6ea1f93eSDaniel Fojt { "CP20127", "ASCII" },
353*6ea1f93eSDaniel Fojt { "CP20866", "KOI8-R" },
354*6ea1f93eSDaniel Fojt { "CP20936", "GB2312" },
355*6ea1f93eSDaniel Fojt { "CP21866", "KOI8-RU" },
356*6ea1f93eSDaniel Fojt { "CP28591", "ISO-8859-1" },
357*6ea1f93eSDaniel Fojt { "CP28592", "ISO-8859-2" },
358*6ea1f93eSDaniel Fojt { "CP28593", "ISO-8859-3" },
359*6ea1f93eSDaniel Fojt { "CP28594", "ISO-8859-4" },
360*6ea1f93eSDaniel Fojt { "CP28595", "ISO-8859-5" },
361*6ea1f93eSDaniel Fojt { "CP28596", "ISO-8859-6" },
362*6ea1f93eSDaniel Fojt { "CP28597", "ISO-8859-7" },
363*6ea1f93eSDaniel Fojt { "CP28598", "ISO-8859-8" },
364*6ea1f93eSDaniel Fojt { "CP28599", "ISO-8859-9" },
365*6ea1f93eSDaniel Fojt { "CP28605", "ISO-8859-15" },
366*6ea1f93eSDaniel Fojt { "CP38598", "ISO-8859-8" },
367*6ea1f93eSDaniel Fojt { "CP51932", "EUC-JP" },
368*6ea1f93eSDaniel Fojt { "CP51936", "GB2312" },
369*6ea1f93eSDaniel Fojt { "CP51949", "EUC-KR" },
370*6ea1f93eSDaniel Fojt { "CP51950", "EUC-TW" },
371*6ea1f93eSDaniel Fojt { "CP54936", "GB18030" },
372*6ea1f93eSDaniel Fojt { "CP65001", "UTF-8" },
373*6ea1f93eSDaniel Fojt { "CP936", "GBK" }
374*6ea1f93eSDaniel Fojt # define alias_table_defined
375*6ea1f93eSDaniel Fojt # endif
376*6ea1f93eSDaniel Fojt # if defined OS2 /* OS/2 */
377*6ea1f93eSDaniel Fojt /* The list of encodings is taken from "List of OS/2 Codepages"
378*6ea1f93eSDaniel Fojt by Alex Taylor:
379*6ea1f93eSDaniel Fojt <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
380*6ea1f93eSDaniel Fojt See also "IBM Globalization - Code page identifiers":
381*6ea1f93eSDaniel Fojt <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */
382*6ea1f93eSDaniel Fojt { "CP1089", "ISO-8859-6" },
383*6ea1f93eSDaniel Fojt { "CP1208", "UTF-8" },
384*6ea1f93eSDaniel Fojt { "CP1381", "GB2312" },
385*6ea1f93eSDaniel Fojt { "CP1386", "GBK" },
386*6ea1f93eSDaniel Fojt { "CP3372", "EUC-JP" },
387*6ea1f93eSDaniel Fojt { "CP813", "ISO-8859-7" },
388*6ea1f93eSDaniel Fojt { "CP819", "ISO-8859-1" },
389*6ea1f93eSDaniel Fojt { "CP878", "KOI8-R" },
390*6ea1f93eSDaniel Fojt { "CP912", "ISO-8859-2" },
391*6ea1f93eSDaniel Fojt { "CP913", "ISO-8859-3" },
392*6ea1f93eSDaniel Fojt { "CP914", "ISO-8859-4" },
393*6ea1f93eSDaniel Fojt { "CP915", "ISO-8859-5" },
394*6ea1f93eSDaniel Fojt { "CP916", "ISO-8859-8" },
395*6ea1f93eSDaniel Fojt { "CP920", "ISO-8859-9" },
396*6ea1f93eSDaniel Fojt { "CP921", "ISO-8859-13" },
397*6ea1f93eSDaniel Fojt { "CP923", "ISO-8859-15" },
398*6ea1f93eSDaniel Fojt { "CP954", "EUC-JP" },
399*6ea1f93eSDaniel Fojt { "CP964", "EUC-TW" },
400*6ea1f93eSDaniel Fojt { "CP970", "EUC-KR" }
401*6ea1f93eSDaniel Fojt # define alias_table_defined
402*6ea1f93eSDaniel Fojt # endif
403*6ea1f93eSDaniel Fojt # if defined VMS /* OpenVMS */
40444b87433SJohn Marino /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
40544b87433SJohn Marino "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
40644b87433SJohn Marino section 10.7 "Handling Different Character Sets". */
407*6ea1f93eSDaniel Fojt { "DECHANYU", "DEC-HANYU" },
408*6ea1f93eSDaniel Fojt { "DECHANZI", "GB2312" },
409*6ea1f93eSDaniel Fojt { "DECKANJI", "DEC-KANJI" },
410*6ea1f93eSDaniel Fojt { "DECKOREAN", "EUC-KR" },
411*6ea1f93eSDaniel Fojt { "ISO8859-1", "ISO-8859-1" },
412*6ea1f93eSDaniel Fojt { "ISO8859-2", "ISO-8859-2" },
413*6ea1f93eSDaniel Fojt { "ISO8859-5", "ISO-8859-5" },
414*6ea1f93eSDaniel Fojt { "ISO8859-7", "ISO-8859-7" },
415*6ea1f93eSDaniel Fojt { "ISO8859-8", "ISO-8859-8" },
416*6ea1f93eSDaniel Fojt { "ISO8859-9", "ISO-8859-9" },
417*6ea1f93eSDaniel Fojt { "SDECKANJI", "EUC-JP" },
418*6ea1f93eSDaniel Fojt { "SJIS", "SHIFT_JIS" },
419*6ea1f93eSDaniel Fojt { "eucJP", "EUC-JP" },
420*6ea1f93eSDaniel Fojt { "eucTW", "EUC-TW" }
421*6ea1f93eSDaniel Fojt # define alias_table_defined
422*6ea1f93eSDaniel Fojt # endif
423*6ea1f93eSDaniel Fojt # ifndef alias_table_defined
424*6ea1f93eSDaniel Fojt /* Just a dummy entry, to avoid a C syntax error. */
425*6ea1f93eSDaniel Fojt { "", "" }
426*6ea1f93eSDaniel Fojt # endif
427*6ea1f93eSDaniel Fojt };
428*6ea1f93eSDaniel Fojt
42944b87433SJohn Marino # endif
43044b87433SJohn Marino
431*6ea1f93eSDaniel Fojt #else
43244b87433SJohn Marino
433*6ea1f93eSDaniel Fojt /* On these platforms, we use a mapping from locale name to GNU canonical
434*6ea1f93eSDaniel Fojt encoding name. */
435*6ea1f93eSDaniel Fojt
436*6ea1f93eSDaniel Fojt struct table_entry
437*6ea1f93eSDaniel Fojt {
438*6ea1f93eSDaniel Fojt const char locale[17+1];
439*6ea1f93eSDaniel Fojt const char canonical[11+1];
440*6ea1f93eSDaniel Fojt };
441*6ea1f93eSDaniel Fojt
442*6ea1f93eSDaniel Fojt /* Table of platform-dependent mappings, sorted in ascending order. */
443*6ea1f93eSDaniel Fojt static const struct table_entry locale_table[] =
444*6ea1f93eSDaniel Fojt {
445*6ea1f93eSDaniel Fojt # if defined __FreeBSD__ /* FreeBSD 4.2 */
446*6ea1f93eSDaniel Fojt { "cs_CZ.ISO_8859-2", "ISO-8859-2" },
447*6ea1f93eSDaniel Fojt { "da_DK.DIS_8859-15", "ISO-8859-15" },
448*6ea1f93eSDaniel Fojt { "da_DK.ISO_8859-1", "ISO-8859-1" },
449*6ea1f93eSDaniel Fojt { "de_AT.DIS_8859-15", "ISO-8859-15" },
450*6ea1f93eSDaniel Fojt { "de_AT.ISO_8859-1", "ISO-8859-1" },
451*6ea1f93eSDaniel Fojt { "de_CH.DIS_8859-15", "ISO-8859-15" },
452*6ea1f93eSDaniel Fojt { "de_CH.ISO_8859-1", "ISO-8859-1" },
453*6ea1f93eSDaniel Fojt { "de_DE.DIS_8859-15", "ISO-8859-15" },
454*6ea1f93eSDaniel Fojt { "de_DE.ISO_8859-1", "ISO-8859-1" },
455*6ea1f93eSDaniel Fojt { "en_AU.DIS_8859-15", "ISO-8859-15" },
456*6ea1f93eSDaniel Fojt { "en_AU.ISO_8859-1", "ISO-8859-1" },
457*6ea1f93eSDaniel Fojt { "en_CA.DIS_8859-15", "ISO-8859-15" },
458*6ea1f93eSDaniel Fojt { "en_CA.ISO_8859-1", "ISO-8859-1" },
459*6ea1f93eSDaniel Fojt { "en_GB.DIS_8859-15", "ISO-8859-15" },
460*6ea1f93eSDaniel Fojt { "en_GB.ISO_8859-1", "ISO-8859-1" },
461*6ea1f93eSDaniel Fojt { "en_US.DIS_8859-15", "ISO-8859-15" },
462*6ea1f93eSDaniel Fojt { "en_US.ISO_8859-1", "ISO-8859-1" },
463*6ea1f93eSDaniel Fojt { "es_ES.DIS_8859-15", "ISO-8859-15" },
464*6ea1f93eSDaniel Fojt { "es_ES.ISO_8859-1", "ISO-8859-1" },
465*6ea1f93eSDaniel Fojt { "fi_FI.DIS_8859-15", "ISO-8859-15" },
466*6ea1f93eSDaniel Fojt { "fi_FI.ISO_8859-1", "ISO-8859-1" },
467*6ea1f93eSDaniel Fojt { "fr_BE.DIS_8859-15", "ISO-8859-15" },
468*6ea1f93eSDaniel Fojt { "fr_BE.ISO_8859-1", "ISO-8859-1" },
469*6ea1f93eSDaniel Fojt { "fr_CA.DIS_8859-15", "ISO-8859-15" },
470*6ea1f93eSDaniel Fojt { "fr_CA.ISO_8859-1", "ISO-8859-1" },
471*6ea1f93eSDaniel Fojt { "fr_CH.DIS_8859-15", "ISO-8859-15" },
472*6ea1f93eSDaniel Fojt { "fr_CH.ISO_8859-1", "ISO-8859-1" },
473*6ea1f93eSDaniel Fojt { "fr_FR.DIS_8859-15", "ISO-8859-15" },
474*6ea1f93eSDaniel Fojt { "fr_FR.ISO_8859-1", "ISO-8859-1" },
475*6ea1f93eSDaniel Fojt { "hr_HR.ISO_8859-2", "ISO-8859-2" },
476*6ea1f93eSDaniel Fojt { "hu_HU.ISO_8859-2", "ISO-8859-2" },
477*6ea1f93eSDaniel Fojt { "is_IS.DIS_8859-15", "ISO-8859-15" },
478*6ea1f93eSDaniel Fojt { "is_IS.ISO_8859-1", "ISO-8859-1" },
479*6ea1f93eSDaniel Fojt { "it_CH.DIS_8859-15", "ISO-8859-15" },
480*6ea1f93eSDaniel Fojt { "it_CH.ISO_8859-1", "ISO-8859-1" },
481*6ea1f93eSDaniel Fojt { "it_IT.DIS_8859-15", "ISO-8859-15" },
482*6ea1f93eSDaniel Fojt { "it_IT.ISO_8859-1", "ISO-8859-1" },
483*6ea1f93eSDaniel Fojt { "ja_JP.EUC", "EUC-JP" },
484*6ea1f93eSDaniel Fojt { "ja_JP.SJIS", "SHIFT_JIS" },
485*6ea1f93eSDaniel Fojt { "ja_JP.Shift_JIS", "SHIFT_JIS" },
486*6ea1f93eSDaniel Fojt { "ko_KR.EUC", "EUC-KR" },
487*6ea1f93eSDaniel Fojt { "la_LN.ASCII", "ASCII" },
488*6ea1f93eSDaniel Fojt { "la_LN.DIS_8859-15", "ISO-8859-15" },
489*6ea1f93eSDaniel Fojt { "la_LN.ISO_8859-1", "ISO-8859-1" },
490*6ea1f93eSDaniel Fojt { "la_LN.ISO_8859-2", "ISO-8859-2" },
491*6ea1f93eSDaniel Fojt { "la_LN.ISO_8859-4", "ISO-8859-4" },
492*6ea1f93eSDaniel Fojt { "lt_LN.ASCII", "ASCII" },
493*6ea1f93eSDaniel Fojt { "lt_LN.DIS_8859-15", "ISO-8859-15" },
494*6ea1f93eSDaniel Fojt { "lt_LN.ISO_8859-1", "ISO-8859-1" },
495*6ea1f93eSDaniel Fojt { "lt_LN.ISO_8859-2", "ISO-8859-2" },
496*6ea1f93eSDaniel Fojt { "lt_LT.ISO_8859-4", "ISO-8859-4" },
497*6ea1f93eSDaniel Fojt { "nl_BE.DIS_8859-15", "ISO-8859-15" },
498*6ea1f93eSDaniel Fojt { "nl_BE.ISO_8859-1", "ISO-8859-1" },
499*6ea1f93eSDaniel Fojt { "nl_NL.DIS_8859-15", "ISO-8859-15" },
500*6ea1f93eSDaniel Fojt { "nl_NL.ISO_8859-1", "ISO-8859-1" },
501*6ea1f93eSDaniel Fojt { "no_NO.DIS_8859-15", "ISO-8859-15" },
502*6ea1f93eSDaniel Fojt { "no_NO.ISO_8859-1", "ISO-8859-1" },
503*6ea1f93eSDaniel Fojt { "pl_PL.ISO_8859-2", "ISO-8859-2" },
504*6ea1f93eSDaniel Fojt { "pt_PT.DIS_8859-15", "ISO-8859-15" },
505*6ea1f93eSDaniel Fojt { "pt_PT.ISO_8859-1", "ISO-8859-1" },
506*6ea1f93eSDaniel Fojt { "ru_RU.CP866", "CP866" },
507*6ea1f93eSDaniel Fojt { "ru_RU.ISO_8859-5", "ISO-8859-5" },
508*6ea1f93eSDaniel Fojt { "ru_RU.KOI8-R", "KOI8-R" },
509*6ea1f93eSDaniel Fojt { "ru_SU.CP866", "CP866" },
510*6ea1f93eSDaniel Fojt { "ru_SU.ISO_8859-5", "ISO-8859-5" },
511*6ea1f93eSDaniel Fojt { "ru_SU.KOI8-R", "KOI8-R" },
512*6ea1f93eSDaniel Fojt { "sl_SI.ISO_8859-2", "ISO-8859-2" },
513*6ea1f93eSDaniel Fojt { "sv_SE.DIS_8859-15", "ISO-8859-15" },
514*6ea1f93eSDaniel Fojt { "sv_SE.ISO_8859-1", "ISO-8859-1" },
515*6ea1f93eSDaniel Fojt { "uk_UA.KOI8-U", "KOI8-U" },
516*6ea1f93eSDaniel Fojt { "zh_CN.EUC", "GB2312" },
517*6ea1f93eSDaniel Fojt { "zh_TW.BIG5", "BIG5" },
518*6ea1f93eSDaniel Fojt { "zh_TW.Big5", "BIG5" }
519*6ea1f93eSDaniel Fojt # define locale_table_defined
52044b87433SJohn Marino # endif
521*6ea1f93eSDaniel Fojt # if defined __DJGPP__ /* DOS / DJGPP 2.03 */
522*6ea1f93eSDaniel Fojt /* The encodings given here may not all be correct.
523*6ea1f93eSDaniel Fojt If you find that the encoding given for your language and
524*6ea1f93eSDaniel Fojt country is not the one your DOS machine actually uses, just
525*6ea1f93eSDaniel Fojt correct it in this file, and send a mail to
526*6ea1f93eSDaniel Fojt Juan Manuel Guerrero <juan.guerrero@gmx.de>
527*6ea1f93eSDaniel Fojt and <bug-gnulib@gnu.org>. */
528*6ea1f93eSDaniel Fojt { "C", "ASCII" },
529*6ea1f93eSDaniel Fojt { "ar", "CP864" },
530*6ea1f93eSDaniel Fojt { "ar_AE", "CP864" },
531*6ea1f93eSDaniel Fojt { "ar_DZ", "CP864" },
532*6ea1f93eSDaniel Fojt { "ar_EG", "CP864" },
533*6ea1f93eSDaniel Fojt { "ar_IQ", "CP864" },
534*6ea1f93eSDaniel Fojt { "ar_IR", "CP864" },
535*6ea1f93eSDaniel Fojt { "ar_JO", "CP864" },
536*6ea1f93eSDaniel Fojt { "ar_KW", "CP864" },
537*6ea1f93eSDaniel Fojt { "ar_MA", "CP864" },
538*6ea1f93eSDaniel Fojt { "ar_OM", "CP864" },
539*6ea1f93eSDaniel Fojt { "ar_QA", "CP864" },
540*6ea1f93eSDaniel Fojt { "ar_SA", "CP864" },
541*6ea1f93eSDaniel Fojt { "ar_SY", "CP864" },
542*6ea1f93eSDaniel Fojt { "be", "CP866" },
543*6ea1f93eSDaniel Fojt { "be_BE", "CP866" },
544*6ea1f93eSDaniel Fojt { "bg", "CP866" }, /* not CP855 ?? */
545*6ea1f93eSDaniel Fojt { "bg_BG", "CP866" }, /* not CP855 ?? */
546*6ea1f93eSDaniel Fojt { "ca", "CP850" },
547*6ea1f93eSDaniel Fojt { "ca_ES", "CP850" },
548*6ea1f93eSDaniel Fojt { "cs", "CP852" },
549*6ea1f93eSDaniel Fojt { "cs_CZ", "CP852" },
550*6ea1f93eSDaniel Fojt { "da", "CP865" }, /* not CP850 ?? */
551*6ea1f93eSDaniel Fojt { "da_DK", "CP865" }, /* not CP850 ?? */
552*6ea1f93eSDaniel Fojt { "de", "CP850" },
553*6ea1f93eSDaniel Fojt { "de_AT", "CP850" },
554*6ea1f93eSDaniel Fojt { "de_CH", "CP850" },
555*6ea1f93eSDaniel Fojt { "de_DE", "CP850" },
556*6ea1f93eSDaniel Fojt { "el", "CP869" },
557*6ea1f93eSDaniel Fojt { "el_GR", "CP869" },
558*6ea1f93eSDaniel Fojt { "en", "CP850" },
559*6ea1f93eSDaniel Fojt { "en_AU", "CP850" }, /* not CP437 ?? */
560*6ea1f93eSDaniel Fojt { "en_CA", "CP850" },
561*6ea1f93eSDaniel Fojt { "en_GB", "CP850" },
562*6ea1f93eSDaniel Fojt { "en_NZ", "CP437" },
563*6ea1f93eSDaniel Fojt { "en_US", "CP437" },
564*6ea1f93eSDaniel Fojt { "en_ZA", "CP850" }, /* not CP437 ?? */
565*6ea1f93eSDaniel Fojt { "eo", "CP850" },
566*6ea1f93eSDaniel Fojt { "eo_EO", "CP850" },
567*6ea1f93eSDaniel Fojt { "es", "CP850" },
568*6ea1f93eSDaniel Fojt { "es_AR", "CP850" },
569*6ea1f93eSDaniel Fojt { "es_BO", "CP850" },
570*6ea1f93eSDaniel Fojt { "es_CL", "CP850" },
571*6ea1f93eSDaniel Fojt { "es_CO", "CP850" },
572*6ea1f93eSDaniel Fojt { "es_CR", "CP850" },
573*6ea1f93eSDaniel Fojt { "es_CU", "CP850" },
574*6ea1f93eSDaniel Fojt { "es_DO", "CP850" },
575*6ea1f93eSDaniel Fojt { "es_EC", "CP850" },
576*6ea1f93eSDaniel Fojt { "es_ES", "CP850" },
577*6ea1f93eSDaniel Fojt { "es_GT", "CP850" },
578*6ea1f93eSDaniel Fojt { "es_HN", "CP850" },
579*6ea1f93eSDaniel Fojt { "es_MX", "CP850" },
580*6ea1f93eSDaniel Fojt { "es_NI", "CP850" },
581*6ea1f93eSDaniel Fojt { "es_PA", "CP850" },
582*6ea1f93eSDaniel Fojt { "es_PE", "CP850" },
583*6ea1f93eSDaniel Fojt { "es_PY", "CP850" },
584*6ea1f93eSDaniel Fojt { "es_SV", "CP850" },
585*6ea1f93eSDaniel Fojt { "es_UY", "CP850" },
586*6ea1f93eSDaniel Fojt { "es_VE", "CP850" },
587*6ea1f93eSDaniel Fojt { "et", "CP850" },
588*6ea1f93eSDaniel Fojt { "et_EE", "CP850" },
589*6ea1f93eSDaniel Fojt { "eu", "CP850" },
590*6ea1f93eSDaniel Fojt { "eu_ES", "CP850" },
591*6ea1f93eSDaniel Fojt { "fi", "CP850" },
592*6ea1f93eSDaniel Fojt { "fi_FI", "CP850" },
593*6ea1f93eSDaniel Fojt { "fr", "CP850" },
594*6ea1f93eSDaniel Fojt { "fr_BE", "CP850" },
595*6ea1f93eSDaniel Fojt { "fr_CA", "CP850" },
596*6ea1f93eSDaniel Fojt { "fr_CH", "CP850" },
597*6ea1f93eSDaniel Fojt { "fr_FR", "CP850" },
598*6ea1f93eSDaniel Fojt { "ga", "CP850" },
599*6ea1f93eSDaniel Fojt { "ga_IE", "CP850" },
600*6ea1f93eSDaniel Fojt { "gd", "CP850" },
601*6ea1f93eSDaniel Fojt { "gd_GB", "CP850" },
602*6ea1f93eSDaniel Fojt { "gl", "CP850" },
603*6ea1f93eSDaniel Fojt { "gl_ES", "CP850" },
604*6ea1f93eSDaniel Fojt { "he", "CP862" },
605*6ea1f93eSDaniel Fojt { "he_IL", "CP862" },
606*6ea1f93eSDaniel Fojt { "hr", "CP852" },
607*6ea1f93eSDaniel Fojt { "hr_HR", "CP852" },
608*6ea1f93eSDaniel Fojt { "hu", "CP852" },
609*6ea1f93eSDaniel Fojt { "hu_HU", "CP852" },
610*6ea1f93eSDaniel Fojt { "id", "CP850" }, /* not CP437 ?? */
611*6ea1f93eSDaniel Fojt { "id_ID", "CP850" }, /* not CP437 ?? */
612*6ea1f93eSDaniel Fojt { "is", "CP861" }, /* not CP850 ?? */
613*6ea1f93eSDaniel Fojt { "is_IS", "CP861" }, /* not CP850 ?? */
614*6ea1f93eSDaniel Fojt { "it", "CP850" },
615*6ea1f93eSDaniel Fojt { "it_CH", "CP850" },
616*6ea1f93eSDaniel Fojt { "it_IT", "CP850" },
617*6ea1f93eSDaniel Fojt { "ja", "CP932" },
618*6ea1f93eSDaniel Fojt { "ja_JP", "CP932" },
619*6ea1f93eSDaniel Fojt { "kr", "CP949" }, /* not CP934 ?? */
620*6ea1f93eSDaniel Fojt { "kr_KR", "CP949" }, /* not CP934 ?? */
621*6ea1f93eSDaniel Fojt { "lt", "CP775" },
622*6ea1f93eSDaniel Fojt { "lt_LT", "CP775" },
623*6ea1f93eSDaniel Fojt { "lv", "CP775" },
624*6ea1f93eSDaniel Fojt { "lv_LV", "CP775" },
625*6ea1f93eSDaniel Fojt { "mk", "CP866" }, /* not CP855 ?? */
626*6ea1f93eSDaniel Fojt { "mk_MK", "CP866" }, /* not CP855 ?? */
627*6ea1f93eSDaniel Fojt { "mt", "CP850" },
628*6ea1f93eSDaniel Fojt { "mt_MT", "CP850" },
629*6ea1f93eSDaniel Fojt { "nb", "CP865" }, /* not CP850 ?? */
630*6ea1f93eSDaniel Fojt { "nb_NO", "CP865" }, /* not CP850 ?? */
631*6ea1f93eSDaniel Fojt { "nl", "CP850" },
632*6ea1f93eSDaniel Fojt { "nl_BE", "CP850" },
633*6ea1f93eSDaniel Fojt { "nl_NL", "CP850" },
634*6ea1f93eSDaniel Fojt { "nn", "CP865" }, /* not CP850 ?? */
635*6ea1f93eSDaniel Fojt { "nn_NO", "CP865" }, /* not CP850 ?? */
636*6ea1f93eSDaniel Fojt { "no", "CP865" }, /* not CP850 ?? */
637*6ea1f93eSDaniel Fojt { "no_NO", "CP865" }, /* not CP850 ?? */
638*6ea1f93eSDaniel Fojt { "pl", "CP852" },
639*6ea1f93eSDaniel Fojt { "pl_PL", "CP852" },
640*6ea1f93eSDaniel Fojt { "pt", "CP850" },
641*6ea1f93eSDaniel Fojt { "pt_BR", "CP850" },
642*6ea1f93eSDaniel Fojt { "pt_PT", "CP850" },
643*6ea1f93eSDaniel Fojt { "ro", "CP852" },
644*6ea1f93eSDaniel Fojt { "ro_RO", "CP852" },
645*6ea1f93eSDaniel Fojt { "ru", "CP866" },
646*6ea1f93eSDaniel Fojt { "ru_RU", "CP866" },
647*6ea1f93eSDaniel Fojt { "sk", "CP852" },
648*6ea1f93eSDaniel Fojt { "sk_SK", "CP852" },
649*6ea1f93eSDaniel Fojt { "sl", "CP852" },
650*6ea1f93eSDaniel Fojt { "sl_SI", "CP852" },
651*6ea1f93eSDaniel Fojt { "sq", "CP852" },
652*6ea1f93eSDaniel Fojt { "sq_AL", "CP852" },
653*6ea1f93eSDaniel Fojt { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */
654*6ea1f93eSDaniel Fojt { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
655*6ea1f93eSDaniel Fojt { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
656*6ea1f93eSDaniel Fojt { "sv", "CP850" },
657*6ea1f93eSDaniel Fojt { "sv_SE", "CP850" },
658*6ea1f93eSDaniel Fojt { "th", "CP874" },
659*6ea1f93eSDaniel Fojt { "th_TH", "CP874" },
660*6ea1f93eSDaniel Fojt { "tr", "CP857" },
661*6ea1f93eSDaniel Fojt { "tr_TR", "CP857" },
662*6ea1f93eSDaniel Fojt { "uk", "CP1125" },
663*6ea1f93eSDaniel Fojt { "uk_UA", "CP1125" },
664*6ea1f93eSDaniel Fojt { "zh_CN", "GBK" },
665*6ea1f93eSDaniel Fojt { "zh_TW", "CP950" } /* not CP938 ?? */
666*6ea1f93eSDaniel Fojt # define locale_table_defined
667*6ea1f93eSDaniel Fojt # endif
668*6ea1f93eSDaniel Fojt # ifndef locale_table_defined
669*6ea1f93eSDaniel Fojt /* Just a dummy entry, to avoid a C syntax error. */
670*6ea1f93eSDaniel Fojt { "", "" }
671*6ea1f93eSDaniel Fojt # endif
672*6ea1f93eSDaniel Fojt };
673*6ea1f93eSDaniel Fojt
67444b87433SJohn Marino #endif
67544b87433SJohn Marino
67644b87433SJohn Marino
67744b87433SJohn Marino /* Determine the current locale's character encoding, and canonicalize it
678*6ea1f93eSDaniel Fojt into one of the canonical names listed in localcharset.h.
67944b87433SJohn Marino The result must not be freed; it is statically allocated.
68044b87433SJohn Marino If the canonical name cannot be determined, the result is a non-canonical
68144b87433SJohn Marino name. */
68244b87433SJohn Marino
68344b87433SJohn Marino #ifdef STATIC
68444b87433SJohn Marino STATIC
68544b87433SJohn Marino #endif
68644b87433SJohn Marino const char *
locale_charset(void)68744b87433SJohn Marino locale_charset (void)
68844b87433SJohn Marino {
68944b87433SJohn Marino const char *codeset;
69044b87433SJohn Marino
691*6ea1f93eSDaniel Fojt #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
69244b87433SJohn Marino
69344b87433SJohn Marino # if HAVE_LANGINFO_CODESET
69444b87433SJohn Marino
69544b87433SJohn Marino /* Most systems support nl_langinfo (CODESET) nowadays. */
69644b87433SJohn Marino codeset = nl_langinfo (CODESET);
69744b87433SJohn Marino
69844b87433SJohn Marino # ifdef __CYGWIN__
699008e37b6SJohn Marino /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
700008e37b6SJohn Marino returns "US-ASCII". Return the suffix of the locale name from the
701008e37b6SJohn Marino environment variables (if present) or the codepage as a number. */
70244b87433SJohn Marino if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
70344b87433SJohn Marino {
70444b87433SJohn Marino const char *locale;
70544b87433SJohn Marino static char buf[2 + 10 + 1];
70644b87433SJohn Marino
70744b87433SJohn Marino locale = getenv ("LC_ALL");
70844b87433SJohn Marino if (locale == NULL || locale[0] == '\0')
70944b87433SJohn Marino {
71044b87433SJohn Marino locale = getenv ("LC_CTYPE");
71144b87433SJohn Marino if (locale == NULL || locale[0] == '\0')
71244b87433SJohn Marino locale = getenv ("LANG");
71344b87433SJohn Marino }
71444b87433SJohn Marino if (locale != NULL && locale[0] != '\0')
71544b87433SJohn Marino {
71644b87433SJohn Marino /* If the locale name contains an encoding after the dot, return
71744b87433SJohn Marino it. */
71844b87433SJohn Marino const char *dot = strchr (locale, '.');
71944b87433SJohn Marino
72044b87433SJohn Marino if (dot != NULL)
72144b87433SJohn Marino {
72244b87433SJohn Marino const char *modifier;
72344b87433SJohn Marino
72444b87433SJohn Marino dot++;
72544b87433SJohn Marino /* Look for the possible @... trailer and remove it, if any. */
72644b87433SJohn Marino modifier = strchr (dot, '@');
72744b87433SJohn Marino if (modifier == NULL)
72844b87433SJohn Marino return dot;
72944b87433SJohn Marino if (modifier - dot < sizeof (buf))
73044b87433SJohn Marino {
73144b87433SJohn Marino memcpy (buf, dot, modifier - dot);
73244b87433SJohn Marino buf [modifier - dot] = '\0';
73344b87433SJohn Marino return buf;
73444b87433SJohn Marino }
73544b87433SJohn Marino }
73644b87433SJohn Marino }
73744b87433SJohn Marino
7384536c563SJohn Marino /* The Windows API has a function returning the locale's codepage as a
7394536c563SJohn Marino number: GetACP(). This encoding is used by Cygwin, unless the user
7404536c563SJohn Marino has set the environment variable CYGWIN=codepage:oem (which very few
7414536c563SJohn Marino people do).
74244b87433SJohn Marino Output directed to console windows needs to be converted (to
74344b87433SJohn Marino GetOEMCP() if the console is using a raster font, or to
74444b87433SJohn Marino GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
74544b87433SJohn Marino this conversion transparently (see winsup/cygwin/fhandler_console.cc),
74644b87433SJohn Marino converting to GetConsoleOutputCP(). This leads to correct results,
74744b87433SJohn Marino except when SetConsoleOutputCP has been called and a raster font is
74844b87433SJohn Marino in use. */
74944b87433SJohn Marino sprintf (buf, "CP%u", GetACP ());
75044b87433SJohn Marino codeset = buf;
75144b87433SJohn Marino }
75244b87433SJohn Marino # endif
75344b87433SJohn Marino
754*6ea1f93eSDaniel Fojt if (codeset == NULL)
755*6ea1f93eSDaniel Fojt /* The canonical name cannot be determined. */
756*6ea1f93eSDaniel Fojt codeset = "";
75744b87433SJohn Marino
7584536c563SJohn Marino # elif defined WINDOWS_NATIVE
75944b87433SJohn Marino
76044b87433SJohn Marino static char buf[2 + 10 + 1];
76144b87433SJohn Marino
762*6ea1f93eSDaniel Fojt /* The Windows API has a function returning the locale's codepage as
763*6ea1f93eSDaniel Fojt a number, but the value doesn't change according to what the
764*6ea1f93eSDaniel Fojt 'setlocale' call specified. So we use it as a last resort, in
765*6ea1f93eSDaniel Fojt case the string returned by 'setlocale' doesn't specify the
766*6ea1f93eSDaniel Fojt codepage. */
767*6ea1f93eSDaniel Fojt char *current_locale = setlocale (LC_ALL, NULL);
768*6ea1f93eSDaniel Fojt char *pdot;
769*6ea1f93eSDaniel Fojt
770*6ea1f93eSDaniel Fojt /* If they set different locales for different categories,
771*6ea1f93eSDaniel Fojt 'setlocale' will return a semi-colon separated list of locale
772*6ea1f93eSDaniel Fojt values. To make sure we use the correct one, we choose LC_CTYPE. */
773*6ea1f93eSDaniel Fojt if (strchr (current_locale, ';'))
774*6ea1f93eSDaniel Fojt current_locale = setlocale (LC_CTYPE, NULL);
775*6ea1f93eSDaniel Fojt
776*6ea1f93eSDaniel Fojt pdot = strrchr (current_locale, '.');
777*6ea1f93eSDaniel Fojt if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
778*6ea1f93eSDaniel Fojt sprintf (buf, "CP%s", pdot + 1);
779*6ea1f93eSDaniel Fojt else
780*6ea1f93eSDaniel Fojt {
7814536c563SJohn Marino /* The Windows API has a function returning the locale's codepage as a
7824536c563SJohn Marino number: GetACP().
78344b87433SJohn Marino When the output goes to a console window, it needs to be provided in
78444b87433SJohn Marino GetOEMCP() encoding if the console is using a raster font, or in
78544b87433SJohn Marino GetConsoleOutputCP() encoding if it is using a TrueType font.
78644b87433SJohn Marino But in GUI programs and for output sent to files and pipes, GetACP()
78744b87433SJohn Marino encoding is the best bet. */
78844b87433SJohn Marino sprintf (buf, "CP%u", GetACP ());
789*6ea1f93eSDaniel Fojt }
79044b87433SJohn Marino codeset = buf;
79144b87433SJohn Marino
79244b87433SJohn Marino # elif defined OS2
79344b87433SJohn Marino
79444b87433SJohn Marino const char *locale;
79544b87433SJohn Marino static char buf[2 + 10 + 1];
79644b87433SJohn Marino ULONG cp[3];
79744b87433SJohn Marino ULONG cplen;
79844b87433SJohn Marino
799*6ea1f93eSDaniel Fojt codeset = NULL;
800*6ea1f93eSDaniel Fojt
80144b87433SJohn Marino /* Allow user to override the codeset, as set in the operating system,
80244b87433SJohn Marino with standard language environment variables. */
80344b87433SJohn Marino locale = getenv ("LC_ALL");
80444b87433SJohn Marino if (locale == NULL || locale[0] == '\0')
80544b87433SJohn Marino {
80644b87433SJohn Marino locale = getenv ("LC_CTYPE");
80744b87433SJohn Marino if (locale == NULL || locale[0] == '\0')
80844b87433SJohn Marino locale = getenv ("LANG");
80944b87433SJohn Marino }
81044b87433SJohn Marino if (locale != NULL && locale[0] != '\0')
81144b87433SJohn Marino {
81244b87433SJohn Marino /* If the locale name contains an encoding after the dot, return it. */
81344b87433SJohn Marino const char *dot = strchr (locale, '.');
81444b87433SJohn Marino
81544b87433SJohn Marino if (dot != NULL)
81644b87433SJohn Marino {
81744b87433SJohn Marino const char *modifier;
81844b87433SJohn Marino
81944b87433SJohn Marino dot++;
82044b87433SJohn Marino /* Look for the possible @... trailer and remove it, if any. */
82144b87433SJohn Marino modifier = strchr (dot, '@');
82244b87433SJohn Marino if (modifier == NULL)
82344b87433SJohn Marino return dot;
82444b87433SJohn Marino if (modifier - dot < sizeof (buf))
82544b87433SJohn Marino {
82644b87433SJohn Marino memcpy (buf, dot, modifier - dot);
82744b87433SJohn Marino buf [modifier - dot] = '\0';
82844b87433SJohn Marino return buf;
82944b87433SJohn Marino }
83044b87433SJohn Marino }
83144b87433SJohn Marino
832*6ea1f93eSDaniel Fojt /* For the POSIX locale, don't use the system's codepage. */
833*6ea1f93eSDaniel Fojt if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
834*6ea1f93eSDaniel Fojt codeset = "";
83544b87433SJohn Marino }
836*6ea1f93eSDaniel Fojt
837*6ea1f93eSDaniel Fojt if (codeset == NULL)
83844b87433SJohn Marino {
83944b87433SJohn Marino /* OS/2 has a function returning the locale's codepage as a number. */
84044b87433SJohn Marino if (DosQueryCp (sizeof (cp), cp, &cplen))
84144b87433SJohn Marino codeset = "";
84244b87433SJohn Marino else
84344b87433SJohn Marino {
84444b87433SJohn Marino sprintf (buf, "CP%u", cp[0]);
84544b87433SJohn Marino codeset = buf;
84644b87433SJohn Marino }
84744b87433SJohn Marino }
84844b87433SJohn Marino
849*6ea1f93eSDaniel Fojt # else
850*6ea1f93eSDaniel Fojt
851*6ea1f93eSDaniel Fojt # error "Add code for other platforms here."
852*6ea1f93eSDaniel Fojt
85344b87433SJohn Marino # endif
85444b87433SJohn Marino
85544b87433SJohn Marino /* Resolve alias. */
85644b87433SJohn Marino {
857*6ea1f93eSDaniel Fojt # ifdef alias_table_defined
858*6ea1f93eSDaniel Fojt /* On some platforms, UTF-8 locales are the most frequently used ones.
859*6ea1f93eSDaniel Fojt Speed up the common case and slow down the less common cases by
860*6ea1f93eSDaniel Fojt testing for this case first. */
861*6ea1f93eSDaniel Fojt # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
862*6ea1f93eSDaniel Fojt if (strcmp (codeset, "UTF-8") == 0)
863*6ea1f93eSDaniel Fojt goto done_table_lookup;
864*6ea1f93eSDaniel Fojt else
865*6ea1f93eSDaniel Fojt # endif
866*6ea1f93eSDaniel Fojt {
867*6ea1f93eSDaniel Fojt const struct table_entry * const table = alias_table;
868*6ea1f93eSDaniel Fojt size_t const table_size =
869*6ea1f93eSDaniel Fojt sizeof (alias_table) / sizeof (struct table_entry);
870*6ea1f93eSDaniel Fojt /* The table is sorted. Perform a binary search. */
871*6ea1f93eSDaniel Fojt size_t hi = table_size;
872*6ea1f93eSDaniel Fojt size_t lo = 0;
873*6ea1f93eSDaniel Fojt while (lo < hi)
874*6ea1f93eSDaniel Fojt {
875*6ea1f93eSDaniel Fojt /* Invariant:
876*6ea1f93eSDaniel Fojt for i < lo, strcmp (table[i].alias, codeset) < 0,
877*6ea1f93eSDaniel Fojt for i >= hi, strcmp (table[i].alias, codeset) > 0. */
878*6ea1f93eSDaniel Fojt size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
879*6ea1f93eSDaniel Fojt int cmp = strcmp (table[mid].alias, codeset);
880*6ea1f93eSDaniel Fojt if (cmp < 0)
881*6ea1f93eSDaniel Fojt lo = mid + 1;
882*6ea1f93eSDaniel Fojt else if (cmp > 0)
883*6ea1f93eSDaniel Fojt hi = mid;
884*6ea1f93eSDaniel Fojt else
885*6ea1f93eSDaniel Fojt {
886*6ea1f93eSDaniel Fojt /* Found an i with
887*6ea1f93eSDaniel Fojt strcmp (table[i].alias, codeset) == 0. */
888*6ea1f93eSDaniel Fojt codeset = table[mid].canonical;
889*6ea1f93eSDaniel Fojt goto done_table_lookup;
89044b87433SJohn Marino }
891*6ea1f93eSDaniel Fojt }
892*6ea1f93eSDaniel Fojt }
893*6ea1f93eSDaniel Fojt if (0)
894*6ea1f93eSDaniel Fojt done_table_lookup: ;
895*6ea1f93eSDaniel Fojt else
896*6ea1f93eSDaniel Fojt # endif
897*6ea1f93eSDaniel Fojt {
898*6ea1f93eSDaniel Fojt /* Did not find it in the table. */
899*6ea1f93eSDaniel Fojt /* On Mac OS X, all modern locales use the UTF-8 encoding.
900*6ea1f93eSDaniel Fojt BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
901*6ea1f93eSDaniel Fojt # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
902*6ea1f93eSDaniel Fojt codeset = "UTF-8";
903*6ea1f93eSDaniel Fojt # else
90444b87433SJohn Marino /* Don't return an empty string. GNU libc and GNU libiconv interpret
90544b87433SJohn Marino the empty string as denoting "the locale's character encoding",
90644b87433SJohn Marino thus GNU libiconv would call this function a second time. */
90744b87433SJohn Marino if (codeset[0] == '\0')
90844b87433SJohn Marino codeset = "ASCII";
909*6ea1f93eSDaniel Fojt # endif
910*6ea1f93eSDaniel Fojt }
911*6ea1f93eSDaniel Fojt }
912*6ea1f93eSDaniel Fojt
913*6ea1f93eSDaniel Fojt #else
914*6ea1f93eSDaniel Fojt
915*6ea1f93eSDaniel Fojt /* On old systems which lack it, use setlocale or getenv. */
916*6ea1f93eSDaniel Fojt const char *locale = NULL;
917*6ea1f93eSDaniel Fojt
918*6ea1f93eSDaniel Fojt /* But most old systems don't have a complete set of locales. Some
919*6ea1f93eSDaniel Fojt (like DJGPP) have only the C locale. Therefore we don't use setlocale
920*6ea1f93eSDaniel Fojt here; it would return "C" when it doesn't support the locale name the
921*6ea1f93eSDaniel Fojt user has set. */
922*6ea1f93eSDaniel Fojt # if 0
923*6ea1f93eSDaniel Fojt locale = setlocale (LC_CTYPE, NULL);
924*6ea1f93eSDaniel Fojt # endif
925*6ea1f93eSDaniel Fojt if (locale == NULL || locale[0] == '\0')
926*6ea1f93eSDaniel Fojt {
927*6ea1f93eSDaniel Fojt locale = getenv ("LC_ALL");
928*6ea1f93eSDaniel Fojt if (locale == NULL || locale[0] == '\0')
929*6ea1f93eSDaniel Fojt {
930*6ea1f93eSDaniel Fojt locale = getenv ("LC_CTYPE");
931*6ea1f93eSDaniel Fojt if (locale == NULL || locale[0] == '\0')
932*6ea1f93eSDaniel Fojt locale = getenv ("LANG");
933*6ea1f93eSDaniel Fojt if (locale == NULL)
934*6ea1f93eSDaniel Fojt locale = "";
935*6ea1f93eSDaniel Fojt }
936*6ea1f93eSDaniel Fojt }
937*6ea1f93eSDaniel Fojt
938*6ea1f93eSDaniel Fojt /* Map locale name to canonical encoding name. */
939*6ea1f93eSDaniel Fojt {
940*6ea1f93eSDaniel Fojt # ifdef locale_table_defined
941*6ea1f93eSDaniel Fojt const struct table_entry * const table = locale_table;
942*6ea1f93eSDaniel Fojt size_t const table_size =
943*6ea1f93eSDaniel Fojt sizeof (locale_table) / sizeof (struct table_entry);
944*6ea1f93eSDaniel Fojt /* The table is sorted. Perform a binary search. */
945*6ea1f93eSDaniel Fojt size_t hi = table_size;
946*6ea1f93eSDaniel Fojt size_t lo = 0;
947*6ea1f93eSDaniel Fojt while (lo < hi)
948*6ea1f93eSDaniel Fojt {
949*6ea1f93eSDaniel Fojt /* Invariant:
950*6ea1f93eSDaniel Fojt for i < lo, strcmp (table[i].locale, locale) < 0,
951*6ea1f93eSDaniel Fojt for i >= hi, strcmp (table[i].locale, locale) > 0. */
952*6ea1f93eSDaniel Fojt size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
953*6ea1f93eSDaniel Fojt int cmp = strcmp (table[mid].locale, locale);
954*6ea1f93eSDaniel Fojt if (cmp < 0)
955*6ea1f93eSDaniel Fojt lo = mid + 1;
956*6ea1f93eSDaniel Fojt else if (cmp > 0)
957*6ea1f93eSDaniel Fojt hi = mid;
958*6ea1f93eSDaniel Fojt else
959*6ea1f93eSDaniel Fojt {
960*6ea1f93eSDaniel Fojt /* Found an i with
961*6ea1f93eSDaniel Fojt strcmp (table[i].locale, locale) == 0. */
962*6ea1f93eSDaniel Fojt codeset = table[mid].canonical;
963*6ea1f93eSDaniel Fojt goto done_table_lookup;
964*6ea1f93eSDaniel Fojt }
965*6ea1f93eSDaniel Fojt }
966*6ea1f93eSDaniel Fojt if (0)
967*6ea1f93eSDaniel Fojt done_table_lookup: ;
968*6ea1f93eSDaniel Fojt else
969*6ea1f93eSDaniel Fojt # endif
970*6ea1f93eSDaniel Fojt {
971*6ea1f93eSDaniel Fojt /* Did not find it in the table. */
972*6ea1f93eSDaniel Fojt /* On Mac OS X, all modern locales use the UTF-8 encoding.
973*6ea1f93eSDaniel Fojt BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
974*6ea1f93eSDaniel Fojt # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
975*6ea1f93eSDaniel Fojt codeset = "UTF-8";
976*6ea1f93eSDaniel Fojt # else
977*6ea1f93eSDaniel Fojt /* The canonical name cannot be determined. */
978*6ea1f93eSDaniel Fojt /* Don't return an empty string. GNU libc and GNU libiconv interpret
979*6ea1f93eSDaniel Fojt the empty string as denoting "the locale's character encoding",
980*6ea1f93eSDaniel Fojt thus GNU libiconv would call this function a second time. */
981*6ea1f93eSDaniel Fojt codeset = "ASCII";
982*6ea1f93eSDaniel Fojt # endif
983*6ea1f93eSDaniel Fojt }
984*6ea1f93eSDaniel Fojt }
985*6ea1f93eSDaniel Fojt
986*6ea1f93eSDaniel Fojt #endif
98744b87433SJohn Marino
9884536c563SJohn Marino #ifdef DARWIN7
9894536c563SJohn Marino /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
9904536c563SJohn Marino (the default codeset) does not work when MB_CUR_MAX is 1. */
991*6ea1f93eSDaniel Fojt if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
9924536c563SJohn Marino codeset = "ASCII";
9934536c563SJohn Marino #endif
9944536c563SJohn Marino
99544b87433SJohn Marino return codeset;
99644b87433SJohn Marino }
997