1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <sal/config.h>
21
22 #include <algorithm>
23 #include <cstring>
24
25 #include <osl/nlsupport.h>
26 #include <osl/diagnose.h>
27 #include <osl/process.h>
28 #include <rtl/string.hxx>
29 #include <rtl/ustring.hxx>
30 #include <sal/log.hxx>
31
32 #include "nlsupport.hxx"
33
34 #if defined(LINUX) || defined(__sun) || defined(NETBSD) || \
35 defined(FREEBSD) || defined(MACOSX) || defined(IOS) || defined(OPENBSD) || \
36 defined(DRAGONFLY)
37 #if !defined(MACOSX) && !defined(IOS)
38 #include <locale.h>
39 #include <langinfo.h>
40 #else
41 #include <osl/module.h>
42 #include <osl/thread.h>
43 #endif /* !MACOSX && !IOS */
44 #endif /* LINUX || __sun || NETBSD || MACOSX || IOS */
45
46 #if defined(MACOSX) || defined(IOS)
47 #include "system.hxx"
48 #endif
49
50 #include <string.h>
51
52 namespace {
53
54 struct Pair {
55 const char *key;
56 const rtl_TextEncoding value;
57 };
58
59 }
60
61 /*****************************************************************************
62 compare function for binary search
63 *****************************************************************************/
64
65 static int
pair_compare(const char * key,const Pair * pair)66 pair_compare (const char *key, const Pair *pair)
67 {
68 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
69 return result;
70 }
71
72 /*****************************************************************************
73 binary search on encoding tables
74 *****************************************************************************/
75
76 static const Pair*
pair_search(const char * key,const Pair * base,unsigned int member)77 pair_search (const char *key, const Pair *base, unsigned int member )
78 {
79 unsigned int lower = 0;
80 unsigned int upper = member;
81
82 /* check for validity of input */
83 if ( (key == nullptr) || (base == nullptr) || (member == 0) )
84 return nullptr;
85
86 /* binary search */
87 while ( lower < upper )
88 {
89 const unsigned int current = (lower + upper) / 2;
90 const int comparison = pair_compare( key, base + current );
91 if (comparison < 0)
92 upper = current;
93 else if (comparison > 0)
94 lower = current + 1;
95 else
96 return base + current;
97 }
98
99 return nullptr;
100 }
101
102 /*****************************************************************************
103 convert rtl_Locale to locale string
104 *****************************************************************************/
105
compose_locale(rtl_Locale * pLocale,char * buffer,size_t n)106 static char * compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
107 {
108 /* check if a valid locale is specified */
109 if( pLocale && pLocale->Language &&
110 (pLocale->Language->length == 2 || pLocale->Language->length == 3) )
111 {
112 size_t offset = 0;
113
114 /* convert language code to ascii */
115 {
116 rtl_String *pLanguage = nullptr;
117
118 rtl_uString2String( &pLanguage,
119 pLocale->Language->buffer, pLocale->Language->length,
120 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
121
122 if( sal::static_int_cast<sal_uInt32>(pLanguage->length) < n )
123 {
124 strcpy( buffer, pLanguage->buffer );
125 offset = pLanguage->length;
126 }
127
128 rtl_string_release( pLanguage );
129 }
130
131 /* convert country code to ascii */
132 if( pLocale->Country && (pLocale->Country->length == 2) )
133 {
134 rtl_String *pCountry = nullptr;
135
136 rtl_uString2String( &pCountry,
137 pLocale->Country->buffer, pLocale->Country->length,
138 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
139
140 if( offset + pCountry->length + 1 < n )
141 {
142 strcpy( buffer + offset++, "_" );
143 strcpy( buffer + offset, pCountry->buffer );
144 offset += pCountry->length;
145 }
146
147 rtl_string_release( pCountry );
148 }
149
150 /* convert variant to ascii - check if there is enough space for the variant string */
151 if( pLocale->Variant && pLocale->Variant->length &&
152 ( sal::static_int_cast<sal_uInt32>(pLocale->Variant->length) < n - 6 ) )
153 {
154 rtl_String *pVariant = nullptr;
155
156 rtl_uString2String( &pVariant,
157 pLocale->Variant->buffer, pLocale->Variant->length,
158 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
159
160 if( offset + pVariant->length + 1 < n )
161 {
162 strcpy( buffer + offset, pVariant->buffer );
163 }
164
165 rtl_string_release( pVariant );
166 }
167
168 return buffer;
169 }
170
171 return nullptr;
172 }
173
174 /*****************************************************************************
175 convert locale string to rtl_Locale
176 *****************************************************************************/
177
parse_locale(const char * locale)178 static rtl_Locale * parse_locale( const char * locale )
179 {
180 assert(locale != nullptr);
181
182 if (*locale == '\0' || std::strcmp(locale, "C") == 0
183 || std::strcmp(locale, "POSIX") == 0)
184 {
185 return rtl_locale_register(u"C", u"", u"");
186 }
187
188 size_t len = strlen( locale );
189
190 rtl_uString * pLanguage = nullptr;
191 rtl_uString * pCountry = nullptr;
192 rtl_uString * pVariant = nullptr;
193
194 size_t offset = std::min<size_t>(len, 2);
195
196 rtl_Locale * ret;
197
198 /* language is a two or three letter code */
199 if( (len > 3 && locale[3] == '_') || (len == 3 && locale[2] != '_') )
200 offset = 3;
201
202 /* convert language code to unicode */
203 rtl_string2UString( &pLanguage, locale, offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
204 OSL_ASSERT(pLanguage != nullptr);
205
206 /* convert country code to unicode */
207 if( len >= offset+3 && locale[offset] == '_' )
208 {
209 rtl_string2UString( &pCountry, locale + offset + 1, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
210 OSL_ASSERT(pCountry != nullptr);
211 offset += 3;
212 }
213
214 /* convert variant code to unicode - do not rely on "." as delimiter */
215 if( len > offset ) {
216 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
217 OSL_ASSERT(pVariant != nullptr);
218 }
219
220 ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : u"", pVariant ? pVariant->buffer : u"" );
221
222 if (pVariant) rtl_uString_release(pVariant);
223 if (pCountry) rtl_uString_release(pCountry);
224 if (pLanguage) rtl_uString_release(pLanguage);
225
226 return ret;
227 }
228
229 #if defined(LINUX) || defined(__sun) || defined(NETBSD) || \
230 defined(FREEBSD) || defined(OPENBSD) || defined(DRAGONFLY)
231
232 /*
233 * This implementation of osl_getTextEncodingFromLocale maps
234 * from nl_langinfo_l(CODESET) to rtl_textencoding defines.
235 * nl_langinfo() is supported only on Linux, Solaris,
236 * >= NetBSD 1.6 and >= FreeBSD 4.4
237 */
238
239 #ifdef LINUX
240 #if !defined(CODESET)
241 #define CODESET _NL_CTYPE_CODESET_NAME
242 #endif
243 #endif
244
245 /*
246 * _nl_language_list[] is an array list of supported encodings. Because
247 * we are using a binary search, the list has to be in ascending order.
248 * We are comparing the encodings case insensitive, so the list has
249 * to be completely upper or lowercase.
250 */
251
252 #if defined(__sun)
253
254 /* The values in the below list can be obtained with a script like
255 * #!/bin/sh
256 * for i in `locale -a`; do
257 * LC_ALL=$i locale -k code_set_name
258 * done
259 */
260 static const Pair nl_language_list[] = {
261 { "5601", RTL_TEXTENCODING_EUC_KR }, /* ko_KR.EUC */
262 { "646", RTL_TEXTENCODING_ISO_8859_1 }, /* fake: ASCII_US */
263 { "ANSI-1251", RTL_TEXTENCODING_MS_1251 }, /* ru_RU.ANSI1251 */
264 { "BIG5", RTL_TEXTENCODING_BIG5 }, /* zh_CN.BIG5 */
265 { "BIG5-HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* zh_CN.BIG5HK */
266 { "CNS11643", RTL_TEXTENCODING_EUC_TW }, /* zh_TW.EUC */
267 { "EUCJP", RTL_TEXTENCODING_EUC_JP }, /* ja_JP.eucjp */
268 { "GB18030", RTL_TEXTENCODING_GB_18030 }, /* zh_CN.GB18030 */
269 { "GB2312", RTL_TEXTENCODING_GB_2312 }, /* zh_CN */
270 { "GBK", RTL_TEXTENCODING_GBK }, /* zh_CN.GBK */
271 { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 },
272 { "ISO8859-10", RTL_TEXTENCODING_ISO_8859_10 },
273 { "ISO8859-13", RTL_TEXTENCODING_ISO_8859_13 }, /* lt_LT lv_LV */
274 { "ISO8859-14", RTL_TEXTENCODING_ISO_8859_14 },
275 { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 },
276 { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 },
277 { "ISO8859-3", RTL_TEXTENCODING_ISO_8859_3 },
278 { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 },
279 { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 },
280 { "ISO8859-6", RTL_TEXTENCODING_ISO_8859_6 },
281 { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 },
282 { "ISO8859-8", RTL_TEXTENCODING_ISO_8859_8 },
283 { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 },
284 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
285 { "KOI8-U", RTL_TEXTENCODING_KOI8_U },
286 { "PCK", RTL_TEXTENCODING_MS_932 },
287 { "SUN_EU_GREEK", RTL_TEXTENCODING_ISO_8859_7 }, /* 8859-7 + Euro */
288 { "TIS620.2533", RTL_TEXTENCODING_MS_874 }, /* th_TH.TIS620 */
289 { "UTF-8", RTL_TEXTENCODING_UTF8 }
290 };
291
292 /* XXX MS-874 is an extension to tis620, so this is not
293 * really equivalent */
294
295 #elif defined(LINUX)
296
297 const Pair nl_language_list[] = {
298 { "ANSI_X3.110-1983", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-99 NAPLPS */
299 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ISO_8859_1 }, /* fake: ASCII_US */
300 { "ASMO_449", RTL_TEXTENCODING_DONTKNOW }, /* ISO_9036 ARABIC7 */
301 { "BALTIC", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-179 */
302 { "BIG5", RTL_TEXTENCODING_BIG5 }, /* locale: zh_TW */
303 { "BIG5-HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* locale: zh_CN.BIG5HK */
304 { "BIG5HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* deprecated */
305 { "BS_4730", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-4 ISO646-GB */
306 { "BS_VIEWDATA", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-47 */
307 { "CP1250", RTL_TEXTENCODING_MS_1250 }, /* MS-EE */
308 { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */
309 { "CP1252", RTL_TEXTENCODING_MS_1252 }, /* MS-ANSI */
310 { "CP1253", RTL_TEXTENCODING_MS_1253 }, /* MS-GREEK */
311 { "CP1254", RTL_TEXTENCODING_MS_1254 }, /* MS-TURK */
312 { "CP1255", RTL_TEXTENCODING_MS_1255 }, /* MS-HEBR */
313 { "CP1256", RTL_TEXTENCODING_MS_1256 }, /* MS-ARAB */
314 { "CP1257", RTL_TEXTENCODING_MS_1257 }, /* WINBALTRIM */
315 { "CSA_Z243.4-1985-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-121 */
316 { "CSA_Z243.4-1985-2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-122 CSA7-2 */
317 { "CSA_Z243.4-1985-GR", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-123 */
318 { "CSN_369103", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-139 */
319 { "CWI", RTL_TEXTENCODING_DONTKNOW }, /* CWI-2 CP-HU */
320 { "DEC-MCS", RTL_TEXTENCODING_DONTKNOW }, /* DEC */
321 { "DIN_66003", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-21 */
322 { "DS_2089", RTL_TEXTENCODING_DONTKNOW }, /* DS2089 ISO646-DK */
323 { "EBCDIC-AT-DE", RTL_TEXTENCODING_DONTKNOW },
324 { "EBCDIC-AT-DE-A", RTL_TEXTENCODING_DONTKNOW },
325 { "EBCDIC-CA-FR", RTL_TEXTENCODING_DONTKNOW },
326 { "EBCDIC-DK-NO", RTL_TEXTENCODING_DONTKNOW },
327 { "EBCDIC-DK-NO-A", RTL_TEXTENCODING_DONTKNOW },
328 { "EBCDIC-ES", RTL_TEXTENCODING_DONTKNOW },
329 { "EBCDIC-ES-A", RTL_TEXTENCODING_DONTKNOW },
330 { "EBCDIC-ES-S", RTL_TEXTENCODING_DONTKNOW },
331 { "EBCDIC-FI-SE", RTL_TEXTENCODING_DONTKNOW },
332 { "EBCDIC-FI-SE-A", RTL_TEXTENCODING_DONTKNOW },
333 { "EBCDIC-FR", RTL_TEXTENCODING_DONTKNOW },
334 { "EBCDIC-IS-FRISS", RTL_TEXTENCODING_DONTKNOW }, /* FRISS */
335 { "EBCDIC-IT", RTL_TEXTENCODING_DONTKNOW },
336 { "EBCDIC-PT", RTL_TEXTENCODING_DONTKNOW },
337 { "EBCDIC-UK", RTL_TEXTENCODING_DONTKNOW },
338 { "EBCDIC-US", RTL_TEXTENCODING_DONTKNOW },
339 { "ECMA-CYRILLIC", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-111 */
340 { "ES", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-17 */
341 { "ES2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-85 */
342 { "EUC-JP", RTL_TEXTENCODING_EUC_JP }, /* locale: ja_JP.eucjp */
343 { "EUC-KR", RTL_TEXTENCODING_EUC_KR }, /* locale: ko_KR.euckr */
344 { "EUC-TW", RTL_TEXTENCODING_EUC_TW }, /* locale: zh_TW.euctw */
345 { "GB18030", RTL_TEXTENCODING_GB_18030 }, /* locale: zh_CN.gb18030 */
346 { "GB2312", RTL_TEXTENCODING_GB_2312 }, /* locale: zh_CN */
347 { "GB_1988-80", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-57 */
348 { "GBK", RTL_TEXTENCODING_GBK }, /* locale: zh_CN.GBK */
349 { "GOST_19768-74", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-153 */
350 { "GREEK-CCITT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-150 */
351 { "GREEK7", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-88 */
352 { "GREEK7-OLD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-18 */
353 { "HP-ROMAN8", RTL_TEXTENCODING_DONTKNOW }, /* ROMAN8 R8 */
354 { "IBM037", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-[US|CA|WT] */
355 { "IBM038", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-INT CP038 */
356 { "IBM1004", RTL_TEXTENCODING_DONTKNOW }, /* CP1004 OS2LATIN1 */
357 { "IBM1026", RTL_TEXTENCODING_DONTKNOW }, /* CP1026 1026 */
358 { "IBM1047", RTL_TEXTENCODING_DONTKNOW }, /* CP1047 1047 */
359 { "IBM256", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-INT1 */
360 { "IBM273", RTL_TEXTENCODING_DONTKNOW }, /* CP273 */
361 { "IBM274", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-BE CP274 */
362 { "IBM275", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-BR CP275 */
363 { "IBM277", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-[DK|NO] */
364 { "IBM278", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-[FISE]*/
365 { "IBM280", RTL_TEXTENCODING_DONTKNOW }, /* CP280 EBCDIC-CP-IT*/
366 { "IBM281", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-JP-E CP281 */
367 { "IBM284", RTL_TEXTENCODING_DONTKNOW }, /* CP284 EBCDIC-CP-ES */
368 { "IBM285", RTL_TEXTENCODING_DONTKNOW }, /* CP285 EBCDIC-CP-GB */
369 { "IBM290", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-JP-KANA */
370 { "IBM297", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-FR */
371 { "IBM420", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-AR1 */
372 { "IBM423", RTL_TEXTENCODING_DONTKNOW }, /* CP423 EBCDIC-CP-GR */
373 { "IBM424", RTL_TEXTENCODING_DONTKNOW }, /* CP424 EBCDIC-CP-HE */
374 { "IBM437", RTL_TEXTENCODING_IBM_437 }, /* CP437 437 */
375 { "IBM500", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-[BE|CH] */
376 { "IBM850", RTL_TEXTENCODING_IBM_850 }, /* CP850 850 */
377 { "IBM851", RTL_TEXTENCODING_DONTKNOW }, /* CP851 851 */
378 { "IBM852", RTL_TEXTENCODING_IBM_852 }, /* CP852 852 */
379 { "IBM855", RTL_TEXTENCODING_IBM_855 }, /* CP855 855 */
380 { "IBM857", RTL_TEXTENCODING_IBM_857 }, /* CP857 857 */
381 { "IBM860", RTL_TEXTENCODING_IBM_860 }, /* CP860 860 */
382 { "IBM861", RTL_TEXTENCODING_IBM_861 }, /* CP861 861 CP-IS */
383 { "IBM862", RTL_TEXTENCODING_IBM_862 }, /* CP862 862 */
384 { "IBM863", RTL_TEXTENCODING_IBM_863 }, /* CP863 863 */
385 { "IBM864", RTL_TEXTENCODING_IBM_864 }, /* CP864 */
386 { "IBM865", RTL_TEXTENCODING_IBM_865 }, /* CP865 865 */
387 { "IBM866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */
388 { "IBM868", RTL_TEXTENCODING_DONTKNOW }, /* CP868 CP-AR */
389 { "IBM869", RTL_TEXTENCODING_IBM_869 }, /* CP869 869 CP-GR */
390 { "IBM870", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-[ROECE|YU] */
391 { "IBM871", RTL_TEXTENCODING_DONTKNOW }, /* CP871 EBCDIC-CP-IS */
392 { "IBM875", RTL_TEXTENCODING_DONTKNOW }, /* CP875 EBCDIC-GREEK */
393 { "IBM880", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CYRILLIC */
394 { "IBM891", RTL_TEXTENCODING_DONTKNOW }, /* CP891 */
395 { "IBM903", RTL_TEXTENCODING_DONTKNOW }, /* CP903 */
396 { "IBM904", RTL_TEXTENCODING_DONTKNOW }, /* CP904 904 */
397 { "IBM905", RTL_TEXTENCODING_DONTKNOW }, /* CP905 EBCDIC-CP-TR */
398 { "IBM918", RTL_TEXTENCODING_DONTKNOW }, /* CP918 EBCDIC-AR2 */
399 { "IEC_P27-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-143 */
400 { "INIS", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-49 */
401 { "INIS-8", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-50 */
402 { "INIS-CYRILLIC", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-51 */
403 { "INVARIANT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-170 */
404 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* ISO-IR-100 CP819 */
405 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, /* ISO-IR-157 LATIN6 */
406 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */
407 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, /* LATIN8 L8 */
408 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 },
409 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* LATIN2 L2 */
410 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 }, /* LATIN3 L3 */
411 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */
412 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* CYRILLIC */
413 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 }, /* ECMA-114 ARABIC */
414 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* ECMA-118 GREEK8 */
415 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 }, /* ISO_8859-8 HEBREW */
416 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* ISO_8859-9 LATIN5 */
417 { "ISO-IR-90", RTL_TEXTENCODING_DONTKNOW }, /* ISO_6937-2:1983 */
418 { "ISO_10367-BOX", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-155 */
419 { "ISO_2033-1983", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-98 E13B */
420 { "ISO_5427", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-37 KOI-7 */
421 { "ISO_5427-EXT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-54 */
422 { "ISO_5428", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-55 */
423 { "ISO_646.BASIC", RTL_TEXTENCODING_ASCII_US }, /* REF */
424 { "ISO_646.IRV", RTL_TEXTENCODING_ASCII_US }, /* ISO-IR-2 IRV */
425 { "ISO_646.IRV:1983", RTL_TEXTENCODING_ISO_8859_1 }, /* fake: ASCII_US, used for "C" locale*/
426 { "ISO_6937", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-156 ISO6937*/
427 { "ISO_6937-2-25", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-152 */
428 { "ISO_6937-2-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-142 */
429 { "ISO_8859-SUPP", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-154 */
430 { "IT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-15 */
431 { "JIS_C6220-1969-JP", RTL_TEXTENCODING_DONTKNOW }, /* KATAKANA X0201-7 */
432 { "JIS_C6220-1969-RO", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-14 */
433 { "JIS_C6229-1984-A", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-91 */
434 { "JIS_C6229-1984-B", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-92 */
435 { "JIS_C6229-1984-B-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-93 */
436 { "JIS_C6229-1984-HAND", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-94 */
437 { "JIS_C6229-1984-HAND-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-95 */
438 { "JIS_C6229-1984-KANA", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-96 */
439 { "JIS_X0201", RTL_TEXTENCODING_DONTKNOW }, /* X0201 */
440 { "JUS_I.B1.002", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-141 */
441 { "JUS_I.B1.003-MAC", RTL_TEXTENCODING_DONTKNOW }, /* MACEDONIAN */
442 { "JUS_I.B1.003-SERB", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-146 SERBIAN */
443 { "KOI-8", RTL_TEXTENCODING_DONTKNOW },
444 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
445 { "KOI8-U", RTL_TEXTENCODING_KOI8_U },
446 { "KSC5636", RTL_TEXTENCODING_DONTKNOW }, /* ISO646-KR */
447 { "LATIN-GREEK", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-19 */
448 { "LATIN-GREEK-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-27 */
449 { "MAC-IS", RTL_TEXTENCODING_APPLE_ROMAN },
450 { "MAC-UK", RTL_TEXTENCODING_APPLE_ROMAN },
451 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, /* MAC */
452 { "MSZ_7795.3", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-86 */
453 { "NATS-DANO", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-9-1 */
454 { "NATS-DANO-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-9-2 */
455 { "NATS-SEFI", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-8-1 */
456 { "NATS-SEFI-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-8-2 */
457 { "NC_NC00-10", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-151 */
458 { "NEXTSTEP", RTL_TEXTENCODING_DONTKNOW }, /* NEXT */
459 { "NF_Z_62-010", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-69 */
460 { "NF_Z_62-010_(1973)", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-25 */
461 { "NS_4551-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-60 */
462 { "NS_4551-2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-61 */
463 { "PT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-16 */
464 { "PT2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-84 */
465 { "SAMI", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-158 */
466 { "SEN_850200_B", RTL_TEXTENCODING_DONTKNOW }, /* ISO646-[FI|SE] */
467 { "SEN_850200_C", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-11 */
468 { "T.101-G2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-128 */
469 { "T.61-7BIT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-102 */
470 { "T.61-8BIT", RTL_TEXTENCODING_DONTKNOW }, /* T.61 ISO-IR-103 */
471 { "TIS-620", RTL_TEXTENCODING_MS_874 }, /* locale: th_TH */
472 { "UTF-8", RTL_TEXTENCODING_UTF8 }, /* ISO-10646/UTF-8 */
473 { "VIDEOTEX-SUPPL", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-70 */
474 { "WIN-SAMI-2", RTL_TEXTENCODING_DONTKNOW } /* WS2 */
475 };
476
477 #elif defined(FREEBSD) || defined(DRAGONFLY)
478
479 static const Pair nl_language_list[] = {
480 { "ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
481 { "BIG5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */
482 { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */
483 { "CP866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */
484 { "EUCCN", RTL_TEXTENCODING_EUC_CN }, /* China - Simplified Chinese */
485 { "EUCJP", RTL_TEXTENCODING_EUC_JP }, /* Japan */
486 { "EUCKR", RTL_TEXTENCODING_EUC_KR }, /* Korea */
487 { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* Western */
488 { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, /* Western Updated (w/Euro sign) */
489 { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* Central European */
490 { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */
491 { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* Cyrillic */
492 { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* Greek */
493 { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* Turkish */
494 { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, /* KOI8-R */
495 { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, /* KOI8-U */
496 { "SJIS", RTL_TEXTENCODING_SHIFT_JIS }, /* Japan */
497 { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
498 { "UTF-8", RTL_TEXTENCODING_UTF8 } /* ISO-10646/UTF-8 */
499 };
500
501 #elif defined(NETBSD)
502
503 static const Pair nl_language_list[] = {
504 { "ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
505 { "BIG5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */
506 { "Big5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */
507 { "Big5-HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* locale: zh_CN.BIG5HK */
508 { "Big5HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* deprecated */
509 { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */
510 { "CP866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */
511 { "CTEXT", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
512 { "eucCN", RTL_TEXTENCODING_EUC_CN }, /* China - Simplified Chinese */
513 { "eucJP", RTL_TEXTENCODING_EUC_JP }, /* Japan */
514 { "eucKR", RTL_TEXTENCODING_EUC_KR }, /* Korea */
515 { "eucTW", RTL_TEXTENCODING_EUC_TW }, /* China - Traditional Chinese */
516 { "GB18030", RTL_TEXTENCODING_GB_18030 }, /* locale: zh_CN.gb18030 */
517 { "GB2312", RTL_TEXTENCODING_GB_2312 }, /* locale: zh_CN */
518 { "ISO-2022-JP", RTL_TEXTENCODING_DONTKNOW }, /* */
519 { "ISO-2022-JP-2", RTL_TEXTENCODING_DONTKNOW }, /* */
520 { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* Western */
521 { "ISO8859-13", RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */
522 { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, /* Western Updated (w/Euro sign) */
523 { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* Central European */
524 { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */
525 { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* Cyrillic */
526 { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* Greek */
527 { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* Turkish */
528 { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, /* KOI8-R */
529 { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, /* KOI8-U */
530 { "PT154", RTL_TEXTENCODING_PT154 }, /* */
531 { "SJIS", RTL_TEXTENCODING_SHIFT_JIS }, /* Japan */
532 { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
533 { "UTF-8", RTL_TEXTENCODING_UTF8 } /* ISO-10646/UTF-8 */
534 };
535
536 #elif defined(OPENBSD)
537
538 static const Pair nl_language_list[] = {
539 { "ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
540 { "BIG5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */
541 { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */
542 { "CP866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */
543 { "EUCCN", RTL_TEXTENCODING_EUC_CN }, /* China - Simplified Chinese */
544 { "EUCJP", RTL_TEXTENCODING_EUC_JP }, /* Japan */
545 { "EUCKR", RTL_TEXTENCODING_EUC_KR }, /* Korea */
546 { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* Western */
547 { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, /* Western Updated (w/Euro sign) */
548 { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* Central European */
549 { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */
550 { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* Cyrillic */
551 { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* Greek */
552 { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* Turkish */
553 { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, /* KOI8-R */
554 { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, /* KOI8-U */
555 { "SJIS", RTL_TEXTENCODING_SHIFT_JIS }, /* Japan */
556 { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */
557 { "UTF-8", RTL_TEXTENCODING_UTF8 } /* ISO-10646/UTF-8 */
558 };
559
560 #endif /* ifdef __sun LINUX FREEBSD NETBSD OPENBSD */
561
562 /*****************************************************************************
563 return the text encoding corresponding to the given locale
564 *****************************************************************************/
565
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)566 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
567 {
568 const Pair *language=nullptr;
569
570 char locale_buf[64] = "";
571 char codeset_buf[64];
572
573 char *codeset = nullptr;
574
575 /* default to process locale if pLocale == NULL */
576 if( pLocale == nullptr )
577 osl_getProcessLocale( &pLocale );
578
579 /* convert rtl_Locale to locale string */
580 compose_locale( pLocale, locale_buf, 64 );
581
582 locale_t ctype_locale = newlocale(
583 LC_CTYPE_MASK, locale_buf, static_cast<locale_t>(0));
584 if (ctype_locale == static_cast<locale_t>(0))
585 {
586 return RTL_TEXTENCODING_DONTKNOW;
587 }
588
589 /* get the charset as indicated by the LC_CTYPE locale */
590 #if defined(NETBSD) && !defined(CODESET)
591 codeset = NULL;
592 #else
593 codeset = nl_langinfo_l(CODESET, ctype_locale);
594 // per SUSv4, the return value of nl_langinfo_l can be invalidated by a
595 // subsequent call to nl_langinfo (not nl_langinfo_l) in any thread, but
596 // we cannot guard against that (at least, no code in LO itself should
597 // call nl_langinfo)
598 #endif
599
600 if ( codeset != nullptr )
601 {
602 /* get codeset into mt save memory */
603 strncpy( codeset_buf, codeset, sizeof(codeset_buf) );
604 codeset_buf[sizeof(codeset_buf) - 1] = 0;
605 codeset = codeset_buf;
606 }
607
608 freelocale(ctype_locale);
609
610 /* search the codeset in our language list */
611 if ( codeset != nullptr )
612 {
613 language = pair_search (codeset, nl_language_list, SAL_N_ELEMENTS( nl_language_list ) );
614 }
615
616 OSL_ASSERT( language && ( RTL_TEXTENCODING_DONTKNOW != language->value ) );
617
618 /* a matching item in our list provides a mapping from codeset to
619 * rtl-codeset */
620 if ( language != nullptr )
621 return language->value;
622
623 return RTL_TEXTENCODING_DONTKNOW;
624 }
625
626 /*****************************************************************************
627 return the current process locale
628 *****************************************************************************/
629
imp_getProcessLocale(rtl_Locale ** ppLocale)630 void imp_getProcessLocale( rtl_Locale ** ppLocale )
631 {
632 char const * locale = getenv("LC_ALL");
633 if (locale == nullptr || *locale == '\0') {
634 locale = getenv("LC_CTYPE");
635 if (locale == nullptr || *locale == '\0') {
636 locale = getenv("LANG");
637 if (locale == nullptr || *locale == '\0') {
638 locale = "C";
639 }
640 }
641 }
642 // coverity[overrun-buffer-val : FALSE] - coverity gets this very wrong
643 *ppLocale = parse_locale(locale);
644 }
645
646 #else /* ifdef LINUX || __sun || MACOSX || NETBSD */
647
648 /*
649 * This implementation of osl_getTextEncodingFromLocale maps
650 * from the ISO language codes.
651 */
652
653 const Pair full_locale_list[] = {
654 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP },
655 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP },
656 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR },
657 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN },
658 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW }
659 };
660
661 const Pair locale_extension_list[] = {
662 { "big5", RTL_TEXTENCODING_BIG5 },
663 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS },
664 { "gb18030", RTL_TEXTENCODING_GB_18030 },
665 { "euc", RTL_TEXTENCODING_EUC_JP },
666 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 },
667 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 },
668 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 },
669 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 },
670 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 },
671 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 },
672 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 },
673 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 },
674 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 },
675 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 },
676 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 },
677 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 },
678 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 },
679 { "koi8-r", RTL_TEXTENCODING_KOI8_R },
680 { "koi8-u", RTL_TEXTENCODING_KOI8_U },
681 { "pck", RTL_TEXTENCODING_MS_932 },
682 #if (0)
683 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW },
684 #endif
685 { "utf-16", RTL_TEXTENCODING_UNICODE },
686 { "utf-7", RTL_TEXTENCODING_UTF7 },
687 { "utf-8", RTL_TEXTENCODING_UTF8 }
688 };
689
690 const Pair iso_language_list[] = {
691 { "af", RTL_TEXTENCODING_ISO_8859_1 },
692 { "ar", RTL_TEXTENCODING_ISO_8859_6 },
693 { "az", RTL_TEXTENCODING_ISO_8859_9 },
694 { "be", RTL_TEXTENCODING_ISO_8859_5 },
695 { "bg", RTL_TEXTENCODING_ISO_8859_5 },
696 { "ca", RTL_TEXTENCODING_ISO_8859_1 },
697 { "cs", RTL_TEXTENCODING_ISO_8859_2 },
698 { "da", RTL_TEXTENCODING_ISO_8859_1 },
699 { "de", RTL_TEXTENCODING_ISO_8859_1 },
700 { "el", RTL_TEXTENCODING_ISO_8859_7 },
701 { "en", RTL_TEXTENCODING_ISO_8859_1 },
702 { "es", RTL_TEXTENCODING_ISO_8859_1 },
703 { "et", RTL_TEXTENCODING_ISO_8859_4 },
704 { "eu", RTL_TEXTENCODING_ISO_8859_1 },
705 { "fa", RTL_TEXTENCODING_ISO_8859_6 },
706 { "fi", RTL_TEXTENCODING_ISO_8859_1 },
707 { "fo", RTL_TEXTENCODING_ISO_8859_1 },
708 { "fr", RTL_TEXTENCODING_ISO_8859_1 },
709 { "gr", RTL_TEXTENCODING_ISO_8859_7 },
710 { "he", RTL_TEXTENCODING_ISO_8859_8 },
711 { "hi", RTL_TEXTENCODING_DONTKNOW },
712 { "hr", RTL_TEXTENCODING_ISO_8859_2 },
713 { "hu", RTL_TEXTENCODING_ISO_8859_2 },
714 { "hy", RTL_TEXTENCODING_DONTKNOW },
715 { "id", RTL_TEXTENCODING_ISO_8859_1 },
716 { "is", RTL_TEXTENCODING_ISO_8859_1 },
717 { "it", RTL_TEXTENCODING_ISO_8859_1 },
718 { "iw", RTL_TEXTENCODING_ISO_8859_8 },
719 { "ja", RTL_TEXTENCODING_EUC_JP },
720 { "ka", RTL_TEXTENCODING_DONTKNOW },
721 { "kk", RTL_TEXTENCODING_ISO_8859_5 },
722 { "ko", RTL_TEXTENCODING_EUC_KR },
723 { "lt", RTL_TEXTENCODING_ISO_8859_4 },
724 { "lv", RTL_TEXTENCODING_ISO_8859_4 },
725 { "mk", RTL_TEXTENCODING_ISO_8859_5 },
726 { "mr", RTL_TEXTENCODING_DONTKNOW },
727 { "ms", RTL_TEXTENCODING_ISO_8859_1 },
728 { "nl", RTL_TEXTENCODING_ISO_8859_1 },
729 { "no", RTL_TEXTENCODING_ISO_8859_1 },
730 { "pl", RTL_TEXTENCODING_ISO_8859_2 },
731 { "pt", RTL_TEXTENCODING_ISO_8859_1 },
732 { "ro", RTL_TEXTENCODING_ISO_8859_2 },
733 { "ru", RTL_TEXTENCODING_ISO_8859_5 },
734 { "sa", RTL_TEXTENCODING_DONTKNOW },
735 { "sk", RTL_TEXTENCODING_ISO_8859_2 },
736 { "sl", RTL_TEXTENCODING_ISO_8859_2 },
737 { "sq", RTL_TEXTENCODING_ISO_8859_2 },
738 { "sv", RTL_TEXTENCODING_ISO_8859_1 },
739 { "sw", RTL_TEXTENCODING_ISO_8859_1 },
740 { "ta", RTL_TEXTENCODING_DONTKNOW },
741 { "th", RTL_TEXTENCODING_DONTKNOW },
742 { "tr", RTL_TEXTENCODING_ISO_8859_9 },
743 { "tt", RTL_TEXTENCODING_ISO_8859_5 },
744 { "uk", RTL_TEXTENCODING_ISO_8859_5 },
745 { "ur", RTL_TEXTENCODING_ISO_8859_6 },
746 { "uz", RTL_TEXTENCODING_ISO_8859_9 },
747 { "vi", RTL_TEXTENCODING_DONTKNOW },
748 { "zh", RTL_TEXTENCODING_BIG5 }
749 };
750
751 /*****************************************************************************
752 return the text encoding corresponding to the given locale
753 *****************************************************************************/
754
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)755 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
756 {
757 const Pair *language = nullptr;
758 char locale_buf[64] = "";
759
760 /* default to process locale if pLocale == NULL */
761 if( nullptr == pLocale )
762 osl_getProcessLocale( &pLocale );
763
764 /* convert rtl_Locale to locale string */
765 if( compose_locale( pLocale, locale_buf, 64 ) )
766 {
767 /* check special handling list (EUC) first */
768 language = pair_search( locale_buf, full_locale_list, SAL_N_ELEMENTS( full_locale_list ) );
769
770 if( nullptr == language )
771 {
772 /*
773 * check if there is a charset qualifier at the end of the given locale string
774 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
775 * charset to use
776 */
777 char* cp = strrchr( locale_buf, '.' );
778
779 if( nullptr != cp )
780 {
781 language = pair_search( cp + 1, locale_extension_list, SAL_N_ELEMENTS( locale_extension_list ) );
782 }
783 }
784
785 /* use iso language code to determine the charset */
786 if( nullptr == language )
787 {
788 /* iso lang codes have 2 characters */
789 locale_buf[2] = '\0';
790
791 language = pair_search( locale_buf, iso_language_list, SAL_N_ELEMENTS( iso_language_list ) );
792 }
793 }
794
795 /* a matching item in our list provides a mapping from codeset to
796 * rtl-codeset */
797 if ( language != nullptr )
798 return language->value;
799
800 return RTL_TEXTENCODING_DONTKNOW;
801 }
802
803 #if defined(MACOSX) || defined(IOS)
804
805 /*****************************************************************************
806 return the current process locale
807 *****************************************************************************/
808
imp_getProcessLocale(rtl_Locale ** ppLocale)809 void imp_getProcessLocale( rtl_Locale ** ppLocale )
810 {
811 OUString loc16(macosx_getLocale());
812 OString locale;
813 if (!loc16.convertToString(
814 &locale, RTL_TEXTENCODING_UTF8,
815 (RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
816 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR)))
817 {
818 SAL_INFO("sal.osl", "Cannot convert \"" << loc16 << "\" to UTF-8");
819 }
820
821 /* handle the case where OS specific method of finding locale fails */
822 if ( locale.isEmpty() )
823 {
824 /* simulate behavior of setlocale */
825 locale = getenv( "LC_ALL" );
826
827 if( locale.isEmpty() )
828 locale = getenv( "LC_CTYPE" );
829
830 if( locale.isEmpty() )
831 locale = getenv( "LANG" );
832
833 if( locale.isEmpty() )
834 locale = "C";
835 }
836
837 /* return the locale */
838 *ppLocale = parse_locale( locale.getStr() );
839 }
840 #else
841 /*****************************************************************************
842 return the current process locale
843 *****************************************************************************/
844
imp_getProcessLocale(rtl_Locale ** ppLocale)845 void imp_getProcessLocale( rtl_Locale ** ppLocale )
846 {
847 #ifdef ANDROID
848 /* No locale environment variables on Android, so why even bother
849 * with getenv().
850 */
851 char const * locale = "en-US.UTF-8";
852 #else
853 /* simulate behavior off setlocale */
854 char * locale = getenv( "LC_ALL" );
855
856 if( NULL == locale )
857 locale = getenv( "LC_CTYPE" );
858
859 if( NULL == locale )
860 locale = getenv( "LANG" );
861
862 if( NULL == locale )
863 locale = "C";
864
865 #endif
866 *ppLocale = parse_locale( locale );
867 }
868 #endif
869
870 #endif /* ifdef LINUX || __sun || MACOSX || NETBSD || AIX */
871
872 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
873