1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <cstring>
24 
25 #include <osl/nlsupport.h>
26 #include <osl/diagnose.h>
27 #include <osl/process.h>
28 #include <rtl/string.hxx>
29 #include <rtl/ustring.hxx>
30 #include <sal/log.hxx>
31 
32 #include "nlsupport.hxx"
33 
34 #if defined(LINUX) || defined(__sun) || defined(NETBSD) || \
35     defined(FREEBSD) || defined(MACOSX)  || defined(IOS) || defined(OPENBSD) || \
36     defined(DRAGONFLY)
37 #if !defined(MACOSX) && !defined(IOS)
38 #include <locale.h>
39 #include <langinfo.h>
40 #else
41 #include <osl/module.h>
42 #include <osl/thread.h>
43 #endif  /* !MACOSX && !IOS */
44 #endif  /* LINUX || __sun || NETBSD || MACOSX || IOS */
45 
46 #if defined(MACOSX) || defined(IOS)
47 #include "system.hxx"
48 #endif
49 
50 #include <string.h>
51 
52 namespace {
53 
54 struct Pair {
55     const char              *key;
56     const rtl_TextEncoding   value;
57 };
58 
59 }
60 
61 /*****************************************************************************
62  compare function for binary search
63  *****************************************************************************/
64 
65 static int
pair_compare(const char * key,const Pair * pair)66 pair_compare (const char *key, const Pair *pair)
67 {
68     int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
69     return result;
70 }
71 
72 /*****************************************************************************
73  binary search on encoding tables
74  *****************************************************************************/
75 
76 static const Pair*
pair_search(const char * key,const Pair * base,unsigned int member)77 pair_search (const char *key, const Pair *base, unsigned int member )
78 {
79     unsigned int lower = 0;
80     unsigned int upper = member;
81 
82     /* check for validity of input */
83     if ( (key == nullptr) || (base == nullptr) || (member == 0) )
84         return nullptr;
85 
86     /* binary search */
87     while ( lower < upper )
88     {
89         const unsigned int current = (lower + upper) / 2;
90         const int comparison = pair_compare( key, base + current );
91         if (comparison < 0)
92             upper = current;
93         else if (comparison > 0)
94             lower = current + 1;
95         else
96             return base + current;
97     }
98 
99     return nullptr;
100 }
101 
102 /*****************************************************************************
103  convert rtl_Locale to locale string
104  *****************************************************************************/
105 
compose_locale(rtl_Locale * pLocale,char * buffer,size_t n)106 static char * compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
107 {
108     /* check if a valid locale is specified */
109     if( pLocale && pLocale->Language &&
110             (pLocale->Language->length == 2 || pLocale->Language->length == 3) )
111     {
112         size_t offset = 0;
113 
114         /* convert language code to ascii */
115         {
116             rtl_String *pLanguage = nullptr;
117 
118             rtl_uString2String( &pLanguage,
119                 pLocale->Language->buffer, pLocale->Language->length,
120                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
121 
122             if( sal::static_int_cast<sal_uInt32>(pLanguage->length) < n )
123             {
124                 strcpy( buffer, pLanguage->buffer );
125                 offset = pLanguage->length;
126             }
127 
128             rtl_string_release( pLanguage );
129         }
130 
131         /* convert country code to ascii */
132         if( pLocale->Country && (pLocale->Country->length == 2) )
133         {
134             rtl_String *pCountry = nullptr;
135 
136             rtl_uString2String( &pCountry,
137                 pLocale->Country->buffer, pLocale->Country->length,
138                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
139 
140             if( offset + pCountry->length + 1 < n )
141             {
142                 strcpy( buffer + offset++, "_" );
143                 strcpy( buffer + offset, pCountry->buffer );
144                 offset += pCountry->length;
145             }
146 
147             rtl_string_release( pCountry );
148         }
149 
150         /* convert variant to ascii - check if there is enough space for the variant string */
151         if( pLocale->Variant && pLocale->Variant->length &&
152             ( sal::static_int_cast<sal_uInt32>(pLocale->Variant->length) < n - 6 ) )
153         {
154             rtl_String *pVariant = nullptr;
155 
156             rtl_uString2String( &pVariant,
157                 pLocale->Variant->buffer, pLocale->Variant->length,
158                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
159 
160             if( offset + pVariant->length + 1 < n )
161             {
162                 strcpy( buffer + offset, pVariant->buffer );
163             }
164 
165             rtl_string_release( pVariant );
166         }
167 
168         return buffer;
169     }
170 
171     return nullptr;
172 }
173 
174 /*****************************************************************************
175  convert locale string to rtl_Locale
176  *****************************************************************************/
177 
parse_locale(const char * locale)178 static rtl_Locale * parse_locale( const char * locale )
179 {
180     assert(locale != nullptr);
181 
182     if (*locale == '\0' || std::strcmp(locale, "C") == 0
183         || std::strcmp(locale, "POSIX") == 0)
184     {
185         return rtl_locale_register(u"C", u"", u"");
186     }
187 
188     size_t len = strlen( locale );
189 
190     rtl_uString * pLanguage = nullptr;
191     rtl_uString * pCountry  = nullptr;
192     rtl_uString * pVariant  = nullptr;
193 
194     size_t offset = std::min<size_t>(len, 2);
195 
196     rtl_Locale * ret;
197 
198     /* language is a two or three letter code */
199     if( (len > 3 && locale[3] == '_') || (len == 3 && locale[2] != '_') )
200         offset = 3;
201 
202     /* convert language code to unicode */
203     rtl_string2UString( &pLanguage, locale, offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
204     OSL_ASSERT(pLanguage != nullptr);
205 
206     /* convert country code to unicode */
207     if( len >= offset+3 && locale[offset] == '_' )
208     {
209         rtl_string2UString( &pCountry, locale + offset + 1, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
210         OSL_ASSERT(pCountry != nullptr);
211         offset += 3;
212     }
213 
214     /* convert variant code to unicode - do not rely on "." as delimiter */
215     if( len > offset ) {
216         rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
217         OSL_ASSERT(pVariant != nullptr);
218     }
219 
220     ret =  rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : u"", pVariant ? pVariant->buffer : u"" );
221 
222     if (pVariant) rtl_uString_release(pVariant);
223     if (pCountry) rtl_uString_release(pCountry);
224     if (pLanguage) rtl_uString_release(pLanguage);
225 
226     return ret;
227 }
228 
229 #if defined(LINUX) || defined(__sun) || defined(NETBSD) || \
230     defined(FREEBSD) || defined(OPENBSD) || defined(DRAGONFLY)
231 
232 /*
233  * This implementation of osl_getTextEncodingFromLocale maps
234  * from nl_langinfo_l(CODESET) to rtl_textencoding defines.
235  * nl_langinfo() is supported only on Linux, Solaris,
236  * >= NetBSD 1.6 and >= FreeBSD 4.4
237  */
238 
239 #ifdef LINUX
240 #if !defined(CODESET)
241 #define CODESET _NL_CTYPE_CODESET_NAME
242 #endif
243 #endif
244 
245 /*
246  * _nl_language_list[] is an array list of supported encodings. Because
247  * we are using a binary search, the list has to be in ascending order.
248  * We are comparing the encodings case insensitive, so the list has
249  * to be completely upper or lowercase.
250  */
251 
252 #if defined(__sun)
253 
254 /* The values in the below list can be obtained with a script like
255  *  #!/bin/sh
256  *  for i in `locale -a`; do
257  *    LC_ALL=$i locale -k code_set_name
258  *  done
259  */
260 static const Pair nl_language_list[] = {
261     { "5601",           RTL_TEXTENCODING_EUC_KR         }, /* ko_KR.EUC */
262     { "646",            RTL_TEXTENCODING_ISO_8859_1     }, /* fake: ASCII_US */
263     { "ANSI-1251",      RTL_TEXTENCODING_MS_1251        }, /* ru_RU.ANSI1251 */
264     { "BIG5",           RTL_TEXTENCODING_BIG5           }, /* zh_CN.BIG5 */
265     { "BIG5-HKSCS",     RTL_TEXTENCODING_BIG5_HKSCS     }, /* zh_CN.BIG5HK */
266     { "CNS11643",       RTL_TEXTENCODING_EUC_TW         }, /* zh_TW.EUC */
267     { "EUCJP",          RTL_TEXTENCODING_EUC_JP         }, /* ja_JP.eucjp */
268     { "GB18030",        RTL_TEXTENCODING_GB_18030       }, /* zh_CN.GB18030 */
269     { "GB2312",         RTL_TEXTENCODING_GB_2312        }, /* zh_CN */
270     { "GBK",            RTL_TEXTENCODING_GBK            }, /* zh_CN.GBK */
271     { "ISO8859-1",      RTL_TEXTENCODING_ISO_8859_1     },
272     { "ISO8859-10",     RTL_TEXTENCODING_ISO_8859_10    },
273     { "ISO8859-13",     RTL_TEXTENCODING_ISO_8859_13    }, /* lt_LT lv_LV */
274     { "ISO8859-14",     RTL_TEXTENCODING_ISO_8859_14    },
275     { "ISO8859-15",     RTL_TEXTENCODING_ISO_8859_15    },
276     { "ISO8859-2",      RTL_TEXTENCODING_ISO_8859_2     },
277     { "ISO8859-3",      RTL_TEXTENCODING_ISO_8859_3     },
278     { "ISO8859-4",      RTL_TEXTENCODING_ISO_8859_4     },
279     { "ISO8859-5",      RTL_TEXTENCODING_ISO_8859_5     },
280     { "ISO8859-6",      RTL_TEXTENCODING_ISO_8859_6     },
281     { "ISO8859-7",      RTL_TEXTENCODING_ISO_8859_7     },
282     { "ISO8859-8",      RTL_TEXTENCODING_ISO_8859_8     },
283     { "ISO8859-9",      RTL_TEXTENCODING_ISO_8859_9     },
284     { "KOI8-R",         RTL_TEXTENCODING_KOI8_R         },
285     { "KOI8-U",         RTL_TEXTENCODING_KOI8_U         },
286     { "PCK",            RTL_TEXTENCODING_MS_932         },
287     { "SUN_EU_GREEK",   RTL_TEXTENCODING_ISO_8859_7     }, /* 8859-7 + Euro */
288     { "TIS620.2533",    RTL_TEXTENCODING_MS_874         }, /* th_TH.TIS620 */
289     { "UTF-8",          RTL_TEXTENCODING_UTF8           }
290 };
291 
292 /* XXX MS-874 is an extension to tis620, so this is not
293  * really equivalent */
294 
295 #elif defined(LINUX)
296 
297 const Pair nl_language_list[] = {
298     { "ANSI_X3.110-1983",           RTL_TEXTENCODING_DONTKNOW   },  /* ISO-IR-99 NAPLPS */
299     { "ANSI_X3.4-1968",             RTL_TEXTENCODING_ISO_8859_1 },  /* fake: ASCII_US */
300     { "ASMO_449",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO_9036 ARABIC7 */
301     { "BALTIC",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-179 */
302     { "BIG5",                       RTL_TEXTENCODING_BIG5 },        /* locale: zh_TW */
303     { "BIG5-HKSCS",                 RTL_TEXTENCODING_BIG5_HKSCS },  /* locale: zh_CN.BIG5HK */
304     { "BIG5HKSCS",                  RTL_TEXTENCODING_BIG5_HKSCS },  /* deprecated */
305     { "BS_4730",                    RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-4 ISO646-GB */
306     { "BS_VIEWDATA",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-47 */
307     { "CP1250",                     RTL_TEXTENCODING_MS_1250 },     /* MS-EE */
308     { "CP1251",                     RTL_TEXTENCODING_MS_1251 },     /* MS-CYRL */
309     { "CP1252",                     RTL_TEXTENCODING_MS_1252 },     /* MS-ANSI */
310     { "CP1253",                     RTL_TEXTENCODING_MS_1253 },     /* MS-GREEK */
311     { "CP1254",                     RTL_TEXTENCODING_MS_1254 },     /* MS-TURK */
312     { "CP1255",                     RTL_TEXTENCODING_MS_1255 },     /* MS-HEBR */
313     { "CP1256",                     RTL_TEXTENCODING_MS_1256 },     /* MS-ARAB */
314     { "CP1257",                     RTL_TEXTENCODING_MS_1257 },     /* WINBALTRIM */
315     { "CSA_Z243.4-1985-1",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-121 */
316     { "CSA_Z243.4-1985-2",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-122 CSA7-2 */
317     { "CSA_Z243.4-1985-GR",         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-123 */
318     { "CSN_369103",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-139 */
319     { "CWI",                        RTL_TEXTENCODING_DONTKNOW },    /* CWI-2 CP-HU */
320     { "DEC-MCS",                    RTL_TEXTENCODING_DONTKNOW },    /* DEC */
321     { "DIN_66003",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-21 */
322     { "DS_2089",                    RTL_TEXTENCODING_DONTKNOW },    /* DS2089 ISO646-DK */
323     { "EBCDIC-AT-DE",               RTL_TEXTENCODING_DONTKNOW },
324     { "EBCDIC-AT-DE-A",             RTL_TEXTENCODING_DONTKNOW },
325     { "EBCDIC-CA-FR",               RTL_TEXTENCODING_DONTKNOW },
326     { "EBCDIC-DK-NO",               RTL_TEXTENCODING_DONTKNOW },
327     { "EBCDIC-DK-NO-A",             RTL_TEXTENCODING_DONTKNOW },
328     { "EBCDIC-ES",                  RTL_TEXTENCODING_DONTKNOW },
329     { "EBCDIC-ES-A",                RTL_TEXTENCODING_DONTKNOW },
330     { "EBCDIC-ES-S",                RTL_TEXTENCODING_DONTKNOW },
331     { "EBCDIC-FI-SE",               RTL_TEXTENCODING_DONTKNOW },
332     { "EBCDIC-FI-SE-A",             RTL_TEXTENCODING_DONTKNOW },
333     { "EBCDIC-FR",                  RTL_TEXTENCODING_DONTKNOW },
334     { "EBCDIC-IS-FRISS",            RTL_TEXTENCODING_DONTKNOW },    /*  FRISS */
335     { "EBCDIC-IT",                  RTL_TEXTENCODING_DONTKNOW },
336     { "EBCDIC-PT",                  RTL_TEXTENCODING_DONTKNOW },
337     { "EBCDIC-UK",                  RTL_TEXTENCODING_DONTKNOW },
338     { "EBCDIC-US",                  RTL_TEXTENCODING_DONTKNOW },
339     { "ECMA-CYRILLIC",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-111 */
340     { "ES",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-17 */
341     { "ES2",                        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-85 */
342     { "EUC-JP",                     RTL_TEXTENCODING_EUC_JP },      /* locale: ja_JP.eucjp */
343     { "EUC-KR",                     RTL_TEXTENCODING_EUC_KR },      /* locale: ko_KR.euckr */
344     { "EUC-TW",                     RTL_TEXTENCODING_EUC_TW },      /* locale: zh_TW.euctw */
345     { "GB18030",                    RTL_TEXTENCODING_GB_18030 },    /* locale: zh_CN.gb18030 */
346     { "GB2312",                     RTL_TEXTENCODING_GB_2312 },     /* locale: zh_CN */
347     { "GB_1988-80",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-57 */
348     { "GBK",                        RTL_TEXTENCODING_GBK },         /* locale: zh_CN.GBK */
349     { "GOST_19768-74",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-153 */
350     { "GREEK-CCITT",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-150 */
351     { "GREEK7",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-88 */
352     { "GREEK7-OLD",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-18 */
353     { "HP-ROMAN8",                  RTL_TEXTENCODING_DONTKNOW },    /* ROMAN8 R8 */
354     { "IBM037",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-[US|CA|WT] */
355     { "IBM038",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-INT CP038 */
356     { "IBM1004",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1004 OS2LATIN1 */
357     { "IBM1026",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1026 1026 */
358     { "IBM1047",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1047 1047 */
359     { "IBM256",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-INT1 */
360     { "IBM273",                     RTL_TEXTENCODING_DONTKNOW },    /* CP273 */
361     { "IBM274",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-BE CP274 */
362     { "IBM275",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-BR CP275 */
363     { "IBM277",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[DK|NO] */
364     { "IBM278",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[FISE]*/
365     { "IBM280",                     RTL_TEXTENCODING_DONTKNOW },    /* CP280 EBCDIC-CP-IT*/
366     { "IBM281",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-JP-E CP281 */
367     { "IBM284",                     RTL_TEXTENCODING_DONTKNOW },    /* CP284 EBCDIC-CP-ES */
368     { "IBM285",                     RTL_TEXTENCODING_DONTKNOW },    /* CP285 EBCDIC-CP-GB */
369     { "IBM290",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-JP-KANA */
370     { "IBM297",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-FR */
371     { "IBM420",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-AR1 */
372     { "IBM423",                     RTL_TEXTENCODING_DONTKNOW },    /* CP423 EBCDIC-CP-GR */
373     { "IBM424",                     RTL_TEXTENCODING_DONTKNOW },    /* CP424 EBCDIC-CP-HE */
374     { "IBM437",                     RTL_TEXTENCODING_IBM_437 },     /* CP437 437 */
375     { "IBM500",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[BE|CH] */
376     { "IBM850",                     RTL_TEXTENCODING_IBM_850 },     /* CP850 850 */
377     { "IBM851",                     RTL_TEXTENCODING_DONTKNOW },    /* CP851 851 */
378     { "IBM852",                     RTL_TEXTENCODING_IBM_852 },     /* CP852 852 */
379     { "IBM855",                     RTL_TEXTENCODING_IBM_855 },     /* CP855 855 */
380     { "IBM857",                     RTL_TEXTENCODING_IBM_857 },     /* CP857 857 */
381     { "IBM860",                     RTL_TEXTENCODING_IBM_860 },     /* CP860 860 */
382     { "IBM861",                     RTL_TEXTENCODING_IBM_861 },     /* CP861 861 CP-IS */
383     { "IBM862",                     RTL_TEXTENCODING_IBM_862 },     /* CP862 862 */
384     { "IBM863",                     RTL_TEXTENCODING_IBM_863 },     /* CP863 863 */
385     { "IBM864",                     RTL_TEXTENCODING_IBM_864 },     /* CP864 */
386     { "IBM865",                     RTL_TEXTENCODING_IBM_865 },     /* CP865 865 */
387     { "IBM866",                     RTL_TEXTENCODING_IBM_866 },     /* CP866 866 */
388     { "IBM868",                     RTL_TEXTENCODING_DONTKNOW },    /* CP868 CP-AR */
389     { "IBM869",                     RTL_TEXTENCODING_IBM_869 },     /* CP869 869 CP-GR */
390     { "IBM870",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-[ROECE|YU] */
391     { "IBM871",                     RTL_TEXTENCODING_DONTKNOW },    /* CP871 EBCDIC-CP-IS */
392     { "IBM875",                     RTL_TEXTENCODING_DONTKNOW },    /* CP875 EBCDIC-GREEK */
393     { "IBM880",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CYRILLIC */
394     { "IBM891",                     RTL_TEXTENCODING_DONTKNOW },    /* CP891 */
395     { "IBM903",                     RTL_TEXTENCODING_DONTKNOW },    /* CP903 */
396     { "IBM904",                     RTL_TEXTENCODING_DONTKNOW },    /* CP904 904 */
397     { "IBM905",                     RTL_TEXTENCODING_DONTKNOW },    /* CP905 EBCDIC-CP-TR */
398     { "IBM918",                     RTL_TEXTENCODING_DONTKNOW },    /* CP918 EBCDIC-AR2 */
399     { "IEC_P27-1",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-143 */
400     { "INIS",                       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-49 */
401     { "INIS-8",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-50 */
402     { "INIS-CYRILLIC",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-51 */
403     { "INVARIANT",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-170 */
404     { "ISO-8859-1",                 RTL_TEXTENCODING_ISO_8859_1 },  /* ISO-IR-100 CP819 */
405     { "ISO-8859-10",                RTL_TEXTENCODING_ISO_8859_10 }, /* ISO-IR-157 LATIN6 */
406     { "ISO-8859-13",                RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */
407     { "ISO-8859-14",                RTL_TEXTENCODING_ISO_8859_14 }, /* LATIN8 L8 */
408     { "ISO-8859-15",                RTL_TEXTENCODING_ISO_8859_15 },
409     { "ISO-8859-2",                 RTL_TEXTENCODING_ISO_8859_2 },  /* LATIN2 L2 */
410     { "ISO-8859-3",                 RTL_TEXTENCODING_ISO_8859_3 },  /* LATIN3 L3 */
411     { "ISO-8859-4",                 RTL_TEXTENCODING_ISO_8859_4 },  /* LATIN4 L4 */
412     { "ISO-8859-5",                 RTL_TEXTENCODING_ISO_8859_5 },  /* CYRILLIC */
413     { "ISO-8859-6",                 RTL_TEXTENCODING_ISO_8859_6 },  /* ECMA-114 ARABIC */
414     { "ISO-8859-7",                 RTL_TEXTENCODING_ISO_8859_7 },  /* ECMA-118 GREEK8 */
415     { "ISO-8859-8",                 RTL_TEXTENCODING_ISO_8859_8 },  /* ISO_8859-8 HEBREW */
416     { "ISO-8859-9",                 RTL_TEXTENCODING_ISO_8859_9 },  /* ISO_8859-9 LATIN5 */
417     { "ISO-IR-90",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO_6937-2:1983 */
418     { "ISO_10367-BOX",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-155 */
419     { "ISO_2033-1983",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-98 E13B */
420     { "ISO_5427",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-37 KOI-7 */
421     { "ISO_5427-EXT",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-54  */
422     { "ISO_5428",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-55 */
423     { "ISO_646.BASIC",              RTL_TEXTENCODING_ASCII_US },    /* REF */
424     { "ISO_646.IRV",                RTL_TEXTENCODING_ASCII_US },    /* ISO-IR-2 IRV */
425     { "ISO_646.IRV:1983",           RTL_TEXTENCODING_ISO_8859_1 },  /* fake: ASCII_US, used for "C" locale*/
426     { "ISO_6937",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-156 ISO6937*/
427     { "ISO_6937-2-25",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-152 */
428     { "ISO_6937-2-ADD",             RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-142 */
429     { "ISO_8859-SUPP",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-154 */
430     { "IT",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-15  */
431     { "JIS_C6220-1969-JP",          RTL_TEXTENCODING_DONTKNOW },    /* KATAKANA X0201-7 */
432     { "JIS_C6220-1969-RO",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-14 */
433     { "JIS_C6229-1984-A",           RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-91 */
434     { "JIS_C6229-1984-B",           RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-92 */
435     { "JIS_C6229-1984-B-ADD",       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-93 */
436     { "JIS_C6229-1984-HAND",        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-94 */
437     { "JIS_C6229-1984-HAND-ADD",    RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-95 */
438     { "JIS_C6229-1984-KANA",        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-96 */
439     { "JIS_X0201",                  RTL_TEXTENCODING_DONTKNOW },    /* X0201 */
440     { "JUS_I.B1.002",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-141 */
441     { "JUS_I.B1.003-MAC",           RTL_TEXTENCODING_DONTKNOW },    /* MACEDONIAN */
442     { "JUS_I.B1.003-SERB",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-146 SERBIAN */
443     { "KOI-8",                      RTL_TEXTENCODING_DONTKNOW },
444     { "KOI8-R",                     RTL_TEXTENCODING_KOI8_R },
445     { "KOI8-U",                     RTL_TEXTENCODING_KOI8_U },
446     { "KSC5636",                    RTL_TEXTENCODING_DONTKNOW },    /* ISO646-KR */
447     { "LATIN-GREEK",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-19 */
448     { "LATIN-GREEK-1",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-27 */
449     { "MAC-IS",                     RTL_TEXTENCODING_APPLE_ROMAN },
450     { "MAC-UK",                     RTL_TEXTENCODING_APPLE_ROMAN },
451     { "MACINTOSH",                  RTL_TEXTENCODING_APPLE_ROMAN }, /* MAC */
452     { "MSZ_7795.3",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-86 */
453     { "NATS-DANO",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-9-1 */
454     { "NATS-DANO-ADD",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-9-2 */
455     { "NATS-SEFI",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-8-1 */
456     { "NATS-SEFI-ADD",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-8-2 */
457     { "NC_NC00-10",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-151 */
458     { "NEXTSTEP",                   RTL_TEXTENCODING_DONTKNOW },    /* NEXT */
459     { "NF_Z_62-010",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-69 */
460     { "NF_Z_62-010_(1973)",         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-25 */
461     { "NS_4551-1",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-60 */
462     { "NS_4551-2",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-61 */
463     { "PT",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-16 */
464     { "PT2",                        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-84 */
465     { "SAMI",                       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-158 */
466     { "SEN_850200_B",               RTL_TEXTENCODING_DONTKNOW },    /* ISO646-[FI|SE] */
467     { "SEN_850200_C",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-11 */
468     { "T.101-G2",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-128 */
469     { "T.61-7BIT",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-102 */
470     { "T.61-8BIT",                  RTL_TEXTENCODING_DONTKNOW },    /* T.61 ISO-IR-103 */
471     { "TIS-620",                    RTL_TEXTENCODING_MS_874 },     /* locale: th_TH */
472     { "UTF-8",                      RTL_TEXTENCODING_UTF8 },        /* ISO-10646/UTF-8 */
473     { "VIDEOTEX-SUPPL",             RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-70 */
474     { "WIN-SAMI-2",                 RTL_TEXTENCODING_DONTKNOW }     /* WS2 */
475 };
476 
477 #elif defined(FREEBSD) || defined(DRAGONFLY)
478 
479 static const Pair nl_language_list[] = {
480     { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
481     { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
482     { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
483     { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
484     { "EUCCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
485     { "EUCJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
486     { "EUCKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
487     { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
488     { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
489     { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
490     { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
491     { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
492     { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
493     { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
494     { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
495     { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
496     { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
497     { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
498     { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
499 };
500 
501 #elif defined(NETBSD)
502 
503 static const Pair nl_language_list[] = {
504     { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
505     { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
506     { "Big5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
507     { "Big5-HKSCS",    RTL_TEXTENCODING_BIG5_HKSCS     }, /* locale: zh_CN.BIG5HK */
508     { "Big5HKSCS",     RTL_TEXTENCODING_BIG5_HKSCS     }, /* deprecated */
509     { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
510     { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
511     { "CTEXT",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
512     { "eucCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
513     { "eucJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
514     { "eucKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
515     { "eucTW",         RTL_TEXTENCODING_EUC_TW         }, /* China - Traditional Chinese */
516     { "GB18030",       RTL_TEXTENCODING_GB_18030       }, /* locale: zh_CN.gb18030 */
517     { "GB2312",        RTL_TEXTENCODING_GB_2312        }, /* locale: zh_CN */
518     { "ISO-2022-JP",   RTL_TEXTENCODING_DONTKNOW       }, /* */
519     { "ISO-2022-JP-2", RTL_TEXTENCODING_DONTKNOW       }, /* */
520     { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
521     { "ISO8859-13",    RTL_TEXTENCODING_ISO_8859_13    }, /* ISO-IR-179 LATIN7 */
522     { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
523     { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
524     { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
525     { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
526     { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
527     { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
528     { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
529     { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
530     { "PT154",         RTL_TEXTENCODING_PT154          }, /* */
531     { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
532     { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
533     { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
534 };
535 
536 #elif defined(OPENBSD)
537 
538 static const Pair nl_language_list[] = {
539     { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
540     { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
541     { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
542     { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
543     { "EUCCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
544     { "EUCJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
545     { "EUCKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
546     { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
547     { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
548     { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
549     { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
550     { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
551     { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
552     { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
553     { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
554     { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
555     { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
556     { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
557     { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
558 };
559 
560 #endif /* ifdef __sun LINUX FREEBSD NETBSD OPENBSD */
561 
562 /*****************************************************************************
563  return the text encoding corresponding to the given locale
564  *****************************************************************************/
565 
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)566 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
567 {
568     const Pair *language=nullptr;
569 
570     char  locale_buf[64] = "";
571     char  codeset_buf[64];
572 
573     char *codeset      = nullptr;
574 
575     /* default to process locale if pLocale == NULL */
576     if( pLocale == nullptr )
577         osl_getProcessLocale( &pLocale );
578 
579     /* convert rtl_Locale to locale string */
580     compose_locale( pLocale, locale_buf, 64 );
581 
582     locale_t ctype_locale = newlocale(
583         LC_CTYPE_MASK, locale_buf, static_cast<locale_t>(0));
584     if (ctype_locale == static_cast<locale_t>(0))
585     {
586         return RTL_TEXTENCODING_DONTKNOW;
587     }
588 
589     /* get the charset as indicated by the LC_CTYPE locale */
590 #if defined(NETBSD) && !defined(CODESET)
591     codeset = NULL;
592 #else
593     codeset = nl_langinfo_l(CODESET, ctype_locale);
594         // per SUSv4, the return value of nl_langinfo_l can be invalidated by a
595         // subsequent call to nl_langinfo (not nl_langinfo_l) in any thread, but
596         // we cannot guard against that (at least, no code in LO itself should
597         // call nl_langinfo)
598 #endif
599 
600     if ( codeset != nullptr )
601     {
602         /* get codeset into mt save memory */
603         strncpy( codeset_buf, codeset, sizeof(codeset_buf) );
604         codeset_buf[sizeof(codeset_buf) - 1] = 0;
605         codeset = codeset_buf;
606     }
607 
608     freelocale(ctype_locale);
609 
610     /* search the codeset in our language list */
611     if ( codeset != nullptr )
612     {
613         language = pair_search (codeset, nl_language_list, SAL_N_ELEMENTS( nl_language_list ) );
614     }
615 
616     OSL_ASSERT( language && ( RTL_TEXTENCODING_DONTKNOW != language->value ) );
617 
618     /* a matching item in our list provides a mapping from codeset to
619      * rtl-codeset */
620     if ( language != nullptr )
621         return language->value;
622 
623     return RTL_TEXTENCODING_DONTKNOW;
624 }
625 
626 /*****************************************************************************
627  return the current process locale
628  *****************************************************************************/
629 
imp_getProcessLocale(rtl_Locale ** ppLocale)630 void imp_getProcessLocale( rtl_Locale ** ppLocale )
631 {
632     char const * locale = getenv("LC_ALL");
633     if (locale == nullptr || *locale == '\0') {
634         locale = getenv("LC_CTYPE");
635         if (locale == nullptr || *locale == '\0') {
636             locale = getenv("LANG");
637             if (locale == nullptr || *locale == '\0') {
638                 locale = "C";
639             }
640         }
641     }
642     // coverity[overrun-buffer-val : FALSE] - coverity gets this very wrong
643     *ppLocale = parse_locale(locale);
644 }
645 
646 #else /* ifdef LINUX || __sun || MACOSX || NETBSD */
647 
648 /*
649  * This implementation of osl_getTextEncodingFromLocale maps
650  * from the ISO language codes.
651  */
652 
653 const Pair full_locale_list[] = {
654     { "ja_JP.eucJP",  RTL_TEXTENCODING_EUC_JP      },
655     { "ja_JP.EUC",    RTL_TEXTENCODING_EUC_JP      },
656     { "ko_KR.EUC",    RTL_TEXTENCODING_EUC_KR      },
657     { "zh_CN.EUC",    RTL_TEXTENCODING_EUC_CN      },
658     { "zh_TW.EUC",    RTL_TEXTENCODING_EUC_TW      }
659 };
660 
661 const Pair locale_extension_list[] = {
662     { "big5",         RTL_TEXTENCODING_BIG5        },
663     { "big5hk",       RTL_TEXTENCODING_BIG5_HKSCS  },
664     { "gb18030",      RTL_TEXTENCODING_GB_18030    },
665     { "euc",          RTL_TEXTENCODING_EUC_JP      },
666     { "iso8859-1",    RTL_TEXTENCODING_ISO_8859_1  },
667     { "iso8859-10",   RTL_TEXTENCODING_ISO_8859_10 },
668     { "iso8859-13",   RTL_TEXTENCODING_ISO_8859_13 },
669     { "iso8859-14",   RTL_TEXTENCODING_ISO_8859_14 },
670     { "iso8859-15",   RTL_TEXTENCODING_ISO_8859_15 },
671     { "iso8859-2",    RTL_TEXTENCODING_ISO_8859_2  },
672     { "iso8859-3",    RTL_TEXTENCODING_ISO_8859_3  },
673     { "iso8859-4",    RTL_TEXTENCODING_ISO_8859_4  },
674     { "iso8859-5",    RTL_TEXTENCODING_ISO_8859_5  },
675     { "iso8859-6",    RTL_TEXTENCODING_ISO_8859_6  },
676     { "iso8859-7",    RTL_TEXTENCODING_ISO_8859_7  },
677     { "iso8859-8",    RTL_TEXTENCODING_ISO_8859_8  },
678     { "iso8859-9",    RTL_TEXTENCODING_ISO_8859_9  },
679     { "koi8-r",       RTL_TEXTENCODING_KOI8_R      },
680     { "koi8-u",       RTL_TEXTENCODING_KOI8_U      },
681     { "pck",          RTL_TEXTENCODING_MS_932      },
682 #if (0)
683     { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW    },
684 #endif
685     { "utf-16",       RTL_TEXTENCODING_UNICODE     },
686     { "utf-7",        RTL_TEXTENCODING_UTF7        },
687     { "utf-8",        RTL_TEXTENCODING_UTF8        }
688 };
689 
690 const Pair iso_language_list[] = {
691     { "af",  RTL_TEXTENCODING_ISO_8859_1 },
692     { "ar",  RTL_TEXTENCODING_ISO_8859_6 },
693     { "az",  RTL_TEXTENCODING_ISO_8859_9 },
694     { "be",  RTL_TEXTENCODING_ISO_8859_5 },
695     { "bg",  RTL_TEXTENCODING_ISO_8859_5 },
696     { "ca",  RTL_TEXTENCODING_ISO_8859_1 },
697     { "cs",  RTL_TEXTENCODING_ISO_8859_2 },
698     { "da",  RTL_TEXTENCODING_ISO_8859_1 },
699     { "de",  RTL_TEXTENCODING_ISO_8859_1 },
700     { "el",  RTL_TEXTENCODING_ISO_8859_7 },
701     { "en",  RTL_TEXTENCODING_ISO_8859_1 },
702     { "es",  RTL_TEXTENCODING_ISO_8859_1 },
703     { "et",  RTL_TEXTENCODING_ISO_8859_4 },
704     { "eu",  RTL_TEXTENCODING_ISO_8859_1 },
705     { "fa",  RTL_TEXTENCODING_ISO_8859_6 },
706     { "fi",  RTL_TEXTENCODING_ISO_8859_1 },
707     { "fo",  RTL_TEXTENCODING_ISO_8859_1 },
708     { "fr",  RTL_TEXTENCODING_ISO_8859_1 },
709     { "gr",  RTL_TEXTENCODING_ISO_8859_7 },
710     { "he",  RTL_TEXTENCODING_ISO_8859_8 },
711     { "hi",  RTL_TEXTENCODING_DONTKNOW },
712     { "hr",  RTL_TEXTENCODING_ISO_8859_2 },
713     { "hu",  RTL_TEXTENCODING_ISO_8859_2 },
714     { "hy",  RTL_TEXTENCODING_DONTKNOW },
715     { "id",  RTL_TEXTENCODING_ISO_8859_1 },
716     { "is",  RTL_TEXTENCODING_ISO_8859_1 },
717     { "it",  RTL_TEXTENCODING_ISO_8859_1 },
718     { "iw",  RTL_TEXTENCODING_ISO_8859_8 },
719     { "ja",  RTL_TEXTENCODING_EUC_JP },
720     { "ka",  RTL_TEXTENCODING_DONTKNOW },
721     { "kk",  RTL_TEXTENCODING_ISO_8859_5 },
722     { "ko",  RTL_TEXTENCODING_EUC_KR },
723     { "lt",  RTL_TEXTENCODING_ISO_8859_4 },
724     { "lv",  RTL_TEXTENCODING_ISO_8859_4 },
725     { "mk",  RTL_TEXTENCODING_ISO_8859_5 },
726     { "mr",  RTL_TEXTENCODING_DONTKNOW },
727     { "ms",  RTL_TEXTENCODING_ISO_8859_1 },
728     { "nl",  RTL_TEXTENCODING_ISO_8859_1 },
729     { "no",  RTL_TEXTENCODING_ISO_8859_1 },
730     { "pl",  RTL_TEXTENCODING_ISO_8859_2 },
731     { "pt",  RTL_TEXTENCODING_ISO_8859_1 },
732     { "ro",  RTL_TEXTENCODING_ISO_8859_2 },
733     { "ru",  RTL_TEXTENCODING_ISO_8859_5 },
734     { "sa",  RTL_TEXTENCODING_DONTKNOW },
735     { "sk",  RTL_TEXTENCODING_ISO_8859_2 },
736     { "sl",  RTL_TEXTENCODING_ISO_8859_2 },
737     { "sq",  RTL_TEXTENCODING_ISO_8859_2 },
738     { "sv",  RTL_TEXTENCODING_ISO_8859_1 },
739     { "sw",  RTL_TEXTENCODING_ISO_8859_1 },
740     { "ta",  RTL_TEXTENCODING_DONTKNOW },
741     { "th",  RTL_TEXTENCODING_DONTKNOW },
742     { "tr",  RTL_TEXTENCODING_ISO_8859_9 },
743     { "tt",  RTL_TEXTENCODING_ISO_8859_5 },
744     { "uk",  RTL_TEXTENCODING_ISO_8859_5 },
745     { "ur",  RTL_TEXTENCODING_ISO_8859_6 },
746     { "uz",  RTL_TEXTENCODING_ISO_8859_9 },
747     { "vi",  RTL_TEXTENCODING_DONTKNOW },
748     { "zh",  RTL_TEXTENCODING_BIG5 }
749 };
750 
751 /*****************************************************************************
752  return the text encoding corresponding to the given locale
753  *****************************************************************************/
754 
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)755 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
756 {
757     const Pair *language = nullptr;
758     char locale_buf[64] = "";
759 
760     /* default to process locale if pLocale == NULL */
761     if( nullptr == pLocale )
762         osl_getProcessLocale( &pLocale );
763 
764     /* convert rtl_Locale to locale string */
765     if( compose_locale( pLocale, locale_buf, 64 ) )
766     {
767         /* check special handling list (EUC) first */
768         language = pair_search( locale_buf, full_locale_list, SAL_N_ELEMENTS( full_locale_list ) );
769 
770         if( nullptr == language )
771         {
772             /*
773              *  check if there is a charset qualifier at the end of the given locale string
774              *  e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
775              *  charset to use
776              */
777             char* cp = strrchr( locale_buf, '.' );
778 
779             if( nullptr != cp )
780             {
781                 language = pair_search( cp + 1, locale_extension_list, SAL_N_ELEMENTS( locale_extension_list ) );
782             }
783         }
784 
785         /* use iso language code to determine the charset */
786         if( nullptr == language )
787         {
788             /* iso lang codes have 2 characters */
789             locale_buf[2] = '\0';
790 
791             language = pair_search( locale_buf, iso_language_list, SAL_N_ELEMENTS( iso_language_list ) );
792         }
793     }
794 
795     /* a matching item in our list provides a mapping from codeset to
796      * rtl-codeset */
797     if ( language != nullptr )
798         return language->value;
799 
800     return RTL_TEXTENCODING_DONTKNOW;
801 }
802 
803 #if defined(MACOSX) || defined(IOS)
804 
805 /*****************************************************************************
806  return the current process locale
807  *****************************************************************************/
808 
imp_getProcessLocale(rtl_Locale ** ppLocale)809 void imp_getProcessLocale( rtl_Locale ** ppLocale )
810 {
811     OUString loc16(macosx_getLocale());
812     OString locale;
813     if (!loc16.convertToString(
814             &locale, RTL_TEXTENCODING_UTF8,
815             (RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
816              | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR)))
817     {
818         SAL_INFO("sal.osl", "Cannot convert \"" << loc16 << "\" to UTF-8");
819     }
820 
821     /* handle the case where OS specific method of finding locale fails */
822     if ( locale.isEmpty() )
823     {
824         /* simulate behavior of setlocale */
825         locale = getenv( "LC_ALL" );
826 
827         if( locale.isEmpty() )
828             locale = getenv( "LC_CTYPE" );
829 
830         if( locale.isEmpty() )
831             locale = getenv( "LANG" );
832 
833         if( locale.isEmpty() )
834             locale = "C";
835     }
836 
837     /* return the locale */
838     *ppLocale = parse_locale( locale.getStr() );
839 }
840 #else
841 /*****************************************************************************
842  return the current process locale
843  *****************************************************************************/
844 
imp_getProcessLocale(rtl_Locale ** ppLocale)845 void imp_getProcessLocale( rtl_Locale ** ppLocale )
846 {
847 #ifdef ANDROID
848     /* No locale environment variables on Android, so why even bother
849      * with getenv().
850      */
851    char const * locale = "en-US.UTF-8";
852 #else
853     /* simulate behavior off setlocale */
854     char * locale = getenv( "LC_ALL" );
855 
856     if( NULL == locale )
857         locale = getenv( "LC_CTYPE" );
858 
859     if( NULL == locale )
860         locale = getenv( "LANG" );
861 
862     if( NULL == locale )
863         locale = "C";
864 
865 #endif
866     *ppLocale = parse_locale( locale );
867 }
868 #endif
869 
870 #endif /* ifdef LINUX || __sun || MACOSX || NETBSD || AIX */
871 
872 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
873