1 /***
2 *getqloc.c - get qualified locale
3 *
4 *       Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 *       defines __acrt_get_qualified_locale - get complete locale information
8 *
9 *******************************************************************************/
10 #include <corecrt_internal.h>
11 #include <locale.h>
12 #include <stdlib.h>
13 
14 extern "C" {
15 
16 
17 
18 //  local defines
19 #define __LOC_DEFAULT  0x1     //  default language locale for country
20 #define __LOC_PRIMARY  0x2     //  primary language locale for country
21 #define __LOC_FULL     0x4     //  fully matched language locale for country
22 #define __LOC_LANGUAGE 0x100   //  language default seen
23 #define __LOC_EXISTS   0x200   //  language is installed
24 
25 
26 
27 //  non-NLS language string table
28 //  The three letter Windows names are non-standard and very limited and should not be used.
29 extern __crt_locale_string_table const __acrt_rg_language[]
30 {
31     { L"american",                    L"ENU" },
32     { L"american english",            L"ENU" },
33     { L"american-english",            L"ENU" },
34     { L"australian",                  L"ENA" },
35     { L"belgian",                     L"NLB" },
36     { L"canadian",                    L"ENC" },
37     { L"chh",                         L"ZHH" },
38     { L"chi",                         L"ZHI" },
39     { L"chinese",                     L"CHS" },
40     { L"chinese-hongkong",            L"ZHH" },
41     { L"chinese-simplified",          L"CHS" },
42     { L"chinese-singapore",           L"ZHI" },
43     { L"chinese-traditional",         L"CHT" },
44     { L"dutch-belgian",               L"NLB" },
45     { L"english-american",            L"ENU" },
46     { L"english-aus",                 L"ENA" },
47     { L"english-belize",              L"ENL" },
48     { L"english-can",                 L"ENC" },
49     { L"english-caribbean",           L"ENB" },
50     { L"english-ire",                 L"ENI" },
51     { L"english-jamaica",             L"ENJ" },
52     { L"english-nz",                  L"ENZ" },
53     { L"english-south africa",        L"ENS" },
54     { L"english-trinidad y tobago",   L"ENT" },
55     { L"english-uk",                  L"ENG" },
56     { L"english-us",                  L"ENU" },
57     { L"english-usa",                 L"ENU" },
58     { L"french-belgian",              L"FRB" },
59     { L"french-canadian",             L"FRC" },
60     { L"french-luxembourg",           L"FRL" },
61     { L"french-swiss",                L"FRS" },
62     { L"german-austrian",             L"DEA" },
63     { L"german-lichtenstein",         L"DEC" },
64     { L"german-luxembourg",           L"DEL" },
65     { L"german-swiss",                L"DES" },
66     { L"irish-english",               L"ENI" },
67     { L"italian-swiss",               L"ITS" },
68     { L"norwegian",                   L"NOR" },
69     { L"norwegian-bokmal",            L"NOR" },
70     { L"norwegian-nynorsk",           L"NON" },
71     { L"portuguese-brazilian",        L"PTB" },
72     { L"spanish-argentina",           L"ESS" },
73     { L"spanish-bolivia",             L"ESB" },
74     { L"spanish-chile",               L"ESL" },
75     { L"spanish-colombia",            L"ESO" },
76     { L"spanish-costa rica",          L"ESC" },
77     { L"spanish-dominican republic",  L"ESD" },
78     { L"spanish-ecuador",             L"ESF" },
79     { L"spanish-el salvador",         L"ESE" },
80     { L"spanish-guatemala",           L"ESG" },
81     { L"spanish-honduras",            L"ESH" },
82     { L"spanish-mexican",             L"ESM" },
83     { L"spanish-modern",              L"ESN" },
84     { L"spanish-nicaragua",           L"ESI" },
85     { L"spanish-panama",              L"ESA" },
86     { L"spanish-paraguay",            L"ESZ" },
87     { L"spanish-peru",                L"ESR" },
88     { L"spanish-puerto rico",         L"ESU" },
89     { L"spanish-uruguay",             L"ESY" },
90     { L"spanish-venezuela",           L"ESV" },
91     { L"swedish-finland",             L"SVF" },
92     { L"swiss",                       L"DES" },
93     { L"uk",                          L"ENG" },
94     { L"us",                          L"ENU" },
95     { L"usa",                         L"ENU" }
96 };
97 
98 //  non-NLS country/region string table
99 //  The three letter Windows names are non-standard and very limited and should not be used.
100 extern __crt_locale_string_table const __acrt_rg_country[]
101 {
102     { L"america",                     L"USA" },
103     { L"britain",                     L"GBR" },
104     { L"china",                       L"CHN" },
105     { L"czech",                       L"CZE" },
106     { L"england",                     L"GBR" },
107     { L"great britain",               L"GBR" },
108     { L"holland",                     L"NLD" },
109     { L"hong-kong",                   L"HKG" },
110     { L"new-zealand",                 L"NZL" },
111     { L"nz",                          L"NZL" },
112     { L"pr china",                    L"CHN" },
113     { L"pr-china",                    L"CHN" },
114     { L"puerto-rico",                 L"PRI" },
115     { L"slovak",                      L"SVK" },
116     { L"south africa",                L"ZAF" },
117     { L"south korea",                 L"KOR" },
118     { L"south-africa",                L"ZAF" },
119     { L"south-korea",                 L"KOR" },
120     { L"trinidad & tobago",           L"TTO" },
121     { L"uk",                          L"GBR" },
122     { L"united-kingdom",              L"GBR" },
123     { L"united-states",               L"USA" },
124     { L"us",                          L"USA" },
125 };
126 
127 // Number of entries in the language and country tables
128 extern size_t const __acrt_rg_language_count{_countof(__acrt_rg_language)};
129 extern size_t const __acrt_rg_country_count {_countof(__acrt_rg_country )};
130 
131 
132 
133 
134 //  function prototypes
135 BOOL __cdecl __acrt_get_qualified_locale(const __crt_locale_strings*, UINT*, __crt_locale_strings*);
136 static BOOL TranslateName(const __crt_locale_string_table *, int, const wchar_t **);
137 
138 static void GetLocaleNameFromLangCountry (__crt_qualified_locale_data* _psetloc_data);
139 static BOOL CALLBACK LangCountryEnumProcEx(_In_z_ LPWSTR, DWORD, LPARAM);
140 
141 static void GetLocaleNameFromLanguage (__crt_qualified_locale_data* _psetloc_data);
142 static BOOL CALLBACK LanguageEnumProcEx(_In_z_ LPWSTR, DWORD, LPARAM);
143 
144 static void GetLocaleNameFromDefault (__crt_qualified_locale_data* _psetloc_data);
145 
146 static int ProcessCodePage (LPCWSTR lpCodePageStr, __crt_qualified_locale_data* _psetloc_data);
147 static BOOL TestDefaultCountry(LPCWSTR localeName);
148 static BOOL TestDefaultLanguage (LPCWSTR localeName, BOOL bTestPrimary, __crt_qualified_locale_data* _psetloc_data);
149 
150 static int GetPrimaryLen(LPCWSTR);
151 
152 
153 /***
154 *BOOL __acrt_get_qualified_locale - return fully qualified locale
155 *
156 *Purpose:
157 *       get default locale, qualify partially complete locales
158 *
159 *Entry:
160 *       lpInStr - input strings to be qualified
161 *       lpOutStr - pointer to string locale names and codepage output
162 *
163 *Exit:
164 *       TRUE if success, qualified locale is valid
165 *       FALSE if failure
166 *
167 *Exceptions:
168 *
169 *******************************************************************************/
170 BOOL __cdecl __acrt_get_qualified_locale(const __crt_locale_strings* lpInStr, UINT* lpOutCodePage, __crt_locale_strings* lpOutStr)
171 {
172     int iCodePage;
173     __crt_qualified_locale_data* _psetloc_data = &__acrt_getptd()->_setloc_data;
174     _psetloc_data->_cacheLocaleName[0] = L'\x0'; // Initialize to invariant localename
175 
176     //  initialize pointer to call locale info routine based on operating system
177 
178     _psetloc_data->iLocState = 0;
179     _psetloc_data->pchLanguage = lpInStr->szLanguage;
180     _psetloc_data->pchCountry = lpInStr->szCountry;
181 
182     //  if country defined
183     //  convert non-NLS country strings to three-letter abbreviations
184     if (*_psetloc_data->pchCountry)
185         TranslateName(__acrt_rg_country, static_cast<int>(__acrt_rg_country_count - 1),
186                       &_psetloc_data->pchCountry);
187 
188 
189     //  if language defined ...
190     if (*_psetloc_data->pchLanguage)
191     {
192         //  and country defined
193         if (*_psetloc_data->pchCountry)
194         {
195             //  both language and country strings defined
196             //  get locale info using language and country
197             GetLocaleNameFromLangCountry(_psetloc_data);
198         }
199         else
200         {
201             //  language string defined, but country string undefined
202             //  get locale info using language only
203             GetLocaleNameFromLanguage(_psetloc_data);
204         }
205 
206         // still not done?
207         if (_psetloc_data->iLocState == 0)
208         {
209             //  first attempt failed, try substituting the language name
210             //  convert non-NLS language strings to three-letter abbrevs
211             if (TranslateName(__acrt_rg_language, static_cast<int>(__acrt_rg_language_count - 1),
212                               &_psetloc_data->pchLanguage))
213             {
214                 if (*_psetloc_data->pchCountry)
215                 {
216                     //  get locale info using language and country
217                     GetLocaleNameFromLangCountry(_psetloc_data);
218                 }
219                 else
220                 {
221                     //  get locale info using language only
222                     GetLocaleNameFromLanguage(_psetloc_data);
223                 }
224             }
225         }
226     }
227     else
228     {
229         //  language is an empty string, use the User Default locale name
230         GetLocaleNameFromDefault(_psetloc_data);
231     }
232 
233     //  test for error in locale processing
234     if (_psetloc_data->iLocState == 0)
235         return FALSE;
236 
237     //  process codepage value
238     if (lpInStr == nullptr || *lpInStr->szLanguage || *lpInStr->szCodePage )
239     {
240         // If there's no input, then get the current codepage
241         // If they explicitly chose a language, use that default codepage
242         // If they explicilty set a codepage, then use that
243         iCodePage = ProcessCodePage(lpInStr ? lpInStr->szCodePage : nullptr, _psetloc_data);
244     }
245     else
246     {
247         // No language or codepage means that they want to set to the
248         // user default settings, get that codepage (could be UTF-8)
249         iCodePage = GetACP();
250     }
251 
252     //  verify codepage validity
253     //  CP_UTF7 is unexpected and has never been previously permitted.
254     //  CP_UTF8 is the current preferred codepage
255     if (!iCodePage || iCodePage == CP_UTF7 || !IsValidCodePage((WORD)iCodePage))
256         return FALSE;
257 
258     //  set codepage
259     if (lpOutCodePage)
260     {
261         *lpOutCodePage = (UINT)iCodePage;
262     }
263 
264     //  set locale name and codepage results
265     if (lpOutStr)
266     {
267         lpOutStr->szLocaleName[0] = L'\x0'; // Init the locale name to empty string
268 
269         _ERRCHECK(wcsncpy_s(lpOutStr->szLocaleName, _countof(lpOutStr->szLocaleName), _psetloc_data->_cacheLocaleName, wcslen(_psetloc_data->_cacheLocaleName) + 1));
270 
271         // Get and store the English language lang name, to be returned to user
272         if (__acrt_GetLocaleInfoEx(lpOutStr->szLocaleName, LOCALE_SENGLISHLANGUAGENAME,
273                                     lpOutStr->szLanguage, MAX_LANG_LEN) == 0)
274             return FALSE;
275 
276         // Get and store the English language country name, to be returned to user
277         if (__acrt_GetLocaleInfoEx(lpOutStr->szLocaleName, LOCALE_SENGLISHCOUNTRYNAME,
278                                     lpOutStr->szCountry, MAX_CTRY_LEN) == 0)
279             return FALSE;
280 
281         // Special case: Both '.' and '_' are separators in string passed to setlocale,
282         // so if found in Country we use abbreviated name instead.
283         if (wcschr(lpOutStr->szCountry, L'_') || wcschr(lpOutStr->szCountry, L'.'))
284             if (__acrt_GetLocaleInfoEx(lpOutStr->szLocaleName, LOCALE_SABBREVCTRYNAME,
285                                     lpOutStr->szCountry, MAX_CTRY_LEN) == 0)
286                 return FALSE;
287 
288         if (iCodePage == CP_UTF8)
289         {
290             // We want UTF-8 to look like utf8, not 65001
291             _ERRCHECK(wcsncpy_s(lpOutStr->szCodePage, _countof(lpOutStr->szCodePage), L"utf8", 5));
292         }
293         else
294         {
295             _itow_s((int)iCodePage, (wchar_t *)lpOutStr->szCodePage, MAX_CP_LEN, 10);
296         }
297     }
298 
299     return TRUE;
300 }
301 
302 /***
303 *BOOL TranslateName - convert known non-NLS string to NLS equivalent
304 *
305 *Purpose:
306 *   Provide compatibility with existing code for non-NLS strings
307 *
308 *Entry:
309 *   lpTable  - pointer to __crt_locale_string_table used for translation
310 *   high     - maximum index of table (size - 1)
311 *   ppchName - pointer to pointer of string to translate
312 *
313 *Exit:
314 *   ppchName - pointer to pointer of string possibly translated
315 *   TRUE if string translated, FALSE if unchanged
316 *
317 *Exceptions:
318 *
319 *******************************************************************************/
320 static BOOL TranslateName (
321     const __crt_locale_string_table * lpTable,
322     int               high,
323     const wchar_t**   ppchName)
324 {
325     int     i;
326     int     cmp = 1;
327     int     low = 0;
328 
329     //  typical binary search - do until no more to search or match
330     while (low <= high && cmp != 0)
331     {
332         i = (low + high) / 2;
333         cmp = _wcsicmp(*ppchName, (const wchar_t *)(*(lpTable + i)).szName);
334 
335         if (cmp == 0)
336             *ppchName = (*(lpTable + i)).chAbbrev;
337         else if (cmp < 0)
338             high = i - 1;
339         else
340             low = i + 1;
341     }
342 
343     return !cmp;
344 }
345 
346 /***
347 *void GetLocaleNameFromLangCountry - get locale names from language and country strings
348 *
349 *Purpose:
350 *   Match the best locale names to the language and country string given.
351 *   After global variables are initialized, the LangCountryEnumProcEx
352 *   routine is registered as an EnumSystemLocalesEx callback to actually
353 *   perform the matching as the locale names are enumerated.
354 *
355 *
356 *WARNING:
357 *   This depends on an exact match with a localized string that can change!
358 *   It is strongly recommended that locales be selected with valid BCP-47
359 *   tags instead of the English names.
360 *
361 *   This API is also very brute-force and resource intensive, reading in all
362 *   of the locales, forcing them to be cached, and looking up their names.
363 *
364 *WARNING:
365 *   In the event of a 2 or 3 letter friendly name (Asu, Edo, Ewe, Yi, ...)
366 *   then this function will fail
367 *
368 *Entry:
369 *   pchLanguage     - language string
370 *   bAbbrevLanguage - language string is a three-letter abbreviation
371 *   pchCountry      - country string
372 *   bAbbrevCountry  - country string ia a three-letter abbreviation
373 *   iPrimaryLen     - length of language string with primary name
374 *
375 *Exit:
376 *   localeName - locale name of given language and country
377 *
378 *Exceptions:
379 *
380 *******************************************************************************/
381 static void GetLocaleNameFromLangCountry (__crt_qualified_locale_data* _psetloc_data)
382 {
383     //  initialize static variables for callback use
384     _psetloc_data->bAbbrevLanguage = wcslen(_psetloc_data->pchLanguage) == 3;
385     _psetloc_data->bAbbrevCountry = wcslen(_psetloc_data->pchCountry) == 3;
386 
387     _psetloc_data->iPrimaryLen = _psetloc_data->bAbbrevLanguage ?
388                              2 : GetPrimaryLen(_psetloc_data->pchLanguage);
389 
390     // Enumerate all locales that come with the operating system,
391     // including replacement locales, but excluding alternate sorts.
392     __acrt_EnumSystemLocalesEx(LangCountryEnumProcEx, LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL, 0, nullptr);
393 
394     //  locale value is invalid if the language was not installed or the language
395     //  was not available for the country specified
396     if (!(_psetloc_data->iLocState & __LOC_LANGUAGE) ||
397         !(_psetloc_data->iLocState & __LOC_EXISTS) ||
398         !(_psetloc_data->iLocState & (__LOC_FULL |
399                                     __LOC_PRIMARY |
400                                     __LOC_DEFAULT)))
401         _psetloc_data->iLocState = 0;
402 }
403 
404 /***
405 *BOOL CALLBACK LangCountryEnumProcEx - callback routine for GetLocaleNameFromLangCountry
406 *
407 *Purpose:
408 *   Determine if locale name given matches the language in pchLanguage
409 *   and country in pchCountry.
410 *
411 *Entry:
412 *   lpLocaleString   - pointer to locale name string string
413 *   pchCountry     - pointer to country name
414 *   bAbbrevCountry - set if country is three-letter abbreviation
415 *
416 *Exit:
417 *   iLocState   - status of match
418 *       __LOC_FULL - both language and country match (best match)
419 *       __LOC_PRIMARY - primary language and country match (better)
420 *       __LOC_DEFAULT - default language and country match (good)
421 *       __LOC_LANGUAGE - default primary language exists
422 *       __LOC_EXISTS - full match of language string exists
423 *       (Overall match occurs for the best of FULL/PRIMARY/DEFAULT
424 *        and LANGUAGE/EXISTS both set.)
425 *   localeName - lpLocaleString matched
426 *   FALSE if match occurred to terminate enumeration, else TRUE.
427 *
428 *Exceptions:
429 *
430 *******************************************************************************/
431 static BOOL CALLBACK LangCountryEnumProcEx(LPWSTR lpLocaleString, DWORD dwFlags, LPARAM lParam)
432 {
433     UNREFERENCED_PARAMETER(dwFlags);
434     UNREFERENCED_PARAMETER(lParam);
435 
436     __crt_qualified_locale_data* _psetloc_data = &__acrt_getptd()->_setloc_data;
437     wchar_t  rgcInfo[MAX_LANG_LEN]; // MAX_LANG_LEN == MAX_CTRY_LEN == 64
438 
439     //  test locale country against input value
440     if (__acrt_GetLocaleInfoEx(lpLocaleString,
441                                _psetloc_data->bAbbrevCountry ? LOCALE_SABBREVCTRYNAME : LOCALE_SENGLISHCOUNTRYNAME,
442                                rgcInfo, _countof(rgcInfo)) == 0)
443     {
444         //  set error condition and exit
445         _psetloc_data->iLocState = 0;
446         return TRUE;
447     }
448 
449     //  if country names matched
450     if (_wcsicmp(_psetloc_data->pchCountry, rgcInfo) == 0)
451     {
452         //  test for language match
453         if (__acrt_GetLocaleInfoEx(lpLocaleString,
454                                    _psetloc_data->bAbbrevLanguage ?
455                                    LOCALE_SABBREVLANGNAME : LOCALE_SENGLISHLANGUAGENAME,
456                                    rgcInfo, _countof(rgcInfo)) == 0)
457         {
458             //  set error condition and exit
459             _psetloc_data->iLocState = 0;
460             return TRUE;
461         }
462 
463         if (_wcsicmp(_psetloc_data->pchLanguage, rgcInfo) == 0)
464         {
465             //  language matched also - set state and value
466             //  this is the best match
467             _psetloc_data->iLocState |= (__LOC_FULL |
468                                        __LOC_LANGUAGE |
469                                        __LOC_EXISTS);
470 
471             _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
472         }
473         //  test if match already for primary langauage
474         else if (!(_psetloc_data->iLocState & __LOC_PRIMARY))
475         {
476             //  if not, use _psetloc_data->iPrimaryLen to partial match language string
477             if (_psetloc_data->iPrimaryLen && !_wcsnicmp(_psetloc_data->pchLanguage, rgcInfo, _psetloc_data->iPrimaryLen))
478             {
479                 //  primary language matched - set locale name
480                 _psetloc_data->iLocState |= __LOC_PRIMARY;
481                 _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
482             }
483 
484             //  test if default language already defined
485             else if (!(_psetloc_data->iLocState & __LOC_DEFAULT))
486             {
487                 //  if not, test if locale language is default for country
488                 if (TestDefaultCountry(lpLocaleString))
489                 {
490                     //  default language for country - set state, value
491                     _psetloc_data->iLocState |= __LOC_DEFAULT;
492                     _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
493                 }
494             }
495         }
496     }
497 
498     //  test if input language both exists and default primary language defined
499     if ((_psetloc_data->iLocState & (__LOC_LANGUAGE | __LOC_EXISTS)) !=
500                       (__LOC_LANGUAGE | __LOC_EXISTS))
501     {
502         //  test language match to determine whether it is installed
503         if (__acrt_GetLocaleInfoEx(lpLocaleString, _psetloc_data->bAbbrevLanguage ? LOCALE_SABBREVLANGNAME
504                                                                            : LOCALE_SENGLISHLANGUAGENAME,
505                            rgcInfo, _countof(rgcInfo)) == 0)
506         {
507             //  set error condition and exit
508             _psetloc_data->iLocState = 0;
509             return TRUE;
510         }
511 
512         // the input language matches
513         if (_wcsicmp(_psetloc_data->pchLanguage, rgcInfo) == 0)
514         {
515             //  language matched - set bit for existance
516             _psetloc_data->iLocState |= __LOC_EXISTS;
517 
518             if (_psetloc_data->bAbbrevLanguage)
519             {
520                 //  abbreviation - set state
521                 //  also set language locale name if not set already
522                 _psetloc_data->iLocState |= __LOC_LANGUAGE;
523                 if (!_psetloc_data->_cacheLocaleName[0])
524                     _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
525             }
526 
527             //  test if language is primary only (no sublanguage)
528             else if (_psetloc_data->iPrimaryLen && ((int)wcslen(_psetloc_data->pchLanguage) == _psetloc_data->iPrimaryLen))
529             {
530                 //  primary language only - test if default locale name
531                 if (TestDefaultLanguage(lpLocaleString, TRUE, _psetloc_data))
532                 {
533                     //  default primary language - set state
534                     //  also set locale name if not set already
535                     _psetloc_data->iLocState |= __LOC_LANGUAGE;
536                     if (!_psetloc_data->_cacheLocaleName[0])
537                         _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
538                 }
539             }
540             else
541             {
542                 //  language with sublanguage - set state
543                 //  also set locale name if not set already
544                 _psetloc_data->iLocState |= __LOC_LANGUAGE;
545                 if (!_psetloc_data->_cacheLocaleName[0])
546                     _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
547             }
548         }
549     }
550 
551     //  if LOCALE_FULL set, return FALSE to stop enumeration,
552     //  else return TRUE to continue
553     return (_psetloc_data->iLocState & __LOC_FULL) == 0;
554 }
555 
556 /***
557 *void GetLocaleNameFromLanguage - get locale name from language string
558 *
559 *Purpose:
560 *   Match the best locale name to the language string given.  After global
561 *   variables are initialized, the LanguageEnumProcEx routine is
562 *   registered as an EnumSystemLocalesEx callback to actually perform
563 *   the matching as the locale names are enumerated.
564 *
565 *WARNING:
566 *   This depends on an exact match with a localized string that can change!
567 *   It is strongly recommended that locales be selected with valid BCP-47
568 *   tags instead of the English names.
569 *
570 *   This API is also very brute-force and resource intensive, reading in all
571 *   of the locales, forcing them to be cached, and looking up their names.
572 *
573 *WARNING:
574 *   In the event of a 3 letter BCP-47 tag that happens to match a Windows
575 *   propriatary language code, this function will return the wrong answer!
576 *
577 *WARNING:
578 *   In the event of a 2 or 3 letter friendly name (Asu, Edo, Ewe, Yi, ...)
579 *   then this function will fail
580 *
581 *Entry:
582 *   pchLanguage     - language string
583 *   bAbbrevLanguage - language string is a three-letter abbreviation
584 *   iPrimaryLen     - length of language string with primary name
585 *
586 *Exit:
587 *   localeName - locale name of language with default country
588 *
589 *Exceptions:
590 *
591 *******************************************************************************/
592 static void GetLocaleNameFromLanguage (__crt_qualified_locale_data* _psetloc_data)
593 {
594     //  initialize static variables for callback use
595     _psetloc_data->bAbbrevLanguage = wcslen(_psetloc_data->pchLanguage) == 3;
596     _psetloc_data->iPrimaryLen = _psetloc_data->bAbbrevLanguage ? 2 : GetPrimaryLen(_psetloc_data->pchLanguage);
597 
598     // Enumerate all locales that come with the operating system, including replacement locales,
599     // but excluding alternate sorts.
600     __acrt_EnumSystemLocalesEx(LanguageEnumProcEx, LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL, 0, nullptr);
601 
602     //  locale value is invalid if the language was not installed
603     //  or the language was not available for the country specified
604     if ((_psetloc_data->iLocState & __LOC_FULL) == 0)
605         _psetloc_data->iLocState = 0;
606 }
607 
608 /***
609 *BOOL CALLBACK LanguageEnumProcEx - callback routine for GetLocaleNameFromLanguage
610 *
611 *Purpose:
612 *   Determine if locale name given matches the default country for the
613 *   language in pchLanguage.
614 *
615 *Entry:
616 *   lpLocaleString    - pointer to string with locale name
617 *   dwFlags     - not used
618 *   lParam      - not used
619 *
620 *Exit:
621 *   localeName - locale name matched
622 *   FALSE if match occurred to terminate enumeration, else TRUE.
623 *
624 *Exceptions:
625 *
626 *******************************************************************************/
627 static BOOL CALLBACK LanguageEnumProcEx (LPWSTR lpLocaleString, DWORD dwFlags, LPARAM lParam)
628 {
629     UNREFERENCED_PARAMETER(dwFlags);
630     UNREFERENCED_PARAMETER(lParam);
631 
632     __crt_qualified_locale_data* _psetloc_data = &__acrt_getptd()->_setloc_data;
633     wchar_t    rgcInfo[120];
634 
635     //  test locale for language specified
636     if (__acrt_GetLocaleInfoEx(lpLocaleString, _psetloc_data->bAbbrevLanguage ? LOCALE_SABBREVLANGNAME
637                                                                               : LOCALE_SENGLISHLANGUAGENAME,
638                                rgcInfo, _countof(rgcInfo)) == 0)
639     {
640         //  set error condition and exit
641         _psetloc_data->iLocState = 0;
642         return TRUE;
643     }
644 
645     if (_wcsicmp(_psetloc_data->pchLanguage, rgcInfo) == 0)
646     {
647         //  language matches
648         _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), lpLocaleString, wcslen(lpLocaleString) + 1));
649 
650         _psetloc_data->iLocState |= __LOC_FULL;
651     }
652 
653     return (_psetloc_data->iLocState & __LOC_FULL) == 0;
654 }
655 
656 
657 /***
658 *void GetLocaleNameFromDefault - get default locale names
659 *
660 *Purpose:
661 *   Set both language and country locale names to the user default.
662 *
663 *Entry:
664 *   None.
665 *
666 *Exceptions:
667 *
668 *******************************************************************************/
669 static void GetLocaleNameFromDefault (__crt_qualified_locale_data* _psetloc_data)
670 {
671     wchar_t localeName[LOCALE_NAME_MAX_LENGTH];
672     _psetloc_data->iLocState |= (__LOC_FULL | __LOC_LANGUAGE);
673 
674     // Store the default user locale name. The returned buffer size includes the
675     // terminating null character, so only store if the size returned is > 1
676     if (__acrt_GetUserDefaultLocaleName(localeName, LOCALE_NAME_MAX_LENGTH) > 1)
677     {
678         _ERRCHECK(wcsncpy_s(_psetloc_data->_cacheLocaleName, _countof(_psetloc_data->_cacheLocaleName), localeName, wcslen(localeName) + 1));
679     }
680 }
681 
682 /***
683 *int ProcessCodePage - convert codepage string to numeric value
684 *
685 *Purpose:
686 *   Process codepage string consisting of a decimal string, or the
687 *   special case strings "ACP" and "OCP", for ANSI and OEM codepages,
688 *   respectively.  Null pointer or string returns the ANSI codepage.
689 *
690 *Entry:
691 *   lpCodePageStr - pointer to codepage string
692 *
693 *Exit:
694 *   Returns numeric value of codepage, zero if GetLocaleInfoEx failed.
695 *   (which then would mean caller aborts and locale is not set)
696 *
697 *Exceptions:
698 *
699 *******************************************************************************/
700 static int ProcessCodePage (LPCWSTR lpCodePageStr, __crt_qualified_locale_data* _psetloc_data)
701 {
702     int iCodePage;
703 
704     if (!lpCodePageStr || !*lpCodePageStr || wcscmp(lpCodePageStr, L"ACP") == 0)
705     {
706         //  get ANSI codepage for the country locale name
707         //  CONSIDER: If system is running UTF-8 ACP, then always return UTF-8?
708         if (__acrt_GetLocaleInfoEx(_psetloc_data->_cacheLocaleName, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
709                                  (LPWSTR) &iCodePage, sizeof(iCodePage) / sizeof(wchar_t)) == 0)
710             return 0;
711 
712         // Locales with no code page ("Unicode only locales") should return UTF-8
713         // (0, 1 & 2 are Unicode-Only ACP, OEMCP & MacCP flags)
714         if (iCodePage < 3)
715         {
716             return CP_UTF8;
717         }
718 
719     }
720     else if (_wcsicmp(lpCodePageStr, L"utf8") == 0 ||
721              _wcsicmp(lpCodePageStr, L"utf-8") == 0)
722     {
723         // Use UTF-8
724         return CP_UTF8;
725     }
726     else if (wcscmp(lpCodePageStr, L"OCP") == 0)
727     {
728         //  get OEM codepage for the country locale name
729         //  CONSIDER: If system is running UTF-8 ACP, then always return UTF-8?
730         if (__acrt_GetLocaleInfoEx(_psetloc_data->_cacheLocaleName, LOCALE_IDEFAULTCODEPAGE | LOCALE_RETURN_NUMBER,
731                                  (LPWSTR) &iCodePage, sizeof(iCodePage) / sizeof(wchar_t)) == 0)
732             return 0;
733 
734         // Locales with no code page ("unicode only locales") should return UTF-8
735         // (0, 1 & 2 are Unicode-Only ACP, OEMCP & MacCP flags)
736         if (iCodePage < 3)
737         {
738             return CP_UTF8;
739         }
740     }
741     else
742     {
743          // convert decimal string to numeric value
744          iCodePage = (int)_wtol(lpCodePageStr);
745     }
746 
747     return iCodePage;
748 }
749 
750 /***
751 *BOOL TestDefaultCountry - determine if default locale for country
752 *
753 *Purpose:
754 *   Determine if the locale of the given locale name has the default sublanguage.
755 *   This is determined by checking if the given language is neutral.
756 *
757 *Entry:
758 *   localeName - name of locale to test
759 *
760 *Exit:
761 *   Returns TRUE if default sublanguage, else FALSE.
762 *
763 *Exceptions:
764 *
765 *******************************************************************************/
766 static BOOL TestDefaultCountry (LPCWSTR localeName)
767 {
768     wchar_t sIso639LangName[9]; // The maximum length for LOCALE_SISO3166CTRYNAME
769                                 // is 9 including nullptr
770 
771     //  Get 2-letter ISO Standard 639 or 3-letter ISO 639-2 value
772     if (__acrt_GetLocaleInfoEx(localeName, LOCALE_SISO639LANGNAME,
773                                 sIso639LangName, _countof(sIso639LangName)) == 0)
774         return FALSE;
775 
776     // Determine if this is a neutral language
777     if (wcsncmp(sIso639LangName, localeName, _countof(sIso639LangName)) == 0)
778         return TRUE;
779 
780     return FALSE;
781 }
782 
783 /***
784 *BOOL TestDefaultLanguage - determine if default locale for language
785 *
786 *Purpose:
787 *   Determines if the given locale name has the default sublanguage.
788 *   If bTestPrimary is set, also allow TRUE when string contains an
789 *   implicit sublanguage.
790 *
791 *Entry:
792 *   localeName         - locale name of locale to test
793 *   bTestPrimary - set if testing if language is primary
794 *
795 *Exit:
796 *   Returns TRUE if sublanguage is default for locale tested.
797 *   If bTestPrimary set, TRUE is language has implied sublanguge.
798 *
799 *Exceptions:
800 *
801 *******************************************************************************/
802 static BOOL TestDefaultLanguage(LPCWSTR localeName, BOOL bTestPrimary, __crt_qualified_locale_data* _psetloc_data)
803 {
804     if (!TestDefaultCountry (localeName))
805     {
806         //  test if string contains an implicit sublanguage by
807         //  having a character other than upper/lowercase letters.
808         if (bTestPrimary && GetPrimaryLen(_psetloc_data->pchLanguage) == (int)wcslen(_psetloc_data->pchLanguage))
809             return FALSE;
810     }
811 
812     return TRUE;
813 }
814 
815 /***
816 *int GetPrimaryLen - get length of primary language name
817 *
818 *Purpose:
819 *   Determine primary language string length by scanning until
820 *   first non-alphabetic character.
821 *
822 *Entry:
823 *   pchLanguage - string to scan
824 *
825 *Exit:
826 *   Returns length of primary language string.
827 *
828 *Exceptions:
829 *
830 *******************************************************************************/
831 static int GetPrimaryLen(LPCWSTR pchLanguage)
832 {
833     int     len = 0;
834     wchar_t    ch;
835 
836     if (!pchLanguage)
837         return 0;
838 
839     ch = *pchLanguage++;
840     while ((ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z'))
841     {
842         len++;
843         ch = *pchLanguage++;
844     }
845 
846     return len;
847 }
848 
849 
850 
851 } // extern "C"
852