1 /*
2  **********************************************************************
3  *   Copyright (C) 1996-2016, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  *
7  * Provides functionality for mapping between
8  * LCID and Posix IDs or ICU locale to codepage
9  *
10  * Note: All classes and code in this file are
11  *       intended for internal use only.
12  *
13  * Methods of interest:
14  *   unsigned long convertToLCID(const char*);
15  *   const char* convertToPosix(unsigned long);
16  *
17  * Kathleen Wilson, 4/30/96
18  *
19  *  Date        Name        Description
20  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
21  *                          setId() method and safety check against
22  *                          MAX_ID_LENGTH.
23  * 04/23/99     stephen     Added C wrapper for convertToPosix.
24  * 09/18/00     george      Removed the memory leaks.
25  * 08/23/01     george      Convert to C
26  */
27 
28 #include "locmap.h"
29 #include "cstring.h"
30 #include "cmemory.h"
31 
32 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
33 /*
34  * TODO: It seems like we should widen this to
35  * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
36  * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
37  * but those use gcc and won't have defined(_MSC_VER).
38  * We might need to #include some Windows header and test for some version macro from there.
39  * Or call some Windows function and see what it returns.
40  */
41 #define USE_WINDOWS_LOCALE_API
42 #endif
43 
44 #ifdef USE_WINDOWS_LOCALE_API
45 #include <windows.h>
46 #include <winnls.h>
47 #endif
48 
49 /*
50  * Note:
51  * The mapping from Win32 locale ID numbers to POSIX locale strings should
52  * be the faster one.
53  *
54  * Many LCID values come from winnt.h
55  * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
56  */
57 
58 /*
59 ////////////////////////////////////////////////
60 //
61 // Internal Classes for LCID <--> POSIX Mapping
62 //
63 /////////////////////////////////////////////////
64 */
65 
66 typedef struct ILcidPosixElement
67 {
68     const uint32_t hostID;
69     const char * const posixID;
70 } ILcidPosixElement;
71 
72 typedef struct ILcidPosixMap
73 {
74     const uint32_t numRegions;
75     const struct ILcidPosixElement* const regionMaps;
76 } ILcidPosixMap;
77 
78 
79 /*
80 /////////////////////////////////////////////////
81 //
82 // Easy macros to make the LCID <--> POSIX Mapping
83 //
84 /////////////////////////////////////////////////
85 */
86 
87 /**
88  * The standard one language/one country mapping for LCID.
89  * The first element must be the language, and the following
90  * elements are the language with the country.
91  * @param hostID LCID in host format such as 0x044d
92  * @param languageID posix ID of just the language such as 'de'
93  * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
94  */
95 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
96 static const ILcidPosixElement locmap_ ## languageID [] = { \
97     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
98     {hostID, #posixID}, \
99 };
100 
101 /**
102  * Define a subtable by ID
103  * @param id the POSIX ID, either a language or language_TERRITORY
104  */
105 #define ILCID_POSIX_SUBTABLE(id) \
106 static const ILcidPosixElement locmap_ ## id [] =
107 
108 
109 /**
110  * Create the map for the posixID. This macro supposes that the language string
111  * name is the same as the global variable name, and that the first element
112  * in the ILcidPosixElement is just the language.
113  * @param _posixID the full POSIX ID for this entry.
114  */
115 #define ILCID_POSIX_MAP(_posixID) \
116     {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
117 
118 /*
119 ////////////////////////////////////////////
120 //
121 // Create the table of LCID to POSIX Mapping
122 // None of it should be dynamically created.
123 //
124 // Keep static locale variables inside the function so that
125 // it can be created properly during static init.
126 //
127 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
128 //       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
129 //       maintained for support of older Windows version.
130 //       Update: Windows 7 (091130)
131 //
132 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
133 //       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
134 //       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
135 //       to support other keywords in this mapping data, we must update the implementation.
136 ////////////////////////////////////////////
137 */
138 
139 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
140 
ILCID_POSIX_SUBTABLE(ar)141 ILCID_POSIX_SUBTABLE(ar) {
142     {0x01,   "ar"},
143     {0x3801, "ar_AE"},
144     {0x3c01, "ar_BH"},
145     {0x1401, "ar_DZ"},
146     {0x0c01, "ar_EG"},
147     {0x0801, "ar_IQ"},
148     {0x2c01, "ar_JO"},
149     {0x3401, "ar_KW"},
150     {0x3001, "ar_LB"},
151     {0x1001, "ar_LY"},
152     {0x1801, "ar_MA"},
153     {0x1801, "ar_MO"},
154     {0x2001, "ar_OM"},
155     {0x4001, "ar_QA"},
156     {0x0401, "ar_SA"},
157     {0x2801, "ar_SY"},
158     {0x1c01, "ar_TN"},
159     {0x2401, "ar_YE"}
160 };
161 
162 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
163 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
164 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
165 
ILCID_POSIX_SUBTABLE(az)166 ILCID_POSIX_SUBTABLE(az) {
167     {0x2c,   "az"},
168     {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
169     {0x742c, "az_Cyrl"},  /* Cyrillic based */
170     {0x042c, "az_Latn_AZ"}, /* Latin based */
171     {0x782c, "az_Latn"}, /* Latin based */
172     {0x042c, "az_AZ"} /* Latin based */
173 };
174 
175 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
176 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
177 
178 /*ILCID_POSIX_SUBTABLE(ber) {
179     {0x5f,   "ber"},
180     {0x045f, "ber_Arab_DZ"},
181     {0x045f, "ber_Arab"},
182     {0x085f, "ber_Latn_DZ"},
183     {0x085f, "ber_Latn"}
184 };*/
185 
186 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
187 
188 ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
189 
ILCID_POSIX_SUBTABLE(bn)190 ILCID_POSIX_SUBTABLE(bn) {
191     {0x45,   "bn"},
192     {0x0845, "bn_BD"},
193     {0x0445, "bn_IN"}
194 };
195 
ILCID_POSIX_SUBTABLE(bo)196 ILCID_POSIX_SUBTABLE(bo) {
197     {0x51,   "bo"},
198     {0x0851, "bo_BT"},
199     {0x0451, "bo_CN"},
200     {0x0c51, "dz_BT"}
201 };
202 
203 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
204 
ILCID_POSIX_SUBTABLE(ca)205 ILCID_POSIX_SUBTABLE(ca) {
206     {0x03,   "ca"},
207     {0x0403, "ca_ES"},
208     {0x0803, "ca_ES_VALENCIA"}
209 };
210 
211 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
212 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
213 
ILCID_POSIX_SUBTABLE(ckb)214 ILCID_POSIX_SUBTABLE(ckb) {
215     {0x92,   "ckb"},
216     {0x92,   "ku"},
217     {0x7c92, "ckb_Arab"},
218     {0x7c92, "ku_Arab"},
219     {0x0492, "ckb_Arab_IQ"},
220     {0x0492, "ku_Arab_IQ"}
221 };
222 
223 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
224 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
225 
226 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
227 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
228 
ILCID_POSIX_SUBTABLE(de)229 ILCID_POSIX_SUBTABLE(de) {
230     {0x07,   "de"},
231     {0x0c07, "de_AT"},
232     {0x0807, "de_CH"},
233     {0x0407, "de_DE"},
234     {0x1407, "de_LI"},
235     {0x1007, "de_LU"},
236     {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
237     {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
238 };
239 
240 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
241 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
242 
ILCID_POSIX_SUBTABLE(en)243 ILCID_POSIX_SUBTABLE(en) {
244     {0x09,   "en"},
245     {0x0c09, "en_AU"},
246     {0x2809, "en_BZ"},
247     {0x1009, "en_CA"},
248     {0x0809, "en_GB"},
249     {0x3c09, "en_HK"},
250     {0x3809, "en_ID"},
251     {0x1809, "en_IE"},
252     {0x4009, "en_IN"},
253     {0x2009, "en_JM"},
254     {0x4409, "en_MY"},
255     {0x1409, "en_NZ"},
256     {0x3409, "en_PH"},
257     {0x4809, "en_SG"},
258     {0x2C09, "en_TT"},
259     {0x0409, "en_US"},
260     {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
261     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). */
262     {0x1c09, "en_ZA"},
263     {0x3009, "en_ZW"},
264     {0x2409, "en_029"},
265     {0x0409, "en_AS"},  /* Alias for en_US. Leave last. */
266     {0x0409, "en_GU"},  /* Alias for en_US. Leave last. */
267     {0x0409, "en_MH"},  /* Alias for en_US. Leave last. */
268     {0x0409, "en_MP"},  /* Alias for en_US. Leave last. */
269     {0x0409, "en_UM"}   /* Alias for en_US. Leave last. */
270 };
271 
ILCID_POSIX_SUBTABLE(en_US_POSIX)272 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
273     {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
274 };
275 
ILCID_POSIX_SUBTABLE(es)276 ILCID_POSIX_SUBTABLE(es) {
277     {0x0a,   "es"},
278     {0x2c0a, "es_AR"},
279     {0x400a, "es_BO"},
280     {0x340a, "es_CL"},
281     {0x240a, "es_CO"},
282     {0x140a, "es_CR"},
283     {0x5c0a, "es_CU"},
284     {0x1c0a, "es_DO"},
285     {0x300a, "es_EC"},
286     {0x0c0a, "es_ES"},      /*Modern sort.*/
287     {0x100a, "es_GT"},
288     {0x480a, "es_HN"},
289     {0x080a, "es_MX"},
290     {0x4c0a, "es_NI"},
291     {0x180a, "es_PA"},
292     {0x280a, "es_PE"},
293     {0x500a, "es_PR"},
294     {0x3c0a, "es_PY"},
295     {0x440a, "es_SV"},
296     {0x540a, "es_US"},
297     {0x380a, "es_UY"},
298     {0x200a, "es_VE"},
299     {0x580a, "es_419"},
300     {0x040a, "es_ES@collation=traditional"},
301     {0x040a, "es@collation=traditional"}
302 };
303 
304 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
305 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
306 
307 /* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa)308 ILCID_POSIX_SUBTABLE(fa) {
309     {0x29,   "fa"},
310     {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
311     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
312 };
313 
314 /* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF)315 ILCID_POSIX_SUBTABLE(fa_AF) {
316     {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
317     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
318 };
319 
ILCID_POSIX_SUBTABLE(ff)320 ILCID_POSIX_SUBTABLE(ff) {
321     {0x67,   "ff"},
322     {0x7c67, "ff_Latn"},
323     {0x0867, "ff_Latn_SN"},
324     {0x0467, "ff_NG"}
325 };
326 
327 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
328 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
329 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
330 
ILCID_POSIX_SUBTABLE(fr)331 ILCID_POSIX_SUBTABLE(fr) {
332     {0x0c,   "fr"},
333     {0x080c, "fr_BE"},
334     {0x0c0c, "fr_CA"},
335     {0x240c, "fr_CD"},
336     {0x240c, "fr_CG"},
337     {0x100c, "fr_CH"},
338     {0x300c, "fr_CI"},
339     {0x2c0c, "fr_CM"},
340     {0x040c, "fr_FR"},
341     {0x3c0c, "fr_HT"},
342     {0x140c, "fr_LU"},
343     {0x380c, "fr_MA"},
344     {0x180c, "fr_MC"},
345     {0x340c, "fr_ML"},
346     {0x200c, "fr_RE"},
347     {0x280c, "fr_SN"},
348     {0xe40c, "fr_015"},
349     {0x1c0c, "fr_029"}
350 };
351 
352 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
353 
354 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
355 
ILCID_POSIX_SUBTABLE(ga)356 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
357     {0x3c,   "ga"},
358     {0x083c, "ga_IE"},
359     {0x043c, "gd_GB"}
360 };
361 
ILCID_POSIX_SUBTABLE(gd)362 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
363     {0x91,   "gd"},
364     {0x0491, "gd_GB"}
365 };
366 
367 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
368 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
369 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
370 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
371 
ILCID_POSIX_SUBTABLE(ha)372 ILCID_POSIX_SUBTABLE(ha) {
373     {0x68,   "ha"},
374     {0x7c68, "ha_Latn"},
375     {0x0468, "ha_Latn_NG"},
376 };
377 
378 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
379 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
380 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
381 
382 /* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr)383 ILCID_POSIX_SUBTABLE(hr) {
384     {0x1a,   "hr"},
385     {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
386     {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
387     {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
388     {0x781a, "bs"},     /* Bosnian */
389     {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
390     {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
391     {0x101a, "hr_BA"},  /* Croatian in Bosnia */
392     {0x041a, "hr_HR"},  /* Croatian*/
393     {0x2c1a, "sr_Latn_ME"},
394     {0x241a, "sr_Latn_RS"},
395     {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
396     {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
397     {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
398     {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
399     {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
400     {0x301a, "sr_Cyrl_ME"},
401     {0x281a, "sr_Cyrl_RS"},
402     {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
403     {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
404 };
405 
ILCID_POSIX_SUBTABLE(hsb)406 ILCID_POSIX_SUBTABLE(hsb) {
407     {0x2E,   "hsb"},
408     {0x042E, "hsb_DE"},
409     {0x082E, "dsb_DE"},
410     {0x7C2E, "dsb"},
411 };
412 
413 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
414 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
415 ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
416 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
417 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
418 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
419 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
420 
ILCID_POSIX_SUBTABLE(it)421 ILCID_POSIX_SUBTABLE(it) {
422     {0x10,   "it"},
423     {0x0810, "it_CH"},
424     {0x0410, "it_IT"}
425 };
426 
ILCID_POSIX_SUBTABLE(iu)427 ILCID_POSIX_SUBTABLE(iu) {
428     {0x5d,   "iu"},
429     {0x045d, "iu_Cans_CA"},
430     {0x785d, "iu_Cans"},
431     {0x085d, "iu_Latn_CA"},
432     {0x7c5d, "iu_Latn"}
433 };
434 
435 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
436 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
437 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
438 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
439 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
440 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
441 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
442 
ILCID_POSIX_SUBTABLE(ko)443 ILCID_POSIX_SUBTABLE(ko) {
444     {0x12,   "ko"},
445     {0x0812, "ko_KP"},
446     {0x0412, "ko_KR"}
447 };
448 
449 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
450 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
451 
ILCID_POSIX_SUBTABLE(ks)452 ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
453     {0x60,   "ks"},
454     {0x0860, "ks_IN"},              /* Documentation doesn't mention script */
455     {0x0460, "ks_Arab_IN"},
456     {0x0860, "ks_Deva_IN"}
457 };
458 
459 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
460 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT)   /* TODO: Verify the country */
461 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
462 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
463 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
464 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
465 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
466 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
467 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
468 
ILCID_POSIX_SUBTABLE(mn)469 ILCID_POSIX_SUBTABLE(mn) {
470     {0x50,   "mn"},
471     {0x0450, "mn_MN"},
472     {0x7c50, "mn_Mong"},
473     {0x0850, "mn_Mong_CN"},
474     {0x0850, "mn_CN"},
475     {0x7850, "mn_Cyrl"},
476     {0x0c50, "mn_Mong_MN"}
477 };
478 
479 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
480 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
481 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
482 
ILCID_POSIX_SUBTABLE(ms)483 ILCID_POSIX_SUBTABLE(ms) {
484     {0x3e,   "ms"},
485     {0x083e, "ms_BN"},   /* Brunei Darussalam*/
486     {0x043e, "ms_MY"}    /* Malaysia*/
487 };
488 
489 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
490 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
491 
ILCID_POSIX_SUBTABLE(ne)492 ILCID_POSIX_SUBTABLE(ne) {
493     {0x61,   "ne"},
494     {0x0861, "ne_IN"},   /* India*/
495     {0x0461, "ne_NP"}    /* Nepal*/
496 };
497 
ILCID_POSIX_SUBTABLE(nl)498 ILCID_POSIX_SUBTABLE(nl) {
499     {0x13,   "nl"},
500     {0x0813, "nl_BE"},
501     {0x0413, "nl_NL"}
502 };
503 
504 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
ILCID_POSIX_SUBTABLE(no)505 ILCID_POSIX_SUBTABLE(no) {
506     {0x14,   "no"},     /* really nb_NO */
507     {0x7c14, "nb"},     /* really nb */
508     {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
509     {0x0414, "no_NO"},  /* really nb_NO */
510     {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
511     {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
512     {0x0814, "no_NO_NY"}/* really nn_NO */
513 };
514 
515 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
516 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
517 
ILCID_POSIX_SUBTABLE(om)518 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
519     {0x72,   "om"},
520     {0x0472, "om_ET"},
521     {0x0472, "gaz_ET"}
522 };
523 
524 /* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN)525 ILCID_POSIX_SUBTABLE(or_IN) {
526     {0x48,   "or"},
527     {0x0448, "or_IN"},
528 };
529 
530 
ILCID_POSIX_SUBTABLE(pa)531 ILCID_POSIX_SUBTABLE(pa) {
532     {0x46,   "pa"},
533     {0x0446, "pa_IN"},
534     {0x0846, "pa_PK"},
535     {0x0846, "pa_Arab_PK"}
536 };
537 
538 ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
539 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
540 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
541 
ILCID_POSIX_SUBTABLE(pt)542 ILCID_POSIX_SUBTABLE(pt) {
543     {0x16,   "pt"},
544     {0x0416, "pt_BR"},
545     {0x0816, "pt_PT"}
546 };
547 
ILCID_POSIX_SUBTABLE(qu)548 ILCID_POSIX_SUBTABLE(qu) {
549     {0x6b,   "qu"},
550     {0x046b, "qu_BO"},
551     {0x086b, "qu_EC"},
552     {0x0C6b, "qu_PE"},
553     {0x046b, "quz_BO"},
554     {0x086b, "quz_EC"},
555     {0x0C6b, "quz_PE"}
556 };
557 
ILCID_POSIX_SUBTABLE(quc)558 ILCID_POSIX_SUBTABLE(quc) {
559     {0x93,   "quc"},
560     {0x0493, "quc_CO"},
561     /*
562         "quc_Latn_GT" is an exceptional case. Language ID of "quc"
563         is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
564         under the group of "qut". "qut" is a retired ISO 639-3 language
565         code for West Central Quiche, and merged to "quc".
566         It looks Windows previously reserved "qut" for K'iche', but,
567         decided to use "quc" when adding a locale for K'iche' (Guatemala).
568 
569         This data structure used here assumes language ID bits in
570         LCID is unique for alphabetic language code. But this is not true
571         for "quc_Latn_GT". If we don't have the data below, LCID look up
572         by alphabetic locale ID (POSIX) will fail. The same entry is found
573         under "qut" below, which is required for reverse look up.
574     */
575     {0x0486, "quc_Latn_GT"}
576 };
577 
ILCID_POSIX_SUBTABLE(qut)578 ILCID_POSIX_SUBTABLE(qut) {
579     {0x86,   "qut"},
580     {0x0486, "qut_GT"},
581     /*
582         See the note in "quc" above.
583     */
584     {0x0486, "quc_Latn_GT"}
585 };
586 
587 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
588 
ILCID_POSIX_SUBTABLE(ro)589 ILCID_POSIX_SUBTABLE(ro) {
590     {0x18,   "ro"},
591     {0x0418, "ro_RO"},
592     {0x0818, "ro_MD"}
593 };
594 
ILCID_POSIX_SUBTABLE(root)595 ILCID_POSIX_SUBTABLE(root) {
596     {0x00,   "root"}
597 };
598 
ILCID_POSIX_SUBTABLE(ru)599 ILCID_POSIX_SUBTABLE(ru) {
600     {0x19,   "ru"},
601     {0x0419, "ru_RU"},
602     {0x0819, "ru_MD"}
603 };
604 
605 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
606 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
607 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
608 
ILCID_POSIX_SUBTABLE(sd)609 ILCID_POSIX_SUBTABLE(sd) {
610     {0x59,   "sd"},
611     {0x0459, "sd_IN"},
612     {0x0459, "sd_Deva_IN"},
613     {0x0859, "sd_PK"}
614 };
615 
ILCID_POSIX_SUBTABLE(se)616 ILCID_POSIX_SUBTABLE(se) {
617     {0x3b,   "se"},
618     {0x0c3b, "se_FI"},
619     {0x043b, "se_NO"},
620     {0x083b, "se_SE"},
621     {0x783b, "sma"},
622     {0x183b, "sma_NO"},
623     {0x1c3b, "sma_SE"},
624     {0x7c3b, "smj"},
625     {0x703b, "smn"},
626     {0x743b, "sms"},
627     {0x103b, "smj_NO"},
628     {0x143b, "smj_SE"},
629     {0x243b, "smn_FI"},
630     {0x203b, "sms_FI"},
631 };
632 
633 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
634 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
635 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
636 
ILCID_POSIX_SUBTABLE(so)637 ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
638     {0x77,   "so"},
639     {0x0477, "so_ET"},
640     {0x0477, "so_SO"}
641 };
642 
643 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
644 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
645 
ILCID_POSIX_SUBTABLE(sv)646 ILCID_POSIX_SUBTABLE(sv) {
647     {0x1d,   "sv"},
648     {0x081d, "sv_FI"},
649     {0x041d, "sv_SE"}
650 };
651 
652 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
653 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
654 
ILCID_POSIX_SUBTABLE(ta)655 ILCID_POSIX_SUBTABLE(ta) {
656     {0x49,   "ta"},
657     {0x0449, "ta_IN"},
658     {0x0849, "ta_LK"}
659 };
660 
661 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
662 
663 /* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg)664 ILCID_POSIX_SUBTABLE(tg) {
665     {0x28,   "tg"},
666     {0x7c28, "tg_Cyrl"},
667     {0x0428, "tg_Cyrl_TJ"}
668 };
669 
670 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
671 
ILCID_POSIX_SUBTABLE(ti)672 ILCID_POSIX_SUBTABLE(ti) {
673     {0x73,   "ti"},
674     {0x0873, "ti_ER"},
675     {0x0473, "ti_ET"}
676 };
677 
678 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
679 
ILCID_POSIX_SUBTABLE(tn)680 ILCID_POSIX_SUBTABLE(tn) {
681     {0x32,   "tn"},
682     {0x0832, "tn_BW"},
683     {0x0432, "tn_ZA"}
684 };
685 
686 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
687 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
688 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
689 
ILCID_POSIX_SUBTABLE(tzm)690 ILCID_POSIX_SUBTABLE(tzm) {
691     {0x5f,   "tzm"},
692     {0x7c5f, "tzm_Latn"},
693     {0x085f, "tzm_Latn_DZ"},
694     {0x105f, "tzm_Tfng_MA"},
695     {0x045f, "tzm_Arab_MA"},
696     {0x045f, "tmz"}
697 };
698 
ILCID_POSIX_SUBTABLE(ug)699 ILCID_POSIX_SUBTABLE(ug) {
700     {0x80,   "ug"},
701     {0x0480, "ug_CN"},
702     {0x0480, "ug_Arab_CN"}
703 };
704 
705 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
706 
ILCID_POSIX_SUBTABLE(ur)707 ILCID_POSIX_SUBTABLE(ur) {
708     {0x20,   "ur"},
709     {0x0820, "ur_IN"},
710     {0x0420, "ur_PK"}
711 };
712 
ILCID_POSIX_SUBTABLE(uz)713 ILCID_POSIX_SUBTABLE(uz) {
714     {0x43,   "uz"},
715     {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
716     {0x7843, "uz_Cyrl"},  /* Cyrillic based */
717     {0x0843, "uz_UZ"},  /* Cyrillic based */
718     {0x0443, "uz_Latn_UZ"}, /* Latin based */
719     {0x7c43, "uz_Latn"} /* Latin based */
720 };
721 
ILCID_POSIX_SUBTABLE(ve)722 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
723     {0x33,   "ve"},
724     {0x0433, "ve_ZA"},
725     {0x0433, "ven_ZA"}
726 };
727 
728 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
729 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
730 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
731 ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
732 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
733 
ILCID_POSIX_SUBTABLE(zh)734 ILCID_POSIX_SUBTABLE(zh) {
735     {0x0004, "zh_Hans"},
736     {0x7804, "zh"},
737     {0x0804, "zh_CN"},
738     {0x0804, "zh_Hans_CN"},
739     {0x0c04, "zh_Hant_HK"},
740     {0x0c04, "zh_HK"},
741     {0x1404, "zh_Hant_MO"},
742     {0x1404, "zh_MO"},
743     {0x1004, "zh_Hans_SG"},
744     {0x1004, "zh_SG"},
745     {0x0404, "zh_Hant_TW"},
746     {0x7c04, "zh_Hant"},
747     {0x0404, "zh_TW"},
748     {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
749     {0x30404,"zh_TW"},          /* Bopomofo order */
750     {0x20004,"zh@collation=stroke"},
751     {0x20404,"zh_Hant@collation=stroke"},
752     {0x20404,"zh_Hant_TW@collation=stroke"},
753     {0x20404,"zh_TW@collation=stroke"},
754     {0x20804,"zh_Hans@collation=stroke"},
755     {0x20804,"zh_Hans_CN@collation=stroke"},
756     {0x20804,"zh_CN@collation=stroke"}
757 };
758 
759 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
760 
761 /* This must be static and grouped by LCID. */
762 static const ILcidPosixMap gPosixIDmap[] = {
763     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
764     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
765     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
766     ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
767     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
768     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
769     ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
770     ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
771 /*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
772     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
773     ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
774     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
775     ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
776     ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
777     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
778     ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
779     ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
780     ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
781     ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
782     ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
783     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
784     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
785     ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
786     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
787     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
788     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
789     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
790     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
791     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
792     ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
793     ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
794     ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
795     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
796     ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
797     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
798     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
799     ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
800     ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
801     ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
802     ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
803     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
804     ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
805     ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
806     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
807     ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
808     ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
809     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
810     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
811     ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
812     ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
813     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
814     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
815     ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
816     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
817     ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
818     ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
819     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
820     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
821     ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
822     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
823     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
824     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
825     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
826     ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
827     ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
828     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
829     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
830     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
831     ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
832     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
833     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
834     ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
835     ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
836     ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
837     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
838     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
839     ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
840     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
841     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
842     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
843     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
844     ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
845     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
846     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
847     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
848     ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
849 /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
850     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
851     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
852 /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
853     ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
854     ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
855     ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
856     ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
857     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
858     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
859     ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
860     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
861     ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
862     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
863     ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
864     ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
865     ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
866     ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
867     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
868     ILCID_POSIX_MAP(root),  /*  root                          0x00 */
869     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
870     ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
871     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
872     ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
873     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
874     ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
875 /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
876     ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
877     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
878     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
879     ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
880     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
881 /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
882     ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
883     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
884     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
885     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
886     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
887     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
888     ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
889     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
890     ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
891     ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
892     ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
893     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
894     ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
895     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
896     ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
897     ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
898     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
899     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
900     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
901     ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
902     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
903     ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
904     ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
905     ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
906     ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
907     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
908     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
909 };
910 
911 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
912 
913 /**
914  * Do not call this function. It is called by hostID.
915  * The function is not private because this struct must stay as a C struct,
916  * and this is an internal class.
917  */
918 static int32_t
idCmp(const char * id1,const char * id2)919 idCmp(const char* id1, const char* id2)
920 {
921     int32_t diffIdx = 0;
922     while (*id1 == *id2 && *id1 != 0) {
923         diffIdx++;
924         id1++;
925         id2++;
926     }
927     return diffIdx;
928 }
929 
930 /**
931  * Searches for a Windows LCID
932  *
933  * @param posixid the Posix style locale id.
934  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
935  *               no equivalent Windows LCID.
936  * @return the LCID
937  */
938 static uint32_t
getHostID(const ILcidPosixMap * this_0,const char * posixID,UErrorCode * status)939 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
940 {
941     int32_t bestIdx = 0;
942     int32_t bestIdxDiff = 0;
943     int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
944     uint32_t idx;
945 
946     for (idx = 0; idx < this_0->numRegions; idx++ ) {
947         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
948         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
949             if (posixIDlen == sameChars) {
950                 /* Exact match */
951                 return this_0->regionMaps[idx].hostID;
952             }
953             bestIdxDiff = sameChars;
954             bestIdx = idx;
955         }
956     }
957     /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
958     /* We also have to make sure that sid and si and similar string subsets don't match. */
959     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
960         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
961     {
962         *status = U_USING_FALLBACK_WARNING;
963         return this_0->regionMaps[bestIdx].hostID;
964     }
965 
966     /*no match found */
967     *status = U_ILLEGAL_ARGUMENT_ERROR;
968     return this_0->regionMaps->hostID;
969 }
970 
971 static const char*
getPosixID(const ILcidPosixMap * this_0,uint32_t hostID)972 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
973 {
974     uint32_t i;
975     for (i = 0; i <= this_0->numRegions; i++)
976     {
977         if (this_0->regionMaps[i].hostID == hostID)
978         {
979             return this_0->regionMaps[i].posixID;
980         }
981     }
982 
983     /* If you get here, then no matching region was found,
984        so return the language id with the wild card region. */
985     return this_0->regionMaps[0].posixID;
986 }
987 
988 /*
989 //////////////////////////////////////
990 //
991 // LCID --> POSIX
992 //
993 /////////////////////////////////////
994 */
995 #ifdef USE_WINDOWS_LOCALE_API
996 /*
997  * Various language tags needs to be changed:
998  * quz -> qu
999  * prs -> fa
1000  */
1001 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
1002     if (len >= 3) { \
1003         if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1004             buffer[2] = 0; \
1005             uprv_strcat(buffer, buffer+3); \
1006         } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1007             buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1008             uprv_strcat(buffer, buffer+3); \
1009         } \
1010     }
1011 
1012 #endif
1013 U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid,char * posixID,int32_t posixIDCapacity,UErrorCode * status)1014 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1015 {
1016     uint16_t langID;
1017     uint32_t localeIndex;
1018     UBool bLookup = TRUE;
1019     const char *pPosixID = NULL;
1020 
1021 #ifdef USE_WINDOWS_LOCALE_API
1022     int32_t tmpLen = 0;
1023     char locName[157];  /* ULOC_FULLNAME_CAPACITY */
1024 
1025     tmpLen = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)locName, UPRV_LENGTHOF(locName));
1026     if (tmpLen > 1) {
1027         /* Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1028            In such case, we need special mapping data found in the hardcoded table
1029            in this source file. */
1030         char *p = uprv_strchr(locName, '_');
1031         if (p) {
1032             /* Keep the base locale, without variant */
1033             *p = 0;
1034             tmpLen = uprv_strlen(locName);
1035         } else {
1036             /* No hardcoded table lookup necessary */
1037             bLookup = FALSE;
1038         }
1039         /* Change the tag separator from '-' to '_' */
1040         p = locName;
1041         while (*p) {
1042             if (*p == '-') {
1043                 *p = '_';
1044             }
1045             p++;
1046         }
1047         FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1048         pPosixID = locName;
1049     }
1050 #endif
1051     if (bLookup) {
1052         const char *pCandidate = NULL;
1053         langID = LANGUAGE_LCID(hostid);
1054 
1055         for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1056             if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1057                 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1058                 break;
1059             }
1060         }
1061 
1062         /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1063            If a match in the hardcoded table is longer than the Windows locale name without
1064            variant, we use the one as the result */
1065         if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1066             pPosixID = pCandidate;
1067         }
1068     }
1069 
1070     if (pPosixID) {
1071         int32_t resLen = uprv_strlen(pPosixID);
1072         int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1073         uprv_memcpy(posixID, pPosixID, copyLen);
1074         if (resLen < posixIDCapacity) {
1075             posixID[resLen] = 0;
1076             if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1077                 *status = U_ZERO_ERROR;
1078             }
1079         } else if (resLen == posixIDCapacity) {
1080             *status = U_STRING_NOT_TERMINATED_WARNING;
1081         } else {
1082             *status = U_BUFFER_OVERFLOW_ERROR;
1083         }
1084         return resLen;
1085     }
1086 
1087     /* no match found */
1088     *status = U_ILLEGAL_ARGUMENT_ERROR;
1089     return -1;
1090 }
1091 
1092 /*
1093 //////////////////////////////////////
1094 //
1095 // POSIX --> LCID
1096 // This should only be called from uloc_getLCID.
1097 // The locale ID must be in canonical form.
1098 // langID is separate so that this file doesn't depend on the uloc_* API.
1099 //
1100 /////////////////////////////////////
1101 */
1102 
1103 U_CAPI uint32_t
uprv_convertToLCID(const char * langID,const char * posixID,UErrorCode * status)1104 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1105 {
1106 
1107     uint32_t   low    = 0;
1108     uint32_t   high   = gLocaleCount;
1109     uint32_t   mid;
1110     uint32_t   oldmid = 0;
1111     int32_t    compVal;
1112 
1113     uint32_t   value         = 0;
1114     uint32_t   fallbackValue = (uint32_t)-1;
1115     UErrorCode myStatus;
1116     uint32_t   idx;
1117 
1118     /* Check for incomplete id. */
1119     if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1120         return 0;
1121     }
1122 
1123     /*Binary search for the map entry for normal cases */
1124 
1125     while (high > low)  /*binary search*/{
1126 
1127         mid = (high+low) >> 1; /*Finds median*/
1128 
1129         if (mid == oldmid)
1130             break;
1131 
1132         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1133         if (compVal < 0){
1134             high = mid;
1135         }
1136         else if (compVal > 0){
1137             low = mid;
1138         }
1139         else /*we found it*/{
1140             return getHostID(&gPosixIDmap[mid], posixID, status);
1141         }
1142         oldmid = mid;
1143     }
1144 
1145     /*
1146      * Sometimes we can't do a binary search on posixID because some LCIDs
1147      * go to different locales.  We hit one of those special cases.
1148      */
1149     for (idx = 0; idx < gLocaleCount; idx++ ) {
1150         myStatus = U_ZERO_ERROR;
1151         value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1152         if (myStatus == U_ZERO_ERROR) {
1153             return value;
1154         }
1155         else if (myStatus == U_USING_FALLBACK_WARNING) {
1156             fallbackValue = value;
1157         }
1158     }
1159 
1160     if (fallbackValue != (uint32_t)-1) {
1161         *status = U_USING_FALLBACK_WARNING;
1162         return fallbackValue;
1163     }
1164 
1165     /* no match found */
1166     *status = U_ILLEGAL_ARGUMENT_ERROR;
1167     return 0;   /* return international (root) */
1168 }
1169 
1170