1 /*
2 **********************************************************************
3 * Copyright (C) 1996-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
9 *
10 * Note: All classes and code in this file are
11 * intended for internal use only.
12 *
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
16 *
17 * Kathleen Wilson, 4/30/96
18 *
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
22 * MAX_ID_LENGTH.
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
26 */
27
28 #include "locmap.h"
29 #include "cstring.h"
30 #include "cmemory.h"
31
32 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
33 /*
34 * TODO: It seems like we should widen this to
35 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
36 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
37 * but those use gcc and won't have defined(_MSC_VER).
38 * We might need to #include some Windows header and test for some version macro from there.
39 * Or call some Windows function and see what it returns.
40 */
41 #define USE_WINDOWS_LOCALE_API
42 #endif
43
44 #ifdef USE_WINDOWS_LOCALE_API
45 #include <windows.h>
46 #include <winnls.h>
47 #endif
48
49 /*
50 * Note:
51 * The mapping from Win32 locale ID numbers to POSIX locale strings should
52 * be the faster one.
53 *
54 * Many LCID values come from winnt.h
55 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
56 */
57
58 /*
59 ////////////////////////////////////////////////
60 //
61 // Internal Classes for LCID <--> POSIX Mapping
62 //
63 /////////////////////////////////////////////////
64 */
65
66 typedef struct ILcidPosixElement
67 {
68 const uint32_t hostID;
69 const char * const posixID;
70 } ILcidPosixElement;
71
72 typedef struct ILcidPosixMap
73 {
74 const uint32_t numRegions;
75 const struct ILcidPosixElement* const regionMaps;
76 } ILcidPosixMap;
77
78
79 /*
80 /////////////////////////////////////////////////
81 //
82 // Easy macros to make the LCID <--> POSIX Mapping
83 //
84 /////////////////////////////////////////////////
85 */
86
87 /**
88 * The standard one language/one country mapping for LCID.
89 * The first element must be the language, and the following
90 * elements are the language with the country.
91 * @param hostID LCID in host format such as 0x044d
92 * @param languageID posix ID of just the language such as 'de'
93 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
94 */
95 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
96 static const ILcidPosixElement locmap_ ## languageID [] = { \
97 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
98 {hostID, #posixID}, \
99 };
100
101 /**
102 * Define a subtable by ID
103 * @param id the POSIX ID, either a language or language_TERRITORY
104 */
105 #define ILCID_POSIX_SUBTABLE(id) \
106 static const ILcidPosixElement locmap_ ## id [] =
107
108
109 /**
110 * Create the map for the posixID. This macro supposes that the language string
111 * name is the same as the global variable name, and that the first element
112 * in the ILcidPosixElement is just the language.
113 * @param _posixID the full POSIX ID for this entry.
114 */
115 #define ILCID_POSIX_MAP(_posixID) \
116 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
117
118 /*
119 ////////////////////////////////////////////
120 //
121 // Create the table of LCID to POSIX Mapping
122 // None of it should be dynamically created.
123 //
124 // Keep static locale variables inside the function so that
125 // it can be created properly during static init.
126 //
127 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
128 // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
129 // maintained for support of older Windows version.
130 // Update: Windows 7 (091130)
131 //
132 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
133 // @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
134 // called from uloc_getLCID(), keywords other than collation are already removed. If we really need
135 // to support other keywords in this mapping data, we must update the implementation.
136 ////////////////////////////////////////////
137 */
138
139 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
140
ILCID_POSIX_SUBTABLE(ar)141 ILCID_POSIX_SUBTABLE(ar) {
142 {0x01, "ar"},
143 {0x3801, "ar_AE"},
144 {0x3c01, "ar_BH"},
145 {0x1401, "ar_DZ"},
146 {0x0c01, "ar_EG"},
147 {0x0801, "ar_IQ"},
148 {0x2c01, "ar_JO"},
149 {0x3401, "ar_KW"},
150 {0x3001, "ar_LB"},
151 {0x1001, "ar_LY"},
152 {0x1801, "ar_MA"},
153 {0x1801, "ar_MO"},
154 {0x2001, "ar_OM"},
155 {0x4001, "ar_QA"},
156 {0x0401, "ar_SA"},
157 {0x2801, "ar_SY"},
158 {0x1c01, "ar_TN"},
159 {0x2401, "ar_YE"}
160 };
161
162 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
163 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
164 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
165
ILCID_POSIX_SUBTABLE(az)166 ILCID_POSIX_SUBTABLE(az) {
167 {0x2c, "az"},
168 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
169 {0x742c, "az_Cyrl"}, /* Cyrillic based */
170 {0x042c, "az_Latn_AZ"}, /* Latin based */
171 {0x782c, "az_Latn"}, /* Latin based */
172 {0x042c, "az_AZ"} /* Latin based */
173 };
174
175 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
176 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
177
178 /*ILCID_POSIX_SUBTABLE(ber) {
179 {0x5f, "ber"},
180 {0x045f, "ber_Arab_DZ"},
181 {0x045f, "ber_Arab"},
182 {0x085f, "ber_Latn_DZ"},
183 {0x085f, "ber_Latn"}
184 };*/
185
186 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
187
188 ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
189
ILCID_POSIX_SUBTABLE(bn)190 ILCID_POSIX_SUBTABLE(bn) {
191 {0x45, "bn"},
192 {0x0845, "bn_BD"},
193 {0x0445, "bn_IN"}
194 };
195
ILCID_POSIX_SUBTABLE(bo)196 ILCID_POSIX_SUBTABLE(bo) {
197 {0x51, "bo"},
198 {0x0851, "bo_BT"},
199 {0x0451, "bo_CN"},
200 {0x0c51, "dz_BT"}
201 };
202
203 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
204
ILCID_POSIX_SUBTABLE(ca)205 ILCID_POSIX_SUBTABLE(ca) {
206 {0x03, "ca"},
207 {0x0403, "ca_ES"},
208 {0x0803, "ca_ES_VALENCIA"}
209 };
210
211 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
212 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
213
ILCID_POSIX_SUBTABLE(ckb)214 ILCID_POSIX_SUBTABLE(ckb) {
215 {0x92, "ckb"},
216 {0x92, "ku"},
217 {0x7c92, "ckb_Arab"},
218 {0x7c92, "ku_Arab"},
219 {0x0492, "ckb_Arab_IQ"},
220 {0x0492, "ku_Arab_IQ"}
221 };
222
223 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
224 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
225
226 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
227 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
228
ILCID_POSIX_SUBTABLE(de)229 ILCID_POSIX_SUBTABLE(de) {
230 {0x07, "de"},
231 {0x0c07, "de_AT"},
232 {0x0807, "de_CH"},
233 {0x0407, "de_DE"},
234 {0x1407, "de_LI"},
235 {0x1007, "de_LU"},
236 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
237 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
238 };
239
240 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
241 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
242
ILCID_POSIX_SUBTABLE(en)243 ILCID_POSIX_SUBTABLE(en) {
244 {0x09, "en"},
245 {0x0c09, "en_AU"},
246 {0x2809, "en_BZ"},
247 {0x1009, "en_CA"},
248 {0x0809, "en_GB"},
249 {0x3c09, "en_HK"},
250 {0x3809, "en_ID"},
251 {0x1809, "en_IE"},
252 {0x4009, "en_IN"},
253 {0x2009, "en_JM"},
254 {0x4409, "en_MY"},
255 {0x1409, "en_NZ"},
256 {0x3409, "en_PH"},
257 {0x4809, "en_SG"},
258 {0x2C09, "en_TT"},
259 {0x0409, "en_US"},
260 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
261 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
262 {0x1c09, "en_ZA"},
263 {0x3009, "en_ZW"},
264 {0x2409, "en_029"},
265 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
266 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
267 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
268 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
269 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
270 };
271
ILCID_POSIX_SUBTABLE(en_US_POSIX)272 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
273 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
274 };
275
ILCID_POSIX_SUBTABLE(es)276 ILCID_POSIX_SUBTABLE(es) {
277 {0x0a, "es"},
278 {0x2c0a, "es_AR"},
279 {0x400a, "es_BO"},
280 {0x340a, "es_CL"},
281 {0x240a, "es_CO"},
282 {0x140a, "es_CR"},
283 {0x5c0a, "es_CU"},
284 {0x1c0a, "es_DO"},
285 {0x300a, "es_EC"},
286 {0x0c0a, "es_ES"}, /*Modern sort.*/
287 {0x100a, "es_GT"},
288 {0x480a, "es_HN"},
289 {0x080a, "es_MX"},
290 {0x4c0a, "es_NI"},
291 {0x180a, "es_PA"},
292 {0x280a, "es_PE"},
293 {0x500a, "es_PR"},
294 {0x3c0a, "es_PY"},
295 {0x440a, "es_SV"},
296 {0x540a, "es_US"},
297 {0x380a, "es_UY"},
298 {0x200a, "es_VE"},
299 {0x580a, "es_419"},
300 {0x040a, "es_ES@collation=traditional"},
301 {0x040a, "es@collation=traditional"}
302 };
303
304 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
305 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
306
307 /* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa)308 ILCID_POSIX_SUBTABLE(fa) {
309 {0x29, "fa"},
310 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
311 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
312 };
313
314 /* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF)315 ILCID_POSIX_SUBTABLE(fa_AF) {
316 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
317 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
318 };
319
ILCID_POSIX_SUBTABLE(ff)320 ILCID_POSIX_SUBTABLE(ff) {
321 {0x67, "ff"},
322 {0x7c67, "ff_Latn"},
323 {0x0867, "ff_Latn_SN"},
324 {0x0467, "ff_NG"}
325 };
326
327 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
328 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
329 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
330
ILCID_POSIX_SUBTABLE(fr)331 ILCID_POSIX_SUBTABLE(fr) {
332 {0x0c, "fr"},
333 {0x080c, "fr_BE"},
334 {0x0c0c, "fr_CA"},
335 {0x240c, "fr_CD"},
336 {0x240c, "fr_CG"},
337 {0x100c, "fr_CH"},
338 {0x300c, "fr_CI"},
339 {0x2c0c, "fr_CM"},
340 {0x040c, "fr_FR"},
341 {0x3c0c, "fr_HT"},
342 {0x140c, "fr_LU"},
343 {0x380c, "fr_MA"},
344 {0x180c, "fr_MC"},
345 {0x340c, "fr_ML"},
346 {0x200c, "fr_RE"},
347 {0x280c, "fr_SN"},
348 {0xe40c, "fr_015"},
349 {0x1c0c, "fr_029"}
350 };
351
352 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
353
354 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
355
ILCID_POSIX_SUBTABLE(ga)356 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
357 {0x3c, "ga"},
358 {0x083c, "ga_IE"},
359 {0x043c, "gd_GB"}
360 };
361
ILCID_POSIX_SUBTABLE(gd)362 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
363 {0x91, "gd"},
364 {0x0491, "gd_GB"}
365 };
366
367 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
368 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
369 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
370 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
371
ILCID_POSIX_SUBTABLE(ha)372 ILCID_POSIX_SUBTABLE(ha) {
373 {0x68, "ha"},
374 {0x7c68, "ha_Latn"},
375 {0x0468, "ha_Latn_NG"},
376 };
377
378 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
379 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
380 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
381
382 /* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr)383 ILCID_POSIX_SUBTABLE(hr) {
384 {0x1a, "hr"},
385 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
386 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
387 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
388 {0x781a, "bs"}, /* Bosnian */
389 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
390 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
391 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
392 {0x041a, "hr_HR"}, /* Croatian*/
393 {0x2c1a, "sr_Latn_ME"},
394 {0x241a, "sr_Latn_RS"},
395 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
396 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
397 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
398 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
399 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
400 {0x301a, "sr_Cyrl_ME"},
401 {0x281a, "sr_Cyrl_RS"},
402 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
403 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
404 };
405
ILCID_POSIX_SUBTABLE(hsb)406 ILCID_POSIX_SUBTABLE(hsb) {
407 {0x2E, "hsb"},
408 {0x042E, "hsb_DE"},
409 {0x082E, "dsb_DE"},
410 {0x7C2E, "dsb"},
411 };
412
413 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
414 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
415 ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
416 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
417 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
418 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
419 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
420
ILCID_POSIX_SUBTABLE(it)421 ILCID_POSIX_SUBTABLE(it) {
422 {0x10, "it"},
423 {0x0810, "it_CH"},
424 {0x0410, "it_IT"}
425 };
426
ILCID_POSIX_SUBTABLE(iu)427 ILCID_POSIX_SUBTABLE(iu) {
428 {0x5d, "iu"},
429 {0x045d, "iu_Cans_CA"},
430 {0x785d, "iu_Cans"},
431 {0x085d, "iu_Latn_CA"},
432 {0x7c5d, "iu_Latn"}
433 };
434
435 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
436 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
437 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
438 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
439 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
440 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
441 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
442
ILCID_POSIX_SUBTABLE(ko)443 ILCID_POSIX_SUBTABLE(ko) {
444 {0x12, "ko"},
445 {0x0812, "ko_KP"},
446 {0x0412, "ko_KR"}
447 };
448
449 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
450 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
451
ILCID_POSIX_SUBTABLE(ks)452 ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
453 {0x60, "ks"},
454 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
455 {0x0460, "ks_Arab_IN"},
456 {0x0860, "ks_Deva_IN"}
457 };
458
459 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
460 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
461 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
462 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
463 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
464 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
465 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
466 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
467 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
468
ILCID_POSIX_SUBTABLE(mn)469 ILCID_POSIX_SUBTABLE(mn) {
470 {0x50, "mn"},
471 {0x0450, "mn_MN"},
472 {0x7c50, "mn_Mong"},
473 {0x0850, "mn_Mong_CN"},
474 {0x0850, "mn_CN"},
475 {0x7850, "mn_Cyrl"},
476 {0x0c50, "mn_Mong_MN"}
477 };
478
479 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
480 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
481 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
482
ILCID_POSIX_SUBTABLE(ms)483 ILCID_POSIX_SUBTABLE(ms) {
484 {0x3e, "ms"},
485 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
486 {0x043e, "ms_MY"} /* Malaysia*/
487 };
488
489 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
490 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
491
ILCID_POSIX_SUBTABLE(ne)492 ILCID_POSIX_SUBTABLE(ne) {
493 {0x61, "ne"},
494 {0x0861, "ne_IN"}, /* India*/
495 {0x0461, "ne_NP"} /* Nepal*/
496 };
497
ILCID_POSIX_SUBTABLE(nl)498 ILCID_POSIX_SUBTABLE(nl) {
499 {0x13, "nl"},
500 {0x0813, "nl_BE"},
501 {0x0413, "nl_NL"}
502 };
503
504 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
ILCID_POSIX_SUBTABLE(no)505 ILCID_POSIX_SUBTABLE(no) {
506 {0x14, "no"}, /* really nb_NO */
507 {0x7c14, "nb"}, /* really nb */
508 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
509 {0x0414, "no_NO"}, /* really nb_NO */
510 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
511 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
512 {0x0814, "no_NO_NY"}/* really nn_NO */
513 };
514
515 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
516 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
517
ILCID_POSIX_SUBTABLE(om)518 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
519 {0x72, "om"},
520 {0x0472, "om_ET"},
521 {0x0472, "gaz_ET"}
522 };
523
524 /* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN)525 ILCID_POSIX_SUBTABLE(or_IN) {
526 {0x48, "or"},
527 {0x0448, "or_IN"},
528 };
529
530
ILCID_POSIX_SUBTABLE(pa)531 ILCID_POSIX_SUBTABLE(pa) {
532 {0x46, "pa"},
533 {0x0446, "pa_IN"},
534 {0x0846, "pa_PK"},
535 {0x0846, "pa_Arab_PK"}
536 };
537
538 ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
539 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
540 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
541
ILCID_POSIX_SUBTABLE(pt)542 ILCID_POSIX_SUBTABLE(pt) {
543 {0x16, "pt"},
544 {0x0416, "pt_BR"},
545 {0x0816, "pt_PT"}
546 };
547
ILCID_POSIX_SUBTABLE(qu)548 ILCID_POSIX_SUBTABLE(qu) {
549 {0x6b, "qu"},
550 {0x046b, "qu_BO"},
551 {0x086b, "qu_EC"},
552 {0x0C6b, "qu_PE"},
553 {0x046b, "quz_BO"},
554 {0x086b, "quz_EC"},
555 {0x0C6b, "quz_PE"}
556 };
557
ILCID_POSIX_SUBTABLE(quc)558 ILCID_POSIX_SUBTABLE(quc) {
559 {0x93, "quc"},
560 {0x0493, "quc_CO"},
561 /*
562 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
563 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
564 under the group of "qut". "qut" is a retired ISO 639-3 language
565 code for West Central Quiche, and merged to "quc".
566 It looks Windows previously reserved "qut" for K'iche', but,
567 decided to use "quc" when adding a locale for K'iche' (Guatemala).
568
569 This data structure used here assumes language ID bits in
570 LCID is unique for alphabetic language code. But this is not true
571 for "quc_Latn_GT". If we don't have the data below, LCID look up
572 by alphabetic locale ID (POSIX) will fail. The same entry is found
573 under "qut" below, which is required for reverse look up.
574 */
575 {0x0486, "quc_Latn_GT"}
576 };
577
ILCID_POSIX_SUBTABLE(qut)578 ILCID_POSIX_SUBTABLE(qut) {
579 {0x86, "qut"},
580 {0x0486, "qut_GT"},
581 /*
582 See the note in "quc" above.
583 */
584 {0x0486, "quc_Latn_GT"}
585 };
586
587 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
588
ILCID_POSIX_SUBTABLE(ro)589 ILCID_POSIX_SUBTABLE(ro) {
590 {0x18, "ro"},
591 {0x0418, "ro_RO"},
592 {0x0818, "ro_MD"}
593 };
594
ILCID_POSIX_SUBTABLE(root)595 ILCID_POSIX_SUBTABLE(root) {
596 {0x00, "root"}
597 };
598
ILCID_POSIX_SUBTABLE(ru)599 ILCID_POSIX_SUBTABLE(ru) {
600 {0x19, "ru"},
601 {0x0419, "ru_RU"},
602 {0x0819, "ru_MD"}
603 };
604
605 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
606 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
607 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
608
ILCID_POSIX_SUBTABLE(sd)609 ILCID_POSIX_SUBTABLE(sd) {
610 {0x59, "sd"},
611 {0x0459, "sd_IN"},
612 {0x0459, "sd_Deva_IN"},
613 {0x0859, "sd_PK"}
614 };
615
ILCID_POSIX_SUBTABLE(se)616 ILCID_POSIX_SUBTABLE(se) {
617 {0x3b, "se"},
618 {0x0c3b, "se_FI"},
619 {0x043b, "se_NO"},
620 {0x083b, "se_SE"},
621 {0x783b, "sma"},
622 {0x183b, "sma_NO"},
623 {0x1c3b, "sma_SE"},
624 {0x7c3b, "smj"},
625 {0x703b, "smn"},
626 {0x743b, "sms"},
627 {0x103b, "smj_NO"},
628 {0x143b, "smj_SE"},
629 {0x243b, "smn_FI"},
630 {0x203b, "sms_FI"},
631 };
632
633 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
634 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
635 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
636
ILCID_POSIX_SUBTABLE(so)637 ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
638 {0x77, "so"},
639 {0x0477, "so_ET"},
640 {0x0477, "so_SO"}
641 };
642
643 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
644 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
645
ILCID_POSIX_SUBTABLE(sv)646 ILCID_POSIX_SUBTABLE(sv) {
647 {0x1d, "sv"},
648 {0x081d, "sv_FI"},
649 {0x041d, "sv_SE"}
650 };
651
652 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
653 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
654
ILCID_POSIX_SUBTABLE(ta)655 ILCID_POSIX_SUBTABLE(ta) {
656 {0x49, "ta"},
657 {0x0449, "ta_IN"},
658 {0x0849, "ta_LK"}
659 };
660
661 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
662
663 /* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg)664 ILCID_POSIX_SUBTABLE(tg) {
665 {0x28, "tg"},
666 {0x7c28, "tg_Cyrl"},
667 {0x0428, "tg_Cyrl_TJ"}
668 };
669
670 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
671
ILCID_POSIX_SUBTABLE(ti)672 ILCID_POSIX_SUBTABLE(ti) {
673 {0x73, "ti"},
674 {0x0873, "ti_ER"},
675 {0x0473, "ti_ET"}
676 };
677
678 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
679
ILCID_POSIX_SUBTABLE(tn)680 ILCID_POSIX_SUBTABLE(tn) {
681 {0x32, "tn"},
682 {0x0832, "tn_BW"},
683 {0x0432, "tn_ZA"}
684 };
685
686 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
687 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
688 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
689
ILCID_POSIX_SUBTABLE(tzm)690 ILCID_POSIX_SUBTABLE(tzm) {
691 {0x5f, "tzm"},
692 {0x7c5f, "tzm_Latn"},
693 {0x085f, "tzm_Latn_DZ"},
694 {0x105f, "tzm_Tfng_MA"},
695 {0x045f, "tzm_Arab_MA"},
696 {0x045f, "tmz"}
697 };
698
ILCID_POSIX_SUBTABLE(ug)699 ILCID_POSIX_SUBTABLE(ug) {
700 {0x80, "ug"},
701 {0x0480, "ug_CN"},
702 {0x0480, "ug_Arab_CN"}
703 };
704
705 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
706
ILCID_POSIX_SUBTABLE(ur)707 ILCID_POSIX_SUBTABLE(ur) {
708 {0x20, "ur"},
709 {0x0820, "ur_IN"},
710 {0x0420, "ur_PK"}
711 };
712
ILCID_POSIX_SUBTABLE(uz)713 ILCID_POSIX_SUBTABLE(uz) {
714 {0x43, "uz"},
715 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
716 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
717 {0x0843, "uz_UZ"}, /* Cyrillic based */
718 {0x0443, "uz_Latn_UZ"}, /* Latin based */
719 {0x7c43, "uz_Latn"} /* Latin based */
720 };
721
ILCID_POSIX_SUBTABLE(ve)722 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
723 {0x33, "ve"},
724 {0x0433, "ve_ZA"},
725 {0x0433, "ven_ZA"}
726 };
727
728 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
729 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
730 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
731 ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
732 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
733
ILCID_POSIX_SUBTABLE(zh)734 ILCID_POSIX_SUBTABLE(zh) {
735 {0x0004, "zh_Hans"},
736 {0x7804, "zh"},
737 {0x0804, "zh_CN"},
738 {0x0804, "zh_Hans_CN"},
739 {0x0c04, "zh_Hant_HK"},
740 {0x0c04, "zh_HK"},
741 {0x1404, "zh_Hant_MO"},
742 {0x1404, "zh_MO"},
743 {0x1004, "zh_Hans_SG"},
744 {0x1004, "zh_SG"},
745 {0x0404, "zh_Hant_TW"},
746 {0x7c04, "zh_Hant"},
747 {0x0404, "zh_TW"},
748 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
749 {0x30404,"zh_TW"}, /* Bopomofo order */
750 {0x20004,"zh@collation=stroke"},
751 {0x20404,"zh_Hant@collation=stroke"},
752 {0x20404,"zh_Hant_TW@collation=stroke"},
753 {0x20404,"zh_TW@collation=stroke"},
754 {0x20804,"zh_Hans@collation=stroke"},
755 {0x20804,"zh_Hans_CN@collation=stroke"},
756 {0x20804,"zh_CN@collation=stroke"}
757 };
758
759 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
760
761 /* This must be static and grouped by LCID. */
762 static const ILcidPosixMap gPosixIDmap[] = {
763 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
764 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
765 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
766 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
767 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
768 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
769 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
770 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
771 /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
772 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
773 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
774 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
775 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
776 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
777 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
778 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
779 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
780 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
781 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
782 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
783 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
784 ILCID_POSIX_MAP(de), /* de German 0x07 */
785 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
786 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
787 ILCID_POSIX_MAP(en), /* en English 0x09 */
788 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
789 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
790 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
791 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
792 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
793 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
794 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
795 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
796 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
797 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
798 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
799 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
800 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
801 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
802 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
803 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
804 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
805 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
806 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
807 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
808 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
809 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
810 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
811 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
812 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
813 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
814 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
815 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
816 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
817 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
818 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
819 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
820 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
821 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
822 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
823 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
824 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
825 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
826 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
827 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
828 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
829 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
830 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
831 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
832 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
833 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
834 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
835 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
836 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
837 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
838 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
839 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
840 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
841 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
842 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
843 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
844 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
845 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
846 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
847 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
848 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
849 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
850 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
851 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
852 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
853 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
854 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
855 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
856 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
857 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
858 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
859 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
860 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
861 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
862 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
863 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
864 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
865 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
866 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
867 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
868 ILCID_POSIX_MAP(root), /* root 0x00 */
869 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
870 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
871 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
872 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
873 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
874 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
875 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
876 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
877 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
878 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
879 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
880 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
881 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
882 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
883 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
884 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
885 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
886 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
887 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
888 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
889 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
890 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
891 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
892 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
893 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
894 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
895 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
896 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
897 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
898 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
899 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
900 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
901 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
902 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
903 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
904 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
905 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
906 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
907 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
908 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
909 };
910
911 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
912
913 /**
914 * Do not call this function. It is called by hostID.
915 * The function is not private because this struct must stay as a C struct,
916 * and this is an internal class.
917 */
918 static int32_t
idCmp(const char * id1,const char * id2)919 idCmp(const char* id1, const char* id2)
920 {
921 int32_t diffIdx = 0;
922 while (*id1 == *id2 && *id1 != 0) {
923 diffIdx++;
924 id1++;
925 id2++;
926 }
927 return diffIdx;
928 }
929
930 /**
931 * Searches for a Windows LCID
932 *
933 * @param posixid the Posix style locale id.
934 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
935 * no equivalent Windows LCID.
936 * @return the LCID
937 */
938 static uint32_t
getHostID(const ILcidPosixMap * this_0,const char * posixID,UErrorCode * status)939 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
940 {
941 int32_t bestIdx = 0;
942 int32_t bestIdxDiff = 0;
943 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
944 uint32_t idx;
945
946 for (idx = 0; idx < this_0->numRegions; idx++ ) {
947 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
948 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
949 if (posixIDlen == sameChars) {
950 /* Exact match */
951 return this_0->regionMaps[idx].hostID;
952 }
953 bestIdxDiff = sameChars;
954 bestIdx = idx;
955 }
956 }
957 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
958 /* We also have to make sure that sid and si and similar string subsets don't match. */
959 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
960 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
961 {
962 *status = U_USING_FALLBACK_WARNING;
963 return this_0->regionMaps[bestIdx].hostID;
964 }
965
966 /*no match found */
967 *status = U_ILLEGAL_ARGUMENT_ERROR;
968 return this_0->regionMaps->hostID;
969 }
970
971 static const char*
getPosixID(const ILcidPosixMap * this_0,uint32_t hostID)972 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
973 {
974 uint32_t i;
975 for (i = 0; i <= this_0->numRegions; i++)
976 {
977 if (this_0->regionMaps[i].hostID == hostID)
978 {
979 return this_0->regionMaps[i].posixID;
980 }
981 }
982
983 /* If you get here, then no matching region was found,
984 so return the language id with the wild card region. */
985 return this_0->regionMaps[0].posixID;
986 }
987
988 /*
989 //////////////////////////////////////
990 //
991 // LCID --> POSIX
992 //
993 /////////////////////////////////////
994 */
995 #ifdef USE_WINDOWS_LOCALE_API
996 /*
997 * Various language tags needs to be changed:
998 * quz -> qu
999 * prs -> fa
1000 */
1001 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
1002 if (len >= 3) { \
1003 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1004 buffer[2] = 0; \
1005 uprv_strcat(buffer, buffer+3); \
1006 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1007 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1008 uprv_strcat(buffer, buffer+3); \
1009 } \
1010 }
1011
1012 #endif
1013 U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid,char * posixID,int32_t posixIDCapacity,UErrorCode * status)1014 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1015 {
1016 uint16_t langID;
1017 uint32_t localeIndex;
1018 UBool bLookup = TRUE;
1019 const char *pPosixID = NULL;
1020
1021 #ifdef USE_WINDOWS_LOCALE_API
1022 int32_t tmpLen = 0;
1023 char locName[157]; /* ULOC_FULLNAME_CAPACITY */
1024
1025 tmpLen = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)locName, UPRV_LENGTHOF(locName));
1026 if (tmpLen > 1) {
1027 /* Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1028 In such case, we need special mapping data found in the hardcoded table
1029 in this source file. */
1030 char *p = uprv_strchr(locName, '_');
1031 if (p) {
1032 /* Keep the base locale, without variant */
1033 *p = 0;
1034 tmpLen = uprv_strlen(locName);
1035 } else {
1036 /* No hardcoded table lookup necessary */
1037 bLookup = FALSE;
1038 }
1039 /* Change the tag separator from '-' to '_' */
1040 p = locName;
1041 while (*p) {
1042 if (*p == '-') {
1043 *p = '_';
1044 }
1045 p++;
1046 }
1047 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1048 pPosixID = locName;
1049 }
1050 #endif
1051 if (bLookup) {
1052 const char *pCandidate = NULL;
1053 langID = LANGUAGE_LCID(hostid);
1054
1055 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1056 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1057 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1058 break;
1059 }
1060 }
1061
1062 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1063 If a match in the hardcoded table is longer than the Windows locale name without
1064 variant, we use the one as the result */
1065 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1066 pPosixID = pCandidate;
1067 }
1068 }
1069
1070 if (pPosixID) {
1071 int32_t resLen = uprv_strlen(pPosixID);
1072 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1073 uprv_memcpy(posixID, pPosixID, copyLen);
1074 if (resLen < posixIDCapacity) {
1075 posixID[resLen] = 0;
1076 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1077 *status = U_ZERO_ERROR;
1078 }
1079 } else if (resLen == posixIDCapacity) {
1080 *status = U_STRING_NOT_TERMINATED_WARNING;
1081 } else {
1082 *status = U_BUFFER_OVERFLOW_ERROR;
1083 }
1084 return resLen;
1085 }
1086
1087 /* no match found */
1088 *status = U_ILLEGAL_ARGUMENT_ERROR;
1089 return -1;
1090 }
1091
1092 /*
1093 //////////////////////////////////////
1094 //
1095 // POSIX --> LCID
1096 // This should only be called from uloc_getLCID.
1097 // The locale ID must be in canonical form.
1098 // langID is separate so that this file doesn't depend on the uloc_* API.
1099 //
1100 /////////////////////////////////////
1101 */
1102
1103 U_CAPI uint32_t
uprv_convertToLCID(const char * langID,const char * posixID,UErrorCode * status)1104 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1105 {
1106
1107 uint32_t low = 0;
1108 uint32_t high = gLocaleCount;
1109 uint32_t mid;
1110 uint32_t oldmid = 0;
1111 int32_t compVal;
1112
1113 uint32_t value = 0;
1114 uint32_t fallbackValue = (uint32_t)-1;
1115 UErrorCode myStatus;
1116 uint32_t idx;
1117
1118 /* Check for incomplete id. */
1119 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1120 return 0;
1121 }
1122
1123 /*Binary search for the map entry for normal cases */
1124
1125 while (high > low) /*binary search*/{
1126
1127 mid = (high+low) >> 1; /*Finds median*/
1128
1129 if (mid == oldmid)
1130 break;
1131
1132 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1133 if (compVal < 0){
1134 high = mid;
1135 }
1136 else if (compVal > 0){
1137 low = mid;
1138 }
1139 else /*we found it*/{
1140 return getHostID(&gPosixIDmap[mid], posixID, status);
1141 }
1142 oldmid = mid;
1143 }
1144
1145 /*
1146 * Sometimes we can't do a binary search on posixID because some LCIDs
1147 * go to different locales. We hit one of those special cases.
1148 */
1149 for (idx = 0; idx < gLocaleCount; idx++ ) {
1150 myStatus = U_ZERO_ERROR;
1151 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1152 if (myStatus == U_ZERO_ERROR) {
1153 return value;
1154 }
1155 else if (myStatus == U_USING_FALLBACK_WARNING) {
1156 fallbackValue = value;
1157 }
1158 }
1159
1160 if (fallbackValue != (uint32_t)-1) {
1161 *status = U_USING_FALLBACK_WARNING;
1162 return fallbackValue;
1163 }
1164
1165 /* no match found */
1166 *status = U_ILLEGAL_ARGUMENT_ERROR;
1167 return 0; /* return international (root) */
1168 }
1169
1170