1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9 #include <algorithm>
10
11 #include "unicode/utypes.h"
12 #include "unicode/unistr.h"
13 #include "unicode/uobject.h"
14
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "uassert.h"
19 #include "ucln_cmn.h"
20 #include "uhash.h"
21 #include "umutex.h"
22 #include "uresimp.h"
23 #include "uvector.h"
24 #include "udataswp.h" /* for InvChar functions */
25
26 static UHashtable* gLocExtKeyMap = NULL;
27 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
28
29 // bit flags for special types
30 typedef enum {
31 SPECIALTYPE_NONE = 0,
32 SPECIALTYPE_CODEPOINTS = 1,
33 SPECIALTYPE_REORDER_CODE = 2,
34 SPECIALTYPE_RG_KEY_VALUE = 4
35 } SpecialType;
36
37 struct LocExtKeyData : public icu::UMemory {
38 const char* legacyId;
39 const char* bcpId;
40 icu::LocalUHashtablePointer typeMap;
41 uint32_t specialTypes;
42 };
43
44 struct LocExtType : public icu::UMemory {
45 const char* legacyId;
46 const char* bcpId;
47 };
48
49 static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL;
50 static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL;
51 static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL;
52
53 U_CDECL_BEGIN
54
55 static UBool U_CALLCONV
uloc_key_type_cleanup(void)56 uloc_key_type_cleanup(void) {
57 if (gLocExtKeyMap != NULL) {
58 uhash_close(gLocExtKeyMap);
59 gLocExtKeyMap = NULL;
60 }
61
62 delete gLocExtKeyDataEntries;
63 gLocExtKeyDataEntries = NULL;
64
65 delete gLocExtTypeEntries;
66 gLocExtTypeEntries = NULL;
67
68 delete gKeyTypeStringPool;
69 gKeyTypeStringPool = NULL;
70
71 gLocExtKeyMapInitOnce.reset();
72 return TRUE;
73 }
74
75 U_CDECL_END
76
77
78 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)79 initFromResourceBundle(UErrorCode& sts) {
80 U_NAMESPACE_USE
81 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
82
83 gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
84
85 LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
86 LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
87 LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
88
89 if (U_FAILURE(sts)) {
90 return;
91 }
92
93 UErrorCode tmpSts = U_ZERO_ERROR;
94 LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
95 tmpSts = U_ZERO_ERROR;
96 LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
97
98 // initialize pools storing dynamically allocated objects
99 gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
100 if (gKeyTypeStringPool == NULL) {
101 sts = U_MEMORY_ALLOCATION_ERROR;
102 return;
103 }
104 gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
105 if (gLocExtKeyDataEntries == NULL) {
106 sts = U_MEMORY_ALLOCATION_ERROR;
107 return;
108 }
109 gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
110 if (gLocExtTypeEntries == NULL) {
111 sts = U_MEMORY_ALLOCATION_ERROR;
112 return;
113 }
114
115 // iterate through keyMap resource
116 LocalUResourceBundlePointer keyMapEntry;
117
118 while (ures_hasNext(keyMapRes.getAlias())) {
119 keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
120 if (U_FAILURE(sts)) {
121 break;
122 }
123 const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
124 UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
125 if (U_FAILURE(sts)) {
126 break;
127 }
128
129 // empty value indicates that BCP key is same with the legacy key.
130 const char* bcpKeyId = legacyKeyId;
131 if (!uBcpKeyId.isEmpty()) {
132 icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
133 if (bcpKeyIdBuf == NULL) {
134 sts = U_MEMORY_ALLOCATION_ERROR;
135 break;
136 }
137 bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
138 if (U_FAILURE(sts)) {
139 break;
140 }
141 bcpKeyId = bcpKeyIdBuf->data();
142 }
143
144 UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
145
146 UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
147 if (U_FAILURE(sts)) {
148 break;
149 }
150 uint32_t specialTypes = SPECIALTYPE_NONE;
151
152 LocalUResourceBundlePointer typeAliasResByKey;
153 LocalUResourceBundlePointer bcpTypeAliasResByKey;
154
155 if (typeAliasRes.isValid()) {
156 tmpSts = U_ZERO_ERROR;
157 typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
158 if (U_FAILURE(tmpSts)) {
159 typeAliasResByKey.orphan();
160 }
161 }
162 if (bcpTypeAliasRes.isValid()) {
163 tmpSts = U_ZERO_ERROR;
164 bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
165 if (U_FAILURE(tmpSts)) {
166 bcpTypeAliasResByKey.orphan();
167 }
168 }
169
170 // look up type map for the key, and walk through the mapping data
171 LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &sts));
172 if (U_FAILURE(sts)) {
173 // We fail here if typeMap does not have an entry corresponding to every entry in keyMap (should
174 // not happen for valid keyTypeData), or if ures_getByKeyfails fails for some other reason
175 // (e.g. data file cannot be loaded, using stubdata, over-aggressive data filtering has removed
176 // something like timezoneTypes.res, etc.). Error code is already set. See ICU-21669.
177 UPRV_UNREACHABLE_ASSERT;
178 } else {
179 LocalUResourceBundlePointer typeMapEntry;
180
181 while (ures_hasNext(typeMapResByKey.getAlias())) {
182 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
183 if (U_FAILURE(sts)) {
184 break;
185 }
186 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
187
188 // special types
189 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
190 specialTypes |= SPECIALTYPE_CODEPOINTS;
191 continue;
192 }
193 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
194 specialTypes |= SPECIALTYPE_REORDER_CODE;
195 continue;
196 }
197 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
198 specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
199 continue;
200 }
201
202 if (isTZ) {
203 // a timezone key uses a colon instead of a slash in the resource.
204 // e.g. America:Los_Angeles
205 if (uprv_strchr(legacyTypeId, ':') != NULL) {
206 icu::CharString* legacyTypeIdBuf =
207 gKeyTypeStringPool->create(legacyTypeId, sts);
208 if (legacyTypeIdBuf == NULL) {
209 sts = U_MEMORY_ALLOCATION_ERROR;
210 break;
211 }
212 if (U_FAILURE(sts)) {
213 break;
214 }
215 std::replace(
216 legacyTypeIdBuf->data(),
217 legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
218 ':', '/');
219 legacyTypeId = legacyTypeIdBuf->data();
220 }
221 }
222
223 UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
224 if (U_FAILURE(sts)) {
225 break;
226 }
227
228 // empty value indicates that BCP type is same with the legacy type.
229 const char* bcpTypeId = legacyTypeId;
230 if (!uBcpTypeId.isEmpty()) {
231 icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
232 if (bcpTypeIdBuf == NULL) {
233 sts = U_MEMORY_ALLOCATION_ERROR;
234 break;
235 }
236 bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
237 if (U_FAILURE(sts)) {
238 break;
239 }
240 bcpTypeId = bcpTypeIdBuf->data();
241 }
242
243 // Note: legacy type value should never be
244 // equivalent to bcp type value of a different
245 // type under the same key. So we use a single
246 // map for lookup.
247 LocExtType* t = gLocExtTypeEntries->create();
248 if (t == NULL) {
249 sts = U_MEMORY_ALLOCATION_ERROR;
250 break;
251 }
252 t->bcpId = bcpTypeId;
253 t->legacyId = legacyTypeId;
254
255 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
256 if (bcpTypeId != legacyTypeId) {
257 // different type value
258 uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
259 }
260 if (U_FAILURE(sts)) {
261 break;
262 }
263
264 // also put aliases in the map
265 if (typeAliasResByKey.isValid()) {
266 LocalUResourceBundlePointer typeAliasDataEntry;
267
268 ures_resetIterator(typeAliasResByKey.getAlias());
269 while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
270 int32_t toLen;
271 typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
272 const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
273 if (U_FAILURE(sts)) {
274 break;
275 }
276 // check if this is an alias of canonical legacy type
277 if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
278 const char* from = ures_getKey(typeAliasDataEntry.getAlias());
279 if (isTZ) {
280 // replace colon with slash if necessary
281 if (uprv_strchr(from, ':') != NULL) {
282 icu::CharString* fromBuf =
283 gKeyTypeStringPool->create(from, sts);
284 if (fromBuf == NULL) {
285 sts = U_MEMORY_ALLOCATION_ERROR;
286 break;
287 }
288 if (U_FAILURE(sts)) {
289 break;
290 }
291 std::replace(
292 fromBuf->data(),
293 fromBuf->data() + fromBuf->length(),
294 ':', '/');
295 from = fromBuf->data();
296 }
297 }
298 uhash_put(typeDataMap, (void*)from, t, &sts);
299 }
300 }
301 if (U_FAILURE(sts)) {
302 break;
303 }
304 }
305
306 if (bcpTypeAliasResByKey.isValid()) {
307 LocalUResourceBundlePointer bcpTypeAliasDataEntry;
308
309 ures_resetIterator(bcpTypeAliasResByKey.getAlias());
310 while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
311 int32_t toLen;
312 bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
313 const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
314 if (U_FAILURE(sts)) {
315 break;
316 }
317 // check if this is an alias of bcp type
318 if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
319 const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
320 uhash_put(typeDataMap, (void*)from, t, &sts);
321 }
322 }
323 if (U_FAILURE(sts)) {
324 break;
325 }
326 }
327 }
328 }
329 if (U_FAILURE(sts)) {
330 break;
331 }
332
333 LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
334 if (keyData == NULL) {
335 sts = U_MEMORY_ALLOCATION_ERROR;
336 break;
337 }
338 keyData->bcpId = bcpKeyId;
339 keyData->legacyId = legacyKeyId;
340 keyData->specialTypes = specialTypes;
341 keyData->typeMap.adoptInstead(typeDataMap);
342
343 uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
344 if (legacyKeyId != bcpKeyId) {
345 // different key value
346 uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
347 }
348 if (U_FAILURE(sts)) {
349 break;
350 }
351 }
352 }
353
354 static UBool
init()355 init() {
356 UErrorCode sts = U_ZERO_ERROR;
357 umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
358 if (U_FAILURE(sts)) {
359 return FALSE;
360 }
361 return TRUE;
362 }
363
364 static UBool
isSpecialTypeCodepoints(const char * val)365 isSpecialTypeCodepoints(const char* val) {
366 int32_t subtagLen = 0;
367 const char* p = val;
368 while (*p) {
369 if (*p == '-') {
370 if (subtagLen < 4 || subtagLen > 6) {
371 return FALSE;
372 }
373 subtagLen = 0;
374 } else if ((*p >= '0' && *p <= '9') ||
375 (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
376 (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
377 subtagLen++;
378 } else {
379 return FALSE;
380 }
381 p++;
382 }
383 return (subtagLen >= 4 && subtagLen <= 6);
384 }
385
386 static UBool
isSpecialTypeReorderCode(const char * val)387 isSpecialTypeReorderCode(const char* val) {
388 int32_t subtagLen = 0;
389 const char* p = val;
390 while (*p) {
391 if (*p == '-') {
392 if (subtagLen < 3 || subtagLen > 8) {
393 return FALSE;
394 }
395 subtagLen = 0;
396 } else if (uprv_isASCIILetter(*p)) {
397 subtagLen++;
398 } else {
399 return FALSE;
400 }
401 p++;
402 }
403 return (subtagLen >=3 && subtagLen <=8);
404 }
405
406 static UBool
isSpecialTypeRgKeyValue(const char * val)407 isSpecialTypeRgKeyValue(const char* val) {
408 int32_t subtagLen = 0;
409 const char* p = val;
410 while (*p) {
411 if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
412 (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
413 subtagLen++;
414 } else {
415 return FALSE;
416 }
417 p++;
418 }
419 return (subtagLen == 6);
420 }
421
422 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)423 ulocimp_toBcpKey(const char* key) {
424 if (!init()) {
425 return NULL;
426 }
427
428 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
429 if (keyData != NULL) {
430 return keyData->bcpId;
431 }
432 return NULL;
433 }
434
435 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)436 ulocimp_toLegacyKey(const char* key) {
437 if (!init()) {
438 return NULL;
439 }
440
441 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
442 if (keyData != NULL) {
443 return keyData->legacyId;
444 }
445 return NULL;
446 }
447
448 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)449 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
450 if (isKnownKey != NULL) {
451 *isKnownKey = FALSE;
452 }
453 if (isSpecialType != NULL) {
454 *isSpecialType = FALSE;
455 }
456
457 if (!init()) {
458 return NULL;
459 }
460
461 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
462 if (keyData != NULL) {
463 if (isKnownKey != NULL) {
464 *isKnownKey = TRUE;
465 }
466 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
467 if (t != NULL) {
468 return t->bcpId;
469 }
470 if (keyData->specialTypes != SPECIALTYPE_NONE) {
471 UBool matched = FALSE;
472 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
473 matched = isSpecialTypeCodepoints(type);
474 }
475 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
476 matched = isSpecialTypeReorderCode(type);
477 }
478 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
479 matched = isSpecialTypeRgKeyValue(type);
480 }
481 if (matched) {
482 if (isSpecialType != NULL) {
483 *isSpecialType = TRUE;
484 }
485 return type;
486 }
487 }
488 }
489 return NULL;
490 }
491
492
493 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)494 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
495 if (isKnownKey != NULL) {
496 *isKnownKey = FALSE;
497 }
498 if (isSpecialType != NULL) {
499 *isSpecialType = FALSE;
500 }
501
502 if (!init()) {
503 return NULL;
504 }
505
506 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
507 if (keyData != NULL) {
508 if (isKnownKey != NULL) {
509 *isKnownKey = TRUE;
510 }
511 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
512 if (t != NULL) {
513 return t->legacyId;
514 }
515 if (keyData->specialTypes != SPECIALTYPE_NONE) {
516 UBool matched = FALSE;
517 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
518 matched = isSpecialTypeCodepoints(type);
519 }
520 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
521 matched = isSpecialTypeReorderCode(type);
522 }
523 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
524 matched = isSpecialTypeRgKeyValue(type);
525 }
526 if (matched) {
527 if (isSpecialType != NULL) {
528 *isSpecialType = TRUE;
529 }
530 return type;
531 }
532 }
533 }
534 return NULL;
535 }
536
537