1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2014-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 #include <algorithm>
10 
11 #include "unicode/utypes.h"
12 #include "unicode/unistr.h"
13 #include "unicode/uobject.h"
14 
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "uassert.h"
19 #include "ucln_cmn.h"
20 #include "uhash.h"
21 #include "umutex.h"
22 #include "uresimp.h"
23 #include "uvector.h"
24 #include "udataswp.h" /* for InvChar functions */
25 
26 static UHashtable* gLocExtKeyMap = NULL;
27 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
28 
29 // bit flags for special types
30 typedef enum {
31     SPECIALTYPE_NONE = 0,
32     SPECIALTYPE_CODEPOINTS = 1,
33     SPECIALTYPE_REORDER_CODE = 2,
34     SPECIALTYPE_RG_KEY_VALUE = 4
35 } SpecialType;
36 
37 struct LocExtKeyData : public icu::UMemory {
38     const char*     legacyId;
39     const char*     bcpId;
40     icu::LocalUHashtablePointer typeMap;
41     uint32_t        specialTypes;
42 };
43 
44 struct LocExtType : public icu::UMemory {
45     const char*     legacyId;
46     const char*     bcpId;
47 };
48 
49 static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL;
50 static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL;
51 static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL;
52 
53 U_CDECL_BEGIN
54 
55 static UBool U_CALLCONV
uloc_key_type_cleanup(void)56 uloc_key_type_cleanup(void) {
57     if (gLocExtKeyMap != NULL) {
58         uhash_close(gLocExtKeyMap);
59         gLocExtKeyMap = NULL;
60     }
61 
62     delete gLocExtKeyDataEntries;
63     gLocExtKeyDataEntries = NULL;
64 
65     delete gLocExtTypeEntries;
66     gLocExtTypeEntries = NULL;
67 
68     delete gKeyTypeStringPool;
69     gKeyTypeStringPool = NULL;
70 
71     gLocExtKeyMapInitOnce.reset();
72     return TRUE;
73 }
74 
75 U_CDECL_END
76 
77 
78 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)79 initFromResourceBundle(UErrorCode& sts) {
80     U_NAMESPACE_USE
81     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
82 
83     gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
84 
85     LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
86     LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
87     LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
88 
89     if (U_FAILURE(sts)) {
90         return;
91     }
92 
93     UErrorCode tmpSts = U_ZERO_ERROR;
94     LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
95     tmpSts = U_ZERO_ERROR;
96     LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
97 
98     // initialize pools storing dynamically allocated objects
99     gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
100     if (gKeyTypeStringPool == NULL) {
101         sts = U_MEMORY_ALLOCATION_ERROR;
102         return;
103     }
104     gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
105     if (gLocExtKeyDataEntries == NULL) {
106         sts = U_MEMORY_ALLOCATION_ERROR;
107         return;
108     }
109     gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
110     if (gLocExtTypeEntries == NULL) {
111         sts = U_MEMORY_ALLOCATION_ERROR;
112         return;
113     }
114 
115     // iterate through keyMap resource
116     LocalUResourceBundlePointer keyMapEntry;
117 
118     while (ures_hasNext(keyMapRes.getAlias())) {
119         keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
120         if (U_FAILURE(sts)) {
121             break;
122         }
123         const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
124         UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
125         if (U_FAILURE(sts)) {
126             break;
127         }
128 
129         // empty value indicates that BCP key is same with the legacy key.
130         const char* bcpKeyId = legacyKeyId;
131         if (!uBcpKeyId.isEmpty()) {
132             icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
133             if (bcpKeyIdBuf == NULL) {
134                 sts = U_MEMORY_ALLOCATION_ERROR;
135                 break;
136             }
137             bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
138             if (U_FAILURE(sts)) {
139                 break;
140             }
141             bcpKeyId = bcpKeyIdBuf->data();
142         }
143 
144         UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
145 
146         UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
147         if (U_FAILURE(sts)) {
148             break;
149         }
150         uint32_t specialTypes = SPECIALTYPE_NONE;
151 
152         LocalUResourceBundlePointer typeAliasResByKey;
153         LocalUResourceBundlePointer bcpTypeAliasResByKey;
154 
155         if (typeAliasRes.isValid()) {
156             tmpSts = U_ZERO_ERROR;
157             typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
158             if (U_FAILURE(tmpSts)) {
159                 typeAliasResByKey.orphan();
160             }
161         }
162         if (bcpTypeAliasRes.isValid()) {
163             tmpSts = U_ZERO_ERROR;
164             bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
165             if (U_FAILURE(tmpSts)) {
166                 bcpTypeAliasResByKey.orphan();
167             }
168         }
169 
170         // look up type map for the key, and walk through the mapping data
171         LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &sts));
172         if (U_FAILURE(sts)) {
173             // We fail here if typeMap does not have an entry corresponding to every entry in keyMap (should
174             // not happen for valid keyTypeData), or if ures_getByKeyfails fails for some other reason
175             // (e.g. data file cannot be loaded, using stubdata, over-aggressive data filtering has removed
176             // something like timezoneTypes.res, etc.). Error code is already set. See ICU-21669.
177             UPRV_UNREACHABLE_ASSERT;
178         } else {
179             LocalUResourceBundlePointer typeMapEntry;
180 
181             while (ures_hasNext(typeMapResByKey.getAlias())) {
182                 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
183                 if (U_FAILURE(sts)) {
184                     break;
185                 }
186                 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
187 
188                 // special types
189                 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
190                     specialTypes |= SPECIALTYPE_CODEPOINTS;
191                     continue;
192                 }
193                 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
194                     specialTypes |= SPECIALTYPE_REORDER_CODE;
195                     continue;
196                 }
197                 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
198                     specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
199                     continue;
200                 }
201 
202                 if (isTZ) {
203                     // a timezone key uses a colon instead of a slash in the resource.
204                     // e.g. America:Los_Angeles
205                     if (uprv_strchr(legacyTypeId, ':') != NULL) {
206                         icu::CharString* legacyTypeIdBuf =
207                                 gKeyTypeStringPool->create(legacyTypeId, sts);
208                         if (legacyTypeIdBuf == NULL) {
209                             sts = U_MEMORY_ALLOCATION_ERROR;
210                             break;
211                         }
212                         if (U_FAILURE(sts)) {
213                             break;
214                         }
215                         std::replace(
216                                 legacyTypeIdBuf->data(),
217                                 legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
218                                 ':', '/');
219                         legacyTypeId = legacyTypeIdBuf->data();
220                     }
221                 }
222 
223                 UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
224                 if (U_FAILURE(sts)) {
225                     break;
226                 }
227 
228                 // empty value indicates that BCP type is same with the legacy type.
229                 const char* bcpTypeId = legacyTypeId;
230                 if (!uBcpTypeId.isEmpty()) {
231                     icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
232                     if (bcpTypeIdBuf == NULL) {
233                         sts = U_MEMORY_ALLOCATION_ERROR;
234                         break;
235                     }
236                     bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
237                     if (U_FAILURE(sts)) {
238                         break;
239                     }
240                     bcpTypeId = bcpTypeIdBuf->data();
241                 }
242 
243                 // Note: legacy type value should never be
244                 // equivalent to bcp type value of a different
245                 // type under the same key. So we use a single
246                 // map for lookup.
247                 LocExtType* t = gLocExtTypeEntries->create();
248                 if (t == NULL) {
249                     sts = U_MEMORY_ALLOCATION_ERROR;
250                     break;
251                 }
252                 t->bcpId = bcpTypeId;
253                 t->legacyId = legacyTypeId;
254 
255                 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
256                 if (bcpTypeId != legacyTypeId) {
257                     // different type value
258                     uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
259                 }
260                 if (U_FAILURE(sts)) {
261                     break;
262                 }
263 
264                 // also put aliases in the map
265                 if (typeAliasResByKey.isValid()) {
266                     LocalUResourceBundlePointer typeAliasDataEntry;
267 
268                     ures_resetIterator(typeAliasResByKey.getAlias());
269                     while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
270                         int32_t toLen;
271                         typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
272                         const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
273                         if (U_FAILURE(sts)) {
274                             break;
275                         }
276                         // check if this is an alias of canonical legacy type
277                         if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
278                             const char* from = ures_getKey(typeAliasDataEntry.getAlias());
279                             if (isTZ) {
280                                 // replace colon with slash if necessary
281                                 if (uprv_strchr(from, ':') != NULL) {
282                                     icu::CharString* fromBuf =
283                                             gKeyTypeStringPool->create(from, sts);
284                                     if (fromBuf == NULL) {
285                                         sts = U_MEMORY_ALLOCATION_ERROR;
286                                         break;
287                                     }
288                                     if (U_FAILURE(sts)) {
289                                         break;
290                                     }
291                                     std::replace(
292                                             fromBuf->data(),
293                                             fromBuf->data() + fromBuf->length(),
294                                             ':', '/');
295                                     from = fromBuf->data();
296                                 }
297                             }
298                             uhash_put(typeDataMap, (void*)from, t, &sts);
299                         }
300                     }
301                     if (U_FAILURE(sts)) {
302                         break;
303                     }
304                 }
305 
306                 if (bcpTypeAliasResByKey.isValid()) {
307                     LocalUResourceBundlePointer bcpTypeAliasDataEntry;
308 
309                     ures_resetIterator(bcpTypeAliasResByKey.getAlias());
310                     while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
311                         int32_t toLen;
312                         bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
313                         const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
314                         if (U_FAILURE(sts)) {
315                             break;
316                         }
317                         // check if this is an alias of bcp type
318                         if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
319                             const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
320                             uhash_put(typeDataMap, (void*)from, t, &sts);
321                         }
322                     }
323                     if (U_FAILURE(sts)) {
324                         break;
325                     }
326                 }
327             }
328         }
329         if (U_FAILURE(sts)) {
330             break;
331         }
332 
333         LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
334         if (keyData == NULL) {
335             sts = U_MEMORY_ALLOCATION_ERROR;
336             break;
337         }
338         keyData->bcpId = bcpKeyId;
339         keyData->legacyId = legacyKeyId;
340         keyData->specialTypes = specialTypes;
341         keyData->typeMap.adoptInstead(typeDataMap);
342 
343         uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
344         if (legacyKeyId != bcpKeyId) {
345             // different key value
346             uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
347         }
348         if (U_FAILURE(sts)) {
349             break;
350         }
351     }
352 }
353 
354 static UBool
init()355 init() {
356     UErrorCode sts = U_ZERO_ERROR;
357     umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
358     if (U_FAILURE(sts)) {
359         return FALSE;
360     }
361     return TRUE;
362 }
363 
364 static UBool
isSpecialTypeCodepoints(const char * val)365 isSpecialTypeCodepoints(const char* val) {
366     int32_t subtagLen = 0;
367     const char* p = val;
368     while (*p) {
369         if (*p == '-') {
370             if (subtagLen < 4 || subtagLen > 6) {
371                 return FALSE;
372             }
373             subtagLen = 0;
374         } else if ((*p >= '0' && *p <= '9') ||
375                     (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
376                     (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
377             subtagLen++;
378         } else {
379             return FALSE;
380         }
381         p++;
382     }
383     return (subtagLen >= 4 && subtagLen <= 6);
384 }
385 
386 static UBool
isSpecialTypeReorderCode(const char * val)387 isSpecialTypeReorderCode(const char* val) {
388     int32_t subtagLen = 0;
389     const char* p = val;
390     while (*p) {
391         if (*p == '-') {
392             if (subtagLen < 3 || subtagLen > 8) {
393                 return FALSE;
394             }
395             subtagLen = 0;
396         } else if (uprv_isASCIILetter(*p)) {
397             subtagLen++;
398         } else {
399             return FALSE;
400         }
401         p++;
402     }
403     return (subtagLen >=3 && subtagLen <=8);
404 }
405 
406 static UBool
isSpecialTypeRgKeyValue(const char * val)407 isSpecialTypeRgKeyValue(const char* val) {
408     int32_t subtagLen = 0;
409     const char* p = val;
410     while (*p) {
411         if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
412                     (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
413             subtagLen++;
414         } else {
415             return FALSE;
416         }
417         p++;
418     }
419     return (subtagLen == 6);
420 }
421 
422 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)423 ulocimp_toBcpKey(const char* key) {
424     if (!init()) {
425         return NULL;
426     }
427 
428     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
429     if (keyData != NULL) {
430         return keyData->bcpId;
431     }
432     return NULL;
433 }
434 
435 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)436 ulocimp_toLegacyKey(const char* key) {
437     if (!init()) {
438         return NULL;
439     }
440 
441     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
442     if (keyData != NULL) {
443         return keyData->legacyId;
444     }
445     return NULL;
446 }
447 
448 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)449 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
450     if (isKnownKey != NULL) {
451         *isKnownKey = FALSE;
452     }
453     if (isSpecialType != NULL) {
454         *isSpecialType = FALSE;
455     }
456 
457     if (!init()) {
458         return NULL;
459     }
460 
461     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
462     if (keyData != NULL) {
463         if (isKnownKey != NULL) {
464             *isKnownKey = TRUE;
465         }
466         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
467         if (t != NULL) {
468             return t->bcpId;
469         }
470         if (keyData->specialTypes != SPECIALTYPE_NONE) {
471             UBool matched = FALSE;
472             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
473                 matched = isSpecialTypeCodepoints(type);
474             }
475             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
476                 matched = isSpecialTypeReorderCode(type);
477             }
478             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
479                 matched = isSpecialTypeRgKeyValue(type);
480             }
481             if (matched) {
482                 if (isSpecialType != NULL) {
483                     *isSpecialType = TRUE;
484                 }
485                 return type;
486             }
487         }
488     }
489     return NULL;
490 }
491 
492 
493 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)494 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
495     if (isKnownKey != NULL) {
496         *isKnownKey = FALSE;
497     }
498     if (isSpecialType != NULL) {
499         *isSpecialType = FALSE;
500     }
501 
502     if (!init()) {
503         return NULL;
504     }
505 
506     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
507     if (keyData != NULL) {
508         if (isKnownKey != NULL) {
509             *isKnownKey = TRUE;
510         }
511         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
512         if (t != NULL) {
513             return t->legacyId;
514         }
515         if (keyData->specialTypes != SPECIALTYPE_NONE) {
516             UBool matched = FALSE;
517             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
518                 matched = isSpecialTypeCodepoints(type);
519             }
520             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
521                 matched = isSpecialTypeReorderCode(type);
522             }
523             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
524                 matched = isSpecialTypeRgKeyValue(type);
525             }
526             if (matched) {
527                 if (isSpecialType != NULL) {
528                     *isSpecialType = TRUE;
529                 }
530                 return type;
531             }
532         }
533     }
534     return NULL;
535 }
536 
537