1 /*
2 **********************************************************************
3 *   Copyright (C) 2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/utypes.h"
8 
9 #include "cstring.h"
10 #include "uassert.h"
11 #include "ucln_cmn.h"
12 #include "uhash.h"
13 #include "umutex.h"
14 #include "uresimp.h"
15 #include "uvector.h"
16 
17 static UHashtable* gLocExtKeyMap = NULL;
18 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
19 static icu::UVector* gKeyTypeStringPool = NULL;
20 static icu::UVector* gLocExtKeyDataEntries = NULL;
21 static icu::UVector* gLocExtTypeEntries = NULL;
22 
23 // bit flags for special types
24 typedef enum {
25     SPECIALTYPE_NONE = 0,
26     SPECIALTYPE_CODEPOINTS = 1,
27     SPECIALTYPE_REORDER_CODE = 2
28 } SpecialType;
29 
30 typedef struct LocExtKeyData {
31     const char*     legacyId;
32     const char*     bcpId;
33     UHashtable*     typeMap;
34     uint32_t        specialTypes;
35 } LocExtKeyData;
36 
37 typedef struct LocExtType {
38     const char*     legacyId;
39     const char*     bcpId;
40 } LocExtType;
41 
42 U_CDECL_BEGIN
43 
44 static UBool U_CALLCONV
uloc_key_type_cleanup(void)45 uloc_key_type_cleanup(void) {
46     if (gLocExtKeyMap != NULL) {
47         uhash_close(gLocExtKeyMap);
48         gLocExtKeyMap = NULL;
49     }
50 
51     delete gLocExtKeyDataEntries;
52     gLocExtKeyDataEntries = NULL;
53 
54     delete gLocExtTypeEntries;
55     gLocExtTypeEntries = NULL;
56 
57     delete gKeyTypeStringPool;
58     gKeyTypeStringPool = NULL;
59 
60     gLocExtKeyMapInitOnce.reset();
61     return TRUE;
62 }
63 
64 static void U_CALLCONV
uloc_deleteKeyTypeStringPoolEntry(void * obj)65 uloc_deleteKeyTypeStringPoolEntry(void* obj) {
66     uprv_free(obj);
67 }
68 
69 static void U_CALLCONV
uloc_deleteKeyDataEntry(void * obj)70 uloc_deleteKeyDataEntry(void* obj) {
71     LocExtKeyData* keyData = (LocExtKeyData*)obj;
72     if (keyData->typeMap != NULL) {
73         uhash_close(keyData->typeMap);
74     }
75     uprv_free(keyData);
76 }
77 
78 static void U_CALLCONV
uloc_deleteTypeEntry(void * obj)79 uloc_deleteTypeEntry(void* obj) {
80     uprv_free(obj);
81 }
82 
83 U_CDECL_END
84 
85 
86 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)87 initFromResourceBundle(UErrorCode& sts) {
88     U_NAMESPACE_USE
89     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
90 
91     gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
92 
93     LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
94     LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
95     LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
96 
97     if (U_FAILURE(sts)) {
98         return;
99     }
100 
101     UErrorCode tmpSts = U_ZERO_ERROR;
102     LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
103     tmpSts = U_ZERO_ERROR;
104     LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
105 
106     // initialize vectors storing dynamically allocated objects
107     gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);
108     if (gKeyTypeStringPool == NULL) {
109         if (U_SUCCESS(sts)) {
110             sts = U_MEMORY_ALLOCATION_ERROR;
111         }
112     }
113     if (U_FAILURE(sts)) {
114         return;
115     }
116     gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);
117     if (gLocExtKeyDataEntries == NULL) {
118         if (U_SUCCESS(sts)) {
119             sts = U_MEMORY_ALLOCATION_ERROR;
120         }
121     }
122     if (U_FAILURE(sts)) {
123         return;
124     }
125     gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);
126     if (gLocExtTypeEntries == NULL) {
127         if (U_SUCCESS(sts)) {
128             sts = U_MEMORY_ALLOCATION_ERROR;
129         }
130     }
131     if (U_FAILURE(sts)) {
132         return;
133     }
134 
135     // iterate through keyMap resource
136     LocalUResourceBundlePointer keyMapEntry;
137 
138     while (ures_hasNext(keyMapRes.getAlias())) {
139         keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
140         if (U_FAILURE(sts)) {
141             break;
142         }
143         const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
144         int32_t bcpKeyIdLen = 0;
145         const UChar* uBcpKeyId = ures_getString(keyMapEntry.getAlias(), &bcpKeyIdLen, &sts);
146         if (U_FAILURE(sts)) {
147             break;
148         }
149 
150         // empty value indicates that BCP key is same with the legacy key.
151         const char* bcpKeyId = legacyKeyId;
152         if (bcpKeyIdLen > 0) {
153             char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);
154             if (bcpKeyIdBuf == NULL) {
155                 sts = U_MEMORY_ALLOCATION_ERROR;
156                 break;
157             }
158             u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);
159             bcpKeyIdBuf[bcpKeyIdLen] = 0;
160             gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);
161             if (U_FAILURE(sts)) {
162                 break;
163             }
164             bcpKeyId = bcpKeyIdBuf;
165         }
166 
167         UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
168 
169         UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
170         if (U_FAILURE(sts)) {
171             break;
172         }
173         uint32_t specialTypes = SPECIALTYPE_NONE;
174 
175         LocalUResourceBundlePointer typeAliasResByKey;
176         LocalUResourceBundlePointer bcpTypeAliasResByKey;
177 
178         if (typeAliasRes.isValid()) {
179             tmpSts = U_ZERO_ERROR;
180             typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
181             if (U_FAILURE(tmpSts)) {
182                 typeAliasResByKey.orphan();
183             }
184         }
185         if (bcpTypeAliasRes.isValid()) {
186             tmpSts = U_ZERO_ERROR;
187             bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
188             if (U_FAILURE(tmpSts)) {
189                 bcpTypeAliasResByKey.orphan();
190             }
191         }
192 
193         // look up type map for the key, and walk through the mapping data
194         tmpSts = U_ZERO_ERROR;
195         LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
196         if (U_FAILURE(tmpSts)) {
197             // type map for each key must exist
198             U_ASSERT(FALSE);
199         } else {
200             LocalUResourceBundlePointer typeMapEntry;
201 
202             while (ures_hasNext(typeMapResByKey.getAlias())) {
203                 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
204                 if (U_FAILURE(sts)) {
205                     break;
206                 }
207                 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
208 
209                 // special types
210                 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
211                     specialTypes |= SPECIALTYPE_CODEPOINTS;
212                     continue;
213                 }
214                 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
215                     specialTypes |= SPECIALTYPE_REORDER_CODE;
216                     continue;
217                 }
218 
219                 if (isTZ) {
220                     // a timezone key uses a colon instead of a slash in the resource.
221                     // e.g. America:Los_Angeles
222                     if (uprv_strchr(legacyTypeId, ':') != NULL) {
223                         int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
224                         char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
225                         if (legacyTypeIdBuf == NULL) {
226                             sts = U_MEMORY_ALLOCATION_ERROR;
227                             break;
228                         }
229                         const char* p = legacyTypeId;
230                         char* q = legacyTypeIdBuf;
231                         while (*p) {
232                             if (*p == ':') {
233                                 *q++ = '/';
234                             } else {
235                                 *q++ = *p;
236                             }
237                             p++;
238                         }
239                         *q = 0;
240 
241                         gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);
242                         if (U_FAILURE(sts)) {
243                             break;
244                         }
245                         legacyTypeId = legacyTypeIdBuf;
246                     }
247                 }
248 
249                 int32_t bcpTypeIdLen = 0;
250                 const UChar* uBcpTypeId = ures_getString(typeMapEntry.getAlias(), &bcpTypeIdLen, &sts);
251                 if (U_FAILURE(sts)) {
252                     break;
253                 }
254 
255                 // empty value indicates that BCP type is same with the legacy type.
256                 const char* bcpTypeId = legacyTypeId;
257                 if (bcpTypeIdLen > 0) {
258                     char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);
259                     if (bcpTypeIdBuf == NULL) {
260                         sts = U_MEMORY_ALLOCATION_ERROR;
261                         break;
262                     }
263                     u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);
264                     bcpTypeIdBuf[bcpTypeIdLen] = 0;
265                     gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);
266                     if (U_FAILURE(sts)) {
267                         break;
268                     }
269                     bcpTypeId = bcpTypeIdBuf;
270                 }
271 
272                 // Note: legacy type value should never be
273                 // equivalent to bcp type value of a different
274                 // type under the same key. So we use a single
275                 // map for lookup.
276                 LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));
277                 if (t == NULL) {
278                     sts = U_MEMORY_ALLOCATION_ERROR;
279                     break;
280                 }
281                 t->bcpId = bcpTypeId;
282                 t->legacyId = legacyTypeId;
283                 gLocExtTypeEntries->addElement((void*)t, sts);
284                 if (U_FAILURE(sts)) {
285                     break;
286                 }
287 
288                 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
289                 if (bcpTypeId != legacyTypeId) {
290                     // different type value
291                     uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
292                 }
293                 if (U_FAILURE(sts)) {
294                     break;
295                 }
296 
297                 // also put aliases in the map
298                 if (typeAliasResByKey.isValid()) {
299                     LocalUResourceBundlePointer typeAliasDataEntry;
300 
301                     ures_resetIterator(typeAliasResByKey.getAlias());
302                     while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
303                         int32_t toLen;
304                         typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
305                         const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
306                         if (U_FAILURE(sts)) {
307                             break;
308                         }
309                         // check if this is an alias of canoncal legacy type
310                         if (uprv_compareInvAscii(NULL, legacyTypeId, -1, to, toLen) == 0) {
311                             const char* from = ures_getKey(typeAliasDataEntry.getAlias());
312                             if (isTZ) {
313                                 // replace colon with slash if necessary
314                                 if (uprv_strchr(from, ':') != NULL) {
315                                     int32_t fromLen = uprv_strlen(from);
316                                     char* fromBuf = (char*)uprv_malloc(fromLen + 1);
317                                     if (fromBuf == NULL) {
318                                         sts = U_MEMORY_ALLOCATION_ERROR;
319                                         break;
320                                     }
321                                     const char* p = from;
322                                     char* q = fromBuf;
323                                     while (*p) {
324                                         if (*p == ':') {
325                                             *q++ = '/';
326                                         } else {
327                                             *q++ = *p;
328                                         }
329                                         p++;
330                                     }
331                                     *q = 0;
332 
333                                     gKeyTypeStringPool->addElement(fromBuf, sts);
334                                     if (U_FAILURE(sts)) {
335                                         break;
336                                     }
337                                     from = fromBuf;
338                                 }
339                             }
340                             uhash_put(typeDataMap, (void*)from, t, &sts);
341                         }
342                     }
343                     if (U_FAILURE(sts)) {
344                         break;
345                     }
346                 }
347 
348                 if (bcpTypeAliasResByKey.isValid()) {
349                     LocalUResourceBundlePointer bcpTypeAliasDataEntry;
350 
351                     ures_resetIterator(bcpTypeAliasResByKey.getAlias());
352                     while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
353                         int32_t toLen;
354                         bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
355                         const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
356                         if (U_FAILURE(sts)) {
357                             break;
358                         }
359                         // check if this is an alias of bcp type
360                         if (uprv_compareInvAscii(NULL, bcpTypeId, -1, to, toLen) == 0) {
361                             const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
362                             uhash_put(typeDataMap, (void*)from, t, &sts);
363                         }
364                     }
365                     if (U_FAILURE(sts)) {
366                         break;
367                     }
368                 }
369             }
370         }
371         if (U_FAILURE(sts)) {
372             break;
373         }
374 
375         LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));
376         if (keyData == NULL) {
377             sts = U_MEMORY_ALLOCATION_ERROR;
378             break;
379         }
380         keyData->bcpId = bcpKeyId;
381         keyData->legacyId = legacyKeyId;
382         keyData->specialTypes = specialTypes;
383         keyData->typeMap = typeDataMap;
384 
385         gLocExtKeyDataEntries->addElement((void*)keyData, sts);
386         if (U_FAILURE(sts)) {
387             break;
388         }
389 
390         uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
391         if (legacyKeyId != bcpKeyId) {
392             // different key value
393             uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
394         }
395         if (U_FAILURE(sts)) {
396             break;
397         }
398     }
399 }
400 
401 static UBool
init()402 init() {
403     UErrorCode sts = U_ZERO_ERROR;
404     umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
405     if (U_FAILURE(sts)) {
406         return FALSE;
407     }
408     return TRUE;
409 }
410 
411 static UBool
isSpecialTypeCodepoints(const char * val)412 isSpecialTypeCodepoints(const char* val) {
413     int32_t subtagLen = 0;
414     const char* p = val;
415     while (*p) {
416         if (*p == '-') {
417             if (subtagLen < 4 || subtagLen > 6) {
418                 return FALSE;
419             }
420             subtagLen = 0;
421         } else if ((*p >= '0' && *p <= '9') ||
422                     (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
423                     (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
424             subtagLen++;
425         } else {
426             return FALSE;
427         }
428         p++;
429     }
430     return (subtagLen >= 4 && subtagLen <= 6);
431 }
432 
433 static UBool
isSpecialTypeReorderCode(const char * val)434 isSpecialTypeReorderCode(const char* val) {
435     int32_t subtagLen = 0;
436     const char* p = val;
437     while (*p) {
438         if (*p == '-') {
439             if (subtagLen < 3 || subtagLen > 8) {
440                 return FALSE;
441             }
442             subtagLen = 0;
443         } else if (uprv_isASCIILetter(*p)) {
444             subtagLen++;
445         } else {
446             return FALSE;
447         }
448         p++;
449     }
450     return (subtagLen >=3 && subtagLen <=8);
451 }
452 
453 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)454 ulocimp_toBcpKey(const char* key) {
455     if (!init()) {
456         return NULL;
457     }
458 
459     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
460     if (keyData != NULL) {
461         return keyData->bcpId;
462     }
463     return NULL;
464 }
465 
466 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)467 ulocimp_toLegacyKey(const char* key) {
468     if (!init()) {
469         return NULL;
470     }
471 
472     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
473     if (keyData != NULL) {
474         return keyData->legacyId;
475     }
476     return NULL;
477 }
478 
479 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)480 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
481     if (isKnownKey != NULL) {
482         *isKnownKey = FALSE;
483     }
484     if (isSpecialType != NULL) {
485         *isSpecialType = FALSE;
486     }
487 
488     if (!init()) {
489         return NULL;
490     }
491 
492     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
493     if (keyData != NULL) {
494         if (isKnownKey != NULL) {
495             *isKnownKey = TRUE;
496         }
497         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
498         if (t != NULL) {
499             return t->bcpId;
500         }
501         if (keyData->specialTypes != SPECIALTYPE_NONE) {
502             UBool matched = FALSE;
503             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
504                 matched = isSpecialTypeCodepoints(type);
505             }
506             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
507                 matched = isSpecialTypeReorderCode(type);
508             }
509             if (matched) {
510                 if (isSpecialType != NULL) {
511                     *isSpecialType = TRUE;
512                 }
513                 return type;
514             }
515         }
516     }
517     return NULL;
518 }
519 
520 
521 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)522 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
523     if (isKnownKey != NULL) {
524         *isKnownKey = FALSE;
525     }
526     if (isSpecialType != NULL) {
527         *isSpecialType = FALSE;
528     }
529 
530     if (!init()) {
531         return NULL;
532     }
533 
534     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
535     if (keyData != NULL) {
536         if (isKnownKey != NULL) {
537             *isKnownKey = TRUE;
538         }
539         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
540         if (t != NULL) {
541             return t->legacyId;
542         }
543         if (keyData->specialTypes != SPECIALTYPE_NONE) {
544             UBool matched = FALSE;
545             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
546                 matched = isSpecialTypeCodepoints(type);
547             }
548             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
549                 matched = isSpecialTypeReorderCode(type);
550             }
551             if (matched) {
552                 if (isSpecialType != NULL) {
553                     *isSpecialType = TRUE;
554                 }
555                 return type;
556             }
557         }
558     }
559     return NULL;
560 }
561