1 /*
2 **********************************************************************
3 *   Copyright (C) 2014-2016, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/utypes.h"
8 
9 #include "cstring.h"
10 #include "uassert.h"
11 #include "ucln_cmn.h"
12 #include "uhash.h"
13 #include "umutex.h"
14 #include "uresimp.h"
15 #include "uvector.h"
16 
17 static UHashtable* gLocExtKeyMap = NULL;
18 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
19 static icu::UVector* gKeyTypeStringPool = NULL;
20 static icu::UVector* gLocExtKeyDataEntries = NULL;
21 static icu::UVector* gLocExtTypeEntries = NULL;
22 
23 // bit flags for special types
24 typedef enum {
25     SPECIALTYPE_NONE = 0,
26     SPECIALTYPE_CODEPOINTS = 1,
27     SPECIALTYPE_REORDER_CODE = 2,
28     SPECIALTYPE_RG_KEY_VALUE = 4
29 } SpecialType;
30 
31 typedef struct LocExtKeyData {
32     const char*     legacyId;
33     const char*     bcpId;
34     UHashtable*     typeMap;
35     uint32_t        specialTypes;
36 } LocExtKeyData;
37 
38 typedef struct LocExtType {
39     const char*     legacyId;
40     const char*     bcpId;
41 } LocExtType;
42 
43 U_CDECL_BEGIN
44 
45 static UBool U_CALLCONV
uloc_key_type_cleanup(void)46 uloc_key_type_cleanup(void) {
47     if (gLocExtKeyMap != NULL) {
48         uhash_close(gLocExtKeyMap);
49         gLocExtKeyMap = NULL;
50     }
51 
52     delete gLocExtKeyDataEntries;
53     gLocExtKeyDataEntries = NULL;
54 
55     delete gLocExtTypeEntries;
56     gLocExtTypeEntries = NULL;
57 
58     delete gKeyTypeStringPool;
59     gKeyTypeStringPool = NULL;
60 
61     gLocExtKeyMapInitOnce.reset();
62     return TRUE;
63 }
64 
65 static void U_CALLCONV
uloc_deleteKeyTypeStringPoolEntry(void * obj)66 uloc_deleteKeyTypeStringPoolEntry(void* obj) {
67     uprv_free(obj);
68 }
69 
70 static void U_CALLCONV
uloc_deleteKeyDataEntry(void * obj)71 uloc_deleteKeyDataEntry(void* obj) {
72     LocExtKeyData* keyData = (LocExtKeyData*)obj;
73     if (keyData->typeMap != NULL) {
74         uhash_close(keyData->typeMap);
75     }
76     uprv_free(keyData);
77 }
78 
79 static void U_CALLCONV
uloc_deleteTypeEntry(void * obj)80 uloc_deleteTypeEntry(void* obj) {
81     uprv_free(obj);
82 }
83 
84 U_CDECL_END
85 
86 
87 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)88 initFromResourceBundle(UErrorCode& sts) {
89     U_NAMESPACE_USE
90     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
91 
92     gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
93 
94     LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
95     LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
96     LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
97 
98     if (U_FAILURE(sts)) {
99         return;
100     }
101 
102     UErrorCode tmpSts = U_ZERO_ERROR;
103     LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
104     tmpSts = U_ZERO_ERROR;
105     LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
106 
107     // initialize vectors storing dynamically allocated objects
108     gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);
109     if (gKeyTypeStringPool == NULL) {
110         if (U_SUCCESS(sts)) {
111             sts = U_MEMORY_ALLOCATION_ERROR;
112         }
113     }
114     if (U_FAILURE(sts)) {
115         return;
116     }
117     gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);
118     if (gLocExtKeyDataEntries == NULL) {
119         if (U_SUCCESS(sts)) {
120             sts = U_MEMORY_ALLOCATION_ERROR;
121         }
122     }
123     if (U_FAILURE(sts)) {
124         return;
125     }
126     gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);
127     if (gLocExtTypeEntries == NULL) {
128         if (U_SUCCESS(sts)) {
129             sts = U_MEMORY_ALLOCATION_ERROR;
130         }
131     }
132     if (U_FAILURE(sts)) {
133         return;
134     }
135 
136     // iterate through keyMap resource
137     LocalUResourceBundlePointer keyMapEntry;
138 
139     while (ures_hasNext(keyMapRes.getAlias())) {
140         keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
141         if (U_FAILURE(sts)) {
142             break;
143         }
144         const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
145         int32_t bcpKeyIdLen = 0;
146         const UChar* uBcpKeyId = ures_getString(keyMapEntry.getAlias(), &bcpKeyIdLen, &sts);
147         if (U_FAILURE(sts)) {
148             break;
149         }
150 
151         // empty value indicates that BCP key is same with the legacy key.
152         const char* bcpKeyId = legacyKeyId;
153         if (bcpKeyIdLen > 0) {
154             char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);
155             if (bcpKeyIdBuf == NULL) {
156                 sts = U_MEMORY_ALLOCATION_ERROR;
157                 break;
158             }
159             u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);
160             bcpKeyIdBuf[bcpKeyIdLen] = 0;
161             gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);
162             if (U_FAILURE(sts)) {
163                 break;
164             }
165             bcpKeyId = bcpKeyIdBuf;
166         }
167 
168         UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
169 
170         UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
171         if (U_FAILURE(sts)) {
172             break;
173         }
174         uint32_t specialTypes = SPECIALTYPE_NONE;
175 
176         LocalUResourceBundlePointer typeAliasResByKey;
177         LocalUResourceBundlePointer bcpTypeAliasResByKey;
178 
179         if (typeAliasRes.isValid()) {
180             tmpSts = U_ZERO_ERROR;
181             typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
182             if (U_FAILURE(tmpSts)) {
183                 typeAliasResByKey.orphan();
184             }
185         }
186         if (bcpTypeAliasRes.isValid()) {
187             tmpSts = U_ZERO_ERROR;
188             bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
189             if (U_FAILURE(tmpSts)) {
190                 bcpTypeAliasResByKey.orphan();
191             }
192         }
193 
194         // look up type map for the key, and walk through the mapping data
195         tmpSts = U_ZERO_ERROR;
196         LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
197         if (U_FAILURE(tmpSts)) {
198             // type map for each key must exist
199             U_ASSERT(FALSE);
200         } else {
201             LocalUResourceBundlePointer typeMapEntry;
202 
203             while (ures_hasNext(typeMapResByKey.getAlias())) {
204                 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
205                 if (U_FAILURE(sts)) {
206                     break;
207                 }
208                 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
209 
210                 // special types
211                 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
212                     specialTypes |= SPECIALTYPE_CODEPOINTS;
213                     continue;
214                 }
215                 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
216                     specialTypes |= SPECIALTYPE_REORDER_CODE;
217                     continue;
218                 }
219                 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
220                     specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
221                     continue;
222                 }
223 
224                 if (isTZ) {
225                     // a timezone key uses a colon instead of a slash in the resource.
226                     // e.g. America:Los_Angeles
227                     if (uprv_strchr(legacyTypeId, ':') != NULL) {
228                         int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
229                         char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
230                         if (legacyTypeIdBuf == NULL) {
231                             sts = U_MEMORY_ALLOCATION_ERROR;
232                             break;
233                         }
234                         const char* p = legacyTypeId;
235                         char* q = legacyTypeIdBuf;
236                         while (*p) {
237                             if (*p == ':') {
238                                 *q++ = '/';
239                             } else {
240                                 *q++ = *p;
241                             }
242                             p++;
243                         }
244                         *q = 0;
245 
246                         gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);
247                         if (U_FAILURE(sts)) {
248                             break;
249                         }
250                         legacyTypeId = legacyTypeIdBuf;
251                     }
252                 }
253 
254                 int32_t bcpTypeIdLen = 0;
255                 const UChar* uBcpTypeId = ures_getString(typeMapEntry.getAlias(), &bcpTypeIdLen, &sts);
256                 if (U_FAILURE(sts)) {
257                     break;
258                 }
259 
260                 // empty value indicates that BCP type is same with the legacy type.
261                 const char* bcpTypeId = legacyTypeId;
262                 if (bcpTypeIdLen > 0) {
263                     char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);
264                     if (bcpTypeIdBuf == NULL) {
265                         sts = U_MEMORY_ALLOCATION_ERROR;
266                         break;
267                     }
268                     u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);
269                     bcpTypeIdBuf[bcpTypeIdLen] = 0;
270                     gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);
271                     if (U_FAILURE(sts)) {
272                         break;
273                     }
274                     bcpTypeId = bcpTypeIdBuf;
275                 }
276 
277                 // Note: legacy type value should never be
278                 // equivalent to bcp type value of a different
279                 // type under the same key. So we use a single
280                 // map for lookup.
281                 LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));
282                 if (t == NULL) {
283                     sts = U_MEMORY_ALLOCATION_ERROR;
284                     break;
285                 }
286                 t->bcpId = bcpTypeId;
287                 t->legacyId = legacyTypeId;
288                 gLocExtTypeEntries->addElement((void*)t, sts);
289                 if (U_FAILURE(sts)) {
290                     break;
291                 }
292 
293                 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
294                 if (bcpTypeId != legacyTypeId) {
295                     // different type value
296                     uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
297                 }
298                 if (U_FAILURE(sts)) {
299                     break;
300                 }
301 
302                 // also put aliases in the map
303                 if (typeAliasResByKey.isValid()) {
304                     LocalUResourceBundlePointer typeAliasDataEntry;
305 
306                     ures_resetIterator(typeAliasResByKey.getAlias());
307                     while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
308                         int32_t toLen;
309                         typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
310                         const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
311                         if (U_FAILURE(sts)) {
312                             break;
313                         }
314                         // check if this is an alias of canoncal legacy type
315                         if (uprv_compareInvAscii(NULL, legacyTypeId, -1, to, toLen) == 0) {
316                             const char* from = ures_getKey(typeAliasDataEntry.getAlias());
317                             if (isTZ) {
318                                 // replace colon with slash if necessary
319                                 if (uprv_strchr(from, ':') != NULL) {
320                                     int32_t fromLen = uprv_strlen(from);
321                                     char* fromBuf = (char*)uprv_malloc(fromLen + 1);
322                                     if (fromBuf == NULL) {
323                                         sts = U_MEMORY_ALLOCATION_ERROR;
324                                         break;
325                                     }
326                                     const char* p = from;
327                                     char* q = fromBuf;
328                                     while (*p) {
329                                         if (*p == ':') {
330                                             *q++ = '/';
331                                         } else {
332                                             *q++ = *p;
333                                         }
334                                         p++;
335                                     }
336                                     *q = 0;
337 
338                                     gKeyTypeStringPool->addElement(fromBuf, sts);
339                                     if (U_FAILURE(sts)) {
340                                         break;
341                                     }
342                                     from = fromBuf;
343                                 }
344                             }
345                             uhash_put(typeDataMap, (void*)from, t, &sts);
346                         }
347                     }
348                     if (U_FAILURE(sts)) {
349                         break;
350                     }
351                 }
352 
353                 if (bcpTypeAliasResByKey.isValid()) {
354                     LocalUResourceBundlePointer bcpTypeAliasDataEntry;
355 
356                     ures_resetIterator(bcpTypeAliasResByKey.getAlias());
357                     while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
358                         int32_t toLen;
359                         bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
360                         const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
361                         if (U_FAILURE(sts)) {
362                             break;
363                         }
364                         // check if this is an alias of bcp type
365                         if (uprv_compareInvAscii(NULL, bcpTypeId, -1, to, toLen) == 0) {
366                             const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
367                             uhash_put(typeDataMap, (void*)from, t, &sts);
368                         }
369                     }
370                     if (U_FAILURE(sts)) {
371                         break;
372                     }
373                 }
374             }
375         }
376         if (U_FAILURE(sts)) {
377             break;
378         }
379 
380         LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));
381         if (keyData == NULL) {
382             sts = U_MEMORY_ALLOCATION_ERROR;
383             break;
384         }
385         keyData->bcpId = bcpKeyId;
386         keyData->legacyId = legacyKeyId;
387         keyData->specialTypes = specialTypes;
388         keyData->typeMap = typeDataMap;
389 
390         gLocExtKeyDataEntries->addElement((void*)keyData, sts);
391         if (U_FAILURE(sts)) {
392             break;
393         }
394 
395         uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
396         if (legacyKeyId != bcpKeyId) {
397             // different key value
398             uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
399         }
400         if (U_FAILURE(sts)) {
401             break;
402         }
403     }
404 }
405 
406 static UBool
init()407 init() {
408     UErrorCode sts = U_ZERO_ERROR;
409     umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
410     if (U_FAILURE(sts)) {
411         return FALSE;
412     }
413     return TRUE;
414 }
415 
416 static UBool
isSpecialTypeCodepoints(const char * val)417 isSpecialTypeCodepoints(const char* val) {
418     int32_t subtagLen = 0;
419     const char* p = val;
420     while (*p) {
421         if (*p == '-') {
422             if (subtagLen < 4 || subtagLen > 6) {
423                 return FALSE;
424             }
425             subtagLen = 0;
426         } else if ((*p >= '0' && *p <= '9') ||
427                     (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
428                     (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
429             subtagLen++;
430         } else {
431             return FALSE;
432         }
433         p++;
434     }
435     return (subtagLen >= 4 && subtagLen <= 6);
436 }
437 
438 static UBool
isSpecialTypeReorderCode(const char * val)439 isSpecialTypeReorderCode(const char* val) {
440     int32_t subtagLen = 0;
441     const char* p = val;
442     while (*p) {
443         if (*p == '-') {
444             if (subtagLen < 3 || subtagLen > 8) {
445                 return FALSE;
446             }
447             subtagLen = 0;
448         } else if (uprv_isASCIILetter(*p)) {
449             subtagLen++;
450         } else {
451             return FALSE;
452         }
453         p++;
454     }
455     return (subtagLen >=3 && subtagLen <=8);
456 }
457 
458 static UBool
isSpecialTypeRgKeyValue(const char * val)459 isSpecialTypeRgKeyValue(const char* val) {
460     int32_t subtagLen = 0;
461     const char* p = val;
462     while (*p) {
463         if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
464                     (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
465             subtagLen++;
466         } else {
467             return FALSE;
468         }
469         p++;
470     }
471     return (subtagLen == 6);
472     return TRUE;
473 }
474 
475 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)476 ulocimp_toBcpKey(const char* key) {
477     if (!init()) {
478         return NULL;
479     }
480 
481     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
482     if (keyData != NULL) {
483         return keyData->bcpId;
484     }
485     return NULL;
486 }
487 
488 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)489 ulocimp_toLegacyKey(const char* key) {
490     if (!init()) {
491         return NULL;
492     }
493 
494     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
495     if (keyData != NULL) {
496         return keyData->legacyId;
497     }
498     return NULL;
499 }
500 
501 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)502 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
503     if (isKnownKey != NULL) {
504         *isKnownKey = FALSE;
505     }
506     if (isSpecialType != NULL) {
507         *isSpecialType = FALSE;
508     }
509 
510     if (!init()) {
511         return NULL;
512     }
513 
514     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
515     if (keyData != NULL) {
516         if (isKnownKey != NULL) {
517             *isKnownKey = TRUE;
518         }
519         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
520         if (t != NULL) {
521             return t->bcpId;
522         }
523         if (keyData->specialTypes != SPECIALTYPE_NONE) {
524             UBool matched = FALSE;
525             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
526                 matched = isSpecialTypeCodepoints(type);
527             }
528             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
529                 matched = isSpecialTypeReorderCode(type);
530             }
531             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
532                 matched = isSpecialTypeRgKeyValue(type);
533             }
534             if (matched) {
535                 if (isSpecialType != NULL) {
536                     *isSpecialType = TRUE;
537                 }
538                 return type;
539             }
540         }
541     }
542     return NULL;
543 }
544 
545 
546 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)547 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
548     if (isKnownKey != NULL) {
549         *isKnownKey = FALSE;
550     }
551     if (isSpecialType != NULL) {
552         *isSpecialType = FALSE;
553     }
554 
555     if (!init()) {
556         return NULL;
557     }
558 
559     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
560     if (keyData != NULL) {
561         if (isKnownKey != NULL) {
562             *isKnownKey = TRUE;
563         }
564         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
565         if (t != NULL) {
566             return t->legacyId;
567         }
568         if (keyData->specialTypes != SPECIALTYPE_NONE) {
569             UBool matched = FALSE;
570             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
571                 matched = isSpecialTypeCodepoints(type);
572             }
573             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
574                 matched = isSpecialTypeReorderCode(type);
575             }
576             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
577                 matched = isSpecialTypeRgKeyValue(type);
578             }
579             if (matched) {
580                 if (isSpecialType != NULL) {
581                     *isSpecialType = TRUE;
582                 }
583                 return type;
584             }
585         }
586     }
587     return NULL;
588 }
589 
590