1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File locid.cpp
10 *
11 * Created by: Richard Gillam
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17 * methods to get and set it.
18 * 04/02/97 aliu Made operator!= inline; fixed return value
19 * of getName().
20 * 04/15/97 aliu Cleanup for AIX/Win32.
21 * 04/24/97 aliu Numerous changes per code review.
22 * 08/18/98 stephen Changed getDisplayName()
23 * Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24 * Added getISOCountries(), getISOLanguages(),
25 * getLanguagesForCountry()
26 * 03/16/99 bertrand rehaul.
27 * 07/21/99 stephen Added U_CFUNC setDefault
28 * 11/09/99 weiv Added const char * getName() const;
29 * 04/12/00 srl removing unicodestring api's and cached hash code
30 * 08/10/01 grhoten Change the static Locales to accessor functions
31 ******************************************************************************
32 */
33
34 #include <utility>
35
36 #include "unicode/bytestream.h"
37 #include "unicode/locid.h"
38 #include "unicode/localebuilder.h"
39 #include "unicode/strenum.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/uloc.h"
42 #include "unicode/ures.h"
43
44 #include "bytesinkutil.h"
45 #include "charstr.h"
46 #include "charstrmap.h"
47 #include "cmemory.h"
48 #include "cstring.h"
49 #include "mutex.h"
50 #include "putilimp.h"
51 #include "uassert.h"
52 #include "ucln_cmn.h"
53 #include "uhash.h"
54 #include "ulocimp.h"
55 #include "umutex.h"
56 #include "uniquecharstr.h"
57 #include "ustr_imp.h"
58 #include "uvector.h"
59
60 U_CDECL_BEGIN
61 static UBool U_CALLCONV locale_cleanup(void);
62 U_CDECL_END
63
64 U_NAMESPACE_BEGIN
65
66 static Locale *gLocaleCache = NULL;
67 static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
68
69 // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
70 static UMutex gDefaultLocaleMutex;
71 static UHashtable *gDefaultLocalesHashT = NULL;
72 static Locale *gDefaultLocale = NULL;
73
74 /**
75 * \def ULOC_STRING_LIMIT
76 * strings beyond this value crash in CharString
77 */
78 #define ULOC_STRING_LIMIT 357913941
79
80 U_NAMESPACE_END
81
82 typedef enum ELocalePos {
83 eENGLISH,
84 eFRENCH,
85 eGERMAN,
86 eITALIAN,
87 eJAPANESE,
88 eKOREAN,
89 eCHINESE,
90
91 eFRANCE,
92 eGERMANY,
93 eITALY,
94 eJAPAN,
95 eKOREA,
96 eCHINA, /* Alias for PRC */
97 eTAIWAN,
98 eUK,
99 eUS,
100 eCANADA,
101 eCANADA_FRENCH,
102 eROOT,
103
104
105 //eDEFAULT,
106 eMAX_LOCALES
107 } ELocalePos;
108
109 U_CDECL_BEGIN
110 //
111 // Deleter function for Locales owned by the default Locale hash table/
112 //
113 static void U_CALLCONV
deleteLocale(void * obj)114 deleteLocale(void *obj) {
115 delete (icu::Locale *) obj;
116 }
117
locale_cleanup(void)118 static UBool U_CALLCONV locale_cleanup(void)
119 {
120 U_NAMESPACE_USE
121
122 delete [] gLocaleCache;
123 gLocaleCache = NULL;
124 gLocaleCacheInitOnce.reset();
125
126 if (gDefaultLocalesHashT) {
127 uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
128 gDefaultLocalesHashT = NULL;
129 }
130 gDefaultLocale = NULL;
131 return TRUE;
132 }
133
134
locale_init(UErrorCode & status)135 static void U_CALLCONV locale_init(UErrorCode &status) {
136 U_NAMESPACE_USE
137
138 U_ASSERT(gLocaleCache == NULL);
139 gLocaleCache = new Locale[(int)eMAX_LOCALES];
140 if (gLocaleCache == NULL) {
141 status = U_MEMORY_ALLOCATION_ERROR;
142 return;
143 }
144 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
145 gLocaleCache[eROOT] = Locale("");
146 gLocaleCache[eENGLISH] = Locale("en");
147 gLocaleCache[eFRENCH] = Locale("fr");
148 gLocaleCache[eGERMAN] = Locale("de");
149 gLocaleCache[eITALIAN] = Locale("it");
150 gLocaleCache[eJAPANESE] = Locale("ja");
151 gLocaleCache[eKOREAN] = Locale("ko");
152 gLocaleCache[eCHINESE] = Locale("zh");
153 gLocaleCache[eFRANCE] = Locale("fr", "FR");
154 gLocaleCache[eGERMANY] = Locale("de", "DE");
155 gLocaleCache[eITALY] = Locale("it", "IT");
156 gLocaleCache[eJAPAN] = Locale("ja", "JP");
157 gLocaleCache[eKOREA] = Locale("ko", "KR");
158 gLocaleCache[eCHINA] = Locale("zh", "CN");
159 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
160 gLocaleCache[eUK] = Locale("en", "GB");
161 gLocaleCache[eUS] = Locale("en", "US");
162 gLocaleCache[eCANADA] = Locale("en", "CA");
163 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
164 }
165
166 U_CDECL_END
167
168 U_NAMESPACE_BEGIN
169
locale_set_default_internal(const char * id,UErrorCode & status)170 Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
171 // Synchronize this entire function.
172 Mutex lock(&gDefaultLocaleMutex);
173
174 UBool canonicalize = FALSE;
175
176 // If given a NULL string for the locale id, grab the default
177 // name from the system.
178 // (Different from most other locale APIs, where a null name means use
179 // the current ICU default locale.)
180 if (id == NULL) {
181 id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
182 canonicalize = TRUE; // always canonicalize host ID
183 }
184
185 CharString localeNameBuf;
186 {
187 CharStringByteSink sink(&localeNameBuf);
188 if (canonicalize) {
189 ulocimp_canonicalize(id, sink, &status);
190 } else {
191 ulocimp_getName(id, sink, &status);
192 }
193 }
194
195 if (U_FAILURE(status)) {
196 return gDefaultLocale;
197 }
198
199 if (gDefaultLocalesHashT == NULL) {
200 gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
201 if (U_FAILURE(status)) {
202 return gDefaultLocale;
203 }
204 uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
205 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
206 }
207
208 Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
209 if (newDefault == NULL) {
210 newDefault = new Locale(Locale::eBOGUS);
211 if (newDefault == NULL) {
212 status = U_MEMORY_ALLOCATION_ERROR;
213 return gDefaultLocale;
214 }
215 newDefault->init(localeNameBuf.data(), FALSE);
216 uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
217 if (U_FAILURE(status)) {
218 return gDefaultLocale;
219 }
220 }
221 gDefaultLocale = newDefault;
222 return gDefaultLocale;
223 }
224
225 U_NAMESPACE_END
226
227 /* sfb 07/21/99 */
228 U_CFUNC void
locale_set_default(const char * id)229 locale_set_default(const char *id)
230 {
231 U_NAMESPACE_USE
232 UErrorCode status = U_ZERO_ERROR;
233 locale_set_default_internal(id, status);
234 }
235 /* end */
236
237 U_CFUNC const char *
locale_get_default(void)238 locale_get_default(void)
239 {
240 U_NAMESPACE_USE
241 return Locale::getDefault().getName();
242 }
243
244
245 U_NAMESPACE_BEGIN
246
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)247 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
248
249 /*Character separating the posix id fields*/
250 // '_'
251 // In the platform codepage.
252 #define SEP_CHAR '_'
253 #define NULL_CHAR '\0'
254
255 Locale::~Locale()
256 {
257 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
258 uprv_free(baseName);
259 }
260 baseName = NULL;
261 /*if fullName is on the heap, we free it*/
262 if (fullName != fullNameBuffer)
263 {
264 uprv_free(fullName);
265 fullName = NULL;
266 }
267 }
268
Locale()269 Locale::Locale()
270 : UObject(), fullName(fullNameBuffer), baseName(NULL)
271 {
272 init(NULL, FALSE);
273 }
274
275 /*
276 * Internal constructor to allow construction of a locale object with
277 * NO side effects. (Default constructor tries to get
278 * the default locale.)
279 */
Locale(Locale::ELocaleType)280 Locale::Locale(Locale::ELocaleType)
281 : UObject(), fullName(fullNameBuffer), baseName(NULL)
282 {
283 setToBogus();
284 }
285
286
Locale(const char * newLanguage,const char * newCountry,const char * newVariant,const char * newKeywords)287 Locale::Locale( const char * newLanguage,
288 const char * newCountry,
289 const char * newVariant,
290 const char * newKeywords)
291 : UObject(), fullName(fullNameBuffer), baseName(NULL)
292 {
293 if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
294 {
295 init(NULL, FALSE); /* shortcut */
296 }
297 else
298 {
299 UErrorCode status = U_ZERO_ERROR;
300 int32_t size = 0;
301 int32_t lsize = 0;
302 int32_t csize = 0;
303 int32_t vsize = 0;
304 int32_t ksize = 0;
305
306 // Calculate the size of the resulting string.
307
308 // Language
309 if ( newLanguage != NULL )
310 {
311 lsize = (int32_t)uprv_strlen(newLanguage);
312 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
313 setToBogus();
314 return;
315 }
316 size = lsize;
317 }
318
319 CharString togo(newLanguage, lsize, status); // start with newLanguage
320
321 // _Country
322 if ( newCountry != NULL )
323 {
324 csize = (int32_t)uprv_strlen(newCountry);
325 if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
326 setToBogus();
327 return;
328 }
329 size += csize;
330 }
331
332 // _Variant
333 if ( newVariant != NULL )
334 {
335 // remove leading _'s
336 while(newVariant[0] == SEP_CHAR)
337 {
338 newVariant++;
339 }
340
341 // remove trailing _'s
342 vsize = (int32_t)uprv_strlen(newVariant);
343 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
344 setToBogus();
345 return;
346 }
347 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
348 {
349 vsize--;
350 }
351 }
352
353 if( vsize > 0 )
354 {
355 size += vsize;
356 }
357
358 // Separator rules:
359 if ( vsize > 0 )
360 {
361 size += 2; // at least: __v
362 }
363 else if ( csize > 0 )
364 {
365 size += 1; // at least: _v
366 }
367
368 if ( newKeywords != NULL)
369 {
370 ksize = (int32_t)uprv_strlen(newKeywords);
371 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
372 setToBogus();
373 return;
374 }
375 size += ksize + 1;
376 }
377
378 // NOW we have the full locale string..
379 // Now, copy it back.
380
381 // newLanguage is already copied
382
383 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
384 { // ^
385 togo.append(SEP_CHAR, status);
386 }
387
388 if ( csize != 0 )
389 {
390 togo.append(newCountry, status);
391 }
392
393 if ( vsize != 0)
394 {
395 togo.append(SEP_CHAR, status)
396 .append(newVariant, vsize, status);
397 }
398
399 if ( ksize != 0)
400 {
401 if (uprv_strchr(newKeywords, '=')) {
402 togo.append('@', status); /* keyword parsing */
403 }
404 else {
405 togo.append('_', status); /* Variant parsing with a script */
406 if ( vsize == 0) {
407 togo.append('_', status); /* No country found */
408 }
409 }
410 togo.append(newKeywords, status);
411 }
412
413 if (U_FAILURE(status)) {
414 // Something went wrong with appending, etc.
415 setToBogus();
416 return;
417 }
418 // Parse it, because for example 'language' might really be a complete
419 // string.
420 init(togo.data(), FALSE);
421 }
422 }
423
Locale(const Locale & other)424 Locale::Locale(const Locale &other)
425 : UObject(other), fullName(fullNameBuffer), baseName(NULL)
426 {
427 *this = other;
428 }
429
Locale(Locale && other)430 Locale::Locale(Locale&& other) U_NOEXCEPT
431 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
432 *this = std::move(other);
433 }
434
operator =(const Locale & other)435 Locale& Locale::operator=(const Locale& other) {
436 if (this == &other) {
437 return *this;
438 }
439
440 setToBogus();
441
442 if (other.fullName == other.fullNameBuffer) {
443 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
444 } else if (other.fullName == nullptr) {
445 fullName = nullptr;
446 } else {
447 fullName = uprv_strdup(other.fullName);
448 if (fullName == nullptr) return *this;
449 }
450
451 if (other.baseName == other.fullName) {
452 baseName = fullName;
453 } else if (other.baseName != nullptr) {
454 baseName = uprv_strdup(other.baseName);
455 if (baseName == nullptr) return *this;
456 }
457
458 uprv_strcpy(language, other.language);
459 uprv_strcpy(script, other.script);
460 uprv_strcpy(country, other.country);
461
462 variantBegin = other.variantBegin;
463 fIsBogus = other.fIsBogus;
464
465 return *this;
466 }
467
operator =(Locale && other)468 Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
469 if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
470 if (fullName != fullNameBuffer) uprv_free(fullName);
471
472 if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
473 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
474 }
475 if (other.fullName == other.fullNameBuffer) {
476 fullName = fullNameBuffer;
477 } else {
478 fullName = other.fullName;
479 }
480
481 if (other.baseName == other.fullNameBuffer) {
482 baseName = fullNameBuffer;
483 } else if (other.baseName == other.fullName) {
484 baseName = fullName;
485 } else {
486 baseName = other.baseName;
487 }
488
489 uprv_strcpy(language, other.language);
490 uprv_strcpy(script, other.script);
491 uprv_strcpy(country, other.country);
492
493 variantBegin = other.variantBegin;
494 fIsBogus = other.fIsBogus;
495
496 other.baseName = other.fullName = other.fullNameBuffer;
497
498 return *this;
499 }
500
501 Locale *
clone() const502 Locale::clone() const {
503 return new Locale(*this);
504 }
505
506 UBool
operator ==(const Locale & other) const507 Locale::operator==( const Locale& other) const
508 {
509 return (uprv_strcmp(other.fullName, fullName) == 0);
510 }
511
512 namespace {
513
514 UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER;
515 UHashtable *gKnownCanonicalized = nullptr;
516
517 static const char* const KNOWN_CANONICALIZED[] = {
518 "c",
519 // Commonly used locales known are already canonicalized
520 "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
521 "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
522 "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
523 "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
524 "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
525 "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
526 "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
527 "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
528 "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
529 "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
530 "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
531 "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
532 "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
533 "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
534 "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
535 "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
536 "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
537 "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
538 "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
539 };
540
cleanupKnownCanonicalized()541 static UBool U_CALLCONV cleanupKnownCanonicalized() {
542 gKnownCanonicalizedInitOnce.reset();
543 if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
544 return TRUE;
545 }
546
loadKnownCanonicalized(UErrorCode & status)547 static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
548 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
549 cleanupKnownCanonicalized);
550 LocalUHashtablePointer newKnownCanonicalizedMap(
551 uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
552 for (int32_t i = 0;
553 U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
554 i++) {
555 uhash_puti(newKnownCanonicalizedMap.getAlias(),
556 (void*)KNOWN_CANONICALIZED[i],
557 1, &status);
558 }
559 if (U_FAILURE(status)) {
560 return;
561 }
562
563 gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
564 }
565
566 class AliasData;
567
568 /**
569 * A Builder class to build the alias data.
570 */
571 class AliasDataBuilder {
572 public:
AliasDataBuilder()573 AliasDataBuilder() {
574 }
575
576 // Build the AliasData from resource.
577 AliasData* build(UErrorCode &status);
578
579 private:
580 void readAlias(UResourceBundle* alias,
581 UniqueCharStrings* strings,
582 LocalMemory<const char*>& types,
583 LocalMemory<int32_t>& replacementIndexes,
584 int32_t &length,
585 void (*checkType)(const char* type),
586 void (*checkReplacement)(const UnicodeString& replacement),
587 UErrorCode &status);
588
589 // Read the languageAlias data from alias to
590 // strings+types+replacementIndexes
591 // The number of record will be stored into length.
592 // Allocate length items for types, to store the type field.
593 // Allocate length items for replacementIndexes,
594 // to store the index in the strings for the replacement script.
595 void readLanguageAlias(UResourceBundle* alias,
596 UniqueCharStrings* strings,
597 LocalMemory<const char*>& types,
598 LocalMemory<int32_t>& replacementIndexes,
599 int32_t &length,
600 UErrorCode &status);
601
602 // Read the scriptAlias data from alias to
603 // strings+types+replacementIndexes
604 // Allocate length items for types, to store the type field.
605 // Allocate length items for replacementIndexes,
606 // to store the index in the strings for the replacement script.
607 void readScriptAlias(UResourceBundle* alias,
608 UniqueCharStrings* strings,
609 LocalMemory<const char*>& types,
610 LocalMemory<int32_t>& replacementIndexes,
611 int32_t &length, UErrorCode &status);
612
613 // Read the territoryAlias data from alias to
614 // strings+types+replacementIndexes
615 // Allocate length items for types, to store the type field.
616 // Allocate length items for replacementIndexes,
617 // to store the index in the strings for the replacement script.
618 void readTerritoryAlias(UResourceBundle* alias,
619 UniqueCharStrings* strings,
620 LocalMemory<const char*>& types,
621 LocalMemory<int32_t>& replacementIndexes,
622 int32_t &length, UErrorCode &status);
623
624 // Read the variantAlias data from alias to
625 // strings+types+replacementIndexes
626 // Allocate length items for types, to store the type field.
627 // Allocate length items for replacementIndexes,
628 // to store the index in the strings for the replacement variant.
629 void readVariantAlias(UResourceBundle* alias,
630 UniqueCharStrings* strings,
631 LocalMemory<const char*>& types,
632 LocalMemory<int32_t>& replacementIndexes,
633 int32_t &length, UErrorCode &status);
634
635 // Read the subdivisionAlias data from alias to
636 // strings+types+replacementIndexes
637 // Allocate length items for types, to store the type field.
638 // Allocate length items for replacementIndexes,
639 // to store the index in the strings for the replacement variant.
640 void readSubdivisionAlias(UResourceBundle* alias,
641 UniqueCharStrings* strings,
642 LocalMemory<const char*>& types,
643 LocalMemory<int32_t>& replacementIndexes,
644 int32_t &length, UErrorCode &status);
645 };
646
647 /**
648 * A class to hold the Alias Data.
649 */
650 class AliasData : public UMemory {
651 public:
singleton(UErrorCode & status)652 static const AliasData* singleton(UErrorCode& status) {
653 if (U_FAILURE(status)) {
654 // Do not get into loadData if the status already has error.
655 return nullptr;
656 }
657 umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
658 return gSingleton;
659 }
660
languageMap() const661 const CharStringMap& languageMap() const { return language; }
scriptMap() const662 const CharStringMap& scriptMap() const { return script; }
territoryMap() const663 const CharStringMap& territoryMap() const { return territory; }
variantMap() const664 const CharStringMap& variantMap() const { return variant; }
subdivisionMap() const665 const CharStringMap& subdivisionMap() const { return subdivision; }
666
667 static void U_CALLCONV loadData(UErrorCode &status);
668 static UBool U_CALLCONV cleanup();
669
670 static UInitOnce gInitOnce;
671
672 private:
AliasData(CharStringMap languageMap,CharStringMap scriptMap,CharStringMap territoryMap,CharStringMap variantMap,CharStringMap subdivisionMap,CharString * strings)673 AliasData(CharStringMap languageMap,
674 CharStringMap scriptMap,
675 CharStringMap territoryMap,
676 CharStringMap variantMap,
677 CharStringMap subdivisionMap,
678 CharString* strings)
679 : language(std::move(languageMap)),
680 script(std::move(scriptMap)),
681 territory(std::move(territoryMap)),
682 variant(std::move(variantMap)),
683 subdivision(std::move(subdivisionMap)),
684 strings(strings) {
685 }
686
~AliasData()687 ~AliasData() {
688 delete strings;
689 }
690
691 static const AliasData* gSingleton;
692
693 CharStringMap language;
694 CharStringMap script;
695 CharStringMap territory;
696 CharStringMap variant;
697 CharStringMap subdivision;
698 CharString* strings;
699
700 friend class AliasDataBuilder;
701 };
702
703
704 const AliasData* AliasData::gSingleton = nullptr;
705 UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER;
706
707 UBool U_CALLCONV
cleanup()708 AliasData::cleanup()
709 {
710 gInitOnce.reset();
711 delete gSingleton;
712 return TRUE;
713 }
714
715 void
readAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,void (* checkType)(const char * type),void (* checkReplacement)(const UnicodeString & replacement),UErrorCode & status)716 AliasDataBuilder::readAlias(
717 UResourceBundle* alias,
718 UniqueCharStrings* strings,
719 LocalMemory<const char*>& types,
720 LocalMemory<int32_t>& replacementIndexes,
721 int32_t &length,
722 void (*checkType)(const char* type),
723 void (*checkReplacement)(const UnicodeString& replacement),
724 UErrorCode &status) {
725 if (U_FAILURE(status)) {
726 return;
727 }
728 length = ures_getSize(alias);
729 const char** rawTypes = types.allocateInsteadAndCopy(length);
730 if (rawTypes == nullptr) {
731 status = U_MEMORY_ALLOCATION_ERROR;
732 return;
733 }
734 int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
735 if (rawIndexes == nullptr) {
736 status = U_MEMORY_ALLOCATION_ERROR;
737 return;
738 }
739 int i = 0;
740 while (ures_hasNext(alias)) {
741 LocalUResourceBundlePointer res(
742 ures_getNextResource(alias, nullptr, &status));
743 const char* aliasFrom = ures_getKey(res.getAlias());
744 UnicodeString aliasTo =
745 ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
746
747 checkType(aliasFrom);
748 checkReplacement(aliasTo);
749
750 rawTypes[i] = aliasFrom;
751 rawIndexes[i] = strings->add(aliasTo, status);
752 i++;
753 }
754 }
755
756 /**
757 * Read the languageAlias data from alias to strings+types+replacementIndexes.
758 * Allocate length items for types, to store the type field. Allocate length
759 * items for replacementIndexes, to store the index in the strings for the
760 * replacement language.
761 */
762 void
readLanguageAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)763 AliasDataBuilder::readLanguageAlias(
764 UResourceBundle* alias,
765 UniqueCharStrings* strings,
766 LocalMemory<const char*>& types,
767 LocalMemory<int32_t>& replacementIndexes,
768 int32_t &length,
769 UErrorCode &status)
770 {
771 return readAlias(
772 alias, strings, types, replacementIndexes, length,
773 #if U_DEBUG
774 [](const char* type) {
775 // Assert the aliasFrom only contains the following possibilties
776 // language_REGION_variant
777 // language_REGION
778 // language_variant
779 // language
780 // und_variant
781 Locale test(type);
782 // Assert no script in aliasFrom
783 U_ASSERT(test.getScript()[0] == '\0');
784 // Assert when language is und, no REGION in aliasFrom.
785 U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
786 },
787 #else
788 [](const char*) {},
789 #endif
790 [](const UnicodeString&) {}, status);
791 }
792
793 /**
794 * Read the scriptAlias data from alias to strings+types+replacementIndexes.
795 * Allocate length items for types, to store the type field. Allocate length
796 * items for replacementIndexes, to store the index in the strings for the
797 * replacement script.
798 */
799 void
readScriptAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)800 AliasDataBuilder::readScriptAlias(
801 UResourceBundle* alias,
802 UniqueCharStrings* strings,
803 LocalMemory<const char*>& types,
804 LocalMemory<int32_t>& replacementIndexes,
805 int32_t &length,
806 UErrorCode &status)
807 {
808 return readAlias(
809 alias, strings, types, replacementIndexes, length,
810 #if U_DEBUG
811 [](const char* type) {
812 U_ASSERT(uprv_strlen(type) == 4);
813 },
814 [](const UnicodeString& replacement) {
815 U_ASSERT(replacement.length() == 4);
816 },
817 #else
818 [](const char*) {},
819 [](const UnicodeString&) { },
820 #endif
821 status);
822 }
823
824 /**
825 * Read the territoryAlias data from alias to strings+types+replacementIndexes.
826 * Allocate length items for types, to store the type field. Allocate length
827 * items for replacementIndexes, to store the index in the strings for the
828 * replacement regions.
829 */
830 void
readTerritoryAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)831 AliasDataBuilder::readTerritoryAlias(
832 UResourceBundle* alias,
833 UniqueCharStrings* strings,
834 LocalMemory<const char*>& types,
835 LocalMemory<int32_t>& replacementIndexes,
836 int32_t &length,
837 UErrorCode &status)
838 {
839 return readAlias(
840 alias, strings, types, replacementIndexes, length,
841 #if U_DEBUG
842 [](const char* type) {
843 U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
844 },
845 #else
846 [](const char*) {},
847 #endif
848 [](const UnicodeString&) { },
849 status);
850 }
851
852 /**
853 * Read the variantAlias data from alias to strings+types+replacementIndexes.
854 * Allocate length items for types, to store the type field. Allocate length
855 * items for replacementIndexes, to store the index in the strings for the
856 * replacement variant.
857 */
858 void
readVariantAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)859 AliasDataBuilder::readVariantAlias(
860 UResourceBundle* alias,
861 UniqueCharStrings* strings,
862 LocalMemory<const char*>& types,
863 LocalMemory<int32_t>& replacementIndexes,
864 int32_t &length,
865 UErrorCode &status)
866 {
867 return readAlias(
868 alias, strings, types, replacementIndexes, length,
869 #if U_DEBUG
870 [](const char* type) {
871 U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
872 U_ASSERT(uprv_strlen(type) != 4 ||
873 (type[0] >= '0' && type[0] <= '9'));
874 },
875 [](const UnicodeString& replacement) {
876 U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
877 U_ASSERT(replacement.length() != 4 ||
878 (replacement.charAt(0) >= u'0' &&
879 replacement.charAt(0) <= u'9'));
880 },
881 #else
882 [](const char*) {},
883 [](const UnicodeString&) { },
884 #endif
885 status);
886 }
887
888 /**
889 * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
890 * Allocate length items for types, to store the type field. Allocate length
891 * items for replacementIndexes, to store the index in the strings for the
892 * replacement regions.
893 */
894 void
readSubdivisionAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)895 AliasDataBuilder::readSubdivisionAlias(
896 UResourceBundle* alias,
897 UniqueCharStrings* strings,
898 LocalMemory<const char*>& types,
899 LocalMemory<int32_t>& replacementIndexes,
900 int32_t &length,
901 UErrorCode &status)
902 {
903 return readAlias(
904 alias, strings, types, replacementIndexes, length,
905 #if U_DEBUG
906 [](const char* type) {
907 U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
908 },
909 #else
910 [](const char*) {},
911 #endif
912 [](const UnicodeString&) { },
913 status);
914 }
915
916 /**
917 * Initializes the alias data from the ICU resource bundles. The alias data
918 * contains alias of language, country, script and variants.
919 *
920 * If the alias data has already loaded, then this method simply returns without
921 * doing anything meaningful.
922 */
923 void U_CALLCONV
loadData(UErrorCode & status)924 AliasData::loadData(UErrorCode &status)
925 {
926 #ifdef LOCALE_CANONICALIZATION_DEBUG
927 UDate start = uprv_getRawUTCtime();
928 #endif // LOCALE_CANONICALIZATION_DEBUG
929 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
930 AliasDataBuilder builder;
931 gSingleton = builder.build(status);
932 #ifdef LOCALE_CANONICALIZATION_DEBUG
933 UDate end = uprv_getRawUTCtime();
934 printf("AliasData::loadData took total %f ms\n", end - start);
935 #endif // LOCALE_CANONICALIZATION_DEBUG
936 }
937
938 /**
939 * Build the alias data from resources.
940 */
941 AliasData*
build(UErrorCode & status)942 AliasDataBuilder::build(UErrorCode &status) {
943 LocalUResourceBundlePointer metadata(
944 ures_openDirect(nullptr, "metadata", &status));
945 LocalUResourceBundlePointer metadataAlias(
946 ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
947 LocalUResourceBundlePointer languageAlias(
948 ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
949 LocalUResourceBundlePointer scriptAlias(
950 ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
951 LocalUResourceBundlePointer territoryAlias(
952 ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
953 LocalUResourceBundlePointer variantAlias(
954 ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
955 LocalUResourceBundlePointer subdivisionAlias(
956 ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
957
958 if (U_FAILURE(status)) {
959 return nullptr;
960 }
961 int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
962 variantLength = 0, subdivisionLength = 0;
963
964 // Read the languageAlias into languageTypes, languageReplacementIndexes
965 // and strings
966 UniqueCharStrings strings(status);
967 LocalMemory<const char*> languageTypes;
968 LocalMemory<int32_t> languageReplacementIndexes;
969 readLanguageAlias(languageAlias.getAlias(),
970 &strings,
971 languageTypes,
972 languageReplacementIndexes,
973 languagesLength,
974 status);
975
976 // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
977 // and strings
978 LocalMemory<const char*> scriptTypes;
979 LocalMemory<int32_t> scriptReplacementIndexes;
980 readScriptAlias(scriptAlias.getAlias(),
981 &strings,
982 scriptTypes,
983 scriptReplacementIndexes,
984 scriptLength,
985 status);
986
987 // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
988 // and strings
989 LocalMemory<const char*> territoryTypes;
990 LocalMemory<int32_t> territoryReplacementIndexes;
991 readTerritoryAlias(territoryAlias.getAlias(),
992 &strings,
993 territoryTypes,
994 territoryReplacementIndexes,
995 territoryLength, status);
996
997 // Read the variantAlias into variantTypes, variantReplacementIndexes
998 // and strings
999 LocalMemory<const char*> variantTypes;
1000 LocalMemory<int32_t> variantReplacementIndexes;
1001 readVariantAlias(variantAlias.getAlias(),
1002 &strings,
1003 variantTypes,
1004 variantReplacementIndexes,
1005 variantLength, status);
1006
1007 // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
1008 // and strings
1009 LocalMemory<const char*> subdivisionTypes;
1010 LocalMemory<int32_t> subdivisionReplacementIndexes;
1011 readSubdivisionAlias(subdivisionAlias.getAlias(),
1012 &strings,
1013 subdivisionTypes,
1014 subdivisionReplacementIndexes,
1015 subdivisionLength, status);
1016
1017 if (U_FAILURE(status)) {
1018 return nullptr;
1019 }
1020
1021 // We can only use strings after freeze it.
1022 strings.freeze();
1023
1024 // Build the languageMap from languageTypes & languageReplacementIndexes
1025 CharStringMap languageMap(490, status);
1026 for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
1027 languageMap.put(languageTypes[i],
1028 strings.get(languageReplacementIndexes[i]),
1029 status);
1030 }
1031
1032 // Build the scriptMap from scriptTypes & scriptReplacementIndexes
1033 CharStringMap scriptMap(1, status);
1034 for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
1035 scriptMap.put(scriptTypes[i],
1036 strings.get(scriptReplacementIndexes[i]),
1037 status);
1038 }
1039
1040 // Build the territoryMap from territoryTypes & territoryReplacementIndexes
1041 CharStringMap territoryMap(650, status);
1042 for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
1043 territoryMap.put(territoryTypes[i],
1044 strings.get(territoryReplacementIndexes[i]),
1045 status);
1046 }
1047
1048 // Build the variantMap from variantTypes & variantReplacementIndexes.
1049 CharStringMap variantMap(2, status);
1050 for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
1051 variantMap.put(variantTypes[i],
1052 strings.get(variantReplacementIndexes[i]),
1053 status);
1054 }
1055
1056 // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
1057 CharStringMap subdivisionMap(2, status);
1058 for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
1059 subdivisionMap.put(subdivisionTypes[i],
1060 strings.get(subdivisionReplacementIndexes[i]),
1061 status);
1062 }
1063
1064 if (U_FAILURE(status)) {
1065 return nullptr;
1066 }
1067
1068 // copy hashtables
1069 auto *data = new AliasData(
1070 std::move(languageMap),
1071 std::move(scriptMap),
1072 std::move(territoryMap),
1073 std::move(variantMap),
1074 std::move(subdivisionMap),
1075 strings.orphanCharStrings());
1076
1077 if (data == nullptr) {
1078 status = U_MEMORY_ALLOCATION_ERROR;
1079 }
1080 return data;
1081 }
1082
1083 /**
1084 * A class that find the replacement values of locale fields by using AliasData.
1085 */
1086 class AliasReplacer {
1087 public:
AliasReplacer(UErrorCode status)1088 AliasReplacer(UErrorCode status) :
1089 language(nullptr), script(nullptr), region(nullptr),
1090 extensions(nullptr), variants(status),
1091 data(nullptr) {
1092 }
~AliasReplacer()1093 ~AliasReplacer() {
1094 }
1095
1096 // Check the fields inside locale, if need to replace fields,
1097 // place the the replaced locale ID in out and return true.
1098 // Otherwise return false for no replacement or error.
1099 bool replace(
1100 const Locale& locale, CharString& out, UErrorCode& status);
1101
1102 private:
1103 const char* language;
1104 const char* script;
1105 const char* region;
1106 const char* extensions;
1107 UVector variants;
1108
1109 const AliasData* data;
1110
notEmpty(const char * str)1111 inline bool notEmpty(const char* str) {
1112 return str && str[0] != NULL_CHAR;
1113 }
1114
1115 /**
1116 * If replacement is neither null nor empty and input is either null or empty,
1117 * return replacement.
1118 * If replacement is neither null nor empty but input is not empty, return input.
1119 * If replacement is either null or empty and type is either null or empty,
1120 * return input.
1121 * Otherwise return null.
1122 * replacement input type return
1123 * AAA nullptr * AAA
1124 * AAA BBB * BBB
1125 * nullptr || "" CCC nullptr CCC
1126 * nullptr || "" * DDD nullptr
1127 */
deleteOrReplace(const char * input,const char * type,const char * replacement)1128 inline const char* deleteOrReplace(
1129 const char* input, const char* type, const char* replacement) {
1130 return notEmpty(replacement) ?
1131 ((input == nullptr) ? replacement : input) :
1132 ((type == nullptr) ? input : nullptr);
1133 }
1134
same(const char * a,const char * b)1135 inline bool same(const char* a, const char* b) {
1136 if (a == nullptr && b == nullptr) {
1137 return true;
1138 }
1139 if ((a == nullptr && b != nullptr) ||
1140 (a != nullptr && b == nullptr)) {
1141 return false;
1142 }
1143 return uprv_strcmp(a, b) == 0;
1144 }
1145
1146 // Gather fields and generate locale ID into out.
1147 CharString& outputToString(CharString& out, UErrorCode status);
1148
1149 // Generate the lookup key.
1150 CharString& generateKey(const char* language, const char* region,
1151 const char* variant, CharString& out,
1152 UErrorCode status);
1153
1154 void parseLanguageReplacement(const char* replacement,
1155 const char*& replaceLanguage,
1156 const char*& replaceScript,
1157 const char*& replaceRegion,
1158 const char*& replaceVariant,
1159 const char*& replaceExtensions,
1160 UVector& toBeFreed,
1161 UErrorCode& status);
1162
1163 // Replace by using languageAlias.
1164 bool replaceLanguage(bool checkLanguage, bool checkRegion,
1165 bool checkVariants, UVector& toBeFreed,
1166 UErrorCode& status);
1167
1168 // Replace by using territoryAlias.
1169 bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
1170
1171 // Replace by using scriptAlias.
1172 bool replaceScript(UErrorCode& status);
1173
1174 // Replace by using variantAlias.
1175 bool replaceVariant(UErrorCode& status);
1176
1177 // Replace by using subdivisionAlias.
1178 bool replaceSubdivision(StringPiece subdivision,
1179 CharString& output, UErrorCode& status);
1180
1181 // Replace transformed extensions.
1182 bool replaceTransformedExtensions(
1183 CharString& transformedExtensions, CharString& output, UErrorCode& status);
1184 };
1185
1186 CharString&
generateKey(const char * language,const char * region,const char * variant,CharString & out,UErrorCode status)1187 AliasReplacer::generateKey(
1188 const char* language, const char* region, const char* variant,
1189 CharString& out, UErrorCode status)
1190 {
1191 out.append(language, status);
1192 if (notEmpty(region)) {
1193 out.append(SEP_CHAR, status)
1194 .append(region, status);
1195 }
1196 if (notEmpty(variant)) {
1197 out.append(SEP_CHAR, status)
1198 .append(variant, status);
1199 }
1200 return out;
1201 }
1202
1203 void
parseLanguageReplacement(const char * replacement,const char * & replacedLanguage,const char * & replacedScript,const char * & replacedRegion,const char * & replacedVariant,const char * & replacedExtensions,UVector & toBeFreed,UErrorCode & status)1204 AliasReplacer::parseLanguageReplacement(
1205 const char* replacement,
1206 const char*& replacedLanguage,
1207 const char*& replacedScript,
1208 const char*& replacedRegion,
1209 const char*& replacedVariant,
1210 const char*& replacedExtensions,
1211 UVector& toBeFreed,
1212 UErrorCode& status)
1213 {
1214 if (U_FAILURE(status)) {
1215 return;
1216 }
1217 replacedScript = replacedRegion = replacedVariant
1218 = replacedExtensions = nullptr;
1219 if (uprv_strchr(replacement, '_') == nullptr) {
1220 replacedLanguage = replacement;
1221 // reach the end, just return it.
1222 return;
1223 }
1224 // We have multiple field so we have to allocate and parse
1225 CharString* str = new CharString(
1226 replacement, (int32_t)uprv_strlen(replacement), status);
1227 if (U_FAILURE(status)) {
1228 return;
1229 }
1230 if (str == nullptr) {
1231 status = U_MEMORY_ALLOCATION_ERROR;
1232 return;
1233 }
1234 toBeFreed.addElement(str, status);
1235 char* data = str->data();
1236 replacedLanguage = (const char*) data;
1237 char* endOfField = uprv_strchr(data, '_');
1238 *endOfField = '\0'; // null terminiate it.
1239 endOfField++;
1240 const char* start = endOfField;
1241 endOfField = (char*) uprv_strchr(start, '_');
1242 size_t len = 0;
1243 if (endOfField == nullptr) {
1244 len = uprv_strlen(start);
1245 } else {
1246 len = endOfField - start;
1247 *endOfField = '\0'; // null terminiate it.
1248 }
1249 if (len == 4 && uprv_isASCIILetter(*start)) {
1250 // Got a script
1251 replacedScript = start;
1252 if (endOfField == nullptr) {
1253 return;
1254 }
1255 start = endOfField++;
1256 endOfField = (char*)uprv_strchr(start, '_');
1257 if (endOfField == nullptr) {
1258 len = uprv_strlen(start);
1259 } else {
1260 len = endOfField - start;
1261 *endOfField = '\0'; // null terminiate it.
1262 }
1263 }
1264 if (len >= 2 && len <= 3) {
1265 // Got a region
1266 replacedRegion = start;
1267 if (endOfField == nullptr) {
1268 return;
1269 }
1270 start = endOfField++;
1271 endOfField = (char*)uprv_strchr(start, '_');
1272 if (endOfField == nullptr) {
1273 len = uprv_strlen(start);
1274 } else {
1275 len = endOfField - start;
1276 *endOfField = '\0'; // null terminiate it.
1277 }
1278 }
1279 if (len >= 4) {
1280 // Got a variant
1281 replacedVariant = start;
1282 if (endOfField == nullptr) {
1283 return;
1284 }
1285 start = endOfField++;
1286 }
1287 replacedExtensions = start;
1288 }
1289
1290 bool
replaceLanguage(bool checkLanguage,bool checkRegion,bool checkVariants,UVector & toBeFreed,UErrorCode & status)1291 AliasReplacer::replaceLanguage(
1292 bool checkLanguage, bool checkRegion,
1293 bool checkVariants, UVector& toBeFreed, UErrorCode& status)
1294 {
1295 if (U_FAILURE(status)) {
1296 return false;
1297 }
1298 if ( (checkRegion && region == nullptr) ||
1299 (checkVariants && variants.size() == 0)) {
1300 // Nothing to search.
1301 return false;
1302 }
1303 int32_t variant_size = checkVariants ? variants.size() : 1;
1304 // Since we may have more than one variant, we need to loop through them.
1305 const char* searchLanguage = checkLanguage ? language : "und";
1306 const char* searchRegion = checkRegion ? region : nullptr;
1307 const char* searchVariant = nullptr;
1308 for (int32_t variant_index = 0;
1309 variant_index < variant_size;
1310 variant_index++) {
1311 if (checkVariants) {
1312 U_ASSERT(variant_index < variant_size);
1313 searchVariant = (const char*)(variants.elementAt(variant_index));
1314 }
1315
1316 if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
1317 // Do not consider ill-formed variant subtag.
1318 searchVariant = nullptr;
1319 }
1320 CharString typeKey;
1321 generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
1322 status);
1323 if (U_FAILURE(status)) {
1324 return false;
1325 }
1326 const char *replacement = data->languageMap().get(typeKey.data());
1327 if (replacement == nullptr) {
1328 // Found no replacement data.
1329 continue;
1330 }
1331
1332 const char* replacedLanguage = nullptr;
1333 const char* replacedScript = nullptr;
1334 const char* replacedRegion = nullptr;
1335 const char* replacedVariant = nullptr;
1336 const char* replacedExtensions = nullptr;
1337 parseLanguageReplacement(replacement,
1338 replacedLanguage,
1339 replacedScript,
1340 replacedRegion,
1341 replacedVariant,
1342 replacedExtensions,
1343 toBeFreed,
1344 status);
1345 replacedLanguage =
1346 (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == 0) ?
1347 language : replacedLanguage;
1348 replacedScript = deleteOrReplace(script, nullptr, replacedScript);
1349 replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
1350 replacedVariant = deleteOrReplace(
1351 searchVariant, searchVariant, replacedVariant);
1352
1353 if ( same(language, replacedLanguage) &&
1354 same(script, replacedScript) &&
1355 same(region, replacedRegion) &&
1356 same(searchVariant, replacedVariant) &&
1357 replacedExtensions == nullptr) {
1358 // Replacement produce no changes.
1359 continue;
1360 }
1361
1362 language = replacedLanguage;
1363 region = replacedRegion;
1364 script = replacedScript;
1365 if (searchVariant != nullptr) {
1366 if (notEmpty(replacedVariant)) {
1367 variants.setElementAt((void*)replacedVariant, variant_index);
1368 } else {
1369 variants.removeElementAt(variant_index);
1370 }
1371 }
1372 if (replacedExtensions != nullptr) {
1373 // DO NOTHING
1374 // UTS35 does not specifiy what should we do if we have extensions in the
1375 // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
1376 // extensions in them languageAlias:
1377 // i_default => en_x_i_default
1378 // i_enochian => und_x_i_enochian
1379 // i_mingo => see_x_i_mingo
1380 // zh_min => nan_x_zh_min
1381 // But all of them are already changed by code inside ultag_parse() before
1382 // hitting this code.
1383 }
1384
1385 // Something changed by language alias data.
1386 return true;
1387 }
1388 // Nothing changed by language alias data.
1389 return false;
1390 }
1391
1392 bool
replaceTerritory(UVector & toBeFreed,UErrorCode & status)1393 AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
1394 {
1395 if (U_FAILURE(status)) {
1396 return false;
1397 }
1398 if (region == nullptr) {
1399 // No region to search.
1400 return false;
1401 }
1402 const char *replacement = data->territoryMap().get(region);
1403 if (replacement == nullptr) {
1404 // Found no replacement data for this region.
1405 return false;
1406 }
1407 const char* replacedRegion = replacement;
1408 const char* firstSpace = uprv_strchr(replacement, ' ');
1409 if (firstSpace != nullptr) {
1410 // If there are are more than one region in the replacement.
1411 // We need to check which one match based on the language.
1412 // Cannot use nullptr for language because that will construct
1413 // the default locale, in that case, use "und" to get the correct
1414 // locale.
1415 Locale l = LocaleBuilder()
1416 .setLanguage(language == nullptr ? "und" : language)
1417 .setScript(script)
1418 .build(status);
1419 l.addLikelySubtags(status);
1420 const char* likelyRegion = l.getCountry();
1421 LocalPointer<CharString> item;
1422 if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
1423 size_t len = uprv_strlen(likelyRegion);
1424 const char* foundInReplacement = uprv_strstr(replacement,
1425 likelyRegion);
1426 if (foundInReplacement != nullptr) {
1427 // Assuming the case there are no three letter region code in
1428 // the replacement of territoryAlias
1429 U_ASSERT(foundInReplacement == replacement ||
1430 *(foundInReplacement-1) == ' ');
1431 U_ASSERT(foundInReplacement[len] == ' ' ||
1432 foundInReplacement[len] == '\0');
1433 item.adoptInsteadAndCheckErrorCode(
1434 new CharString(foundInReplacement, (int32_t)len, status), status);
1435 }
1436 }
1437 if (item.isNull() && U_SUCCESS(status)) {
1438 item.adoptInsteadAndCheckErrorCode(
1439 new CharString(replacement,
1440 (int32_t)(firstSpace - replacement), status), status);
1441 }
1442 if (U_FAILURE(status)) { return false; }
1443 if (item.isNull()) {
1444 status = U_MEMORY_ALLOCATION_ERROR;
1445 return false;
1446 }
1447 replacedRegion = item->data();
1448 toBeFreed.addElement(item.orphan(), status);
1449 }
1450 U_ASSERT(!same(region, replacedRegion));
1451 region = replacedRegion;
1452 // The region is changed by data in territory alias.
1453 return true;
1454 }
1455
1456 bool
replaceScript(UErrorCode & status)1457 AliasReplacer::replaceScript(UErrorCode& status)
1458 {
1459 if (U_FAILURE(status)) {
1460 return false;
1461 }
1462 if (script == nullptr) {
1463 // No script to search.
1464 return false;
1465 }
1466 const char *replacement = data->scriptMap().get(script);
1467 if (replacement == nullptr) {
1468 // Found no replacement data for this script.
1469 return false;
1470 }
1471 U_ASSERT(!same(script, replacement));
1472 script = replacement;
1473 // The script is changed by data in script alias.
1474 return true;
1475 }
1476
1477 bool
replaceVariant(UErrorCode & status)1478 AliasReplacer::replaceVariant(UErrorCode& status)
1479 {
1480 if (U_FAILURE(status)) {
1481 return false;
1482 }
1483 // Since we may have more than one variant, we need to loop through them.
1484 for (int32_t i = 0; i < variants.size(); i++) {
1485 const char *variant = (const char*)(variants.elementAt(i));
1486 const char *replacement = data->variantMap().get(variant);
1487 if (replacement == nullptr) {
1488 // Found no replacement data for this variant.
1489 continue;
1490 }
1491 U_ASSERT((uprv_strlen(replacement) >= 5 &&
1492 uprv_strlen(replacement) <= 8) ||
1493 (uprv_strlen(replacement) == 4 &&
1494 replacement[0] >= '0' &&
1495 replacement[0] <= '9'));
1496 if (!same(variant, replacement)) {
1497 variants.setElementAt((void*)replacement, i);
1498 // Special hack to handle hepburn-heploc => alalc97
1499 if (uprv_strcmp(variant, "heploc") == 0) {
1500 for (int32_t j = 0; j < variants.size(); j++) {
1501 if (uprv_strcmp((const char*)(variants.elementAt(j)),
1502 "hepburn") == 0) {
1503 variants.removeElementAt(j);
1504 }
1505 }
1506 }
1507 return true;
1508 }
1509 }
1510 return false;
1511 }
1512
1513 bool
replaceSubdivision(StringPiece subdivision,CharString & output,UErrorCode & status)1514 AliasReplacer::replaceSubdivision(
1515 StringPiece subdivision, CharString& output, UErrorCode& status)
1516 {
1517 if (U_FAILURE(status)) {
1518 return false;
1519 }
1520 const char *replacement = data->subdivisionMap().get(subdivision.data());
1521 if (replacement != nullptr) {
1522 const char* firstSpace = uprv_strchr(replacement, ' ');
1523 // Found replacement data for this subdivision.
1524 size_t len = (firstSpace != nullptr) ?
1525 (firstSpace - replacement) : uprv_strlen(replacement);
1526 if (2 <= len && len <= 8) {
1527 output.append(replacement, (int32_t)len, status);
1528 if (2 == len) {
1529 // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
1530 output.append("zzzz", 4, status);
1531 }
1532 }
1533 return true;
1534 }
1535 return false;
1536 }
1537
1538 bool
replaceTransformedExtensions(CharString & transformedExtensions,CharString & output,UErrorCode & status)1539 AliasReplacer::replaceTransformedExtensions(
1540 CharString& transformedExtensions, CharString& output, UErrorCode& status)
1541 {
1542 // The content of the transformedExtensions will be modified in this
1543 // function to NULL-terminating (tkey-tvalue) pairs.
1544 if (U_FAILURE(status)) {
1545 return false;
1546 }
1547 int32_t len = transformedExtensions.length();
1548 const char* str = transformedExtensions.data();
1549 const char* tkey = ultag_getTKeyStart(str);
1550 int32_t tlangLen = (tkey == str) ? 0 :
1551 ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
1552 CharStringByteSink sink(&output);
1553 if (tlangLen > 0) {
1554 Locale tlang = LocaleBuilder()
1555 .setLanguageTag(StringPiece(str, tlangLen))
1556 .build(status);
1557 tlang.canonicalize(status);
1558 tlang.toLanguageTag(sink, status);
1559 if (U_FAILURE(status)) {
1560 return false;
1561 }
1562 T_CString_toLowerCase(output.data());
1563 }
1564 if (tkey != nullptr) {
1565 // We need to sort the tfields by tkey
1566 UVector tfields(status);
1567 if (U_FAILURE(status)) {
1568 return false;
1569 }
1570 do {
1571 const char* tvalue = uprv_strchr(tkey, '-');
1572 if (tvalue == nullptr) {
1573 status = U_ILLEGAL_ARGUMENT_ERROR;
1574 return false;
1575 }
1576 const char* nextTKey = ultag_getTKeyStart(tvalue);
1577 if (nextTKey != nullptr) {
1578 *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
1579 }
1580 tfields.insertElementAt((void*)tkey, tfields.size(), status);
1581 if (U_FAILURE(status)) {
1582 return false;
1583 }
1584 tkey = nextTKey;
1585 } while (tkey != nullptr);
1586 tfields.sort([](UElement e1, UElement e2) -> int8_t {
1587 // uprv_strcmp return int and in some platform, such as arm64-v8a,
1588 // it may return positive values > 127 which cause the casted value
1589 // of int8_t negative.
1590 int res = uprv_strcmp(
1591 (const char*)e1.pointer, (const char*)e2.pointer);
1592 return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
1593 }, status);
1594 for (int32_t i = 0; i < tfields.size(); i++) {
1595 if (output.length() > 0) {
1596 output.append('-', status);
1597 }
1598 const char* tfield = (const char*) tfields.elementAt(i);
1599 const char* tvalue = uprv_strchr(tfield, '-');
1600 if (tvalue == nullptr) {
1601 status = U_ILLEGAL_ARGUMENT_ERROR;
1602 return false;
1603 }
1604 // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
1605 *((char*)tvalue++) = '\0'; // NULL terminate tkey
1606 output.append(tfield, status).append('-', status);
1607 const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
1608 output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
1609 }
1610 }
1611 if (U_FAILURE(status)) {
1612 return false;
1613 }
1614 return true;
1615 }
1616
1617 CharString&
outputToString(CharString & out,UErrorCode status)1618 AliasReplacer::outputToString(
1619 CharString& out, UErrorCode status)
1620 {
1621 out.append(language, status);
1622 if (notEmpty(script)) {
1623 out.append(SEP_CHAR, status)
1624 .append(script, status);
1625 }
1626 if (notEmpty(region)) {
1627 out.append(SEP_CHAR, status)
1628 .append(region, status);
1629 }
1630 if (variants.size() > 0) {
1631 if (!notEmpty(script) && !notEmpty(region)) {
1632 out.append(SEP_CHAR, status);
1633 }
1634 variants.sort([](UElement e1, UElement e2) -> int8_t {
1635 // uprv_strcmp return int and in some platform, such as arm64-v8a,
1636 // it may return positive values > 127 which cause the casted value
1637 // of int8_t negative.
1638 int res = uprv_strcmp(
1639 (const char*)e1.pointer, (const char*)e2.pointer);
1640 return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
1641 }, status);
1642 int32_t variantsStart = out.length();
1643 for (int32_t i = 0; i < variants.size(); i++) {
1644 out.append(SEP_CHAR, status)
1645 .append((const char*)(variants.elementAt(i)),
1646 status);
1647 }
1648 T_CString_toUpperCase(out.data() + variantsStart);
1649 }
1650 if (notEmpty(extensions)) {
1651 CharString tmp("und_", status);
1652 tmp.append(extensions, status);
1653 Locale tmpLocale(tmp.data());
1654 // only support x extension inside CLDR for now.
1655 U_ASSERT(extensions[0] == 'x');
1656 out.append(tmpLocale.getName() + 1, status);
1657 }
1658 return out;
1659 }
1660
1661 bool
replace(const Locale & locale,CharString & out,UErrorCode & status)1662 AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
1663 {
1664 data = AliasData::singleton(status);
1665 if (U_FAILURE(status)) {
1666 return false;
1667 }
1668 U_ASSERT(data != nullptr);
1669 out.clear();
1670 language = locale.getLanguage();
1671 if (!notEmpty(language)) {
1672 language = nullptr;
1673 }
1674 script = locale.getScript();
1675 if (!notEmpty(script)) {
1676 script = nullptr;
1677 }
1678 region = locale.getCountry();
1679 if (!notEmpty(region)) {
1680 region = nullptr;
1681 }
1682 const char* variantsStr = locale.getVariant();
1683 CharString variantsBuff(variantsStr, -1, status);
1684 if (!variantsBuff.isEmpty()) {
1685 if (U_FAILURE(status)) { return false; }
1686 char* start = variantsBuff.data();
1687 T_CString_toLowerCase(start);
1688 char* end;
1689 while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
1690 U_SUCCESS(status)) {
1691 *end = NULL_CHAR; // null terminate inside variantsBuff
1692 variants.addElement(start, status);
1693 start = end + 1;
1694 }
1695 variants.addElement(start, status);
1696 }
1697 if (U_FAILURE(status)) { return false; }
1698
1699 // Sort the variants
1700 variants.sort([](UElement e1, UElement e2) -> int8_t {
1701 // uprv_strcmp return int and in some platform, such as arm64-v8a,
1702 // it may return positive values > 127 which cause the casted value
1703 // of int8_t negative.
1704 int res = uprv_strcmp(
1705 (const char*)e1.pointer, (const char*)e2.pointer);
1706 return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
1707 }, status);
1708
1709 // A changed count to assert when loop too many times.
1710 int changed = 0;
1711 // A UVector to to hold CharString allocated by the replace* method
1712 // and freed when out of scope from his function.
1713 UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
1714 nullptr, 10, status);
1715 while (U_SUCCESS(status)) {
1716 // Something wrong with the data cause looping here more than 10 times
1717 // already.
1718 U_ASSERT(changed < 5);
1719 // From observation of key in data/misc/metadata.txt
1720 // we know currently we only need to search in the following combination
1721 // of fields for type in languageAlias:
1722 // * lang_region_variant
1723 // * lang_region
1724 // * lang_variant
1725 // * lang
1726 // * und_variant
1727 // This assumption is ensured by the U_ASSERT in readLanguageAlias
1728 //
1729 // lang REGION variant
1730 if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
1731 replaceLanguage(true, true, false, stringsToBeFreed, status) ||
1732 replaceLanguage(true, false, true, stringsToBeFreed, status) ||
1733 replaceLanguage(true, false, false, stringsToBeFreed, status) ||
1734 replaceLanguage(false,false, true, stringsToBeFreed, status) ||
1735 replaceTerritory(stringsToBeFreed, status) ||
1736 replaceScript(status) ||
1737 replaceVariant(status)) {
1738 // Some values in data is changed, try to match from the beginning
1739 // again.
1740 changed++;
1741 continue;
1742 }
1743 // Nothing changed. Break out.
1744 break;
1745 } // while(1)
1746
1747 if (U_FAILURE(status)) { return false; }
1748 // Nothing changed and we know the order of the vaiants are not change
1749 // because we have no variant or only one.
1750 const char* extensionsStr = locale_getKeywordsStart(locale.getName());
1751 if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
1752 return false;
1753 }
1754 outputToString(out, status);
1755 if (U_FAILURE(status)) {
1756 return false;
1757 }
1758 if (extensionsStr != nullptr) {
1759 changed = 0;
1760 Locale temp(locale);
1761 LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
1762 if (U_SUCCESS(status) && !iter.isNull()) {
1763 const char* key;
1764 while ((key = iter->next(nullptr, status)) != nullptr) {
1765 if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
1766 uprv_strcmp("t", key) == 0) {
1767 CharString value;
1768 CharStringByteSink valueSink(&value);
1769 locale.getKeywordValue(key, valueSink, status);
1770 if (U_FAILURE(status)) {
1771 status = U_ZERO_ERROR;
1772 continue;
1773 }
1774 CharString replacement;
1775 if (uprv_strlen(key) == 2) {
1776 if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
1777 changed++;
1778 temp.setKeywordValue(key, replacement.data(), status);
1779 }
1780 } else {
1781 U_ASSERT(uprv_strcmp(key, "t") == 0);
1782 if (replaceTransformedExtensions(value, replacement, status)) {
1783 changed++;
1784 temp.setKeywordValue(key, replacement.data(), status);
1785 }
1786 }
1787 if (U_FAILURE(status)) {
1788 return false;
1789 }
1790 }
1791 }
1792 }
1793 if (changed != 0) {
1794 extensionsStr = locale_getKeywordsStart(temp.getName());
1795 }
1796 out.append(extensionsStr, status);
1797 }
1798 if (U_FAILURE(status)) {
1799 return false;
1800 }
1801 // If the tag is not changed, return.
1802 if (uprv_strcmp(out.data(), locale.getName()) == 0) {
1803 out.clear();
1804 return false;
1805 }
1806 return true;
1807 }
1808
1809 // Return true if the locale is changed during canonicalization.
1810 // The replaced value then will be put into out.
1811 bool
canonicalizeLocale(const Locale & locale,CharString & out,UErrorCode & status)1812 canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
1813 {
1814 AliasReplacer replacer(status);
1815 return replacer.replace(locale, out, status);
1816 }
1817
1818 // Function to optimize for known cases without so we can skip the loading
1819 // of resources in the startup time until we really need it.
1820 bool
isKnownCanonicalizedLocale(const char * locale,UErrorCode & status)1821 isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
1822 {
1823 if ( uprv_strcmp(locale, "c") == 0 ||
1824 uprv_strcmp(locale, "en") == 0 ||
1825 uprv_strcmp(locale, "en_US") == 0) {
1826 return true;
1827 }
1828
1829 // common well-known Canonicalized.
1830 umtx_initOnce(gKnownCanonicalizedInitOnce,
1831 &loadKnownCanonicalized, status);
1832 if (U_FAILURE(status)) {
1833 return false;
1834 }
1835 U_ASSERT(gKnownCanonicalized != nullptr);
1836 return uhash_geti(gKnownCanonicalized, locale) != 0;
1837 }
1838
1839 } // namespace
1840
1841 // Function for testing.
1842 U_CAPI const char* const*
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t * length)1843 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
1844 {
1845 *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
1846 return KNOWN_CANONICALIZED;
1847 }
1848
1849 // Function for testing.
1850 U_CAPI bool
ulocimp_isCanonicalizedLocaleForTest(const char * localeName)1851 ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
1852 {
1853 Locale l(localeName);
1854 UErrorCode status = U_ZERO_ERROR;
1855 CharString temp;
1856 return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
1857 }
1858
1859 /*This function initializes a Locale from a C locale ID*/
init(const char * localeID,UBool canonicalize)1860 Locale& Locale::init(const char* localeID, UBool canonicalize)
1861 {
1862 fIsBogus = FALSE;
1863 /* Free our current storage */
1864 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
1865 uprv_free(baseName);
1866 }
1867 baseName = NULL;
1868 if(fullName != fullNameBuffer) {
1869 uprv_free(fullName);
1870 fullName = fullNameBuffer;
1871 }
1872
1873 // not a loop:
1874 // just an easy way to have a common error-exit
1875 // without goto and without another function
1876 do {
1877 char *separator;
1878 char *field[5] = {0};
1879 int32_t fieldLen[5] = {0};
1880 int32_t fieldIdx;
1881 int32_t variantField;
1882 int32_t length;
1883 UErrorCode err;
1884
1885 if(localeID == NULL) {
1886 // not an error, just set the default locale
1887 return *this = getDefault();
1888 }
1889
1890 /* preset all fields to empty */
1891 language[0] = script[0] = country[0] = 0;
1892
1893 // "canonicalize" the locale ID to ICU/Java format
1894 err = U_ZERO_ERROR;
1895 length = canonicalize ?
1896 uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
1897 uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
1898
1899 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
1900 U_ASSERT(baseName == nullptr);
1901 /*Go to heap for the fullName if necessary*/
1902 fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
1903 if(fullName == 0) {
1904 fullName = fullNameBuffer;
1905 break; // error: out of memory
1906 }
1907 err = U_ZERO_ERROR;
1908 length = canonicalize ?
1909 uloc_canonicalize(localeID, fullName, length+1, &err) :
1910 uloc_getName(localeID, fullName, length+1, &err);
1911 }
1912 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
1913 /* should never occur */
1914 break;
1915 }
1916
1917 variantBegin = length;
1918
1919 /* after uloc_getName/canonicalize() we know that only '_' are separators */
1920 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
1921 separator = field[0] = fullName;
1922 fieldIdx = 1;
1923 char* at = uprv_strchr(fullName, '@');
1924 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
1925 fieldIdx < UPRV_LENGTHOF(field)-1 &&
1926 (at == nullptr || separator < at)) {
1927 field[fieldIdx] = separator + 1;
1928 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
1929 fieldIdx++;
1930 }
1931 // variant may contain @foo or .foo POSIX cruft; remove it
1932 separator = uprv_strchr(field[fieldIdx-1], '@');
1933 char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
1934 if (separator!=NULL || sep2!=NULL) {
1935 if (separator==NULL || (sep2!=NULL && separator > sep2)) {
1936 separator = sep2;
1937 }
1938 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
1939 } else {
1940 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
1941 }
1942
1943 if (fieldLen[0] >= (int32_t)(sizeof(language)))
1944 {
1945 break; // error: the language field is too long
1946 }
1947
1948 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
1949 if (fieldLen[0] > 0) {
1950 /* We have a language */
1951 uprv_memcpy(language, fullName, fieldLen[0]);
1952 language[fieldLen[0]] = 0;
1953 }
1954 if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
1955 uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
1956 uprv_isASCIILetter(field[1][3])) {
1957 /* We have at least a script */
1958 uprv_memcpy(script, field[1], fieldLen[1]);
1959 script[fieldLen[1]] = 0;
1960 variantField++;
1961 }
1962
1963 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
1964 /* We have a country */
1965 uprv_memcpy(country, field[variantField], fieldLen[variantField]);
1966 country[fieldLen[variantField]] = 0;
1967 variantField++;
1968 } else if (fieldLen[variantField] == 0) {
1969 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
1970 }
1971
1972 if (fieldLen[variantField] > 0) {
1973 /* We have a variant */
1974 variantBegin = (int32_t)(field[variantField] - fullName);
1975 }
1976
1977 err = U_ZERO_ERROR;
1978 initBaseName(err);
1979 if (U_FAILURE(err)) {
1980 break;
1981 }
1982
1983 if (canonicalize) {
1984 if (!isKnownCanonicalizedLocale(fullName, err)) {
1985 CharString replaced;
1986 // Not sure it is already canonicalized
1987 if (canonicalizeLocale(*this, replaced, err)) {
1988 U_ASSERT(U_SUCCESS(err));
1989 // If need replacement, call init again.
1990 init(replaced.data(), false);
1991 }
1992 if (U_FAILURE(err)) {
1993 break;
1994 }
1995 }
1996 } // if (canonicalize) {
1997
1998 // successful end of init()
1999 return *this;
2000 } while(0); /*loop doesn't iterate*/
2001
2002 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
2003 setToBogus();
2004
2005 return *this;
2006 }
2007
2008 /*
2009 * Set up the base name.
2010 * If there are no key words, it's exactly the full name.
2011 * If key words exist, it's the full name truncated at the '@' character.
2012 * Need to set up both at init() and after setting a keyword.
2013 */
2014 void
initBaseName(UErrorCode & status)2015 Locale::initBaseName(UErrorCode &status) {
2016 if (U_FAILURE(status)) {
2017 return;
2018 }
2019 U_ASSERT(baseName==NULL || baseName==fullName);
2020 const char *atPtr = uprv_strchr(fullName, '@');
2021 const char *eqPtr = uprv_strchr(fullName, '=');
2022 if (atPtr && eqPtr && atPtr < eqPtr) {
2023 // Key words exist.
2024 int32_t baseNameLength = (int32_t)(atPtr - fullName);
2025 baseName = (char *)uprv_malloc(baseNameLength + 1);
2026 if (baseName == NULL) {
2027 status = U_MEMORY_ALLOCATION_ERROR;
2028 return;
2029 }
2030 uprv_strncpy(baseName, fullName, baseNameLength);
2031 baseName[baseNameLength] = 0;
2032
2033 // The original computation of variantBegin leaves it equal to the length
2034 // of fullName if there is no variant. It should instead be
2035 // the length of the baseName.
2036 if (variantBegin > baseNameLength) {
2037 variantBegin = baseNameLength;
2038 }
2039 } else {
2040 baseName = fullName;
2041 }
2042 }
2043
2044
2045 int32_t
hashCode() const2046 Locale::hashCode() const
2047 {
2048 return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
2049 }
2050
2051 void
setToBogus()2052 Locale::setToBogus() {
2053 /* Free our current storage */
2054 if((baseName != fullName) && (baseName != fullNameBuffer)) {
2055 uprv_free(baseName);
2056 }
2057 baseName = NULL;
2058 if(fullName != fullNameBuffer) {
2059 uprv_free(fullName);
2060 fullName = fullNameBuffer;
2061 }
2062 *fullNameBuffer = 0;
2063 *language = 0;
2064 *script = 0;
2065 *country = 0;
2066 fIsBogus = TRUE;
2067 variantBegin = 0;
2068 }
2069
2070 const Locale& U_EXPORT2
getDefault()2071 Locale::getDefault()
2072 {
2073 {
2074 Mutex lock(&gDefaultLocaleMutex);
2075 if (gDefaultLocale != NULL) {
2076 return *gDefaultLocale;
2077 }
2078 }
2079 UErrorCode status = U_ZERO_ERROR;
2080 return *locale_set_default_internal(NULL, status);
2081 }
2082
2083
2084
2085 void U_EXPORT2
setDefault(const Locale & newLocale,UErrorCode & status)2086 Locale::setDefault( const Locale& newLocale,
2087 UErrorCode& status)
2088 {
2089 if (U_FAILURE(status)) {
2090 return;
2091 }
2092
2093 /* Set the default from the full name string of the supplied locale.
2094 * This is a convenient way to access the default locale caching mechanisms.
2095 */
2096 const char *localeID = newLocale.getName();
2097 locale_set_default_internal(localeID, status);
2098 }
2099
2100 void
addLikelySubtags(UErrorCode & status)2101 Locale::addLikelySubtags(UErrorCode& status) {
2102 if (U_FAILURE(status)) {
2103 return;
2104 }
2105
2106 CharString maximizedLocaleID;
2107 {
2108 CharStringByteSink sink(&maximizedLocaleID);
2109 ulocimp_addLikelySubtags(fullName, sink, &status);
2110 }
2111
2112 if (U_FAILURE(status)) {
2113 return;
2114 }
2115
2116 init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
2117 if (isBogus()) {
2118 status = U_ILLEGAL_ARGUMENT_ERROR;
2119 }
2120 }
2121
2122 void
minimizeSubtags(UErrorCode & status)2123 Locale::minimizeSubtags(UErrorCode& status) {
2124 if (U_FAILURE(status)) {
2125 return;
2126 }
2127
2128 CharString minimizedLocaleID;
2129 {
2130 CharStringByteSink sink(&minimizedLocaleID);
2131 ulocimp_minimizeSubtags(fullName, sink, &status);
2132 }
2133
2134 if (U_FAILURE(status)) {
2135 return;
2136 }
2137
2138 init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
2139 if (isBogus()) {
2140 status = U_ILLEGAL_ARGUMENT_ERROR;
2141 }
2142 }
2143
2144 void
canonicalize(UErrorCode & status)2145 Locale::canonicalize(UErrorCode& status) {
2146 if (U_FAILURE(status)) {
2147 return;
2148 }
2149 if (isBogus()) {
2150 status = U_ILLEGAL_ARGUMENT_ERROR;
2151 return;
2152 }
2153 CharString uncanonicalized(fullName, status);
2154 if (U_FAILURE(status)) {
2155 return;
2156 }
2157 init(uncanonicalized.data(), /*canonicalize=*/TRUE);
2158 if (isBogus()) {
2159 status = U_ILLEGAL_ARGUMENT_ERROR;
2160 }
2161 }
2162
2163 Locale U_EXPORT2
forLanguageTag(StringPiece tag,UErrorCode & status)2164 Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
2165 {
2166 Locale result(Locale::eBOGUS);
2167
2168 if (U_FAILURE(status)) {
2169 return result;
2170 }
2171
2172 // If a BCP 47 language tag is passed as the language parameter to the
2173 // normal Locale constructor, it will actually fall back to invoking
2174 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
2175 // the string actually is BCP 47. This works well for things like strings
2176 // using BCP 47 extensions, but it does not at all work for things like
2177 // legacy language tags (marked as “Type: grandfathered” in BCP 47,
2178 // e.g., "en-GB-oed") which are possible to also
2179 // interpret as ICU locale IDs and because of that won't trigger the BCP 47
2180 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
2181 // and then Locale::init(), instead of just calling the normal constructor.
2182
2183 CharString localeID;
2184 int32_t parsedLength;
2185 {
2186 CharStringByteSink sink(&localeID);
2187 ulocimp_forLanguageTag(
2188 tag.data(),
2189 tag.length(),
2190 sink,
2191 &parsedLength,
2192 &status);
2193 }
2194
2195 if (U_FAILURE(status)) {
2196 return result;
2197 }
2198
2199 if (parsedLength != tag.size()) {
2200 status = U_ILLEGAL_ARGUMENT_ERROR;
2201 return result;
2202 }
2203
2204 result.init(localeID.data(), /*canonicalize=*/FALSE);
2205 if (result.isBogus()) {
2206 status = U_ILLEGAL_ARGUMENT_ERROR;
2207 }
2208 return result;
2209 }
2210
2211 void
toLanguageTag(ByteSink & sink,UErrorCode & status) const2212 Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
2213 {
2214 if (U_FAILURE(status)) {
2215 return;
2216 }
2217
2218 if (fIsBogus) {
2219 status = U_ILLEGAL_ARGUMENT_ERROR;
2220 return;
2221 }
2222
2223 ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
2224 }
2225
2226 Locale U_EXPORT2
createFromName(const char * name)2227 Locale::createFromName (const char *name)
2228 {
2229 if (name) {
2230 Locale l("");
2231 l.init(name, FALSE);
2232 return l;
2233 }
2234 else {
2235 return getDefault();
2236 }
2237 }
2238
2239 Locale U_EXPORT2
createCanonical(const char * name)2240 Locale::createCanonical(const char* name) {
2241 Locale loc("");
2242 loc.init(name, TRUE);
2243 return loc;
2244 }
2245
2246 const char *
getISO3Language() const2247 Locale::getISO3Language() const
2248 {
2249 return uloc_getISO3Language(fullName);
2250 }
2251
2252
2253 const char *
getISO3Country() const2254 Locale::getISO3Country() const
2255 {
2256 return uloc_getISO3Country(fullName);
2257 }
2258
2259 /**
2260 * Return the LCID value as specified in the "LocaleID" resource for this
2261 * locale. The LocaleID must be expressed as a hexadecimal number, from
2262 * one to four digits. If the LocaleID resource is not present, or is
2263 * in an incorrect format, 0 is returned. The LocaleID is for use in
2264 * Windows (it is an LCID), but is available on all platforms.
2265 */
2266 uint32_t
getLCID() const2267 Locale::getLCID() const
2268 {
2269 return uloc_getLCID(fullName);
2270 }
2271
getISOCountries()2272 const char* const* U_EXPORT2 Locale::getISOCountries()
2273 {
2274 return uloc_getISOCountries();
2275 }
2276
getISOLanguages()2277 const char* const* U_EXPORT2 Locale::getISOLanguages()
2278 {
2279 return uloc_getISOLanguages();
2280 }
2281
2282 // Set the locale's data based on a posix id.
setFromPOSIXID(const char * posixID)2283 void Locale::setFromPOSIXID(const char *posixID)
2284 {
2285 init(posixID, TRUE);
2286 }
2287
2288 const Locale & U_EXPORT2
getRoot(void)2289 Locale::getRoot(void)
2290 {
2291 return getLocale(eROOT);
2292 }
2293
2294 const Locale & U_EXPORT2
getEnglish(void)2295 Locale::getEnglish(void)
2296 {
2297 return getLocale(eENGLISH);
2298 }
2299
2300 const Locale & U_EXPORT2
getFrench(void)2301 Locale::getFrench(void)
2302 {
2303 return getLocale(eFRENCH);
2304 }
2305
2306 const Locale & U_EXPORT2
getGerman(void)2307 Locale::getGerman(void)
2308 {
2309 return getLocale(eGERMAN);
2310 }
2311
2312 const Locale & U_EXPORT2
getItalian(void)2313 Locale::getItalian(void)
2314 {
2315 return getLocale(eITALIAN);
2316 }
2317
2318 const Locale & U_EXPORT2
getJapanese(void)2319 Locale::getJapanese(void)
2320 {
2321 return getLocale(eJAPANESE);
2322 }
2323
2324 const Locale & U_EXPORT2
getKorean(void)2325 Locale::getKorean(void)
2326 {
2327 return getLocale(eKOREAN);
2328 }
2329
2330 const Locale & U_EXPORT2
getChinese(void)2331 Locale::getChinese(void)
2332 {
2333 return getLocale(eCHINESE);
2334 }
2335
2336 const Locale & U_EXPORT2
getSimplifiedChinese(void)2337 Locale::getSimplifiedChinese(void)
2338 {
2339 return getLocale(eCHINA);
2340 }
2341
2342 const Locale & U_EXPORT2
getTraditionalChinese(void)2343 Locale::getTraditionalChinese(void)
2344 {
2345 return getLocale(eTAIWAN);
2346 }
2347
2348
2349 const Locale & U_EXPORT2
getFrance(void)2350 Locale::getFrance(void)
2351 {
2352 return getLocale(eFRANCE);
2353 }
2354
2355 const Locale & U_EXPORT2
getGermany(void)2356 Locale::getGermany(void)
2357 {
2358 return getLocale(eGERMANY);
2359 }
2360
2361 const Locale & U_EXPORT2
getItaly(void)2362 Locale::getItaly(void)
2363 {
2364 return getLocale(eITALY);
2365 }
2366
2367 const Locale & U_EXPORT2
getJapan(void)2368 Locale::getJapan(void)
2369 {
2370 return getLocale(eJAPAN);
2371 }
2372
2373 const Locale & U_EXPORT2
getKorea(void)2374 Locale::getKorea(void)
2375 {
2376 return getLocale(eKOREA);
2377 }
2378
2379 const Locale & U_EXPORT2
getChina(void)2380 Locale::getChina(void)
2381 {
2382 return getLocale(eCHINA);
2383 }
2384
2385 const Locale & U_EXPORT2
getPRC(void)2386 Locale::getPRC(void)
2387 {
2388 return getLocale(eCHINA);
2389 }
2390
2391 const Locale & U_EXPORT2
getTaiwan(void)2392 Locale::getTaiwan(void)
2393 {
2394 return getLocale(eTAIWAN);
2395 }
2396
2397 const Locale & U_EXPORT2
getUK(void)2398 Locale::getUK(void)
2399 {
2400 return getLocale(eUK);
2401 }
2402
2403 const Locale & U_EXPORT2
getUS(void)2404 Locale::getUS(void)
2405 {
2406 return getLocale(eUS);
2407 }
2408
2409 const Locale & U_EXPORT2
getCanada(void)2410 Locale::getCanada(void)
2411 {
2412 return getLocale(eCANADA);
2413 }
2414
2415 const Locale & U_EXPORT2
getCanadaFrench(void)2416 Locale::getCanadaFrench(void)
2417 {
2418 return getLocale(eCANADA_FRENCH);
2419 }
2420
2421 const Locale &
getLocale(int locid)2422 Locale::getLocale(int locid)
2423 {
2424 Locale *localeCache = getLocaleCache();
2425 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
2426 if (localeCache == NULL) {
2427 // Failure allocating the locale cache.
2428 // The best we can do is return a NULL reference.
2429 locid = 0;
2430 }
2431 return localeCache[locid]; /*operating on NULL*/
2432 }
2433
2434 /*
2435 This function is defined this way in order to get around static
2436 initialization and static destruction.
2437 */
2438 Locale *
getLocaleCache(void)2439 Locale::getLocaleCache(void)
2440 {
2441 UErrorCode status = U_ZERO_ERROR;
2442 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
2443 return gLocaleCache;
2444 }
2445
2446 class KeywordEnumeration : public StringEnumeration {
2447 private:
2448 char *keywords;
2449 char *current;
2450 int32_t length;
2451 UnicodeString currUSKey;
2452 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
2453
2454 public:
getStaticClassID(void)2455 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
getDynamicClassID(void) const2456 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
2457 public:
KeywordEnumeration(const char * keys,int32_t keywordLen,int32_t currentIndex,UErrorCode & status)2458 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
2459 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
2460 if(U_SUCCESS(status) && keywordLen != 0) {
2461 if(keys == NULL || keywordLen < 0) {
2462 status = U_ILLEGAL_ARGUMENT_ERROR;
2463 } else {
2464 keywords = (char *)uprv_malloc(keywordLen+1);
2465 if (keywords == NULL) {
2466 status = U_MEMORY_ALLOCATION_ERROR;
2467 }
2468 else {
2469 uprv_memcpy(keywords, keys, keywordLen);
2470 keywords[keywordLen] = 0;
2471 current = keywords + currentIndex;
2472 length = keywordLen;
2473 }
2474 }
2475 }
2476 }
2477
2478 virtual ~KeywordEnumeration();
2479
clone() const2480 virtual StringEnumeration * clone() const
2481 {
2482 UErrorCode status = U_ZERO_ERROR;
2483 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
2484 }
2485
count(UErrorCode &) const2486 virtual int32_t count(UErrorCode &/*status*/) const {
2487 char *kw = keywords;
2488 int32_t result = 0;
2489 while(*kw) {
2490 result++;
2491 kw += uprv_strlen(kw)+1;
2492 }
2493 return result;
2494 }
2495
next(int32_t * resultLength,UErrorCode & status)2496 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
2497 const char* result;
2498 int32_t len;
2499 if(U_SUCCESS(status) && *current != 0) {
2500 result = current;
2501 len = (int32_t)uprv_strlen(current);
2502 current += len+1;
2503 if(resultLength != NULL) {
2504 *resultLength = len;
2505 }
2506 } else {
2507 if(resultLength != NULL) {
2508 *resultLength = 0;
2509 }
2510 result = NULL;
2511 }
2512 return result;
2513 }
2514
snext(UErrorCode & status)2515 virtual const UnicodeString* snext(UErrorCode& status) {
2516 int32_t resultLength = 0;
2517 const char *s = next(&resultLength, status);
2518 return setChars(s, resultLength, status);
2519 }
2520
reset(UErrorCode &)2521 virtual void reset(UErrorCode& /*status*/) {
2522 current = keywords;
2523 }
2524 };
2525
2526 const char KeywordEnumeration::fgClassID = '\0';
2527
~KeywordEnumeration()2528 KeywordEnumeration::~KeywordEnumeration() {
2529 uprv_free(keywords);
2530 }
2531
2532 // A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
2533 // the next() method for each keyword before returning it.
2534 class UnicodeKeywordEnumeration : public KeywordEnumeration {
2535 public:
2536 using KeywordEnumeration::KeywordEnumeration;
2537 virtual ~UnicodeKeywordEnumeration();
2538
next(int32_t * resultLength,UErrorCode & status)2539 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
2540 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
2541 while (U_SUCCESS(status) && legacy_key != nullptr) {
2542 const char* key = uloc_toUnicodeLocaleKey(legacy_key);
2543 if (key != nullptr) {
2544 if (resultLength != nullptr) {
2545 *resultLength = static_cast<int32_t>(uprv_strlen(key));
2546 }
2547 return key;
2548 }
2549 // Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
2550 legacy_key = KeywordEnumeration::next(nullptr, status);
2551 }
2552 if (resultLength != nullptr) *resultLength = 0;
2553 return nullptr;
2554 }
2555 };
2556
2557 // Out-of-line virtual destructor to serve as the "key function".
2558 UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
2559
2560 StringEnumeration *
createKeywords(UErrorCode & status) const2561 Locale::createKeywords(UErrorCode &status) const
2562 {
2563 StringEnumeration *result = NULL;
2564
2565 if (U_FAILURE(status)) {
2566 return result;
2567 }
2568
2569 const char* variantStart = uprv_strchr(fullName, '@');
2570 const char* assignment = uprv_strchr(fullName, '=');
2571 if(variantStart) {
2572 if(assignment > variantStart) {
2573 CharString keywords;
2574 CharStringByteSink sink(&keywords);
2575 ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
2576 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2577 result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2578 if (!result) {
2579 status = U_MEMORY_ALLOCATION_ERROR;
2580 }
2581 }
2582 } else {
2583 status = U_INVALID_FORMAT_ERROR;
2584 }
2585 }
2586 return result;
2587 }
2588
2589 StringEnumeration *
createUnicodeKeywords(UErrorCode & status) const2590 Locale::createUnicodeKeywords(UErrorCode &status) const
2591 {
2592 StringEnumeration *result = NULL;
2593
2594 if (U_FAILURE(status)) {
2595 return result;
2596 }
2597
2598 const char* variantStart = uprv_strchr(fullName, '@');
2599 const char* assignment = uprv_strchr(fullName, '=');
2600 if(variantStart) {
2601 if(assignment > variantStart) {
2602 CharString keywords;
2603 CharStringByteSink sink(&keywords);
2604 ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
2605 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2606 result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2607 if (!result) {
2608 status = U_MEMORY_ALLOCATION_ERROR;
2609 }
2610 }
2611 } else {
2612 status = U_INVALID_FORMAT_ERROR;
2613 }
2614 }
2615 return result;
2616 }
2617
2618 int32_t
getKeywordValue(const char * keywordName,char * buffer,int32_t bufLen,UErrorCode & status) const2619 Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
2620 {
2621 return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
2622 }
2623
2624 void
getKeywordValue(StringPiece keywordName,ByteSink & sink,UErrorCode & status) const2625 Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
2626 if (U_FAILURE(status)) {
2627 return;
2628 }
2629
2630 if (fIsBogus) {
2631 status = U_ILLEGAL_ARGUMENT_ERROR;
2632 return;
2633 }
2634
2635 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2636 const CharString keywordName_nul(keywordName, status);
2637 if (U_FAILURE(status)) {
2638 return;
2639 }
2640
2641 ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
2642 }
2643
2644 void
getUnicodeKeywordValue(StringPiece keywordName,ByteSink & sink,UErrorCode & status) const2645 Locale::getUnicodeKeywordValue(StringPiece keywordName,
2646 ByteSink& sink,
2647 UErrorCode& status) const {
2648 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2649 const CharString keywordName_nul(keywordName, status);
2650 if (U_FAILURE(status)) {
2651 return;
2652 }
2653
2654 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
2655
2656 if (legacy_key == nullptr) {
2657 status = U_ILLEGAL_ARGUMENT_ERROR;
2658 return;
2659 }
2660
2661 CharString legacy_value;
2662 {
2663 CharStringByteSink sink(&legacy_value);
2664 getKeywordValue(legacy_key, sink, status);
2665 }
2666
2667 if (U_FAILURE(status)) {
2668 return;
2669 }
2670
2671 const char* unicode_value = uloc_toUnicodeLocaleType(
2672 keywordName_nul.data(), legacy_value.data());
2673
2674 if (unicode_value == nullptr) {
2675 status = U_ILLEGAL_ARGUMENT_ERROR;
2676 return;
2677 }
2678
2679 sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
2680 }
2681
2682 void
setKeywordValue(const char * keywordName,const char * keywordValue,UErrorCode & status)2683 Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
2684 {
2685 if (U_FAILURE(status)) {
2686 return;
2687 }
2688 if (status == U_STRING_NOT_TERMINATED_WARNING) {
2689 status = U_ZERO_ERROR;
2690 }
2691 int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
2692 int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
2693 bufferLength, &status) + 1;
2694 U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
2695 /* Handle the case the current buffer is not enough to hold the new id */
2696 if (status == U_BUFFER_OVERFLOW_ERROR) {
2697 U_ASSERT(newLength > bufferLength);
2698 char* newFullName = (char *)uprv_malloc(newLength);
2699 if (newFullName == nullptr) {
2700 status = U_MEMORY_ALLOCATION_ERROR;
2701 return;
2702 }
2703 uprv_strcpy(newFullName, fullName);
2704 if (fullName != fullNameBuffer) {
2705 // if full Name is already on the heap, need to free it.
2706 uprv_free(fullName);
2707 if (baseName == fullName) {
2708 baseName = newFullName; // baseName should not point to freed memory.
2709 }
2710 }
2711 fullName = newFullName;
2712 status = U_ZERO_ERROR;
2713 uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
2714 U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
2715 } else {
2716 U_ASSERT(newLength <= bufferLength);
2717 }
2718 if (U_SUCCESS(status) && baseName == fullName) {
2719 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
2720 initBaseName(status);
2721 }
2722 }
2723
2724 void
setKeywordValue(StringPiece keywordName,StringPiece keywordValue,UErrorCode & status)2725 Locale::setKeywordValue(StringPiece keywordName,
2726 StringPiece keywordValue,
2727 UErrorCode& status) {
2728 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2729 const CharString keywordName_nul(keywordName, status);
2730 const CharString keywordValue_nul(keywordValue, status);
2731 setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
2732 }
2733
2734 void
setUnicodeKeywordValue(StringPiece keywordName,StringPiece keywordValue,UErrorCode & status)2735 Locale::setUnicodeKeywordValue(StringPiece keywordName,
2736 StringPiece keywordValue,
2737 UErrorCode& status) {
2738 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2739 const CharString keywordName_nul(keywordName, status);
2740 const CharString keywordValue_nul(keywordValue, status);
2741
2742 if (U_FAILURE(status)) {
2743 return;
2744 }
2745
2746 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
2747
2748 if (legacy_key == nullptr) {
2749 status = U_ILLEGAL_ARGUMENT_ERROR;
2750 return;
2751 }
2752
2753 const char* legacy_value = nullptr;
2754
2755 if (!keywordValue_nul.isEmpty()) {
2756 legacy_value =
2757 uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
2758
2759 if (legacy_value == nullptr) {
2760 status = U_ILLEGAL_ARGUMENT_ERROR;
2761 return;
2762 }
2763 }
2764
2765 setKeywordValue(legacy_key, legacy_value, status);
2766 }
2767
2768 const char *
getBaseName() const2769 Locale::getBaseName() const {
2770 return baseName;
2771 }
2772
2773 Locale::Iterator::~Iterator() = default;
2774
2775 //eof
2776 U_NAMESPACE_END
2777