1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File locid.cpp
10 *
11 * Created by: Richard Gillam
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17 * methods to get and set it.
18 * 04/02/97 aliu Made operator!= inline; fixed return value
19 * of getName().
20 * 04/15/97 aliu Cleanup for AIX/Win32.
21 * 04/24/97 aliu Numerous changes per code review.
22 * 08/18/98 stephen Changed getDisplayName()
23 * Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24 * Added getISOCountries(), getISOLanguages(),
25 * getLanguagesForCountry()
26 * 03/16/99 bertrand rehaul.
27 * 07/21/99 stephen Added U_CFUNC setDefault
28 * 11/09/99 weiv Added const char * getName() const;
29 * 04/12/00 srl removing unicodestring api's and cached hash code
30 * 08/10/01 grhoten Change the static Locales to accessor functions
31 ******************************************************************************
32 */
33
34 #include <utility>
35
36 #include "unicode/bytestream.h"
37 #include "unicode/locid.h"
38 #include "unicode/localebuilder.h"
39 #include "unicode/strenum.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/uloc.h"
42 #include "unicode/ures.h"
43
44 #include "bytesinkutil.h"
45 #include "charstr.h"
46 #include "charstrmap.h"
47 #include "cmemory.h"
48 #include "cstring.h"
49 #include "mutex.h"
50 #include "putilimp.h"
51 #include "uassert.h"
52 #include "ucln_cmn.h"
53 #include "uhash.h"
54 #include "ulocimp.h"
55 #include "umutex.h"
56 #include "uniquecharstr.h"
57 #include "ustr_imp.h"
58 #include "uvector.h"
59
60 U_CDECL_BEGIN
61 static UBool U_CALLCONV locale_cleanup(void);
62 U_CDECL_END
63
64 U_NAMESPACE_BEGIN
65
66 static Locale *gLocaleCache = NULL;
67 static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
68
69 // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
70 static UMutex gDefaultLocaleMutex;
71 static UHashtable *gDefaultLocalesHashT = NULL;
72 static Locale *gDefaultLocale = NULL;
73
74 /**
75 * \def ULOC_STRING_LIMIT
76 * strings beyond this value crash in CharString
77 */
78 #define ULOC_STRING_LIMIT 357913941
79
80 U_NAMESPACE_END
81
82 typedef enum ELocalePos {
83 eENGLISH,
84 eFRENCH,
85 eGERMAN,
86 eITALIAN,
87 eJAPANESE,
88 eKOREAN,
89 eCHINESE,
90
91 eFRANCE,
92 eGERMANY,
93 eITALY,
94 eJAPAN,
95 eKOREA,
96 eCHINA, /* Alias for PRC */
97 eTAIWAN,
98 eUK,
99 eUS,
100 eCANADA,
101 eCANADA_FRENCH,
102 eROOT,
103
104
105 //eDEFAULT,
106 eMAX_LOCALES
107 } ELocalePos;
108
109 U_CDECL_BEGIN
110 //
111 // Deleter function for Locales owned by the default Locale hash table/
112 //
113 static void U_CALLCONV
deleteLocale(void * obj)114 deleteLocale(void *obj) {
115 delete (icu::Locale *) obj;
116 }
117
locale_cleanup(void)118 static UBool U_CALLCONV locale_cleanup(void)
119 {
120 U_NAMESPACE_USE
121
122 delete [] gLocaleCache;
123 gLocaleCache = NULL;
124 gLocaleCacheInitOnce.reset();
125
126 if (gDefaultLocalesHashT) {
127 uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
128 gDefaultLocalesHashT = NULL;
129 }
130 gDefaultLocale = NULL;
131 return TRUE;
132 }
133
134
locale_init(UErrorCode & status)135 static void U_CALLCONV locale_init(UErrorCode &status) {
136 U_NAMESPACE_USE
137
138 U_ASSERT(gLocaleCache == NULL);
139 gLocaleCache = new Locale[(int)eMAX_LOCALES];
140 if (gLocaleCache == NULL) {
141 status = U_MEMORY_ALLOCATION_ERROR;
142 return;
143 }
144 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
145 gLocaleCache[eROOT] = Locale("");
146 gLocaleCache[eENGLISH] = Locale("en");
147 gLocaleCache[eFRENCH] = Locale("fr");
148 gLocaleCache[eGERMAN] = Locale("de");
149 gLocaleCache[eITALIAN] = Locale("it");
150 gLocaleCache[eJAPANESE] = Locale("ja");
151 gLocaleCache[eKOREAN] = Locale("ko");
152 gLocaleCache[eCHINESE] = Locale("zh");
153 gLocaleCache[eFRANCE] = Locale("fr", "FR");
154 gLocaleCache[eGERMANY] = Locale("de", "DE");
155 gLocaleCache[eITALY] = Locale("it", "IT");
156 gLocaleCache[eJAPAN] = Locale("ja", "JP");
157 gLocaleCache[eKOREA] = Locale("ko", "KR");
158 gLocaleCache[eCHINA] = Locale("zh", "CN");
159 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
160 gLocaleCache[eUK] = Locale("en", "GB");
161 gLocaleCache[eUS] = Locale("en", "US");
162 gLocaleCache[eCANADA] = Locale("en", "CA");
163 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
164 }
165
166 U_CDECL_END
167
168 U_NAMESPACE_BEGIN
169
locale_set_default_internal(const char * id,UErrorCode & status)170 Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
171 // Synchronize this entire function.
172 Mutex lock(&gDefaultLocaleMutex);
173
174 UBool canonicalize = FALSE;
175
176 // If given a NULL string for the locale id, grab the default
177 // name from the system.
178 // (Different from most other locale APIs, where a null name means use
179 // the current ICU default locale.)
180 if (id == NULL) {
181 id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
182 canonicalize = TRUE; // always canonicalize host ID
183 }
184
185 CharString localeNameBuf;
186 {
187 CharStringByteSink sink(&localeNameBuf);
188 if (canonicalize) {
189 ulocimp_canonicalize(id, sink, &status);
190 } else {
191 ulocimp_getName(id, sink, &status);
192 }
193 }
194
195 if (U_FAILURE(status)) {
196 return gDefaultLocale;
197 }
198
199 if (gDefaultLocalesHashT == NULL) {
200 gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
201 if (U_FAILURE(status)) {
202 return gDefaultLocale;
203 }
204 uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
205 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
206 }
207
208 Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
209 if (newDefault == NULL) {
210 newDefault = new Locale(Locale::eBOGUS);
211 if (newDefault == NULL) {
212 status = U_MEMORY_ALLOCATION_ERROR;
213 return gDefaultLocale;
214 }
215 newDefault->init(localeNameBuf.data(), FALSE);
216 uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
217 if (U_FAILURE(status)) {
218 return gDefaultLocale;
219 }
220 }
221 gDefaultLocale = newDefault;
222 return gDefaultLocale;
223 }
224
225 U_NAMESPACE_END
226
227 /* sfb 07/21/99 */
228 U_CFUNC void
locale_set_default(const char * id)229 locale_set_default(const char *id)
230 {
231 U_NAMESPACE_USE
232 UErrorCode status = U_ZERO_ERROR;
233 locale_set_default_internal(id, status);
234 }
235 /* end */
236
237 U_CFUNC const char *
locale_get_default(void)238 locale_get_default(void)
239 {
240 U_NAMESPACE_USE
241 return Locale::getDefault().getName();
242 }
243
244
245 U_NAMESPACE_BEGIN
246
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)247 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
248
249 /*Character separating the posix id fields*/
250 // '_'
251 // In the platform codepage.
252 #define SEP_CHAR '_'
253 #define NULL_CHAR '\0'
254
255 Locale::~Locale()
256 {
257 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
258 uprv_free(baseName);
259 }
260 baseName = NULL;
261 /*if fullName is on the heap, we free it*/
262 if (fullName != fullNameBuffer)
263 {
264 uprv_free(fullName);
265 fullName = NULL;
266 }
267 }
268
Locale()269 Locale::Locale()
270 : UObject(), fullName(fullNameBuffer), baseName(NULL)
271 {
272 init(NULL, FALSE);
273 }
274
275 /*
276 * Internal constructor to allow construction of a locale object with
277 * NO side effects. (Default constructor tries to get
278 * the default locale.)
279 */
Locale(Locale::ELocaleType)280 Locale::Locale(Locale::ELocaleType)
281 : UObject(), fullName(fullNameBuffer), baseName(NULL)
282 {
283 setToBogus();
284 }
285
286
Locale(const char * newLanguage,const char * newCountry,const char * newVariant,const char * newKeywords)287 Locale::Locale( const char * newLanguage,
288 const char * newCountry,
289 const char * newVariant,
290 const char * newKeywords)
291 : UObject(), fullName(fullNameBuffer), baseName(NULL)
292 {
293 if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
294 {
295 init(NULL, FALSE); /* shortcut */
296 }
297 else
298 {
299 UErrorCode status = U_ZERO_ERROR;
300 int32_t lsize = 0;
301 int32_t csize = 0;
302 int32_t vsize = 0;
303 int32_t ksize = 0;
304
305 // Calculate the size of the resulting string.
306
307 // Language
308 if ( newLanguage != NULL )
309 {
310 lsize = (int32_t)uprv_strlen(newLanguage);
311 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
312 setToBogus();
313 return;
314 }
315 }
316
317 CharString togo(newLanguage, lsize, status); // start with newLanguage
318
319 // _Country
320 if ( newCountry != NULL )
321 {
322 csize = (int32_t)uprv_strlen(newCountry);
323 if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
324 setToBogus();
325 return;
326 }
327 }
328
329 // _Variant
330 if ( newVariant != NULL )
331 {
332 // remove leading _'s
333 while(newVariant[0] == SEP_CHAR)
334 {
335 newVariant++;
336 }
337
338 // remove trailing _'s
339 vsize = (int32_t)uprv_strlen(newVariant);
340 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
341 setToBogus();
342 return;
343 }
344 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
345 {
346 vsize--;
347 }
348 }
349
350 if ( newKeywords != NULL)
351 {
352 ksize = (int32_t)uprv_strlen(newKeywords);
353 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
354 setToBogus();
355 return;
356 }
357 }
358
359 // NOW we have the full locale string..
360 // Now, copy it back.
361
362 // newLanguage is already copied
363
364 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
365 { // ^
366 togo.append(SEP_CHAR, status);
367 }
368
369 if ( csize != 0 )
370 {
371 togo.append(newCountry, status);
372 }
373
374 if ( vsize != 0)
375 {
376 togo.append(SEP_CHAR, status)
377 .append(newVariant, vsize, status);
378 }
379
380 if ( ksize != 0)
381 {
382 if (uprv_strchr(newKeywords, '=')) {
383 togo.append('@', status); /* keyword parsing */
384 }
385 else {
386 togo.append('_', status); /* Variant parsing with a script */
387 if ( vsize == 0) {
388 togo.append('_', status); /* No country found */
389 }
390 }
391 togo.append(newKeywords, status);
392 }
393
394 if (U_FAILURE(status)) {
395 // Something went wrong with appending, etc.
396 setToBogus();
397 return;
398 }
399 // Parse it, because for example 'language' might really be a complete
400 // string.
401 init(togo.data(), FALSE);
402 }
403 }
404
Locale(const Locale & other)405 Locale::Locale(const Locale &other)
406 : UObject(other), fullName(fullNameBuffer), baseName(NULL)
407 {
408 *this = other;
409 }
410
Locale(Locale && other)411 Locale::Locale(Locale&& other) U_NOEXCEPT
412 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
413 *this = std::move(other);
414 }
415
operator =(const Locale & other)416 Locale& Locale::operator=(const Locale& other) {
417 if (this == &other) {
418 return *this;
419 }
420
421 setToBogus();
422
423 if (other.fullName == other.fullNameBuffer) {
424 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
425 } else if (other.fullName == nullptr) {
426 fullName = nullptr;
427 } else {
428 fullName = uprv_strdup(other.fullName);
429 if (fullName == nullptr) return *this;
430 }
431
432 if (other.baseName == other.fullName) {
433 baseName = fullName;
434 } else if (other.baseName != nullptr) {
435 baseName = uprv_strdup(other.baseName);
436 if (baseName == nullptr) return *this;
437 }
438
439 uprv_strcpy(language, other.language);
440 uprv_strcpy(script, other.script);
441 uprv_strcpy(country, other.country);
442
443 variantBegin = other.variantBegin;
444 fIsBogus = other.fIsBogus;
445
446 return *this;
447 }
448
operator =(Locale && other)449 Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
450 if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
451 if (fullName != fullNameBuffer) uprv_free(fullName);
452
453 if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
454 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
455 }
456 if (other.fullName == other.fullNameBuffer) {
457 fullName = fullNameBuffer;
458 } else {
459 fullName = other.fullName;
460 }
461
462 if (other.baseName == other.fullNameBuffer) {
463 baseName = fullNameBuffer;
464 } else if (other.baseName == other.fullName) {
465 baseName = fullName;
466 } else {
467 baseName = other.baseName;
468 }
469
470 uprv_strcpy(language, other.language);
471 uprv_strcpy(script, other.script);
472 uprv_strcpy(country, other.country);
473
474 variantBegin = other.variantBegin;
475 fIsBogus = other.fIsBogus;
476
477 other.baseName = other.fullName = other.fullNameBuffer;
478
479 return *this;
480 }
481
482 Locale *
clone() const483 Locale::clone() const {
484 return new Locale(*this);
485 }
486
487 UBool
operator ==(const Locale & other) const488 Locale::operator==( const Locale& other) const
489 {
490 return (uprv_strcmp(other.fullName, fullName) == 0);
491 }
492
493 namespace {
494
495 UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER;
496 UHashtable *gKnownCanonicalized = nullptr;
497
498 static const char* const KNOWN_CANONICALIZED[] = {
499 "c",
500 // Commonly used locales known are already canonicalized
501 "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
502 "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
503 "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
504 "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
505 "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
506 "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
507 "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
508 "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
509 "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
510 "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
511 "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
512 "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
513 "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
514 "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
515 "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
516 "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
517 "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
518 "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
519 "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
520 };
521
cleanupKnownCanonicalized()522 static UBool U_CALLCONV cleanupKnownCanonicalized() {
523 gKnownCanonicalizedInitOnce.reset();
524 if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
525 return TRUE;
526 }
527
loadKnownCanonicalized(UErrorCode & status)528 static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
529 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
530 cleanupKnownCanonicalized);
531 LocalUHashtablePointer newKnownCanonicalizedMap(
532 uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
533 for (int32_t i = 0;
534 U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
535 i++) {
536 uhash_puti(newKnownCanonicalizedMap.getAlias(),
537 (void*)KNOWN_CANONICALIZED[i],
538 1, &status);
539 }
540 if (U_FAILURE(status)) {
541 return;
542 }
543
544 gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
545 }
546
547 class AliasData;
548
549 /**
550 * A Builder class to build the alias data.
551 */
552 class AliasDataBuilder {
553 public:
AliasDataBuilder()554 AliasDataBuilder() {
555 }
556
557 // Build the AliasData from resource.
558 AliasData* build(UErrorCode &status);
559
560 private:
561 void readAlias(UResourceBundle* alias,
562 UniqueCharStrings* strings,
563 LocalMemory<const char*>& types,
564 LocalMemory<int32_t>& replacementIndexes,
565 int32_t &length,
566 void (*checkType)(const char* type),
567 void (*checkReplacement)(const UnicodeString& replacement),
568 UErrorCode &status);
569
570 // Read the languageAlias data from alias to
571 // strings+types+replacementIndexes
572 // The number of record will be stored into length.
573 // Allocate length items for types, to store the type field.
574 // Allocate length items for replacementIndexes,
575 // to store the index in the strings for the replacement script.
576 void readLanguageAlias(UResourceBundle* alias,
577 UniqueCharStrings* strings,
578 LocalMemory<const char*>& types,
579 LocalMemory<int32_t>& replacementIndexes,
580 int32_t &length,
581 UErrorCode &status);
582
583 // Read the scriptAlias data from alias to
584 // strings+types+replacementIndexes
585 // Allocate length items for types, to store the type field.
586 // Allocate length items for replacementIndexes,
587 // to store the index in the strings for the replacement script.
588 void readScriptAlias(UResourceBundle* alias,
589 UniqueCharStrings* strings,
590 LocalMemory<const char*>& types,
591 LocalMemory<int32_t>& replacementIndexes,
592 int32_t &length, UErrorCode &status);
593
594 // Read the territoryAlias data from alias to
595 // strings+types+replacementIndexes
596 // Allocate length items for types, to store the type field.
597 // Allocate length items for replacementIndexes,
598 // to store the index in the strings for the replacement script.
599 void readTerritoryAlias(UResourceBundle* alias,
600 UniqueCharStrings* strings,
601 LocalMemory<const char*>& types,
602 LocalMemory<int32_t>& replacementIndexes,
603 int32_t &length, UErrorCode &status);
604
605 // Read the variantAlias data from alias to
606 // strings+types+replacementIndexes
607 // Allocate length items for types, to store the type field.
608 // Allocate length items for replacementIndexes,
609 // to store the index in the strings for the replacement variant.
610 void readVariantAlias(UResourceBundle* alias,
611 UniqueCharStrings* strings,
612 LocalMemory<const char*>& types,
613 LocalMemory<int32_t>& replacementIndexes,
614 int32_t &length, UErrorCode &status);
615
616 // Read the subdivisionAlias data from alias to
617 // strings+types+replacementIndexes
618 // Allocate length items for types, to store the type field.
619 // Allocate length items for replacementIndexes,
620 // to store the index in the strings for the replacement variant.
621 void readSubdivisionAlias(UResourceBundle* alias,
622 UniqueCharStrings* strings,
623 LocalMemory<const char*>& types,
624 LocalMemory<int32_t>& replacementIndexes,
625 int32_t &length, UErrorCode &status);
626 };
627
628 /**
629 * A class to hold the Alias Data.
630 */
631 class AliasData : public UMemory {
632 public:
singleton(UErrorCode & status)633 static const AliasData* singleton(UErrorCode& status) {
634 if (U_FAILURE(status)) {
635 // Do not get into loadData if the status already has error.
636 return nullptr;
637 }
638 umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
639 return gSingleton;
640 }
641
languageMap() const642 const CharStringMap& languageMap() const { return language; }
scriptMap() const643 const CharStringMap& scriptMap() const { return script; }
territoryMap() const644 const CharStringMap& territoryMap() const { return territory; }
variantMap() const645 const CharStringMap& variantMap() const { return variant; }
subdivisionMap() const646 const CharStringMap& subdivisionMap() const { return subdivision; }
647
648 static void U_CALLCONV loadData(UErrorCode &status);
649 static UBool U_CALLCONV cleanup();
650
651 static UInitOnce gInitOnce;
652
653 private:
AliasData(CharStringMap languageMap,CharStringMap scriptMap,CharStringMap territoryMap,CharStringMap variantMap,CharStringMap subdivisionMap,CharString * strings)654 AliasData(CharStringMap languageMap,
655 CharStringMap scriptMap,
656 CharStringMap territoryMap,
657 CharStringMap variantMap,
658 CharStringMap subdivisionMap,
659 CharString* strings)
660 : language(std::move(languageMap)),
661 script(std::move(scriptMap)),
662 territory(std::move(territoryMap)),
663 variant(std::move(variantMap)),
664 subdivision(std::move(subdivisionMap)),
665 strings(strings) {
666 }
667
~AliasData()668 ~AliasData() {
669 delete strings;
670 }
671
672 static const AliasData* gSingleton;
673
674 CharStringMap language;
675 CharStringMap script;
676 CharStringMap territory;
677 CharStringMap variant;
678 CharStringMap subdivision;
679 CharString* strings;
680
681 friend class AliasDataBuilder;
682 };
683
684
685 const AliasData* AliasData::gSingleton = nullptr;
686 UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER;
687
688 UBool U_CALLCONV
cleanup()689 AliasData::cleanup()
690 {
691 gInitOnce.reset();
692 delete gSingleton;
693 return TRUE;
694 }
695
696 void
readAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,void (* checkType)(const char * type),void (* checkReplacement)(const UnicodeString & replacement),UErrorCode & status)697 AliasDataBuilder::readAlias(
698 UResourceBundle* alias,
699 UniqueCharStrings* strings,
700 LocalMemory<const char*>& types,
701 LocalMemory<int32_t>& replacementIndexes,
702 int32_t &length,
703 void (*checkType)(const char* type),
704 void (*checkReplacement)(const UnicodeString& replacement),
705 UErrorCode &status) {
706 if (U_FAILURE(status)) {
707 return;
708 }
709 length = ures_getSize(alias);
710 const char** rawTypes = types.allocateInsteadAndCopy(length);
711 if (rawTypes == nullptr) {
712 status = U_MEMORY_ALLOCATION_ERROR;
713 return;
714 }
715 int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
716 if (rawIndexes == nullptr) {
717 status = U_MEMORY_ALLOCATION_ERROR;
718 return;
719 }
720 int i = 0;
721 while (ures_hasNext(alias)) {
722 LocalUResourceBundlePointer res(
723 ures_getNextResource(alias, nullptr, &status));
724 const char* aliasFrom = ures_getKey(res.getAlias());
725 UnicodeString aliasTo =
726 ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
727
728 checkType(aliasFrom);
729 checkReplacement(aliasTo);
730
731 rawTypes[i] = aliasFrom;
732 rawIndexes[i] = strings->add(aliasTo, status);
733 i++;
734 }
735 }
736
737 /**
738 * Read the languageAlias data from alias to strings+types+replacementIndexes.
739 * Allocate length items for types, to store the type field. Allocate length
740 * items for replacementIndexes, to store the index in the strings for the
741 * replacement language.
742 */
743 void
readLanguageAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)744 AliasDataBuilder::readLanguageAlias(
745 UResourceBundle* alias,
746 UniqueCharStrings* strings,
747 LocalMemory<const char*>& types,
748 LocalMemory<int32_t>& replacementIndexes,
749 int32_t &length,
750 UErrorCode &status)
751 {
752 return readAlias(
753 alias, strings, types, replacementIndexes, length,
754 #if U_DEBUG
755 [](const char* type) {
756 // Assert the aliasFrom only contains the following possibilties
757 // language_REGION_variant
758 // language_REGION
759 // language_variant
760 // language
761 // und_variant
762 Locale test(type);
763 // Assert no script in aliasFrom
764 U_ASSERT(test.getScript()[0] == '\0');
765 // Assert when language is und, no REGION in aliasFrom.
766 U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
767 },
768 #else
769 [](const char*) {},
770 #endif
771 [](const UnicodeString&) {}, status);
772 }
773
774 /**
775 * Read the scriptAlias data from alias to strings+types+replacementIndexes.
776 * Allocate length items for types, to store the type field. Allocate length
777 * items for replacementIndexes, to store the index in the strings for the
778 * replacement script.
779 */
780 void
readScriptAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)781 AliasDataBuilder::readScriptAlias(
782 UResourceBundle* alias,
783 UniqueCharStrings* strings,
784 LocalMemory<const char*>& types,
785 LocalMemory<int32_t>& replacementIndexes,
786 int32_t &length,
787 UErrorCode &status)
788 {
789 return readAlias(
790 alias, strings, types, replacementIndexes, length,
791 #if U_DEBUG
792 [](const char* type) {
793 U_ASSERT(uprv_strlen(type) == 4);
794 },
795 [](const UnicodeString& replacement) {
796 U_ASSERT(replacement.length() == 4);
797 },
798 #else
799 [](const char*) {},
800 [](const UnicodeString&) { },
801 #endif
802 status);
803 }
804
805 /**
806 * Read the territoryAlias data from alias to strings+types+replacementIndexes.
807 * Allocate length items for types, to store the type field. Allocate length
808 * items for replacementIndexes, to store the index in the strings for the
809 * replacement regions.
810 */
811 void
readTerritoryAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)812 AliasDataBuilder::readTerritoryAlias(
813 UResourceBundle* alias,
814 UniqueCharStrings* strings,
815 LocalMemory<const char*>& types,
816 LocalMemory<int32_t>& replacementIndexes,
817 int32_t &length,
818 UErrorCode &status)
819 {
820 return readAlias(
821 alias, strings, types, replacementIndexes, length,
822 #if U_DEBUG
823 [](const char* type) {
824 U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
825 },
826 #else
827 [](const char*) {},
828 #endif
829 [](const UnicodeString&) { },
830 status);
831 }
832
833 /**
834 * Read the variantAlias data from alias to strings+types+replacementIndexes.
835 * Allocate length items for types, to store the type field. Allocate length
836 * items for replacementIndexes, to store the index in the strings for the
837 * replacement variant.
838 */
839 void
readVariantAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)840 AliasDataBuilder::readVariantAlias(
841 UResourceBundle* alias,
842 UniqueCharStrings* strings,
843 LocalMemory<const char*>& types,
844 LocalMemory<int32_t>& replacementIndexes,
845 int32_t &length,
846 UErrorCode &status)
847 {
848 return readAlias(
849 alias, strings, types, replacementIndexes, length,
850 #if U_DEBUG
851 [](const char* type) {
852 U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
853 U_ASSERT(uprv_strlen(type) != 4 ||
854 (type[0] >= '0' && type[0] <= '9'));
855 },
856 [](const UnicodeString& replacement) {
857 U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
858 U_ASSERT(replacement.length() != 4 ||
859 (replacement.charAt(0) >= u'0' &&
860 replacement.charAt(0) <= u'9'));
861 },
862 #else
863 [](const char*) {},
864 [](const UnicodeString&) { },
865 #endif
866 status);
867 }
868
869 /**
870 * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
871 * Allocate length items for types, to store the type field. Allocate length
872 * items for replacementIndexes, to store the index in the strings for the
873 * replacement regions.
874 */
875 void
readSubdivisionAlias(UResourceBundle * alias,UniqueCharStrings * strings,LocalMemory<const char * > & types,LocalMemory<int32_t> & replacementIndexes,int32_t & length,UErrorCode & status)876 AliasDataBuilder::readSubdivisionAlias(
877 UResourceBundle* alias,
878 UniqueCharStrings* strings,
879 LocalMemory<const char*>& types,
880 LocalMemory<int32_t>& replacementIndexes,
881 int32_t &length,
882 UErrorCode &status)
883 {
884 return readAlias(
885 alias, strings, types, replacementIndexes, length,
886 #if U_DEBUG
887 [](const char* type) {
888 U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
889 },
890 #else
891 [](const char*) {},
892 #endif
893 [](const UnicodeString&) { },
894 status);
895 }
896
897 /**
898 * Initializes the alias data from the ICU resource bundles. The alias data
899 * contains alias of language, country, script and variants.
900 *
901 * If the alias data has already loaded, then this method simply returns without
902 * doing anything meaningful.
903 */
904 void U_CALLCONV
loadData(UErrorCode & status)905 AliasData::loadData(UErrorCode &status)
906 {
907 #ifdef LOCALE_CANONICALIZATION_DEBUG
908 UDate start = uprv_getRawUTCtime();
909 #endif // LOCALE_CANONICALIZATION_DEBUG
910 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
911 AliasDataBuilder builder;
912 gSingleton = builder.build(status);
913 #ifdef LOCALE_CANONICALIZATION_DEBUG
914 UDate end = uprv_getRawUTCtime();
915 printf("AliasData::loadData took total %f ms\n", end - start);
916 #endif // LOCALE_CANONICALIZATION_DEBUG
917 }
918
919 /**
920 * Build the alias data from resources.
921 */
922 AliasData*
build(UErrorCode & status)923 AliasDataBuilder::build(UErrorCode &status) {
924 LocalUResourceBundlePointer metadata(
925 ures_openDirect(nullptr, "metadata", &status));
926 LocalUResourceBundlePointer metadataAlias(
927 ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
928 LocalUResourceBundlePointer languageAlias(
929 ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
930 LocalUResourceBundlePointer scriptAlias(
931 ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
932 LocalUResourceBundlePointer territoryAlias(
933 ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
934 LocalUResourceBundlePointer variantAlias(
935 ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
936 LocalUResourceBundlePointer subdivisionAlias(
937 ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
938
939 if (U_FAILURE(status)) {
940 return nullptr;
941 }
942 int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
943 variantLength = 0, subdivisionLength = 0;
944
945 // Read the languageAlias into languageTypes, languageReplacementIndexes
946 // and strings
947 UniqueCharStrings strings(status);
948 LocalMemory<const char*> languageTypes;
949 LocalMemory<int32_t> languageReplacementIndexes;
950 readLanguageAlias(languageAlias.getAlias(),
951 &strings,
952 languageTypes,
953 languageReplacementIndexes,
954 languagesLength,
955 status);
956
957 // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
958 // and strings
959 LocalMemory<const char*> scriptTypes;
960 LocalMemory<int32_t> scriptReplacementIndexes;
961 readScriptAlias(scriptAlias.getAlias(),
962 &strings,
963 scriptTypes,
964 scriptReplacementIndexes,
965 scriptLength,
966 status);
967
968 // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
969 // and strings
970 LocalMemory<const char*> territoryTypes;
971 LocalMemory<int32_t> territoryReplacementIndexes;
972 readTerritoryAlias(territoryAlias.getAlias(),
973 &strings,
974 territoryTypes,
975 territoryReplacementIndexes,
976 territoryLength, status);
977
978 // Read the variantAlias into variantTypes, variantReplacementIndexes
979 // and strings
980 LocalMemory<const char*> variantTypes;
981 LocalMemory<int32_t> variantReplacementIndexes;
982 readVariantAlias(variantAlias.getAlias(),
983 &strings,
984 variantTypes,
985 variantReplacementIndexes,
986 variantLength, status);
987
988 // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
989 // and strings
990 LocalMemory<const char*> subdivisionTypes;
991 LocalMemory<int32_t> subdivisionReplacementIndexes;
992 readSubdivisionAlias(subdivisionAlias.getAlias(),
993 &strings,
994 subdivisionTypes,
995 subdivisionReplacementIndexes,
996 subdivisionLength, status);
997
998 if (U_FAILURE(status)) {
999 return nullptr;
1000 }
1001
1002 // We can only use strings after freeze it.
1003 strings.freeze();
1004
1005 // Build the languageMap from languageTypes & languageReplacementIndexes
1006 CharStringMap languageMap(490, status);
1007 for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
1008 languageMap.put(languageTypes[i],
1009 strings.get(languageReplacementIndexes[i]),
1010 status);
1011 }
1012
1013 // Build the scriptMap from scriptTypes & scriptReplacementIndexes
1014 CharStringMap scriptMap(1, status);
1015 for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
1016 scriptMap.put(scriptTypes[i],
1017 strings.get(scriptReplacementIndexes[i]),
1018 status);
1019 }
1020
1021 // Build the territoryMap from territoryTypes & territoryReplacementIndexes
1022 CharStringMap territoryMap(650, status);
1023 for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
1024 territoryMap.put(territoryTypes[i],
1025 strings.get(territoryReplacementIndexes[i]),
1026 status);
1027 }
1028
1029 // Build the variantMap from variantTypes & variantReplacementIndexes.
1030 CharStringMap variantMap(2, status);
1031 for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
1032 variantMap.put(variantTypes[i],
1033 strings.get(variantReplacementIndexes[i]),
1034 status);
1035 }
1036
1037 // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
1038 CharStringMap subdivisionMap(2, status);
1039 for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
1040 subdivisionMap.put(subdivisionTypes[i],
1041 strings.get(subdivisionReplacementIndexes[i]),
1042 status);
1043 }
1044
1045 if (U_FAILURE(status)) {
1046 return nullptr;
1047 }
1048
1049 // copy hashtables
1050 auto *data = new AliasData(
1051 std::move(languageMap),
1052 std::move(scriptMap),
1053 std::move(territoryMap),
1054 std::move(variantMap),
1055 std::move(subdivisionMap),
1056 strings.orphanCharStrings());
1057
1058 if (data == nullptr) {
1059 status = U_MEMORY_ALLOCATION_ERROR;
1060 }
1061 return data;
1062 }
1063
1064 /**
1065 * A class that find the replacement values of locale fields by using AliasData.
1066 */
1067 class AliasReplacer {
1068 public:
AliasReplacer(UErrorCode status)1069 AliasReplacer(UErrorCode status) :
1070 language(nullptr), script(nullptr), region(nullptr),
1071 extensions(nullptr), variants(status),
1072 data(nullptr) {
1073 }
~AliasReplacer()1074 ~AliasReplacer() {
1075 }
1076
1077 // Check the fields inside locale, if need to replace fields,
1078 // place the the replaced locale ID in out and return true.
1079 // Otherwise return false for no replacement or error.
1080 bool replace(
1081 const Locale& locale, CharString& out, UErrorCode& status);
1082
1083 private:
1084 const char* language;
1085 const char* script;
1086 const char* region;
1087 const char* extensions;
1088 UVector variants;
1089
1090 const AliasData* data;
1091
notEmpty(const char * str)1092 inline bool notEmpty(const char* str) {
1093 return str && str[0] != NULL_CHAR;
1094 }
1095
1096 /**
1097 * If replacement is neither null nor empty and input is either null or empty,
1098 * return replacement.
1099 * If replacement is neither null nor empty but input is not empty, return input.
1100 * If replacement is either null or empty and type is either null or empty,
1101 * return input.
1102 * Otherwise return null.
1103 * replacement input type return
1104 * AAA nullptr * AAA
1105 * AAA BBB * BBB
1106 * nullptr || "" CCC nullptr CCC
1107 * nullptr || "" * DDD nullptr
1108 */
deleteOrReplace(const char * input,const char * type,const char * replacement)1109 inline const char* deleteOrReplace(
1110 const char* input, const char* type, const char* replacement) {
1111 return notEmpty(replacement) ?
1112 ((input == nullptr) ? replacement : input) :
1113 ((type == nullptr) ? input : nullptr);
1114 }
1115
same(const char * a,const char * b)1116 inline bool same(const char* a, const char* b) {
1117 if (a == nullptr && b == nullptr) {
1118 return true;
1119 }
1120 if ((a == nullptr && b != nullptr) ||
1121 (a != nullptr && b == nullptr)) {
1122 return false;
1123 }
1124 return uprv_strcmp(a, b) == 0;
1125 }
1126
1127 // Gather fields and generate locale ID into out.
1128 CharString& outputToString(CharString& out, UErrorCode status);
1129
1130 // Generate the lookup key.
1131 CharString& generateKey(const char* language, const char* region,
1132 const char* variant, CharString& out,
1133 UErrorCode status);
1134
1135 void parseLanguageReplacement(const char* replacement,
1136 const char*& replaceLanguage,
1137 const char*& replaceScript,
1138 const char*& replaceRegion,
1139 const char*& replaceVariant,
1140 const char*& replaceExtensions,
1141 UVector& toBeFreed,
1142 UErrorCode& status);
1143
1144 // Replace by using languageAlias.
1145 bool replaceLanguage(bool checkLanguage, bool checkRegion,
1146 bool checkVariants, UVector& toBeFreed,
1147 UErrorCode& status);
1148
1149 // Replace by using territoryAlias.
1150 bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
1151
1152 // Replace by using scriptAlias.
1153 bool replaceScript(UErrorCode& status);
1154
1155 // Replace by using variantAlias.
1156 bool replaceVariant(UErrorCode& status);
1157
1158 // Replace by using subdivisionAlias.
1159 bool replaceSubdivision(StringPiece subdivision,
1160 CharString& output, UErrorCode& status);
1161
1162 // Replace transformed extensions.
1163 bool replaceTransformedExtensions(
1164 CharString& transformedExtensions, CharString& output, UErrorCode& status);
1165 };
1166
1167 CharString&
generateKey(const char * language,const char * region,const char * variant,CharString & out,UErrorCode status)1168 AliasReplacer::generateKey(
1169 const char* language, const char* region, const char* variant,
1170 CharString& out, UErrorCode status)
1171 {
1172 out.append(language, status);
1173 if (notEmpty(region)) {
1174 out.append(SEP_CHAR, status)
1175 .append(region, status);
1176 }
1177 if (notEmpty(variant)) {
1178 out.append(SEP_CHAR, status)
1179 .append(variant, status);
1180 }
1181 return out;
1182 }
1183
1184 void
parseLanguageReplacement(const char * replacement,const char * & replacedLanguage,const char * & replacedScript,const char * & replacedRegion,const char * & replacedVariant,const char * & replacedExtensions,UVector & toBeFreed,UErrorCode & status)1185 AliasReplacer::parseLanguageReplacement(
1186 const char* replacement,
1187 const char*& replacedLanguage,
1188 const char*& replacedScript,
1189 const char*& replacedRegion,
1190 const char*& replacedVariant,
1191 const char*& replacedExtensions,
1192 UVector& toBeFreed,
1193 UErrorCode& status)
1194 {
1195 if (U_FAILURE(status)) {
1196 return;
1197 }
1198 replacedScript = replacedRegion = replacedVariant
1199 = replacedExtensions = nullptr;
1200 if (uprv_strchr(replacement, '_') == nullptr) {
1201 replacedLanguage = replacement;
1202 // reach the end, just return it.
1203 return;
1204 }
1205 // We have multiple field so we have to allocate and parse
1206 CharString* str = new CharString(
1207 replacement, (int32_t)uprv_strlen(replacement), status);
1208 if (U_FAILURE(status)) {
1209 return;
1210 }
1211 if (str == nullptr) {
1212 status = U_MEMORY_ALLOCATION_ERROR;
1213 return;
1214 }
1215 toBeFreed.addElement(str, status);
1216 char* data = str->data();
1217 replacedLanguage = (const char*) data;
1218 char* endOfField = uprv_strchr(data, '_');
1219 *endOfField = '\0'; // null terminiate it.
1220 endOfField++;
1221 const char* start = endOfField;
1222 endOfField = (char*) uprv_strchr(start, '_');
1223 size_t len = 0;
1224 if (endOfField == nullptr) {
1225 len = uprv_strlen(start);
1226 } else {
1227 len = endOfField - start;
1228 *endOfField = '\0'; // null terminiate it.
1229 }
1230 if (len == 4 && uprv_isASCIILetter(*start)) {
1231 // Got a script
1232 replacedScript = start;
1233 if (endOfField == nullptr) {
1234 return;
1235 }
1236 start = endOfField++;
1237 endOfField = (char*)uprv_strchr(start, '_');
1238 if (endOfField == nullptr) {
1239 len = uprv_strlen(start);
1240 } else {
1241 len = endOfField - start;
1242 *endOfField = '\0'; // null terminiate it.
1243 }
1244 }
1245 if (len >= 2 && len <= 3) {
1246 // Got a region
1247 replacedRegion = start;
1248 if (endOfField == nullptr) {
1249 return;
1250 }
1251 start = endOfField++;
1252 endOfField = (char*)uprv_strchr(start, '_');
1253 if (endOfField == nullptr) {
1254 len = uprv_strlen(start);
1255 } else {
1256 len = endOfField - start;
1257 *endOfField = '\0'; // null terminiate it.
1258 }
1259 }
1260 if (len >= 4) {
1261 // Got a variant
1262 replacedVariant = start;
1263 if (endOfField == nullptr) {
1264 return;
1265 }
1266 start = endOfField++;
1267 }
1268 replacedExtensions = start;
1269 }
1270
1271 bool
replaceLanguage(bool checkLanguage,bool checkRegion,bool checkVariants,UVector & toBeFreed,UErrorCode & status)1272 AliasReplacer::replaceLanguage(
1273 bool checkLanguage, bool checkRegion,
1274 bool checkVariants, UVector& toBeFreed, UErrorCode& status)
1275 {
1276 if (U_FAILURE(status)) {
1277 return false;
1278 }
1279 if ( (checkRegion && region == nullptr) ||
1280 (checkVariants && variants.size() == 0)) {
1281 // Nothing to search.
1282 return false;
1283 }
1284 int32_t variant_size = checkVariants ? variants.size() : 1;
1285 // Since we may have more than one variant, we need to loop through them.
1286 const char* searchLanguage = checkLanguage ? language : "und";
1287 const char* searchRegion = checkRegion ? region : nullptr;
1288 const char* searchVariant = nullptr;
1289 for (int32_t variant_index = 0;
1290 variant_index < variant_size;
1291 variant_index++) {
1292 if (checkVariants) {
1293 U_ASSERT(variant_index < variant_size);
1294 searchVariant = (const char*)(variants.elementAt(variant_index));
1295 }
1296
1297 if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
1298 // Do not consider ill-formed variant subtag.
1299 searchVariant = nullptr;
1300 }
1301 CharString typeKey;
1302 generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
1303 status);
1304 if (U_FAILURE(status)) {
1305 return false;
1306 }
1307 const char *replacement = data->languageMap().get(typeKey.data());
1308 if (replacement == nullptr) {
1309 // Found no replacement data.
1310 continue;
1311 }
1312
1313 const char* replacedLanguage = nullptr;
1314 const char* replacedScript = nullptr;
1315 const char* replacedRegion = nullptr;
1316 const char* replacedVariant = nullptr;
1317 const char* replacedExtensions = nullptr;
1318 parseLanguageReplacement(replacement,
1319 replacedLanguage,
1320 replacedScript,
1321 replacedRegion,
1322 replacedVariant,
1323 replacedExtensions,
1324 toBeFreed,
1325 status);
1326 replacedLanguage =
1327 (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == 0) ?
1328 language : replacedLanguage;
1329 replacedScript = deleteOrReplace(script, nullptr, replacedScript);
1330 replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
1331 replacedVariant = deleteOrReplace(
1332 searchVariant, searchVariant, replacedVariant);
1333
1334 if ( same(language, replacedLanguage) &&
1335 same(script, replacedScript) &&
1336 same(region, replacedRegion) &&
1337 same(searchVariant, replacedVariant) &&
1338 replacedExtensions == nullptr) {
1339 // Replacement produce no changes.
1340 continue;
1341 }
1342
1343 language = replacedLanguage;
1344 region = replacedRegion;
1345 script = replacedScript;
1346 if (searchVariant != nullptr) {
1347 if (notEmpty(replacedVariant)) {
1348 variants.setElementAt((void*)replacedVariant, variant_index);
1349 } else {
1350 variants.removeElementAt(variant_index);
1351 }
1352 }
1353 if (replacedExtensions != nullptr) {
1354 // DO NOTHING
1355 // UTS35 does not specifiy what should we do if we have extensions in the
1356 // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
1357 // extensions in them languageAlias:
1358 // i_default => en_x_i_default
1359 // i_enochian => und_x_i_enochian
1360 // i_mingo => see_x_i_mingo
1361 // zh_min => nan_x_zh_min
1362 // But all of them are already changed by code inside ultag_parse() before
1363 // hitting this code.
1364 }
1365
1366 // Something changed by language alias data.
1367 return true;
1368 }
1369 // Nothing changed by language alias data.
1370 return false;
1371 }
1372
1373 bool
replaceTerritory(UVector & toBeFreed,UErrorCode & status)1374 AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
1375 {
1376 if (U_FAILURE(status)) {
1377 return false;
1378 }
1379 if (region == nullptr) {
1380 // No region to search.
1381 return false;
1382 }
1383 const char *replacement = data->territoryMap().get(region);
1384 if (replacement == nullptr) {
1385 // Found no replacement data for this region.
1386 return false;
1387 }
1388 const char* replacedRegion = replacement;
1389 const char* firstSpace = uprv_strchr(replacement, ' ');
1390 if (firstSpace != nullptr) {
1391 // If there are are more than one region in the replacement.
1392 // We need to check which one match based on the language.
1393 // Cannot use nullptr for language because that will construct
1394 // the default locale, in that case, use "und" to get the correct
1395 // locale.
1396 Locale l = LocaleBuilder()
1397 .setLanguage(language == nullptr ? "und" : language)
1398 .setScript(script)
1399 .build(status);
1400 l.addLikelySubtags(status);
1401 const char* likelyRegion = l.getCountry();
1402 LocalPointer<CharString> item;
1403 if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
1404 size_t len = uprv_strlen(likelyRegion);
1405 const char* foundInReplacement = uprv_strstr(replacement,
1406 likelyRegion);
1407 if (foundInReplacement != nullptr) {
1408 // Assuming the case there are no three letter region code in
1409 // the replacement of territoryAlias
1410 U_ASSERT(foundInReplacement == replacement ||
1411 *(foundInReplacement-1) == ' ');
1412 U_ASSERT(foundInReplacement[len] == ' ' ||
1413 foundInReplacement[len] == '\0');
1414 item.adoptInsteadAndCheckErrorCode(
1415 new CharString(foundInReplacement, (int32_t)len, status), status);
1416 }
1417 }
1418 if (item.isNull() && U_SUCCESS(status)) {
1419 item.adoptInsteadAndCheckErrorCode(
1420 new CharString(replacement,
1421 (int32_t)(firstSpace - replacement), status), status);
1422 }
1423 if (U_FAILURE(status)) { return false; }
1424 if (item.isNull()) {
1425 status = U_MEMORY_ALLOCATION_ERROR;
1426 return false;
1427 }
1428 replacedRegion = item->data();
1429 toBeFreed.addElement(item.orphan(), status);
1430 }
1431 U_ASSERT(!same(region, replacedRegion));
1432 region = replacedRegion;
1433 // The region is changed by data in territory alias.
1434 return true;
1435 }
1436
1437 bool
replaceScript(UErrorCode & status)1438 AliasReplacer::replaceScript(UErrorCode& status)
1439 {
1440 if (U_FAILURE(status)) {
1441 return false;
1442 }
1443 if (script == nullptr) {
1444 // No script to search.
1445 return false;
1446 }
1447 const char *replacement = data->scriptMap().get(script);
1448 if (replacement == nullptr) {
1449 // Found no replacement data for this script.
1450 return false;
1451 }
1452 U_ASSERT(!same(script, replacement));
1453 script = replacement;
1454 // The script is changed by data in script alias.
1455 return true;
1456 }
1457
1458 bool
replaceVariant(UErrorCode & status)1459 AliasReplacer::replaceVariant(UErrorCode& status)
1460 {
1461 if (U_FAILURE(status)) {
1462 return false;
1463 }
1464 // Since we may have more than one variant, we need to loop through them.
1465 for (int32_t i = 0; i < variants.size(); i++) {
1466 const char *variant = (const char*)(variants.elementAt(i));
1467 const char *replacement = data->variantMap().get(variant);
1468 if (replacement == nullptr) {
1469 // Found no replacement data for this variant.
1470 continue;
1471 }
1472 U_ASSERT((uprv_strlen(replacement) >= 5 &&
1473 uprv_strlen(replacement) <= 8) ||
1474 (uprv_strlen(replacement) == 4 &&
1475 replacement[0] >= '0' &&
1476 replacement[0] <= '9'));
1477 if (!same(variant, replacement)) {
1478 variants.setElementAt((void*)replacement, i);
1479 // Special hack to handle hepburn-heploc => alalc97
1480 if (uprv_strcmp(variant, "heploc") == 0) {
1481 for (int32_t j = 0; j < variants.size(); j++) {
1482 if (uprv_strcmp((const char*)(variants.elementAt(j)),
1483 "hepburn") == 0) {
1484 variants.removeElementAt(j);
1485 }
1486 }
1487 }
1488 return true;
1489 }
1490 }
1491 return false;
1492 }
1493
1494 bool
replaceSubdivision(StringPiece subdivision,CharString & output,UErrorCode & status)1495 AliasReplacer::replaceSubdivision(
1496 StringPiece subdivision, CharString& output, UErrorCode& status)
1497 {
1498 if (U_FAILURE(status)) {
1499 return false;
1500 }
1501 const char *replacement = data->subdivisionMap().get(subdivision.data());
1502 if (replacement != nullptr) {
1503 const char* firstSpace = uprv_strchr(replacement, ' ');
1504 // Found replacement data for this subdivision.
1505 size_t len = (firstSpace != nullptr) ?
1506 (firstSpace - replacement) : uprv_strlen(replacement);
1507 if (2 <= len && len <= 8) {
1508 output.append(replacement, (int32_t)len, status);
1509 if (2 == len) {
1510 // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
1511 output.append("zzzz", 4, status);
1512 }
1513 }
1514 return true;
1515 }
1516 return false;
1517 }
1518
1519 bool
replaceTransformedExtensions(CharString & transformedExtensions,CharString & output,UErrorCode & status)1520 AliasReplacer::replaceTransformedExtensions(
1521 CharString& transformedExtensions, CharString& output, UErrorCode& status)
1522 {
1523 // The content of the transformedExtensions will be modified in this
1524 // function to NULL-terminating (tkey-tvalue) pairs.
1525 if (U_FAILURE(status)) {
1526 return false;
1527 }
1528 int32_t len = transformedExtensions.length();
1529 const char* str = transformedExtensions.data();
1530 const char* tkey = ultag_getTKeyStart(str);
1531 int32_t tlangLen = (tkey == str) ? 0 :
1532 ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
1533 CharStringByteSink sink(&output);
1534 if (tlangLen > 0) {
1535 Locale tlang = LocaleBuilder()
1536 .setLanguageTag(StringPiece(str, tlangLen))
1537 .build(status);
1538 tlang.canonicalize(status);
1539 tlang.toLanguageTag(sink, status);
1540 if (U_FAILURE(status)) {
1541 return false;
1542 }
1543 T_CString_toLowerCase(output.data());
1544 }
1545 if (tkey != nullptr) {
1546 // We need to sort the tfields by tkey
1547 UVector tfields(status);
1548 if (U_FAILURE(status)) {
1549 return false;
1550 }
1551 do {
1552 const char* tvalue = uprv_strchr(tkey, '-');
1553 if (tvalue == nullptr) {
1554 status = U_ILLEGAL_ARGUMENT_ERROR;
1555 }
1556 const char* nextTKey = ultag_getTKeyStart(tvalue);
1557 if (nextTKey != nullptr) {
1558 *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
1559 }
1560 tfields.insertElementAt((void*)tkey, tfields.size(), status);
1561 if (U_FAILURE(status)) {
1562 return false;
1563 }
1564 tkey = nextTKey;
1565 } while (tkey != nullptr);
1566 tfields.sort([](UElement e1, UElement e2) -> int8_t {
1567 // uprv_strcmp return int and in some platform, such as arm64-v8a,
1568 // it may return positive values > 127 which cause the casted value
1569 // of int8_t negative.
1570 int res = uprv_strcmp(
1571 (const char*)e1.pointer, (const char*)e2.pointer);
1572 return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
1573 }, status);
1574 for (int32_t i = 0; i < tfields.size(); i++) {
1575 if (output.length() > 0) {
1576 output.append('-', status);
1577 }
1578 const char* tfield = (const char*) tfields.elementAt(i);
1579 const char* tvalue = uprv_strchr(tfield, '-');
1580 // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
1581 U_ASSERT(tvalue != nullptr);
1582 *((char*)tvalue++) = '\0'; // NULL terminate tkey
1583 output.append(tfield, status).append('-', status);
1584 const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
1585 output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
1586 }
1587 }
1588 if (U_FAILURE(status)) {
1589 return false;
1590 }
1591 return true;
1592 }
1593
1594 CharString&
outputToString(CharString & out,UErrorCode status)1595 AliasReplacer::outputToString(
1596 CharString& out, UErrorCode status)
1597 {
1598 out.append(language, status);
1599 if (notEmpty(script)) {
1600 out.append(SEP_CHAR, status)
1601 .append(script, status);
1602 }
1603 if (notEmpty(region)) {
1604 out.append(SEP_CHAR, status)
1605 .append(region, status);
1606 }
1607 if (variants.size() > 0) {
1608 if (!notEmpty(script) && !notEmpty(region)) {
1609 out.append(SEP_CHAR, status);
1610 }
1611 variants.sort([](UElement e1, UElement e2) -> int8_t {
1612 // uprv_strcmp return int and in some platform, such as arm64-v8a,
1613 // it may return positive values > 127 which cause the casted value
1614 // of int8_t negative.
1615 int res = uprv_strcmp(
1616 (const char*)e1.pointer, (const char*)e2.pointer);
1617 return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
1618 }, status);
1619 int32_t variantsStart = out.length();
1620 for (int32_t i = 0; i < variants.size(); i++) {
1621 out.append(SEP_CHAR, status)
1622 .append((const char*)(variants.elementAt(i)),
1623 status);
1624 }
1625 T_CString_toUpperCase(out.data() + variantsStart);
1626 }
1627 if (notEmpty(extensions)) {
1628 CharString tmp("und_", status);
1629 tmp.append(extensions, status);
1630 Locale tmpLocale(tmp.data());
1631 // only support x extension inside CLDR for now.
1632 U_ASSERT(extensions[0] == 'x');
1633 out.append(tmpLocale.getName() + 1, status);
1634 }
1635 return out;
1636 }
1637
1638 bool
replace(const Locale & locale,CharString & out,UErrorCode & status)1639 AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
1640 {
1641 data = AliasData::singleton(status);
1642 if (U_FAILURE(status)) {
1643 return false;
1644 }
1645 U_ASSERT(data != nullptr);
1646 out.clear();
1647 language = locale.getLanguage();
1648 if (!notEmpty(language)) {
1649 language = nullptr;
1650 }
1651 script = locale.getScript();
1652 if (!notEmpty(script)) {
1653 script = nullptr;
1654 }
1655 region = locale.getCountry();
1656 if (!notEmpty(region)) {
1657 region = nullptr;
1658 }
1659 const char* variantsStr = locale.getVariant();
1660 CharString variantsBuff(variantsStr, -1, status);
1661 if (!variantsBuff.isEmpty()) {
1662 if (U_FAILURE(status)) { return false; }
1663 char* start = variantsBuff.data();
1664 T_CString_toLowerCase(start);
1665 char* end;
1666 while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
1667 U_SUCCESS(status)) {
1668 *end = NULL_CHAR; // null terminate inside variantsBuff
1669 variants.addElement(start, status);
1670 start = end + 1;
1671 }
1672 variants.addElement(start, status);
1673 }
1674 if (U_FAILURE(status)) { return false; }
1675
1676 // Sort the variants
1677 variants.sort([](UElement e1, UElement e2) -> int8_t {
1678 // uprv_strcmp return int and in some platform, such as arm64-v8a,
1679 // it may return positive values > 127 which cause the casted value
1680 // of int8_t negative.
1681 int res = uprv_strcmp(
1682 (const char*)e1.pointer, (const char*)e2.pointer);
1683 return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
1684 }, status);
1685
1686 // A changed count to assert when loop too many times.
1687 int changed = 0;
1688 // A UVector to to hold CharString allocated by the replace* method
1689 // and freed when out of scope from his function.
1690 UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
1691 nullptr, 10, status);
1692 while (U_SUCCESS(status)) {
1693 // Something wrong with the data cause looping here more than 10 times
1694 // already.
1695 U_ASSERT(changed < 5);
1696 // From observation of key in data/misc/metadata.txt
1697 // we know currently we only need to search in the following combination
1698 // of fields for type in languageAlias:
1699 // * lang_region_variant
1700 // * lang_region
1701 // * lang_variant
1702 // * lang
1703 // * und_variant
1704 // This assumption is ensured by the U_ASSERT in readLanguageAlias
1705 //
1706 // lang REGION variant
1707 if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
1708 replaceLanguage(true, true, false, stringsToBeFreed, status) ||
1709 replaceLanguage(true, false, true, stringsToBeFreed, status) ||
1710 replaceLanguage(true, false, false, stringsToBeFreed, status) ||
1711 replaceLanguage(false,false, true, stringsToBeFreed, status) ||
1712 replaceTerritory(stringsToBeFreed, status) ||
1713 replaceScript(status) ||
1714 replaceVariant(status)) {
1715 // Some values in data is changed, try to match from the beginning
1716 // again.
1717 changed++;
1718 continue;
1719 }
1720 // Nothing changed. Break out.
1721 break;
1722 } // while(1)
1723
1724 if (U_FAILURE(status)) { return false; }
1725 // Nothing changed and we know the order of the vaiants are not change
1726 // because we have no variant or only one.
1727 const char* extensionsStr = locale_getKeywordsStart(locale.getName());
1728 if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
1729 return false;
1730 }
1731 outputToString(out, status);
1732 if (U_FAILURE(status)) {
1733 return false;
1734 }
1735 if (extensionsStr != nullptr) {
1736 changed = 0;
1737 Locale temp(locale);
1738 LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
1739 if (U_SUCCESS(status) && !iter.isNull()) {
1740 const char* key;
1741 while ((key = iter->next(nullptr, status)) != nullptr) {
1742 if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
1743 uprv_strcmp("t", key) == 0) {
1744 CharString value;
1745 CharStringByteSink valueSink(&value);
1746 locale.getKeywordValue(key, valueSink, status);
1747 if (U_FAILURE(status)) {
1748 status = U_ZERO_ERROR;
1749 continue;
1750 }
1751 CharString replacement;
1752 if (uprv_strlen(key) == 2) {
1753 if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
1754 changed++;
1755 temp.setKeywordValue(key, replacement.data(), status);
1756 }
1757 } else {
1758 U_ASSERT(uprv_strcmp(key, "t") == 0);
1759 if (replaceTransformedExtensions(value, replacement, status)) {
1760 changed++;
1761 temp.setKeywordValue(key, replacement.data(), status);
1762 }
1763 }
1764 if (U_FAILURE(status)) {
1765 return false;
1766 }
1767 }
1768 }
1769 }
1770 if (changed != 0) {
1771 extensionsStr = locale_getKeywordsStart(temp.getName());
1772 }
1773 out.append(extensionsStr, status);
1774 }
1775 if (U_FAILURE(status)) {
1776 return false;
1777 }
1778 // If the tag is not changed, return.
1779 if (uprv_strcmp(out.data(), locale.getName()) == 0) {
1780 out.clear();
1781 return false;
1782 }
1783 return true;
1784 }
1785
1786 // Return true if the locale is changed during canonicalization.
1787 // The replaced value then will be put into out.
1788 bool
canonicalizeLocale(const Locale & locale,CharString & out,UErrorCode & status)1789 canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
1790 {
1791 AliasReplacer replacer(status);
1792 return replacer.replace(locale, out, status);
1793 }
1794
1795 // Function to optimize for known cases without so we can skip the loading
1796 // of resources in the startup time until we really need it.
1797 bool
isKnownCanonicalizedLocale(const char * locale,UErrorCode & status)1798 isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
1799 {
1800 if ( uprv_strcmp(locale, "c") == 0 ||
1801 uprv_strcmp(locale, "en") == 0 ||
1802 uprv_strcmp(locale, "en_US") == 0) {
1803 return true;
1804 }
1805
1806 // common well-known Canonicalized.
1807 umtx_initOnce(gKnownCanonicalizedInitOnce,
1808 &loadKnownCanonicalized, status);
1809 if (U_FAILURE(status)) {
1810 return false;
1811 }
1812 U_ASSERT(gKnownCanonicalized != nullptr);
1813 return uhash_geti(gKnownCanonicalized, locale) != 0;
1814 }
1815
1816 } // namespace
1817
1818 // Function for testing.
1819 U_CAPI const char* const*
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t * length)1820 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
1821 {
1822 *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
1823 return KNOWN_CANONICALIZED;
1824 }
1825
1826 // Function for testing.
1827 U_CAPI bool
ulocimp_isCanonicalizedLocaleForTest(const char * localeName)1828 ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
1829 {
1830 Locale l(localeName);
1831 UErrorCode status = U_ZERO_ERROR;
1832 CharString temp;
1833 return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
1834 }
1835
1836 /*This function initializes a Locale from a C locale ID*/
init(const char * localeID,UBool canonicalize)1837 Locale& Locale::init(const char* localeID, UBool canonicalize)
1838 {
1839 fIsBogus = FALSE;
1840 /* Free our current storage */
1841 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
1842 uprv_free(baseName);
1843 }
1844 baseName = NULL;
1845 if(fullName != fullNameBuffer) {
1846 uprv_free(fullName);
1847 fullName = fullNameBuffer;
1848 }
1849
1850 // not a loop:
1851 // just an easy way to have a common error-exit
1852 // without goto and without another function
1853 do {
1854 char *separator;
1855 char *field[5] = {0};
1856 int32_t fieldLen[5] = {0};
1857 int32_t fieldIdx;
1858 int32_t variantField;
1859 int32_t length;
1860 UErrorCode err;
1861
1862 if(localeID == NULL) {
1863 // not an error, just set the default locale
1864 return *this = getDefault();
1865 }
1866
1867 /* preset all fields to empty */
1868 language[0] = script[0] = country[0] = 0;
1869
1870 // "canonicalize" the locale ID to ICU/Java format
1871 err = U_ZERO_ERROR;
1872 length = canonicalize ?
1873 uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
1874 uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
1875
1876 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
1877 U_ASSERT(baseName == nullptr);
1878 /*Go to heap for the fullName if necessary*/
1879 fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
1880 if(fullName == 0) {
1881 fullName = fullNameBuffer;
1882 break; // error: out of memory
1883 }
1884 err = U_ZERO_ERROR;
1885 length = canonicalize ?
1886 uloc_canonicalize(localeID, fullName, length+1, &err) :
1887 uloc_getName(localeID, fullName, length+1, &err);
1888 }
1889 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
1890 /* should never occur */
1891 break;
1892 }
1893
1894 variantBegin = length;
1895
1896 /* after uloc_getName/canonicalize() we know that only '_' are separators */
1897 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
1898 separator = field[0] = fullName;
1899 fieldIdx = 1;
1900 char* at = uprv_strchr(fullName, '@');
1901 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
1902 fieldIdx < UPRV_LENGTHOF(field)-1 &&
1903 (at == nullptr || separator < at)) {
1904 field[fieldIdx] = separator + 1;
1905 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
1906 fieldIdx++;
1907 }
1908 // variant may contain @foo or .foo POSIX cruft; remove it
1909 separator = uprv_strchr(field[fieldIdx-1], '@');
1910 char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
1911 if (separator!=NULL || sep2!=NULL) {
1912 if (separator==NULL || (sep2!=NULL && separator > sep2)) {
1913 separator = sep2;
1914 }
1915 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
1916 } else {
1917 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
1918 }
1919
1920 if (fieldLen[0] >= (int32_t)(sizeof(language)))
1921 {
1922 break; // error: the language field is too long
1923 }
1924
1925 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
1926 if (fieldLen[0] > 0) {
1927 /* We have a language */
1928 uprv_memcpy(language, fullName, fieldLen[0]);
1929 language[fieldLen[0]] = 0;
1930 }
1931 if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
1932 uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
1933 uprv_isASCIILetter(field[1][3])) {
1934 /* We have at least a script */
1935 uprv_memcpy(script, field[1], fieldLen[1]);
1936 script[fieldLen[1]] = 0;
1937 variantField++;
1938 }
1939
1940 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
1941 /* We have a country */
1942 uprv_memcpy(country, field[variantField], fieldLen[variantField]);
1943 country[fieldLen[variantField]] = 0;
1944 variantField++;
1945 } else if (fieldLen[variantField] == 0) {
1946 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
1947 }
1948
1949 if (fieldLen[variantField] > 0) {
1950 /* We have a variant */
1951 variantBegin = (int32_t)(field[variantField] - fullName);
1952 }
1953
1954 err = U_ZERO_ERROR;
1955 initBaseName(err);
1956 if (U_FAILURE(err)) {
1957 break;
1958 }
1959
1960 if (canonicalize) {
1961 if (!isKnownCanonicalizedLocale(fullName, err)) {
1962 CharString replaced;
1963 // Not sure it is already canonicalized
1964 if (canonicalizeLocale(*this, replaced, err)) {
1965 U_ASSERT(U_SUCCESS(err));
1966 // If need replacement, call init again.
1967 init(replaced.data(), false);
1968 }
1969 if (U_FAILURE(err)) {
1970 break;
1971 }
1972 }
1973 } // if (canonicalize) {
1974
1975 // successful end of init()
1976 return *this;
1977 } while(0); /*loop doesn't iterate*/
1978
1979 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
1980 setToBogus();
1981
1982 return *this;
1983 }
1984
1985 /*
1986 * Set up the base name.
1987 * If there are no key words, it's exactly the full name.
1988 * If key words exist, it's the full name truncated at the '@' character.
1989 * Need to set up both at init() and after setting a keyword.
1990 */
1991 void
initBaseName(UErrorCode & status)1992 Locale::initBaseName(UErrorCode &status) {
1993 if (U_FAILURE(status)) {
1994 return;
1995 }
1996 U_ASSERT(baseName==NULL || baseName==fullName);
1997 const char *atPtr = uprv_strchr(fullName, '@');
1998 const char *eqPtr = uprv_strchr(fullName, '=');
1999 if (atPtr && eqPtr && atPtr < eqPtr) {
2000 // Key words exist.
2001 int32_t baseNameLength = (int32_t)(atPtr - fullName);
2002 baseName = (char *)uprv_malloc(baseNameLength + 1);
2003 if (baseName == NULL) {
2004 status = U_MEMORY_ALLOCATION_ERROR;
2005 return;
2006 }
2007 uprv_strncpy(baseName, fullName, baseNameLength);
2008 baseName[baseNameLength] = 0;
2009
2010 // The original computation of variantBegin leaves it equal to the length
2011 // of fullName if there is no variant. It should instead be
2012 // the length of the baseName.
2013 if (variantBegin > baseNameLength) {
2014 variantBegin = baseNameLength;
2015 }
2016 } else {
2017 baseName = fullName;
2018 }
2019 }
2020
2021
2022 int32_t
hashCode() const2023 Locale::hashCode() const
2024 {
2025 return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
2026 }
2027
2028 void
setToBogus()2029 Locale::setToBogus() {
2030 /* Free our current storage */
2031 if((baseName != fullName) && (baseName != fullNameBuffer)) {
2032 uprv_free(baseName);
2033 }
2034 baseName = NULL;
2035 if(fullName != fullNameBuffer) {
2036 uprv_free(fullName);
2037 fullName = fullNameBuffer;
2038 }
2039 *fullNameBuffer = 0;
2040 *language = 0;
2041 *script = 0;
2042 *country = 0;
2043 fIsBogus = TRUE;
2044 variantBegin = 0;
2045 }
2046
2047 const Locale& U_EXPORT2
getDefault()2048 Locale::getDefault()
2049 {
2050 {
2051 Mutex lock(&gDefaultLocaleMutex);
2052 if (gDefaultLocale != NULL) {
2053 return *gDefaultLocale;
2054 }
2055 }
2056 UErrorCode status = U_ZERO_ERROR;
2057 return *locale_set_default_internal(NULL, status);
2058 }
2059
2060
2061
2062 void U_EXPORT2
setDefault(const Locale & newLocale,UErrorCode & status)2063 Locale::setDefault( const Locale& newLocale,
2064 UErrorCode& status)
2065 {
2066 if (U_FAILURE(status)) {
2067 return;
2068 }
2069
2070 /* Set the default from the full name string of the supplied locale.
2071 * This is a convenient way to access the default locale caching mechanisms.
2072 */
2073 const char *localeID = newLocale.getName();
2074 locale_set_default_internal(localeID, status);
2075 }
2076
2077 void
addLikelySubtags(UErrorCode & status)2078 Locale::addLikelySubtags(UErrorCode& status) {
2079 if (U_FAILURE(status)) {
2080 return;
2081 }
2082
2083 CharString maximizedLocaleID;
2084 {
2085 CharStringByteSink sink(&maximizedLocaleID);
2086 ulocimp_addLikelySubtags(fullName, sink, &status);
2087 }
2088
2089 if (U_FAILURE(status)) {
2090 return;
2091 }
2092
2093 init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
2094 if (isBogus()) {
2095 status = U_ILLEGAL_ARGUMENT_ERROR;
2096 }
2097 }
2098
2099 void
minimizeSubtags(UErrorCode & status)2100 Locale::minimizeSubtags(UErrorCode& status) {
2101 if (U_FAILURE(status)) {
2102 return;
2103 }
2104
2105 CharString minimizedLocaleID;
2106 {
2107 CharStringByteSink sink(&minimizedLocaleID);
2108 ulocimp_minimizeSubtags(fullName, sink, &status);
2109 }
2110
2111 if (U_FAILURE(status)) {
2112 return;
2113 }
2114
2115 init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
2116 if (isBogus()) {
2117 status = U_ILLEGAL_ARGUMENT_ERROR;
2118 }
2119 }
2120
2121 void
canonicalize(UErrorCode & status)2122 Locale::canonicalize(UErrorCode& status) {
2123 if (U_FAILURE(status)) {
2124 return;
2125 }
2126 if (isBogus()) {
2127 status = U_ILLEGAL_ARGUMENT_ERROR;
2128 return;
2129 }
2130 CharString uncanonicalized(fullName, status);
2131 if (U_FAILURE(status)) {
2132 return;
2133 }
2134 init(uncanonicalized.data(), /*canonicalize=*/TRUE);
2135 if (isBogus()) {
2136 status = U_ILLEGAL_ARGUMENT_ERROR;
2137 }
2138 }
2139
2140 Locale U_EXPORT2
forLanguageTag(StringPiece tag,UErrorCode & status)2141 Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
2142 {
2143 Locale result(Locale::eBOGUS);
2144
2145 if (U_FAILURE(status)) {
2146 return result;
2147 }
2148
2149 // If a BCP 47 language tag is passed as the language parameter to the
2150 // normal Locale constructor, it will actually fall back to invoking
2151 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
2152 // the string actually is BCP 47. This works well for things like strings
2153 // using BCP 47 extensions, but it does not at all work for things like
2154 // legacy language tags (marked as “Type: grandfathered” in BCP 47,
2155 // e.g., "en-GB-oed") which are possible to also
2156 // interpret as ICU locale IDs and because of that won't trigger the BCP 47
2157 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
2158 // and then Locale::init(), instead of just calling the normal constructor.
2159
2160 CharString localeID;
2161 int32_t parsedLength;
2162 {
2163 CharStringByteSink sink(&localeID);
2164 ulocimp_forLanguageTag(
2165 tag.data(),
2166 tag.length(),
2167 sink,
2168 &parsedLength,
2169 &status);
2170 }
2171
2172 if (U_FAILURE(status)) {
2173 return result;
2174 }
2175
2176 if (parsedLength != tag.size()) {
2177 status = U_ILLEGAL_ARGUMENT_ERROR;
2178 return result;
2179 }
2180
2181 result.init(localeID.data(), /*canonicalize=*/FALSE);
2182 if (result.isBogus()) {
2183 status = U_ILLEGAL_ARGUMENT_ERROR;
2184 }
2185 return result;
2186 }
2187
2188 void
toLanguageTag(ByteSink & sink,UErrorCode & status) const2189 Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
2190 {
2191 if (U_FAILURE(status)) {
2192 return;
2193 }
2194
2195 if (fIsBogus) {
2196 status = U_ILLEGAL_ARGUMENT_ERROR;
2197 return;
2198 }
2199
2200 ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
2201 }
2202
2203 Locale U_EXPORT2
createFromName(const char * name)2204 Locale::createFromName (const char *name)
2205 {
2206 if (name) {
2207 Locale l("");
2208 l.init(name, FALSE);
2209 return l;
2210 }
2211 else {
2212 return getDefault();
2213 }
2214 }
2215
2216 Locale U_EXPORT2
createCanonical(const char * name)2217 Locale::createCanonical(const char* name) {
2218 Locale loc("");
2219 loc.init(name, TRUE);
2220 return loc;
2221 }
2222
2223 const char *
getISO3Language() const2224 Locale::getISO3Language() const
2225 {
2226 return uloc_getISO3Language(fullName);
2227 }
2228
2229
2230 const char *
getISO3Country() const2231 Locale::getISO3Country() const
2232 {
2233 return uloc_getISO3Country(fullName);
2234 }
2235
2236 /**
2237 * Return the LCID value as specified in the "LocaleID" resource for this
2238 * locale. The LocaleID must be expressed as a hexadecimal number, from
2239 * one to four digits. If the LocaleID resource is not present, or is
2240 * in an incorrect format, 0 is returned. The LocaleID is for use in
2241 * Windows (it is an LCID), but is available on all platforms.
2242 */
2243 uint32_t
getLCID() const2244 Locale::getLCID() const
2245 {
2246 return uloc_getLCID(fullName);
2247 }
2248
getISOCountries()2249 const char* const* U_EXPORT2 Locale::getISOCountries()
2250 {
2251 return uloc_getISOCountries();
2252 }
2253
getISOLanguages()2254 const char* const* U_EXPORT2 Locale::getISOLanguages()
2255 {
2256 return uloc_getISOLanguages();
2257 }
2258
2259 // Set the locale's data based on a posix id.
setFromPOSIXID(const char * posixID)2260 void Locale::setFromPOSIXID(const char *posixID)
2261 {
2262 init(posixID, TRUE);
2263 }
2264
2265 const Locale & U_EXPORT2
getRoot(void)2266 Locale::getRoot(void)
2267 {
2268 return getLocale(eROOT);
2269 }
2270
2271 const Locale & U_EXPORT2
getEnglish(void)2272 Locale::getEnglish(void)
2273 {
2274 return getLocale(eENGLISH);
2275 }
2276
2277 const Locale & U_EXPORT2
getFrench(void)2278 Locale::getFrench(void)
2279 {
2280 return getLocale(eFRENCH);
2281 }
2282
2283 const Locale & U_EXPORT2
getGerman(void)2284 Locale::getGerman(void)
2285 {
2286 return getLocale(eGERMAN);
2287 }
2288
2289 const Locale & U_EXPORT2
getItalian(void)2290 Locale::getItalian(void)
2291 {
2292 return getLocale(eITALIAN);
2293 }
2294
2295 const Locale & U_EXPORT2
getJapanese(void)2296 Locale::getJapanese(void)
2297 {
2298 return getLocale(eJAPANESE);
2299 }
2300
2301 const Locale & U_EXPORT2
getKorean(void)2302 Locale::getKorean(void)
2303 {
2304 return getLocale(eKOREAN);
2305 }
2306
2307 const Locale & U_EXPORT2
getChinese(void)2308 Locale::getChinese(void)
2309 {
2310 return getLocale(eCHINESE);
2311 }
2312
2313 const Locale & U_EXPORT2
getSimplifiedChinese(void)2314 Locale::getSimplifiedChinese(void)
2315 {
2316 return getLocale(eCHINA);
2317 }
2318
2319 const Locale & U_EXPORT2
getTraditionalChinese(void)2320 Locale::getTraditionalChinese(void)
2321 {
2322 return getLocale(eTAIWAN);
2323 }
2324
2325
2326 const Locale & U_EXPORT2
getFrance(void)2327 Locale::getFrance(void)
2328 {
2329 return getLocale(eFRANCE);
2330 }
2331
2332 const Locale & U_EXPORT2
getGermany(void)2333 Locale::getGermany(void)
2334 {
2335 return getLocale(eGERMANY);
2336 }
2337
2338 const Locale & U_EXPORT2
getItaly(void)2339 Locale::getItaly(void)
2340 {
2341 return getLocale(eITALY);
2342 }
2343
2344 const Locale & U_EXPORT2
getJapan(void)2345 Locale::getJapan(void)
2346 {
2347 return getLocale(eJAPAN);
2348 }
2349
2350 const Locale & U_EXPORT2
getKorea(void)2351 Locale::getKorea(void)
2352 {
2353 return getLocale(eKOREA);
2354 }
2355
2356 const Locale & U_EXPORT2
getChina(void)2357 Locale::getChina(void)
2358 {
2359 return getLocale(eCHINA);
2360 }
2361
2362 const Locale & U_EXPORT2
getPRC(void)2363 Locale::getPRC(void)
2364 {
2365 return getLocale(eCHINA);
2366 }
2367
2368 const Locale & U_EXPORT2
getTaiwan(void)2369 Locale::getTaiwan(void)
2370 {
2371 return getLocale(eTAIWAN);
2372 }
2373
2374 const Locale & U_EXPORT2
getUK(void)2375 Locale::getUK(void)
2376 {
2377 return getLocale(eUK);
2378 }
2379
2380 const Locale & U_EXPORT2
getUS(void)2381 Locale::getUS(void)
2382 {
2383 return getLocale(eUS);
2384 }
2385
2386 const Locale & U_EXPORT2
getCanada(void)2387 Locale::getCanada(void)
2388 {
2389 return getLocale(eCANADA);
2390 }
2391
2392 const Locale & U_EXPORT2
getCanadaFrench(void)2393 Locale::getCanadaFrench(void)
2394 {
2395 return getLocale(eCANADA_FRENCH);
2396 }
2397
2398 const Locale &
getLocale(int locid)2399 Locale::getLocale(int locid)
2400 {
2401 Locale *localeCache = getLocaleCache();
2402 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
2403 if (localeCache == NULL) {
2404 // Failure allocating the locale cache.
2405 // The best we can do is return a NULL reference.
2406 locid = 0;
2407 }
2408 return localeCache[locid]; /*operating on NULL*/
2409 }
2410
2411 /*
2412 This function is defined this way in order to get around static
2413 initialization and static destruction.
2414 */
2415 Locale *
getLocaleCache(void)2416 Locale::getLocaleCache(void)
2417 {
2418 UErrorCode status = U_ZERO_ERROR;
2419 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
2420 return gLocaleCache;
2421 }
2422
2423 class KeywordEnumeration : public StringEnumeration {
2424 private:
2425 char *keywords;
2426 char *current;
2427 int32_t length;
2428 UnicodeString currUSKey;
2429 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
2430
2431 public:
getStaticClassID(void)2432 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
getDynamicClassID(void) const2433 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
2434 public:
KeywordEnumeration(const char * keys,int32_t keywordLen,int32_t currentIndex,UErrorCode & status)2435 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
2436 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
2437 if(U_SUCCESS(status) && keywordLen != 0) {
2438 if(keys == NULL || keywordLen < 0) {
2439 status = U_ILLEGAL_ARGUMENT_ERROR;
2440 } else {
2441 keywords = (char *)uprv_malloc(keywordLen+1);
2442 if (keywords == NULL) {
2443 status = U_MEMORY_ALLOCATION_ERROR;
2444 }
2445 else {
2446 uprv_memcpy(keywords, keys, keywordLen);
2447 keywords[keywordLen] = 0;
2448 current = keywords + currentIndex;
2449 length = keywordLen;
2450 }
2451 }
2452 }
2453 }
2454
2455 virtual ~KeywordEnumeration();
2456
clone() const2457 virtual StringEnumeration * clone() const
2458 {
2459 UErrorCode status = U_ZERO_ERROR;
2460 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
2461 }
2462
count(UErrorCode &) const2463 virtual int32_t count(UErrorCode &/*status*/) const {
2464 char *kw = keywords;
2465 int32_t result = 0;
2466 while(*kw) {
2467 result++;
2468 kw += uprv_strlen(kw)+1;
2469 }
2470 return result;
2471 }
2472
next(int32_t * resultLength,UErrorCode & status)2473 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
2474 const char* result;
2475 int32_t len;
2476 if(U_SUCCESS(status) && *current != 0) {
2477 result = current;
2478 len = (int32_t)uprv_strlen(current);
2479 current += len+1;
2480 if(resultLength != NULL) {
2481 *resultLength = len;
2482 }
2483 } else {
2484 if(resultLength != NULL) {
2485 *resultLength = 0;
2486 }
2487 result = NULL;
2488 }
2489 return result;
2490 }
2491
snext(UErrorCode & status)2492 virtual const UnicodeString* snext(UErrorCode& status) {
2493 int32_t resultLength = 0;
2494 const char *s = next(&resultLength, status);
2495 return setChars(s, resultLength, status);
2496 }
2497
reset(UErrorCode &)2498 virtual void reset(UErrorCode& /*status*/) {
2499 current = keywords;
2500 }
2501 };
2502
2503 const char KeywordEnumeration::fgClassID = '\0';
2504
~KeywordEnumeration()2505 KeywordEnumeration::~KeywordEnumeration() {
2506 uprv_free(keywords);
2507 }
2508
2509 // A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
2510 // the next() method for each keyword before returning it.
2511 class UnicodeKeywordEnumeration : public KeywordEnumeration {
2512 public:
2513 using KeywordEnumeration::KeywordEnumeration;
2514 virtual ~UnicodeKeywordEnumeration();
2515
next(int32_t * resultLength,UErrorCode & status)2516 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
2517 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
2518 if (U_SUCCESS(status) && legacy_key != nullptr) {
2519 const char* key = uloc_toUnicodeLocaleKey(legacy_key);
2520 if (key == nullptr) {
2521 status = U_ILLEGAL_ARGUMENT_ERROR;
2522 } else {
2523 if (resultLength != nullptr) {
2524 *resultLength = static_cast<int32_t>(uprv_strlen(key));
2525 }
2526 return key;
2527 }
2528 }
2529 if (resultLength != nullptr) *resultLength = 0;
2530 return nullptr;
2531 }
2532 };
2533
2534 // Out-of-line virtual destructor to serve as the "key function".
2535 UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
2536
2537 StringEnumeration *
createKeywords(UErrorCode & status) const2538 Locale::createKeywords(UErrorCode &status) const
2539 {
2540 StringEnumeration *result = NULL;
2541
2542 if (U_FAILURE(status)) {
2543 return result;
2544 }
2545
2546 const char* variantStart = uprv_strchr(fullName, '@');
2547 const char* assignment = uprv_strchr(fullName, '=');
2548 if(variantStart) {
2549 if(assignment > variantStart) {
2550 CharString keywords;
2551 CharStringByteSink sink(&keywords);
2552 ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
2553 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2554 result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2555 if (!result) {
2556 status = U_MEMORY_ALLOCATION_ERROR;
2557 }
2558 }
2559 } else {
2560 status = U_INVALID_FORMAT_ERROR;
2561 }
2562 }
2563 return result;
2564 }
2565
2566 StringEnumeration *
createUnicodeKeywords(UErrorCode & status) const2567 Locale::createUnicodeKeywords(UErrorCode &status) const
2568 {
2569 StringEnumeration *result = NULL;
2570
2571 if (U_FAILURE(status)) {
2572 return result;
2573 }
2574
2575 const char* variantStart = uprv_strchr(fullName, '@');
2576 const char* assignment = uprv_strchr(fullName, '=');
2577 if(variantStart) {
2578 if(assignment > variantStart) {
2579 CharString keywords;
2580 CharStringByteSink sink(&keywords);
2581 ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
2582 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2583 result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2584 if (!result) {
2585 status = U_MEMORY_ALLOCATION_ERROR;
2586 }
2587 }
2588 } else {
2589 status = U_INVALID_FORMAT_ERROR;
2590 }
2591 }
2592 return result;
2593 }
2594
2595 int32_t
getKeywordValue(const char * keywordName,char * buffer,int32_t bufLen,UErrorCode & status) const2596 Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
2597 {
2598 return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
2599 }
2600
2601 void
getKeywordValue(StringPiece keywordName,ByteSink & sink,UErrorCode & status) const2602 Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
2603 if (U_FAILURE(status)) {
2604 return;
2605 }
2606
2607 if (fIsBogus) {
2608 status = U_ILLEGAL_ARGUMENT_ERROR;
2609 return;
2610 }
2611
2612 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2613 const CharString keywordName_nul(keywordName, status);
2614 if (U_FAILURE(status)) {
2615 return;
2616 }
2617
2618 ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
2619 }
2620
2621 void
getUnicodeKeywordValue(StringPiece keywordName,ByteSink & sink,UErrorCode & status) const2622 Locale::getUnicodeKeywordValue(StringPiece keywordName,
2623 ByteSink& sink,
2624 UErrorCode& status) const {
2625 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2626 const CharString keywordName_nul(keywordName, status);
2627 if (U_FAILURE(status)) {
2628 return;
2629 }
2630
2631 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
2632
2633 if (legacy_key == nullptr) {
2634 status = U_ILLEGAL_ARGUMENT_ERROR;
2635 return;
2636 }
2637
2638 CharString legacy_value;
2639 {
2640 CharStringByteSink sink(&legacy_value);
2641 getKeywordValue(legacy_key, sink, status);
2642 }
2643
2644 if (U_FAILURE(status)) {
2645 return;
2646 }
2647
2648 const char* unicode_value = uloc_toUnicodeLocaleType(
2649 keywordName_nul.data(), legacy_value.data());
2650
2651 if (unicode_value == nullptr) {
2652 status = U_ILLEGAL_ARGUMENT_ERROR;
2653 return;
2654 }
2655
2656 sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
2657 }
2658
2659 void
setKeywordValue(const char * keywordName,const char * keywordValue,UErrorCode & status)2660 Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
2661 {
2662 if (U_FAILURE(status)) {
2663 return;
2664 }
2665 if (status == U_STRING_NOT_TERMINATED_WARNING) {
2666 status = U_ZERO_ERROR;
2667 }
2668 int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
2669 int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
2670 bufferLength, &status) + 1;
2671 U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
2672 /* Handle the case the current buffer is not enough to hold the new id */
2673 if (status == U_BUFFER_OVERFLOW_ERROR) {
2674 U_ASSERT(newLength > bufferLength);
2675 char* newFullName = (char *)uprv_malloc(newLength);
2676 if (newFullName == nullptr) {
2677 status = U_MEMORY_ALLOCATION_ERROR;
2678 return;
2679 }
2680 uprv_strcpy(newFullName, fullName);
2681 if (fullName != fullNameBuffer) {
2682 // if full Name is already on the heap, need to free it.
2683 uprv_free(fullName);
2684 if (baseName == fullName) {
2685 baseName = newFullName; // baseName should not point to freed memory.
2686 }
2687 }
2688 fullName = newFullName;
2689 status = U_ZERO_ERROR;
2690 uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
2691 U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
2692 } else {
2693 U_ASSERT(newLength <= bufferLength);
2694 }
2695 if (U_SUCCESS(status) && baseName == fullName) {
2696 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
2697 initBaseName(status);
2698 }
2699 }
2700
2701 void
setKeywordValue(StringPiece keywordName,StringPiece keywordValue,UErrorCode & status)2702 Locale::setKeywordValue(StringPiece keywordName,
2703 StringPiece keywordValue,
2704 UErrorCode& status) {
2705 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2706 const CharString keywordName_nul(keywordName, status);
2707 const CharString keywordValue_nul(keywordValue, status);
2708 setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
2709 }
2710
2711 void
setUnicodeKeywordValue(StringPiece keywordName,StringPiece keywordValue,UErrorCode & status)2712 Locale::setUnicodeKeywordValue(StringPiece keywordName,
2713 StringPiece keywordValue,
2714 UErrorCode& status) {
2715 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2716 const CharString keywordName_nul(keywordName, status);
2717 const CharString keywordValue_nul(keywordValue, status);
2718
2719 if (U_FAILURE(status)) {
2720 return;
2721 }
2722
2723 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
2724
2725 if (legacy_key == nullptr) {
2726 status = U_ILLEGAL_ARGUMENT_ERROR;
2727 return;
2728 }
2729
2730 const char* legacy_value = nullptr;
2731
2732 if (!keywordValue_nul.isEmpty()) {
2733 legacy_value =
2734 uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
2735
2736 if (legacy_value == nullptr) {
2737 status = U_ILLEGAL_ARGUMENT_ERROR;
2738 return;
2739 }
2740 }
2741
2742 setKeywordValue(legacy_key, legacy_value, status);
2743 }
2744
2745 const char *
getBaseName() const2746 Locale::getBaseName() const {
2747 return baseName;
2748 }
2749
2750 Locale::Iterator::~Iterator() = default;
2751
2752 //eof
2753 U_NAMESPACE_END
2754