1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include <utility>
5 
6 #include "bytesinkutil.h"  // CharStringByteSink
7 #include "charstr.h"
8 #include "cstring.h"
9 #include "ulocimp.h"
10 #include "unicode/localebuilder.h"
11 #include "unicode/locid.h"
12 
13 U_NAMESPACE_BEGIN
14 
15 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
16 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
17 
18 const char* kAttributeKey = "attribute";
19 
_isExtensionSubtags(char key,const char * s,int32_t len)20 static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
21     switch (uprv_tolower(key)) {
22         case 'u':
23             return ultag_isUnicodeExtensionSubtags(s, len);
24         case 't':
25             return ultag_isTransformedExtensionSubtags(s, len);
26         case 'x':
27             return ultag_isPrivateuseValueSubtags(s, len);
28         default:
29             return ultag_isExtensionSubtags(s, len);
30     }
31 }
32 
LocaleBuilder()33 LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
34     script_(), region_(), variant_(nullptr), extensions_(nullptr)
35 {
36     language_[0] = 0;
37     script_[0] = 0;
38     region_[0] = 0;
39 }
40 
~LocaleBuilder()41 LocaleBuilder::~LocaleBuilder()
42 {
43     delete variant_;
44     delete extensions_;
45 }
46 
setLocale(const Locale & locale)47 LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
48 {
49     clear();
50     setLanguage(locale.getLanguage());
51     setScript(locale.getScript());
52     setRegion(locale.getCountry());
53     setVariant(locale.getVariant());
54     extensions_ = locale.clone();
55     if (extensions_ == nullptr) {
56         status_ = U_MEMORY_ALLOCATION_ERROR;
57     }
58     return *this;
59 }
60 
setLanguageTag(StringPiece tag)61 LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
62 {
63     Locale l = Locale::forLanguageTag(tag, status_);
64     if (U_FAILURE(status_)) { return *this; }
65     // Because setLocale will reset status_ we need to return
66     // first if we have error in forLanguageTag.
67     setLocale(l);
68     return *this;
69 }
70 
setField(StringPiece input,char * dest,UErrorCode & errorCode,UBool (* test)(const char *,int32_t))71 static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
72                      UBool (*test)(const char*, int32_t)) {
73     if (U_FAILURE(errorCode)) { return; }
74     if (input.empty()) {
75         dest[0] = '\0';
76     } else if (test(input.data(), input.length())) {
77         uprv_memcpy(dest, input.data(), input.length());
78         dest[input.length()] = '\0';
79     } else {
80         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
81     }
82 }
83 
setLanguage(StringPiece language)84 LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
85 {
86     setField(language, language_, status_, &ultag_isLanguageSubtag);
87     return *this;
88 }
89 
setScript(StringPiece script)90 LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
91 {
92     setField(script, script_, status_, &ultag_isScriptSubtag);
93     return *this;
94 }
95 
setRegion(StringPiece region)96 LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
97 {
98     setField(region, region_, status_, &ultag_isRegionSubtag);
99     return *this;
100 }
101 
transform(char * data,int32_t len)102 static void transform(char* data, int32_t len) {
103     for (int32_t i = 0; i < len; i++, data++) {
104         if (*data == '_') {
105             *data = '-';
106         } else {
107             *data = uprv_tolower(*data);
108         }
109     }
110 }
111 
setVariant(StringPiece variant)112 LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
113 {
114     if (U_FAILURE(status_)) { return *this; }
115     if (variant.empty()) {
116         delete variant_;
117         variant_ = nullptr;
118         return *this;
119     }
120     CharString* new_variant = new CharString(variant, status_);
121     if (U_FAILURE(status_)) { return *this; }
122     if (new_variant == nullptr) {
123         status_ = U_MEMORY_ALLOCATION_ERROR;
124         return *this;
125     }
126     transform(new_variant->data(), new_variant->length());
127     if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
128         delete new_variant;
129         status_ = U_ILLEGAL_ARGUMENT_ERROR;
130         return *this;
131     }
132     delete variant_;
133     variant_ = new_variant;
134     return *this;
135 }
136 
137 static bool
_isKeywordValue(const char * key,const char * value,int32_t value_len)138 _isKeywordValue(const char* key, const char* value, int32_t value_len)
139 {
140     if (key[1] == '\0') {
141         // one char key
142         return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
143                 _isExtensionSubtags(key[0], value, value_len));
144     } else if (uprv_strcmp(key, kAttributeKey) == 0) {
145         // unicode attributes
146         return ultag_isUnicodeLocaleAttributes(value, value_len);
147     }
148     // otherwise: unicode extension value
149     // We need to convert from legacy key/value to unicode
150     // key/value
151     const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
152     const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
153 
154     return unicode_locale_key && unicode_locale_type &&
155            ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
156            ultag_isUnicodeLocaleType(unicode_locale_type, -1);
157 }
158 
159 static void
_copyExtensions(const Locale & from,icu::StringEnumeration * keywords,Locale & to,bool validate,UErrorCode & errorCode)160 _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
161                 Locale& to, bool validate, UErrorCode& errorCode)
162 {
163     if (U_FAILURE(errorCode)) { return; }
164     LocalPointer<icu::StringEnumeration> ownedKeywords;
165     if (keywords == nullptr) {
166         ownedKeywords.adoptInstead(from.createKeywords(errorCode));
167         if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
168         keywords = ownedKeywords.getAlias();
169     }
170     const char* key;
171     while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
172         CharString value;
173         CharStringByteSink sink(&value);
174         from.getKeywordValue(key, sink, errorCode);
175         if (U_FAILURE(errorCode)) { return; }
176         if (uprv_strcmp(key, kAttributeKey) == 0) {
177             transform(value.data(), value.length());
178         }
179         if (validate &&
180             !_isKeywordValue(key, value.data(), value.length())) {
181             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
182             return;
183         }
184         to.setKeywordValue(key, value.data(), errorCode);
185         if (U_FAILURE(errorCode)) { return; }
186     }
187 }
188 
189 void static
_clearUAttributesAndKeyType(Locale & locale,UErrorCode & errorCode)190 _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
191 {
192     // Clear Unicode attributes
193     locale.setKeywordValue(kAttributeKey, "", errorCode);
194 
195     // Clear all Unicode keyword values
196     LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
197     if (U_FAILURE(errorCode) || iter.isNull()) { return; }
198     const char* key;
199     while ((key = iter->next(nullptr, errorCode)) != nullptr) {
200         locale.setUnicodeKeywordValue(key, nullptr, errorCode);
201     }
202 }
203 
204 static void
_setUnicodeExtensions(Locale & locale,const CharString & value,UErrorCode & errorCode)205 _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
206 {
207     // Add the unicode extensions to extensions_
208     CharString locale_str("und-u-", errorCode);
209     locale_str.append(value, errorCode);
210     _copyExtensions(
211         Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
212         locale, false, errorCode);
213 }
214 
setExtension(char key,StringPiece value)215 LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
216 {
217     if (U_FAILURE(status_)) { return *this; }
218     if (!UPRV_ISALPHANUM(key)) {
219         status_ = U_ILLEGAL_ARGUMENT_ERROR;
220         return *this;
221     }
222     CharString value_str(value, status_);
223     if (U_FAILURE(status_)) { return *this; }
224     transform(value_str.data(), value_str.length());
225     if (!value_str.isEmpty() &&
226             !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
227         status_ = U_ILLEGAL_ARGUMENT_ERROR;
228         return *this;
229     }
230     if (extensions_ == nullptr) {
231         extensions_ = new Locale();
232         if (extensions_ == nullptr) {
233             status_ = U_MEMORY_ALLOCATION_ERROR;
234             return *this;
235         }
236     }
237     if (uprv_tolower(key) != 'u') {
238         // for t, x and others extension.
239         extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
240                                      status_);
241         return *this;
242     }
243     _clearUAttributesAndKeyType(*extensions_, status_);
244     if (U_FAILURE(status_)) { return *this; }
245     if (!value.empty()) {
246         _setUnicodeExtensions(*extensions_, value_str, status_);
247     }
248     return *this;
249 }
250 
setUnicodeLocaleKeyword(StringPiece key,StringPiece type)251 LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
252       StringPiece key, StringPiece type)
253 {
254     if (U_FAILURE(status_)) { return *this; }
255     if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
256             (!type.empty() &&
257                  !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
258       status_ = U_ILLEGAL_ARGUMENT_ERROR;
259       return *this;
260     }
261     if (extensions_ == nullptr) {
262         extensions_ = new Locale();
263     }
264     if (extensions_ == nullptr) {
265         status_ = U_MEMORY_ALLOCATION_ERROR;
266         return *this;
267     }
268     extensions_->setUnicodeKeywordValue(key, type, status_);
269     return *this;
270 }
271 
addUnicodeLocaleAttribute(StringPiece value)272 LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
273     StringPiece value)
274 {
275     CharString value_str(value, status_);
276     if (U_FAILURE(status_)) { return *this; }
277     transform(value_str.data(), value_str.length());
278     if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
279         status_ = U_ILLEGAL_ARGUMENT_ERROR;
280         return *this;
281     }
282     if (extensions_ == nullptr) {
283         extensions_ = new Locale();
284         if (extensions_ == nullptr) {
285             status_ = U_MEMORY_ALLOCATION_ERROR;
286             return *this;
287         }
288         extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
289         return *this;
290     }
291 
292     CharString attributes;
293     CharStringByteSink sink(&attributes);
294     UErrorCode localErrorCode = U_ZERO_ERROR;
295     extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
296     if (U_FAILURE(localErrorCode)) {
297         CharString new_attributes(value_str.data(), status_);
298         // No attributes, set the attribute.
299         extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
300         return *this;
301     }
302 
303     transform(attributes.data(),attributes.length());
304     const char* start = attributes.data();
305     const char* limit = attributes.data() + attributes.length();
306     CharString new_attributes;
307     bool inserted = false;
308     while (start < limit) {
309         if (!inserted) {
310             int cmp = uprv_strcmp(start, value_str.data());
311             if (cmp == 0) { return *this; }  // Found it in attributes: Just return
312             if (cmp > 0) {
313                 if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
314                 new_attributes.append(value_str.data(), status_);
315                 inserted = true;
316             }
317         }
318         if (!new_attributes.isEmpty()) {
319             new_attributes.append('_', status_);
320         }
321         new_attributes.append(start, status_);
322         start += uprv_strlen(start) + 1;
323     }
324     if (!inserted) {
325         if (!new_attributes.isEmpty()) {
326             new_attributes.append('_', status_);
327         }
328         new_attributes.append(value_str.data(), status_);
329     }
330     // Not yet in the attributes, set the attribute.
331     extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
332     return *this;
333 }
334 
removeUnicodeLocaleAttribute(StringPiece value)335 LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
336     StringPiece value)
337 {
338     CharString value_str(value, status_);
339     if (U_FAILURE(status_)) { return *this; }
340     transform(value_str.data(), value_str.length());
341     if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
342         status_ = U_ILLEGAL_ARGUMENT_ERROR;
343         return *this;
344     }
345     if (extensions_ == nullptr) { return *this; }
346     UErrorCode localErrorCode = U_ZERO_ERROR;
347     CharString attributes;
348     CharStringByteSink sink(&attributes);
349     extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
350     // get failure, just return
351     if (U_FAILURE(localErrorCode)) { return *this; }
352     // Do not have any attributes, just return.
353     if (attributes.isEmpty()) { return *this; }
354 
355     char* p = attributes.data();
356     // Replace null terminiator in place for _ and - so later
357     // we can use uprv_strcmp to compare.
358     for (int32_t i = 0; i < attributes.length(); i++, p++) {
359         *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
360     }
361 
362     const char* start = attributes.data();
363     const char* limit = attributes.data() + attributes.length();
364     CharString new_attributes;
365     bool found = false;
366     while (start < limit) {
367         if (uprv_strcmp(start, value_str.data()) == 0) {
368             found = true;
369         } else {
370             if (!new_attributes.isEmpty()) {
371                 new_attributes.append('_', status_);
372             }
373             new_attributes.append(start, status_);
374         }
375         start += uprv_strlen(start) + 1;
376     }
377     // Found the value in attributes, set the attribute.
378     if (found) {
379         extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
380     }
381     return *this;
382 }
383 
clear()384 LocaleBuilder& LocaleBuilder::clear()
385 {
386     status_ = U_ZERO_ERROR;
387     language_[0] = 0;
388     script_[0] = 0;
389     region_[0] = 0;
390     delete variant_;
391     variant_ = nullptr;
392     clearExtensions();
393     return *this;
394 }
395 
clearExtensions()396 LocaleBuilder& LocaleBuilder::clearExtensions()
397 {
398     delete extensions_;
399     extensions_ = nullptr;
400     return *this;
401 }
402 
makeBogusLocale()403 Locale makeBogusLocale() {
404   Locale bogus;
405   bogus.setToBogus();
406   return bogus;
407 }
408 
copyExtensionsFrom(const Locale & src,UErrorCode & errorCode)409 void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
410 {
411     if (U_FAILURE(errorCode)) { return; }
412     LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
413     if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
414         // Error, or no extensions to copy.
415         return;
416     }
417     if (extensions_ == nullptr) {
418         extensions_ = new Locale();
419         if (extensions_ == nullptr) {
420             status_ = U_MEMORY_ALLOCATION_ERROR;
421             return;
422         }
423     }
424     _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
425 }
426 
build(UErrorCode & errorCode)427 Locale LocaleBuilder::build(UErrorCode& errorCode)
428 {
429     if (U_FAILURE(errorCode)) {
430         return makeBogusLocale();
431     }
432     if (U_FAILURE(status_)) {
433         errorCode = status_;
434         return makeBogusLocale();
435     }
436     CharString locale_str(language_, errorCode);
437     if (uprv_strlen(script_) > 0) {
438         locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
439     }
440     if (uprv_strlen(region_) > 0) {
441         locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
442     }
443     if (variant_ != nullptr) {
444         locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
445     }
446     if (U_FAILURE(errorCode)) {
447         return makeBogusLocale();
448     }
449     Locale product(locale_str.data());
450     if (extensions_ != nullptr) {
451         _copyExtensions(*extensions_, nullptr, product, true, errorCode);
452     }
453     if (U_FAILURE(errorCode)) {
454         return makeBogusLocale();
455     }
456     return product;
457 }
458 
copyErrorTo(UErrorCode & outErrorCode) const459 UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
460     if (U_FAILURE(outErrorCode)) {
461         // Do not overwrite the older error code
462         return TRUE;
463     }
464     outErrorCode = status_;
465     return U_FAILURE(outErrorCode);
466 }
467 
468 U_NAMESPACE_END
469