1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // loclikelysubtags.cpp
5 // created: 2019may08 Markus W. Scherer
6 
7 #include <utility>
8 #include "unicode/utypes.h"
9 #include "unicode/bytestrie.h"
10 #include "unicode/localpointer.h"
11 #include "unicode/locid.h"
12 #include "unicode/uobject.h"
13 #include "unicode/ures.h"
14 #include "charstr.h"
15 #include "cstring.h"
16 #include "loclikelysubtags.h"
17 #include "lsr.h"
18 #include "uassert.h"
19 #include "ucln_cmn.h"
20 #include "uhash.h"
21 #include "uinvchar.h"
22 #include "umutex.h"
23 #include "uniquecharstr.h"
24 #include "uresdata.h"
25 #include "uresimp.h"
26 
27 U_NAMESPACE_BEGIN
28 
29 namespace {
30 
31 constexpr char PSEUDO_ACCENTS_PREFIX = '\'';  // -XA, -PSACCENT
32 constexpr char PSEUDO_BIDI_PREFIX = '+';  // -XB, -PSBIDI
33 constexpr char PSEUDO_CRACKED_PREFIX = ',';  // -XC, -PSCRACK
34 
35 }  // namespace
36 
LocaleDistanceData(LocaleDistanceData && data)37 LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
38         distanceTrieBytes(data.distanceTrieBytes),
39         regionToPartitions(data.regionToPartitions),
40         partitions(data.partitions),
41         paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
42         distances(data.distances) {
43     data.partitions = nullptr;
44     data.paradigms = nullptr;
45 }
46 
~LocaleDistanceData()47 LocaleDistanceData::~LocaleDistanceData() {
48     uprv_free(partitions);
49     delete[] paradigms;
50 }
51 
52 // TODO(ICU-20777): Rename to just LikelySubtagsData.
53 struct XLikelySubtagsData {
54     UResourceBundle *langInfoBundle = nullptr;
55     UniqueCharStrings strings;
56     CharStringMap languageAliases;
57     CharStringMap regionAliases;
58     const uint8_t *trieBytes = nullptr;
59     LSR *lsrs = nullptr;
60     int32_t lsrsLength = 0;
61 
62     LocaleDistanceData distanceData;
63 
XLikelySubtagsDataXLikelySubtagsData64     XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
65 
~XLikelySubtagsDataXLikelySubtagsData66     ~XLikelySubtagsData() {
67         ures_close(langInfoBundle);
68         delete[] lsrs;
69     }
70 
loadXLikelySubtagsData71     void load(UErrorCode &errorCode) {
72         langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
73         if (U_FAILURE(errorCode)) { return; }
74         StackUResourceBundle stackTempBundle;
75         ResourceDataValue value;
76         ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
77                                   value, errorCode);
78         ResourceTable likelyTable = value.getTable(errorCode);
79         if (U_FAILURE(errorCode)) { return; }
80 
81         // Read all strings in the resource bundle and convert them to invariant char *.
82         LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
83         int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
84         if (!readStrings(likelyTable, "languageAliases", value,
85                          languageIndexes, languagesLength, errorCode) ||
86                 !readStrings(likelyTable, "regionAliases", value,
87                              regionIndexes, regionsLength, errorCode) ||
88                 !readStrings(likelyTable, "lsrs", value,
89                              lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
90             return;
91         }
92         if ((languagesLength & 1) != 0 ||
93                 (regionsLength & 1) != 0 ||
94                 (lsrSubtagsLength % 3) != 0) {
95             errorCode = U_INVALID_FORMAT_ERROR;
96             return;
97         }
98         if (lsrSubtagsLength == 0) {
99             errorCode = U_MISSING_RESOURCE_ERROR;
100             return;
101         }
102 
103         if (!likelyTable.findValue("trie", value)) {
104             errorCode = U_MISSING_RESOURCE_ERROR;
105             return;
106         }
107         int32_t length;
108         trieBytes = value.getBinary(length, errorCode);
109         if (U_FAILURE(errorCode)) { return; }
110 
111         // Also read distance/matcher data if available,
112         // to open & keep only one resource bundle pointer
113         // and to use one single UniqueCharStrings.
114         UErrorCode matchErrorCode = U_ZERO_ERROR;
115         ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
116                                   value, matchErrorCode);
117         LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
118         int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
119         if (U_SUCCESS(matchErrorCode)) {
120             ResourceTable matchTable = value.getTable(errorCode);
121             if (U_FAILURE(errorCode)) { return; }
122 
123             if (matchTable.findValue("trie", value)) {
124                 distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
125                 if (U_FAILURE(errorCode)) { return; }
126             }
127 
128             if (matchTable.findValue("regionToPartitions", value)) {
129                 distanceData.regionToPartitions = value.getBinary(length, errorCode);
130                 if (U_FAILURE(errorCode)) { return; }
131                 if (length < LSR::REGION_INDEX_LIMIT) {
132                     errorCode = U_INVALID_FORMAT_ERROR;
133                     return;
134                 }
135             }
136 
137             if (!readStrings(matchTable, "partitions", value,
138                              partitionIndexes, partitionsLength, errorCode) ||
139                     !readStrings(matchTable, "paradigms", value,
140                                  paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
141                 return;
142             }
143             if ((paradigmSubtagsLength % 3) != 0) {
144                 errorCode = U_INVALID_FORMAT_ERROR;
145                 return;
146             }
147 
148             if (matchTable.findValue("distances", value)) {
149                 distanceData.distances = value.getIntVector(length, errorCode);
150                 if (U_FAILURE(errorCode)) { return; }
151                 if (length < 4) {  // LocaleDistance IX_LIMIT
152                     errorCode = U_INVALID_FORMAT_ERROR;
153                     return;
154                 }
155             }
156         } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
157             // ok for likely subtags
158         } else {  // error other than missing resource
159             errorCode = matchErrorCode;
160             return;
161         }
162 
163         // Fetch & store invariant-character versions of strings
164         // only after we have collected and de-duplicated all of them.
165         strings.freeze();
166 
167         languageAliases = CharStringMap(languagesLength / 2, errorCode);
168         for (int32_t i = 0; i < languagesLength; i += 2) {
169             languageAliases.put(strings.get(languageIndexes[i]),
170                                 strings.get(languageIndexes[i + 1]), errorCode);
171         }
172 
173         regionAliases = CharStringMap(regionsLength / 2, errorCode);
174         for (int32_t i = 0; i < regionsLength; i += 2) {
175             regionAliases.put(strings.get(regionIndexes[i]),
176                               strings.get(regionIndexes[i + 1]), errorCode);
177         }
178         if (U_FAILURE(errorCode)) { return; }
179 
180         lsrsLength = lsrSubtagsLength / 3;
181         lsrs = new LSR[lsrsLength];
182         if (lsrs == nullptr) {
183             errorCode = U_MEMORY_ALLOCATION_ERROR;
184             return;
185         }
186         for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
187             lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
188                           strings.get(lsrSubtagIndexes[i + 1]),
189                           strings.get(lsrSubtagIndexes[i + 2]),
190                           LSR::IMPLICIT_LSR);
191         }
192 
193         if (partitionsLength > 0) {
194             distanceData.partitions = static_cast<const char **>(
195                 uprv_malloc(partitionsLength * sizeof(const char *)));
196             if (distanceData.partitions == nullptr) {
197                 errorCode = U_MEMORY_ALLOCATION_ERROR;
198                 return;
199             }
200             for (int32_t i = 0; i < partitionsLength; ++i) {
201                 distanceData.partitions[i] = strings.get(partitionIndexes[i]);
202             }
203         }
204 
205         if (paradigmSubtagsLength > 0) {
206             distanceData.paradigmsLength = paradigmSubtagsLength / 3;
207             LSR *paradigms = new LSR[distanceData.paradigmsLength];
208             if (paradigms == nullptr) {
209                 errorCode = U_MEMORY_ALLOCATION_ERROR;
210                 return;
211             }
212             for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
213                 paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
214                                    strings.get(paradigmSubtagIndexes[i + 1]),
215                                    strings.get(paradigmSubtagIndexes[i + 2]),
216                                    LSR::DONT_CARE_FLAGS);
217             }
218             distanceData.paradigms = paradigms;
219         }
220     }
221 
222 private:
readStringsXLikelySubtagsData223     bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
224                      LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
225         if (table.findValue(key, value)) {
226             ResourceArray stringArray = value.getArray(errorCode);
227             if (U_FAILURE(errorCode)) { return false; }
228             length = stringArray.getSize();
229             if (length == 0) { return true; }
230             int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
231             if (rawIndexes == nullptr) {
232                 errorCode = U_MEMORY_ALLOCATION_ERROR;
233                 return false;
234             }
235             for (int i = 0; i < length; ++i) {
236                 stringArray.getValue(i, value);  // returns TRUE because i < length
237                 rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
238                 if (U_FAILURE(errorCode)) { return false; }
239             }
240         }
241         return true;
242     }
243 };
244 
245 namespace {
246 
247 XLikelySubtags *gLikelySubtags = nullptr;
248 UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
249 
cleanup()250 UBool U_CALLCONV cleanup() {
251     delete gLikelySubtags;
252     gLikelySubtags = nullptr;
253     gInitOnce.reset();
254     return TRUE;
255 }
256 
257 }  // namespace
258 
initLikelySubtags(UErrorCode & errorCode)259 void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
260     // This function is invoked only via umtx_initOnce().
261     U_ASSERT(gLikelySubtags == nullptr);
262     XLikelySubtagsData data(errorCode);
263     data.load(errorCode);
264     if (U_FAILURE(errorCode)) { return; }
265     gLikelySubtags = new XLikelySubtags(data);
266     if (gLikelySubtags == nullptr) {
267         errorCode = U_MEMORY_ALLOCATION_ERROR;
268         return;
269     }
270     ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
271 }
272 
getSingleton(UErrorCode & errorCode)273 const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
274     if (U_FAILURE(errorCode)) { return nullptr; }
275     umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
276     return gLikelySubtags;
277 }
278 
XLikelySubtags(XLikelySubtagsData & data)279 XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
280         langInfoBundle(data.langInfoBundle),
281         strings(data.strings.orphanCharStrings()),
282         languageAliases(std::move(data.languageAliases)),
283         regionAliases(std::move(data.regionAliases)),
284         trie(data.trieBytes),
285         lsrs(data.lsrs),
286 #if U_DEBUG
287         lsrsLength(data.lsrsLength),
288 #endif
289         distanceData(std::move(data.distanceData)) {
290     data.langInfoBundle = nullptr;
291     data.lsrs = nullptr;
292 
293     // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
294     UStringTrieResult result = trie.next(u'*');
295     U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
296     trieUndState = trie.getState64();
297     result = trie.next(u'*');
298     U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
299     trieUndZzzzState = trie.getState64();
300     result = trie.next(u'*');
301     U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
302     defaultLsrIndex = trie.getValue();
303     trie.reset();
304 
305     for (char16_t c = u'a'; c <= u'z'; ++c) {
306         result = trie.next(c);
307         if (result == USTRINGTRIE_NO_VALUE) {
308             trieFirstLetterStates[c - u'a'] = trie.getState64();
309         }
310         trie.reset();
311     }
312 }
313 
~XLikelySubtags()314 XLikelySubtags::~XLikelySubtags() {
315     ures_close(langInfoBundle);
316     delete strings;
317     delete[] lsrs;
318 }
319 
makeMaximizedLsrFrom(const Locale & locale,UErrorCode & errorCode) const320 LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
321     const char *name = locale.getName();
322     if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
323         // Private use language tag x-subtag-subtag... which CLDR changes to
324         // und-x-subtag-subtag...
325         return LSR(name, "", "", LSR::EXPLICIT_LSR);
326     }
327     return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
328                             locale.getVariant(), errorCode);
329 }
330 
331 namespace {
332 
getCanonical(const CharStringMap & aliases,const char * alias)333 const char *getCanonical(const CharStringMap &aliases, const char *alias) {
334     const char *canonical = aliases.get(alias);
335     return canonical == nullptr ? alias : canonical;
336 }
337 
338 }  // namespace
339 
makeMaximizedLsr(const char * language,const char * script,const char * region,const char * variant,UErrorCode & errorCode) const340 LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
341                                      const char *variant, UErrorCode &errorCode) const {
342     // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
343     // They should match only themselves,
344     // not other locales with what looks like the same language and script subtags.
345     char c1;
346     if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
347         switch (c1) {
348         case 'A':
349             return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
350                        LSR::EXPLICIT_LSR, errorCode);
351         case 'B':
352             return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
353                        LSR::EXPLICIT_LSR, errorCode);
354         case 'C':
355             return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
356                        LSR::EXPLICIT_LSR, errorCode);
357         default:  // normal locale
358             break;
359         }
360     }
361 
362     if (variant[0] == 'P' && variant[1] == 'S') {
363         int32_t lsrFlags = *region == 0 ?
364             LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
365         if (uprv_strcmp(variant, "PSACCENT") == 0) {
366             return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
367                        *region == 0 ? "XA" : region, lsrFlags, errorCode);
368         } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
369             return LSR(PSEUDO_BIDI_PREFIX, language, script,
370                        *region == 0 ? "XB" : region, lsrFlags, errorCode);
371         } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
372             return LSR(PSEUDO_CRACKED_PREFIX, language, script,
373                        *region == 0 ? "XC" : region, lsrFlags, errorCode);
374         }
375         // else normal locale
376     }
377 
378     language = getCanonical(languageAliases, language);
379     // (We have no script mappings.)
380     region = getCanonical(regionAliases, region);
381     return maximize(language, script, region);
382 }
383 
maximize(const char * language,const char * script,const char * region) const384 LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
385     if (uprv_strcmp(language, "und") == 0) {
386         language = "";
387     }
388     if (uprv_strcmp(script, "Zzzz") == 0) {
389         script = "";
390     }
391     if (uprv_strcmp(region, "ZZ") == 0) {
392         region = "";
393     }
394     if (*script != 0 && *region != 0 && *language != 0) {
395         return LSR(language, script, region, LSR::EXPLICIT_LSR);  // already maximized
396     }
397 
398     uint32_t retainOldMask = 0;
399     BytesTrie iter(trie);
400     uint64_t state;
401     int32_t value;
402     // Small optimization: Array lookup for first language letter.
403     int32_t c0;
404     if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
405             language[1] != 0 &&  // language.length() >= 2
406             (state = trieFirstLetterStates[c0]) != 0) {
407         value = trieNext(iter.resetToState64(state), language, 1);
408     } else {
409         value = trieNext(iter, language, 0);
410     }
411     if (value >= 0) {
412         if (*language != 0) {
413             retainOldMask |= 4;
414         }
415         state = iter.getState64();
416     } else {
417         retainOldMask |= 4;
418         iter.resetToState64(trieUndState);  // "und" ("*")
419         state = 0;
420     }
421 
422     if (value > 0) {
423         // Intermediate or final value from just language.
424         if (value == SKIP_SCRIPT) {
425             value = 0;
426         }
427         if (*script != 0) {
428             retainOldMask |= 2;
429         }
430     } else {
431         value = trieNext(iter, script, 0);
432         if (value >= 0) {
433             if (*script != 0) {
434                 retainOldMask |= 2;
435             }
436             state = iter.getState64();
437         } else {
438             retainOldMask |= 2;
439             if (state == 0) {
440                 iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
441             } else {
442                 iter.resetToState64(state);
443                 value = trieNext(iter, "", 0);
444                 U_ASSERT(value >= 0);
445                 state = iter.getState64();
446             }
447         }
448     }
449 
450     if (value > 0) {
451         // Final value from just language or language+script.
452         if (*region != 0) {
453             retainOldMask |= 1;
454         }
455     } else {
456         value = trieNext(iter, region, 0);
457         if (value >= 0) {
458             if (*region != 0) {
459                 retainOldMask |= 1;
460             }
461         } else {
462             retainOldMask |= 1;
463             if (state == 0) {
464                 value = defaultLsrIndex;
465             } else {
466                 iter.resetToState64(state);
467                 value = trieNext(iter, "", 0);
468                 U_ASSERT(value > 0);
469             }
470         }
471     }
472     U_ASSERT(value < lsrsLength);
473     const LSR &result = lsrs[value];
474 
475     if (*language == 0) {
476         language = "und";
477     }
478 
479     if (retainOldMask == 0) {
480         // Quickly return a copy of the lookup-result LSR
481         // without new allocation of the subtags.
482         return LSR(result.language, result.script, result.region, result.flags);
483     }
484     if ((retainOldMask & 4) == 0) {
485         language = result.language;
486     }
487     if ((retainOldMask & 2) == 0) {
488         script = result.script;
489     }
490     if ((retainOldMask & 1) == 0) {
491         region = result.region;
492     }
493     // retainOldMask flags = LSR explicit-subtag flags
494     return LSR(language, script, region, retainOldMask);
495 }
496 
compareLikely(const LSR & lsr,const LSR & other,int32_t likelyInfo) const497 int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
498     // If likelyInfo >= 0:
499     // likelyInfo bit 1 is set if the previous comparison with lsr
500     // was for equal language and script.
501     // Otherwise the scripts differed.
502     if (uprv_strcmp(lsr.language, other.language) != 0) {
503         return 0xfffffffc;  // negative, lsr not better than other
504     }
505     if (uprv_strcmp(lsr.script, other.script) != 0) {
506         int32_t index;
507         if (likelyInfo >= 0 && (likelyInfo & 2) == 0) {
508             index = likelyInfo >> 2;
509         } else {
510             index = getLikelyIndex(lsr.language, "");
511             likelyInfo = index << 2;
512         }
513         const LSR &likely = lsrs[index];
514         if (uprv_strcmp(lsr.script, likely.script) == 0) {
515             return likelyInfo | 1;
516         } else {
517             return likelyInfo & ~1;
518         }
519     }
520     if (uprv_strcmp(lsr.region, other.region) != 0) {
521         int32_t index;
522         if (likelyInfo >= 0 && (likelyInfo & 2) != 0) {
523             index = likelyInfo >> 2;
524         } else {
525             index = getLikelyIndex(lsr.language, lsr.region);
526             likelyInfo = (index << 2) | 2;
527         }
528         const LSR &likely = lsrs[index];
529         if (uprv_strcmp(lsr.region, likely.region) == 0) {
530             return likelyInfo | 1;
531         } else {
532             return likelyInfo & ~1;
533         }
534     }
535     return likelyInfo & ~1;  // lsr not better than other
536 }
537 
538 // Subset of maximize().
getLikelyIndex(const char * language,const char * script) const539 int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const {
540     if (uprv_strcmp(language, "und") == 0) {
541         language = "";
542     }
543     if (uprv_strcmp(script, "Zzzz") == 0) {
544         script = "";
545     }
546 
547     BytesTrie iter(trie);
548     uint64_t state;
549     int32_t value;
550     // Small optimization: Array lookup for first language letter.
551     int32_t c0;
552     if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
553             language[1] != 0 &&  // language.length() >= 2
554             (state = trieFirstLetterStates[c0]) != 0) {
555         value = trieNext(iter.resetToState64(state), language, 1);
556     } else {
557         value = trieNext(iter, language, 0);
558     }
559     if (value >= 0) {
560         state = iter.getState64();
561     } else {
562         iter.resetToState64(trieUndState);  // "und" ("*")
563         state = 0;
564     }
565 
566     if (value > 0) {
567         // Intermediate or final value from just language.
568         if (value == SKIP_SCRIPT) {
569             value = 0;
570         }
571     } else {
572         value = trieNext(iter, script, 0);
573         if (value >= 0) {
574             state = iter.getState64();
575         } else {
576             if (state == 0) {
577                 iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
578             } else {
579                 iter.resetToState64(state);
580                 value = trieNext(iter, "", 0);
581                 U_ASSERT(value >= 0);
582                 state = iter.getState64();
583             }
584         }
585     }
586 
587     if (value > 0) {
588         // Final value from just language or language+script.
589     } else {
590         value = trieNext(iter, "", 0);
591         U_ASSERT(value > 0);
592     }
593     U_ASSERT(value < lsrsLength);
594     return value;
595 }
596 
trieNext(BytesTrie & iter,const char * s,int32_t i)597 int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
598     UStringTrieResult result;
599     uint8_t c;
600     if ((c = s[i]) == 0) {
601         result = iter.next(u'*');
602     } else {
603         for (;;) {
604             c = uprv_invCharToAscii(c);
605             // EBCDIC: If s[i] is not an invariant character,
606             // then c is now 0 and will simply not match anything, which is harmless.
607             uint8_t next = s[++i];
608             if (next != 0) {
609                 if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
610                     return -1;
611                 }
612             } else {
613                 // last character of this subtag
614                 result = iter.next(c | 0x80);
615                 break;
616             }
617             c = next;
618         }
619     }
620     switch (result) {
621     case USTRINGTRIE_NO_MATCH: return -1;
622     case USTRINGTRIE_NO_VALUE: return 0;
623     case USTRINGTRIE_INTERMEDIATE_VALUE:
624         U_ASSERT(iter.getValue() == SKIP_SCRIPT);
625         return SKIP_SCRIPT;
626     case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
627     default: return -1;
628     }
629 }
630 
631 // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
632 // in loclikely.cpp to this new code, including activating this
633 // minimizeSubtags() function. The LocaleMatcher does not minimize.
634 #if 0
635 LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
636                                     const char *regionIn, ULocale.Minimize fieldToFavor,
637                                     UErrorCode &errorCode) const {
638     LSR result = maximize(languageIn, scriptIn, regionIn);
639 
640     // We could try just a series of checks, like:
641     // LSR result2 = addLikelySubtags(languageIn, "", "");
642     // if result.equals(result2) return result2;
643     // However, we can optimize 2 of the cases:
644     //   (languageIn, "", "")
645     //   (languageIn, "", regionIn)
646 
647     // value00 = lookup(result.language, "", "")
648     BytesTrie iter = new BytesTrie(trie);
649     int value = trieNext(iter, result.language, 0);
650     U_ASSERT(value >= 0);
651     if (value == 0) {
652         value = trieNext(iter, "", 0);
653         U_ASSERT(value >= 0);
654         if (value == 0) {
655             value = trieNext(iter, "", 0);
656         }
657     }
658     U_ASSERT(value > 0);
659     LSR value00 = lsrs[value];
660     boolean favorRegionOk = false;
661     if (result.script.equals(value00.script)) { //script is default
662         if (result.region.equals(value00.region)) {
663             return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
664         } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
665             return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
666         } else {
667             favorRegionOk = true;
668         }
669     }
670 
671     // The last case is not as easy to optimize.
672     // Maybe do later, but for now use the straightforward code.
673     LSR result2 = maximize(languageIn, scriptIn, "");
674     if (result2.equals(result)) {
675         return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
676     } else if (favorRegionOk) {
677         return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
678     }
679     return result;
680 }
681 #endif
682 
683 U_NAMESPACE_END
684