1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // loclikelysubtags.h 5 // created: 2019may08 Markus W. Scherer 6 7 #ifndef __LOCLIKELYSUBTAGS_H__ 8 #define __LOCLIKELYSUBTAGS_H__ 9 10 #include <utility> 11 #include "unicode/utypes.h" 12 #include "unicode/bytestrie.h" 13 #include "unicode/locid.h" 14 #include "unicode/uobject.h" 15 #include "unicode/ures.h" 16 #include "charstrmap.h" 17 #include "lsr.h" 18 19 U_NAMESPACE_BEGIN 20 21 struct XLikelySubtagsData; 22 23 struct LocaleDistanceData { 24 LocaleDistanceData() = default; 25 LocaleDistanceData(LocaleDistanceData &&data); 26 ~LocaleDistanceData(); 27 28 const uint8_t *distanceTrieBytes = nullptr; 29 const uint8_t *regionToPartitions = nullptr; 30 const char **partitions = nullptr; 31 const LSR *paradigms = nullptr; 32 int32_t paradigmsLength = 0; 33 const int32_t *distances = nullptr; 34 35 private: 36 LocaleDistanceData &operator=(const LocaleDistanceData &) = delete; 37 }; 38 39 // TODO(ICU-20777): Rename to just LikelySubtags. 40 class XLikelySubtags final : public UMemory { 41 public: 42 ~XLikelySubtags(); 43 44 static constexpr int32_t SKIP_SCRIPT = 1; 45 46 // VisibleForTesting 47 static const XLikelySubtags *getSingleton(UErrorCode &errorCode); 48 49 // VisibleForTesting 50 LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const; 51 52 /** 53 * Tests whether lsr is "more likely" than other. 54 * For example, fr-Latn-FR is more likely than fr-Latn-CH because 55 * FR is the default region for fr-Latn. 56 * 57 * The likelyInfo caches lookup information between calls. 58 * The return value is an updated likelyInfo value, 59 * with bit 0 set if lsr is "more likely". 60 * The initial value of likelyInfo must be negative. 61 */ 62 int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const; 63 64 // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code 65 // in loclikely.cpp to this new code, including activating this 66 // minimizeSubtags() function. The LocaleMatcher does not minimize. 67 #if 0 68 LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn, 69 ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const; 70 #endif 71 72 // visible for LocaleDistance getDistanceData()73 const LocaleDistanceData &getDistanceData() const { return distanceData; } 74 75 private: 76 XLikelySubtags(XLikelySubtagsData &data); 77 XLikelySubtags(const XLikelySubtags &other) = delete; 78 XLikelySubtags &operator=(const XLikelySubtags &other) = delete; 79 80 static void initLikelySubtags(UErrorCode &errorCode); 81 82 LSR makeMaximizedLsr(const char *language, const char *script, const char *region, 83 const char *variant, UErrorCode &errorCode) const; 84 85 /** 86 * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN". 87 */ 88 LSR maximize(const char *language, const char *script, const char *region) const; 89 90 int32_t getLikelyIndex(const char *language, const char *script) const; 91 92 static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i); 93 94 UResourceBundle *langInfoBundle; 95 // We could store the strings by value, except that if there were few enough strings, 96 // moving the contents could copy it to a different array, 97 // invalidating the pointers stored in the maps. 98 CharString *strings; 99 CharStringMap languageAliases; 100 CharStringMap regionAliases; 101 102 // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs. 103 // There is also a trie value for each intermediate lang and lang+script. 104 // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"". 105 BytesTrie trie; 106 uint64_t trieUndState; 107 uint64_t trieUndZzzzState; 108 int32_t defaultLsrIndex; 109 uint64_t trieFirstLetterStates[26]; 110 const LSR *lsrs; 111 #if U_DEBUG 112 int32_t lsrsLength; 113 #endif 114 115 // distance/matcher data: see comment in XLikelySubtagsData::load() 116 LocaleDistanceData distanceData; 117 }; 118 119 U_NAMESPACE_END 120 121 #endif // __LOCLIKELYSUBTAGS_H__ 122