1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 4 // loclikelysubtags.h 5 // created: 2019may08 Markus W. Scherer 6 7 #ifndef __LOCLIKELYSUBTAGS_H__ 8 #define __LOCLIKELYSUBTAGS_H__ 9 10 #include <utility> 11 #include "unicode/utypes.h" 12 #include "unicode/bytestrie.h" 13 #include "unicode/locid.h" 14 #include "unicode/uobject.h" 15 #include "unicode/ures.h" 16 #include "lsr.h" 17 #include "uhash.h" 18 19 U_NAMESPACE_BEGIN 20 21 struct XLikelySubtagsData; 22 23 /** 24 * Map of const char * keys & values. 25 * Stores pointers as is: Does not own/copy/adopt/release strings. 26 */ 27 class CharStringMap final : public UMemory { 28 public: 29 /** Constructs an unusable non-map. */ CharStringMap()30 CharStringMap() : map(nullptr) {} CharStringMap(int32_t size,UErrorCode & errorCode)31 CharStringMap(int32_t size, UErrorCode &errorCode) { 32 map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars, 33 size, &errorCode); 34 } CharStringMap(CharStringMap && other)35 CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) { 36 other.map = nullptr; 37 } 38 CharStringMap(const CharStringMap &other) = delete; ~CharStringMap()39 ~CharStringMap() { 40 uhash_close(map); 41 } 42 43 CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT { 44 map = other.map; 45 other.map = nullptr; 46 return *this; 47 } 48 CharStringMap &operator=(const CharStringMap &other) = delete; 49 get(const char * key)50 const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); } put(const char * key,const char * value,UErrorCode & errorCode)51 void put(const char *key, const char *value, UErrorCode &errorCode) { 52 uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode); 53 } 54 55 private: 56 UHashtable *map; 57 }; 58 59 struct LocaleDistanceData { 60 LocaleDistanceData() = default; 61 LocaleDistanceData(LocaleDistanceData &&data); 62 ~LocaleDistanceData(); 63 64 const uint8_t *distanceTrieBytes = nullptr; 65 const uint8_t *regionToPartitions = nullptr; 66 const char **partitions = nullptr; 67 const LSR *paradigms = nullptr; 68 int32_t paradigmsLength = 0; 69 const int32_t *distances = nullptr; 70 71 private: 72 LocaleDistanceData &operator=(const LocaleDistanceData &) = delete; 73 }; 74 75 // TODO(ICU-20777): Rename to just LikelySubtags. 76 class XLikelySubtags final : public UMemory { 77 public: 78 ~XLikelySubtags(); 79 80 static constexpr int32_t SKIP_SCRIPT = 1; 81 82 // VisibleForTesting 83 static const XLikelySubtags *getSingleton(UErrorCode &errorCode); 84 85 // VisibleForTesting 86 LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const; 87 88 // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code 89 // in loclikely.cpp to this new code, including activating this 90 // minimizeSubtags() function. The LocaleMatcher does not minimize. 91 #if 0 92 LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn, 93 ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const; 94 #endif 95 96 // visible for LocaleDistance getDistanceData()97 const LocaleDistanceData &getDistanceData() const { return distanceData; } 98 99 private: 100 XLikelySubtags(XLikelySubtagsData &data); 101 XLikelySubtags(const XLikelySubtags &other) = delete; 102 XLikelySubtags &operator=(const XLikelySubtags &other) = delete; 103 104 static void initLikelySubtags(UErrorCode &errorCode); 105 106 LSR makeMaximizedLsr(const char *language, const char *script, const char *region, 107 const char *variant, UErrorCode &errorCode) const; 108 109 /** 110 * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN". 111 */ 112 LSR maximize(const char *language, const char *script, const char *region) const; 113 114 static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i); 115 116 UResourceBundle *langInfoBundle; 117 // We could store the strings by value, except that if there were few enough strings, 118 // moving the contents could copy it to a different array, 119 // invalidating the pointers stored in the maps. 120 CharString *strings; 121 CharStringMap languageAliases; 122 CharStringMap regionAliases; 123 124 // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs. 125 // There is also a trie value for each intermediate lang and lang+script. 126 // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"". 127 BytesTrie trie; 128 uint64_t trieUndState; 129 uint64_t trieUndZzzzState; 130 int32_t defaultLsrIndex; 131 uint64_t trieFirstLetterStates[26]; 132 const LSR *lsrs; 133 #if U_DEBUG 134 int32_t lsrsLength; 135 #endif 136 137 // distance/matcher data: see comment in XLikelySubtagsData::load() 138 LocaleDistanceData distanceData; 139 }; 140 141 U_NAMESPACE_END 142 143 #endif // __LOCLIKELYSUBTAGS_H__ 144