1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 
4 // loclikelysubtags.h
5 // created: 2019may08 Markus W. Scherer
6 
7 #ifndef __LOCLIKELYSUBTAGS_H__
8 #define __LOCLIKELYSUBTAGS_H__
9 
10 #include <utility>
11 #include "unicode/utypes.h"
12 #include "unicode/bytestrie.h"
13 #include "unicode/locid.h"
14 #include "unicode/uobject.h"
15 #include "unicode/ures.h"
16 #include "lsr.h"
17 #include "uhash.h"
18 
19 U_NAMESPACE_BEGIN
20 
21 struct XLikelySubtagsData;
22 
23 /**
24  * Map of const char * keys & values.
25  * Stores pointers as is: Does not own/copy/adopt/release strings.
26  */
27 class CharStringMap final : public UMemory {
28 public:
29     /** Constructs an unusable non-map. */
CharStringMap()30     CharStringMap() : map(nullptr) {}
CharStringMap(int32_t size,UErrorCode & errorCode)31     CharStringMap(int32_t size, UErrorCode &errorCode) {
32         map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
33                              size, &errorCode);
34     }
CharStringMap(CharStringMap && other)35     CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
36         other.map = nullptr;
37     }
38     CharStringMap(const CharStringMap &other) = delete;
~CharStringMap()39     ~CharStringMap() {
40         uhash_close(map);
41     }
42 
43     CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
44         map = other.map;
45         other.map = nullptr;
46         return *this;
47     }
48     CharStringMap &operator=(const CharStringMap &other) = delete;
49 
get(const char * key)50     const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
put(const char * key,const char * value,UErrorCode & errorCode)51     void put(const char *key, const char *value, UErrorCode &errorCode) {
52         uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
53     }
54 
55 private:
56     UHashtable *map;
57 };
58 
59 struct LocaleDistanceData {
60     LocaleDistanceData() = default;
61     LocaleDistanceData(LocaleDistanceData &&data);
62     ~LocaleDistanceData();
63 
64     const uint8_t *distanceTrieBytes = nullptr;
65     const uint8_t *regionToPartitions = nullptr;
66     const char **partitions = nullptr;
67     const LSR *paradigms = nullptr;
68     int32_t paradigmsLength = 0;
69     const int32_t *distances = nullptr;
70 
71 private:
72     LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
73 };
74 
75 // TODO(ICU-20777): Rename to just LikelySubtags.
76 class XLikelySubtags final : public UMemory {
77 public:
78     ~XLikelySubtags();
79 
80     static constexpr int32_t SKIP_SCRIPT = 1;
81 
82     // VisibleForTesting
83     static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
84 
85     // VisibleForTesting
86     LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
87 
88     // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
89     // in loclikely.cpp to this new code, including activating this
90     // minimizeSubtags() function. The LocaleMatcher does not minimize.
91 #if 0
92     LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
93                         ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
94 #endif
95 
96     // visible for LocaleDistance
getDistanceData()97     const LocaleDistanceData &getDistanceData() const { return distanceData; }
98 
99 private:
100     XLikelySubtags(XLikelySubtagsData &data);
101     XLikelySubtags(const XLikelySubtags &other) = delete;
102     XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
103 
104     static void initLikelySubtags(UErrorCode &errorCode);
105 
106     LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
107                          const char *variant, UErrorCode &errorCode) const;
108 
109     /**
110      * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
111      */
112     LSR maximize(const char *language, const char *script, const char *region) const;
113 
114     static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
115 
116     UResourceBundle *langInfoBundle;
117     // We could store the strings by value, except that if there were few enough strings,
118     // moving the contents could copy it to a different array,
119     // invalidating the pointers stored in the maps.
120     CharString *strings;
121     CharStringMap languageAliases;
122     CharStringMap regionAliases;
123 
124     // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
125     // There is also a trie value for each intermediate lang and lang+script.
126     // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
127     BytesTrie trie;
128     uint64_t trieUndState;
129     uint64_t trieUndZzzzState;
130     int32_t defaultLsrIndex;
131     uint64_t trieFirstLetterStates[26];
132     const LSR *lsrs;
133 #if U_DEBUG
134     int32_t lsrsLength;
135 #endif
136 
137     // distance/matcher data: see comment in XLikelySubtagsData::load()
138     LocaleDistanceData distanceData;
139 };
140 
141 U_NAMESPACE_END
142 
143 #endif  // __LOCLIKELYSUBTAGS_H__
144