1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // loclikelysubtags.h
5 // created: 2019may08 Markus W. Scherer
6 
7 #ifndef __LOCLIKELYSUBTAGS_H__
8 #define __LOCLIKELYSUBTAGS_H__
9 
10 #include <utility>
11 #include "unicode/utypes.h"
12 #include "unicode/bytestrie.h"
13 #include "unicode/locid.h"
14 #include "unicode/uobject.h"
15 #include "unicode/ures.h"
16 #include "charstrmap.h"
17 #include "lsr.h"
18 
19 U_NAMESPACE_BEGIN
20 
21 struct XLikelySubtagsData;
22 
23 struct LocaleDistanceData {
24     LocaleDistanceData() = default;
25     LocaleDistanceData(LocaleDistanceData &&data);
26     ~LocaleDistanceData();
27 
28     const uint8_t *distanceTrieBytes = nullptr;
29     const uint8_t *regionToPartitions = nullptr;
30     const char **partitions = nullptr;
31     const LSR *paradigms = nullptr;
32     int32_t paradigmsLength = 0;
33     const int32_t *distances = nullptr;
34 
35 private:
36     LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
37 };
38 
39 // TODO(ICU-20777): Rename to just LikelySubtags.
40 class XLikelySubtags final : public UMemory {
41 public:
42     ~XLikelySubtags();
43 
44     static constexpr int32_t SKIP_SCRIPT = 1;
45 
46     // VisibleForTesting
47     static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
48 
49     // VisibleForTesting
50     LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
51 
52     /**
53      * Tests whether lsr is "more likely" than other.
54      * For example, fr-Latn-FR is more likely than fr-Latn-CH because
55      * FR is the default region for fr-Latn.
56      *
57      * The likelyInfo caches lookup information between calls.
58      * The return value is an updated likelyInfo value,
59      * with bit 0 set if lsr is "more likely".
60      * The initial value of likelyInfo must be negative.
61      */
62     int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
63 
64     // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
65     // in loclikely.cpp to this new code, including activating this
66     // minimizeSubtags() function. The LocaleMatcher does not minimize.
67 #if 0
68     LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
69                         ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
70 #endif
71 
72     // visible for LocaleDistance
getDistanceData()73     const LocaleDistanceData &getDistanceData() const { return distanceData; }
74 
75 private:
76     XLikelySubtags(XLikelySubtagsData &data);
77     XLikelySubtags(const XLikelySubtags &other) = delete;
78     XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
79 
80     static void initLikelySubtags(UErrorCode &errorCode);
81 
82     LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
83                          const char *variant, UErrorCode &errorCode) const;
84 
85     /**
86      * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
87      */
88     LSR maximize(const char *language, const char *script, const char *region) const;
89 
90     int32_t getLikelyIndex(const char *language, const char *script) const;
91 
92     static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
93 
94     UResourceBundle *langInfoBundle;
95     // We could store the strings by value, except that if there were few enough strings,
96     // moving the contents could copy it to a different array,
97     // invalidating the pointers stored in the maps.
98     CharString *strings;
99     CharStringMap languageAliases;
100     CharStringMap regionAliases;
101 
102     // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
103     // There is also a trie value for each intermediate lang and lang+script.
104     // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
105     BytesTrie trie;
106     uint64_t trieUndState;
107     uint64_t trieUndZzzzState;
108     int32_t defaultLsrIndex;
109     uint64_t trieFirstLetterStates[26];
110     const LSR *lsrs;
111 #if U_DEBUG
112     int32_t lsrsLength;
113 #endif
114 
115     // distance/matcher data: see comment in XLikelySubtagsData::load()
116     LocaleDistanceData distanceData;
117 };
118 
119 U_NAMESPACE_END
120 
121 #endif  // __LOCLIKELYSUBTAGS_H__
122