1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_PINYIN_PINYINENCODER_H_
7 #define _FCITX_LIBIME_PINYIN_PINYINENCODER_H_
8 
9 #include "libimepinyin_export.h"
10 #include <cassert>
11 #include <fcitx-utils/flags.h>
12 #include <fcitx-utils/log.h>
13 #include <functional>
14 #include <libime/core/segmentgraph.h>
15 #include <string>
16 #include <string_view>
17 #include <unordered_map>
18 #include <vector>
19 
20 namespace libime {
21 
22 class ShuangpinProfile;
23 
24 enum class PinyinFuzzyFlag {
25     None = 0,
26     NG_GN = 1 << 0,
27     V_U = 1 << 1,
28     AN_ANG = 1 << 2,   // 0
29     EN_ENG = 1 << 3,   // 1
30     IAN_IANG = 1 << 4, // 2
31     IN_ING = 1 << 5,   // 3
32     U_OU = 1 << 6,     // 4
33     UAN_UANG = 1 << 7, // 5
34     C_CH = 1 << 8,     // 0
35     F_H = 1 << 9,      // 1
36     L_N = 1 << 10,     // 2
37     S_SH = 1 << 11,    // 3
38     Z_ZH = 1 << 12,    // 4
39     VE_UE = 1 << 13,
40     Inner = 1 << 14,
41     InnerShort = 1 << 15,
42     PartialFinal = 1 << 16,
43 };
44 
45 using PinyinFuzzyFlags = fcitx::Flags<PinyinFuzzyFlag>;
46 
47 LIBIMEPINYIN_EXPORT
48 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
49                                      PinyinFuzzyFlags final);
50 
51 enum class PinyinInitial : char {
52     Invalid = 0,
53     B = 'A',
54     P,
55     M,
56     F,
57     D,
58     T,
59     N,
60     L,
61     G,
62     K,
63     H,
64     J,
65     Q,
66     X,
67     ZH,
68     CH,
69     SH,
70     R,
71     Z,
72     C,
73     S,
74     Y,
75     W,
76     Zero
77 };
78 
79 inline bool operator<(PinyinInitial l, PinyinInitial r) {
80     return static_cast<char>(l) < static_cast<char>(r);
81 }
82 
83 inline bool operator<=(PinyinInitial l, PinyinInitial r) {
84     return l < r || l == r;
85 }
86 
87 inline bool operator>(PinyinInitial l, PinyinInitial r) { return !(l <= r); }
88 
89 inline bool operator>=(PinyinInitial l, PinyinInitial r) { return !(l < r); }
90 
91 LIBIMEPINYIN_EXPORT
92 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
93                                      PinyinInitial initial);
94 
95 enum class PinyinFinal : char {
96     Invalid = 0,
97     A = 'A',
98     AI,
99     AN,
100     ANG,
101     AO,
102     E,
103     EI,
104     EN,
105     ENG,
106     ER,
107     O,
108     ONG,
109     OU,
110     I,
111     IA,
112     IE,
113     IAO,
114     IU,
115     IAN,
116     IN,
117     IANG,
118     ING,
119     IONG,
120     U,
121     UA,
122     UO,
123     UAI,
124     UI,
125     UAN,
126     UN,
127     UANG,
128     V,
129     VE,
130     UE,
131     NG,
132     Zero
133 };
134 
135 inline bool operator<(PinyinFinal l, PinyinFinal r) {
136     return static_cast<char>(l) < static_cast<char>(r);
137 }
138 
139 inline bool operator<=(PinyinFinal l, PinyinFinal r) { return l < r || l == r; }
140 
141 inline bool operator>(PinyinFinal l, PinyinFinal r) { return !(l <= r); }
142 
143 inline bool operator>=(PinyinFinal l, PinyinFinal r) { return !(l < r); }
144 
145 LIBIMEPINYIN_EXPORT
146 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
147                                      PinyinFinal final);
148 
149 struct LIBIMEPINYIN_EXPORT PinyinSyllable {
150 public:
PinyinSyllablePinyinSyllable151     PinyinSyllable(PinyinInitial initial, PinyinFinal final)
152         : initial_(initial), final_(final) {}
FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPYPinyinSyllable153     FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPY(PinyinSyllable)
154 
155     PinyinInitial initial() const { return initial_; }
finalPinyinSyllable156     PinyinFinal final() const { return final_; }
157 
158     std::string toString() const;
159 
160     bool operator==(const PinyinSyllable &other) const {
161         return initial_ == other.initial_ && final_ == other.final_;
162     }
163 
164     bool operator!=(const PinyinSyllable &other) const {
165         return !(*this == other);
166     }
167     bool operator<(const PinyinSyllable &other) const {
168         return std::make_pair(initial_, final_) <
169                std::make_pair(other.initial_, other.final_);
170     }
171     bool operator<=(const PinyinSyllable &other) const {
172         return *this < other || *this == other;
173     }
174     bool operator>(const PinyinSyllable &other) const {
175         return !(*this <= other);
176     }
177     bool operator>=(const PinyinSyllable &other) const {
178         return !(*this < other);
179     }
180 
181 private:
182     PinyinInitial initial_;
183     PinyinFinal final_;
184 };
185 
186 LIBIMEPINYIN_EXPORT
187 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
188                                      PinyinSyllable syl);
189 
190 using MatchedPinyinSyllables = std::vector<
191     std::pair<PinyinInitial, std::vector<std::pair<PinyinFinal, bool>>>>;
192 
193 class LIBIMEPINYIN_EXPORT PinyinEncoder {
194 public:
195     static SegmentGraph parseUserPinyin(std::string pinyin,
196                                         PinyinFuzzyFlags flags);
197     static SegmentGraph parseUserShuangpin(std::string pinyin,
198                                            const ShuangpinProfile &sp,
199                                            PinyinFuzzyFlags flags);
200 
201     /**
202      * @brief Encode a quote separated pinyin string.
203      *
204      * @param pinyin pinyin string, like ni'hao
205      * @return encoded pinyin.
206      */
207     static std::vector<char> encodeFullPinyin(std::string_view pinyin);
208     static std::vector<char> encodeOneUserPinyin(std::string pinyin);
209 
210     static std::string shuangpinToPinyin(std::string_view pinyin,
211                                          const ShuangpinProfile &sp);
212 
213     static bool isValidUserPinyin(const char *data, size_t size);
214 
isValidUserPinyin(const std::vector<char> & v)215     static bool isValidUserPinyin(const std::vector<char> &v) {
216         return isValidUserPinyin(v.data(), v.size());
217     }
218 
decodeFullPinyin(const std::vector<char> & v)219     static std::string decodeFullPinyin(const std::vector<char> &v) {
220         return decodeFullPinyin(v.data(), v.size());
221     }
decodeFullPinyin(std::string_view s)222     static std::string decodeFullPinyin(std::string_view s) {
223         return decodeFullPinyin(s.data(), s.size());
224     }
225     static std::string decodeFullPinyin(const char *data, size_t size);
226 
227     static const std::string &initialToString(PinyinInitial initial);
228     static PinyinInitial stringToInitial(const std::string &str);
isValidInitial(char c)229     static bool isValidInitial(char c) {
230         return c >= firstInitial && c <= lastInitial;
231     }
232 
233     static const std::string &finalToString(PinyinFinal final);
234     static PinyinFinal stringToFinal(const std::string &str);
isValidFinal(char c)235     static bool isValidFinal(char c) {
236         return c >= firstFinal && c <= lastFinal;
237     }
238 
239     static bool isValidInitialFinal(PinyinInitial initial, PinyinFinal final);
240     // This will use "ü" when possible.
241     static std::string initialFinalToPinyinString(PinyinInitial initial,
242                                                   PinyinFinal final);
243 
244     static MatchedPinyinSyllables stringToSyllables(std::string_view pinyin,
245                                                     PinyinFuzzyFlags flags);
246     static MatchedPinyinSyllables
247     shuangpinToSyllables(std::string_view pinyin, const ShuangpinProfile &sp,
248                          PinyinFuzzyFlags flags);
249 
250     static const char firstInitial = static_cast<char>(PinyinInitial::B);
251     static const char lastInitial = static_cast<char>(PinyinInitial::Zero);
252     static const char firstFinal = static_cast<char>(PinyinFinal::A);
253     static const char lastFinal = static_cast<char>(PinyinFinal::Zero);
254 };
255 } // namespace libime
256 
257 #endif // _FCITX_LIBIME_PINYIN_PINYINENCODER_H_
258