1 /* 2 * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com> 3 * 4 * SPDX-License-Identifier: LGPL-2.1-or-later 5 */ 6 #ifndef _FCITX_LIBIME_PINYIN_PINYINENCODER_H_ 7 #define _FCITX_LIBIME_PINYIN_PINYINENCODER_H_ 8 9 #include "libimepinyin_export.h" 10 #include <cassert> 11 #include <fcitx-utils/flags.h> 12 #include <fcitx-utils/log.h> 13 #include <functional> 14 #include <libime/core/segmentgraph.h> 15 #include <string> 16 #include <string_view> 17 #include <unordered_map> 18 #include <vector> 19 20 namespace libime { 21 22 class ShuangpinProfile; 23 24 enum class PinyinFuzzyFlag { 25 None = 0, 26 NG_GN = 1 << 0, 27 V_U = 1 << 1, 28 AN_ANG = 1 << 2, // 0 29 EN_ENG = 1 << 3, // 1 30 IAN_IANG = 1 << 4, // 2 31 IN_ING = 1 << 5, // 3 32 U_OU = 1 << 6, // 4 33 UAN_UANG = 1 << 7, // 5 34 C_CH = 1 << 8, // 0 35 F_H = 1 << 9, // 1 36 L_N = 1 << 10, // 2 37 S_SH = 1 << 11, // 3 38 Z_ZH = 1 << 12, // 4 39 VE_UE = 1 << 13, 40 Inner = 1 << 14, 41 InnerShort = 1 << 15, 42 PartialFinal = 1 << 16, 43 }; 44 45 using PinyinFuzzyFlags = fcitx::Flags<PinyinFuzzyFlag>; 46 47 LIBIMEPINYIN_EXPORT 48 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log, 49 PinyinFuzzyFlags final); 50 51 enum class PinyinInitial : char { 52 Invalid = 0, 53 B = 'A', 54 P, 55 M, 56 F, 57 D, 58 T, 59 N, 60 L, 61 G, 62 K, 63 H, 64 J, 65 Q, 66 X, 67 ZH, 68 CH, 69 SH, 70 R, 71 Z, 72 C, 73 S, 74 Y, 75 W, 76 Zero 77 }; 78 79 inline bool operator<(PinyinInitial l, PinyinInitial r) { 80 return static_cast<char>(l) < static_cast<char>(r); 81 } 82 83 inline bool operator<=(PinyinInitial l, PinyinInitial r) { 84 return l < r || l == r; 85 } 86 87 inline bool operator>(PinyinInitial l, PinyinInitial r) { return !(l <= r); } 88 89 inline bool operator>=(PinyinInitial l, PinyinInitial r) { return !(l < r); } 90 91 LIBIMEPINYIN_EXPORT 92 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log, 93 PinyinInitial initial); 94 95 enum class PinyinFinal : char { 96 Invalid = 0, 97 A = 'A', 98 AI, 99 AN, 100 ANG, 101 AO, 102 E, 103 EI, 104 EN, 105 ENG, 106 ER, 107 O, 108 ONG, 109 OU, 110 I, 111 IA, 112 IE, 113 IAO, 114 IU, 115 IAN, 116 IN, 117 IANG, 118 ING, 119 IONG, 120 U, 121 UA, 122 UO, 123 UAI, 124 UI, 125 UAN, 126 UN, 127 UANG, 128 V, 129 VE, 130 UE, 131 NG, 132 Zero 133 }; 134 135 inline bool operator<(PinyinFinal l, PinyinFinal r) { 136 return static_cast<char>(l) < static_cast<char>(r); 137 } 138 139 inline bool operator<=(PinyinFinal l, PinyinFinal r) { return l < r || l == r; } 140 141 inline bool operator>(PinyinFinal l, PinyinFinal r) { return !(l <= r); } 142 143 inline bool operator>=(PinyinFinal l, PinyinFinal r) { return !(l < r); } 144 145 LIBIMEPINYIN_EXPORT 146 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log, 147 PinyinFinal final); 148 149 struct LIBIMEPINYIN_EXPORT PinyinSyllable { 150 public: PinyinSyllablePinyinSyllable151 PinyinSyllable(PinyinInitial initial, PinyinFinal final) 152 : initial_(initial), final_(final) {} FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPYPinyinSyllable153 FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPY(PinyinSyllable) 154 155 PinyinInitial initial() const { return initial_; } finalPinyinSyllable156 PinyinFinal final() const { return final_; } 157 158 std::string toString() const; 159 160 bool operator==(const PinyinSyllable &other) const { 161 return initial_ == other.initial_ && final_ == other.final_; 162 } 163 164 bool operator!=(const PinyinSyllable &other) const { 165 return !(*this == other); 166 } 167 bool operator<(const PinyinSyllable &other) const { 168 return std::make_pair(initial_, final_) < 169 std::make_pair(other.initial_, other.final_); 170 } 171 bool operator<=(const PinyinSyllable &other) const { 172 return *this < other || *this == other; 173 } 174 bool operator>(const PinyinSyllable &other) const { 175 return !(*this <= other); 176 } 177 bool operator>=(const PinyinSyllable &other) const { 178 return !(*this < other); 179 } 180 181 private: 182 PinyinInitial initial_; 183 PinyinFinal final_; 184 }; 185 186 LIBIMEPINYIN_EXPORT 187 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log, 188 PinyinSyllable syl); 189 190 using MatchedPinyinSyllables = std::vector< 191 std::pair<PinyinInitial, std::vector<std::pair<PinyinFinal, bool>>>>; 192 193 class LIBIMEPINYIN_EXPORT PinyinEncoder { 194 public: 195 static SegmentGraph parseUserPinyin(std::string pinyin, 196 PinyinFuzzyFlags flags); 197 static SegmentGraph parseUserShuangpin(std::string pinyin, 198 const ShuangpinProfile &sp, 199 PinyinFuzzyFlags flags); 200 201 /** 202 * @brief Encode a quote separated pinyin string. 203 * 204 * @param pinyin pinyin string, like ni'hao 205 * @return encoded pinyin. 206 */ 207 static std::vector<char> encodeFullPinyin(std::string_view pinyin); 208 static std::vector<char> encodeOneUserPinyin(std::string pinyin); 209 210 static std::string shuangpinToPinyin(std::string_view pinyin, 211 const ShuangpinProfile &sp); 212 213 static bool isValidUserPinyin(const char *data, size_t size); 214 isValidUserPinyin(const std::vector<char> & v)215 static bool isValidUserPinyin(const std::vector<char> &v) { 216 return isValidUserPinyin(v.data(), v.size()); 217 } 218 decodeFullPinyin(const std::vector<char> & v)219 static std::string decodeFullPinyin(const std::vector<char> &v) { 220 return decodeFullPinyin(v.data(), v.size()); 221 } decodeFullPinyin(std::string_view s)222 static std::string decodeFullPinyin(std::string_view s) { 223 return decodeFullPinyin(s.data(), s.size()); 224 } 225 static std::string decodeFullPinyin(const char *data, size_t size); 226 227 static const std::string &initialToString(PinyinInitial initial); 228 static PinyinInitial stringToInitial(const std::string &str); isValidInitial(char c)229 static bool isValidInitial(char c) { 230 return c >= firstInitial && c <= lastInitial; 231 } 232 233 static const std::string &finalToString(PinyinFinal final); 234 static PinyinFinal stringToFinal(const std::string &str); isValidFinal(char c)235 static bool isValidFinal(char c) { 236 return c >= firstFinal && c <= lastFinal; 237 } 238 239 static bool isValidInitialFinal(PinyinInitial initial, PinyinFinal final); 240 // This will use "ü" when possible. 241 static std::string initialFinalToPinyinString(PinyinInitial initial, 242 PinyinFinal final); 243 244 static MatchedPinyinSyllables stringToSyllables(std::string_view pinyin, 245 PinyinFuzzyFlags flags); 246 static MatchedPinyinSyllables 247 shuangpinToSyllables(std::string_view pinyin, const ShuangpinProfile &sp, 248 PinyinFuzzyFlags flags); 249 250 static const char firstInitial = static_cast<char>(PinyinInitial::B); 251 static const char lastInitial = static_cast<char>(PinyinInitial::Zero); 252 static const char firstFinal = static_cast<char>(PinyinFinal::A); 253 static const char lastFinal = static_cast<char>(PinyinFinal::Zero); 254 }; 255 } // namespace libime 256 257 #endif // _FCITX_LIBIME_PINYIN_PINYINENCODER_H_ 258