1 /*
2 * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3 *
4 * SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 #include "pinyinencoder.h"
7 #include "pinyindata.h"
8 #include "shuangpinprofile.h"
9 #include <boost/algorithm/string.hpp>
10 #include <boost/bimap.hpp>
11 #include <boost/bimap/unordered_set_of.hpp>
12 #include <boost/container/static_vector.hpp>
13 #include <fcitx-utils/charutils.h>
14 #include <queue>
15 #include <sstream>
16 #include <string_view>
17 #include <tuple>
18 #include <unordered_map>
19
20 namespace libime {
21
22 static const std::string emptyString;
23
operator <<(fcitx::LogMessageBuilder & log,PinyinFuzzyFlags fuzzy)24 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
25 PinyinFuzzyFlags fuzzy) {
26 log << fuzzy.toInteger();
27 return log;
28 }
29
operator <<(fcitx::LogMessageBuilder & log,PinyinInitial initial)30 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
31 PinyinInitial initial) {
32 log << PinyinEncoder::initialToString(initial);
33 return log;
34 }
35
operator <<(fcitx::LogMessageBuilder & log,PinyinFinal final)36 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
37 PinyinFinal final) {
38 log << PinyinEncoder::finalToString(final);
39 return log;
40 }
41
operator <<(fcitx::LogMessageBuilder & log,PinyinSyllable syl)42 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
43 PinyinSyllable syl) {
44 log << syl.toString();
45 return log;
46 }
47
48 template <typename L, typename R>
49 boost::bimap<L, R>
makeBimap(std::initializer_list<typename boost::bimap<L,R>::value_type> list)50 makeBimap(std::initializer_list<typename boost::bimap<L, R>::value_type> list) {
51 return boost::bimap<L, R>(list.begin(), list.end());
52 }
53
54 static const auto initialMap = makeBimap<PinyinInitial, std::string>({
55 {PinyinInitial::B, "b"}, {PinyinInitial::P, "p"},
56 {PinyinInitial::M, "m"}, {PinyinInitial::F, "f"},
57 {PinyinInitial::D, "d"}, {PinyinInitial::T, "t"},
58 {PinyinInitial::N, "n"}, {PinyinInitial::L, "l"},
59 {PinyinInitial::G, "g"}, {PinyinInitial::K, "k"},
60 {PinyinInitial::H, "h"}, {PinyinInitial::J, "j"},
61 {PinyinInitial::Q, "q"}, {PinyinInitial::X, "x"},
62 {PinyinInitial::ZH, "zh"}, {PinyinInitial::CH, "ch"},
63 {PinyinInitial::SH, "sh"}, {PinyinInitial::R, "r"},
64 {PinyinInitial::Z, "z"}, {PinyinInitial::C, "c"},
65 {PinyinInitial::S, "s"}, {PinyinInitial::Y, "y"},
66 {PinyinInitial::W, "w"}, {PinyinInitial::Zero, ""},
67 });
68
69 static const auto finalMap = makeBimap<PinyinFinal, std::string>({
70 {PinyinFinal::A, "a"}, {PinyinFinal::AI, "ai"},
71 {PinyinFinal::AN, "an"}, {PinyinFinal::ANG, "ang"},
72 {PinyinFinal::AO, "ao"}, {PinyinFinal::E, "e"},
73 {PinyinFinal::EI, "ei"}, {PinyinFinal::EN, "en"},
74 {PinyinFinal::ENG, "eng"}, {PinyinFinal::ER, "er"},
75 {PinyinFinal::O, "o"}, {PinyinFinal::ONG, "ong"},
76 {PinyinFinal::OU, "ou"}, {PinyinFinal::I, "i"},
77 {PinyinFinal::IA, "ia"}, {PinyinFinal::IE, "ie"},
78 {PinyinFinal::IAO, "iao"}, {PinyinFinal::IU, "iu"},
79 {PinyinFinal::IAN, "ian"}, {PinyinFinal::IN, "in"},
80 {PinyinFinal::IANG, "iang"}, {PinyinFinal::ING, "ing"},
81 {PinyinFinal::IONG, "iong"}, {PinyinFinal::U, "u"},
82 {PinyinFinal::UA, "ua"}, {PinyinFinal::UO, "uo"},
83 {PinyinFinal::UAI, "uai"}, {PinyinFinal::UI, "ui"},
84 {PinyinFinal::UAN, "uan"}, {PinyinFinal::UN, "un"},
85 {PinyinFinal::UANG, "uang"}, {PinyinFinal::V, "v"},
86 {PinyinFinal::UE, "ue"}, {PinyinFinal::VE, "ve"},
87 {PinyinFinal::NG, "ng"}, {PinyinFinal::Zero, ""},
88 });
89
90 static const int maxPinyinLength = 6;
91
92 template <typename Iter>
longestMatch(Iter iter,Iter end,PinyinFuzzyFlags flags)93 std::pair<std::string_view, bool> longestMatch(Iter iter, Iter end,
94 PinyinFuzzyFlags flags) {
95 if (std::distance(iter, end) > maxPinyinLength) {
96 end = iter + maxPinyinLength;
97 }
98 auto range = std::string_view(&*iter, std::distance(iter, end));
99 const auto &map = getPinyinMap();
100 for (; !range.empty(); range.remove_suffix(1)) {
101 auto iterPair = map.equal_range(range);
102 if (iterPair.first != iterPair.second) {
103 for (const auto &item :
104 boost::make_iterator_range(iterPair.first, iterPair.second)) {
105 if (flags.test(item.flags())) {
106 // do not consider m/n/r as complete pinyin
107 return std::make_pair(
108 range, (range != "m" && range != "n" && range != "r"));
109 }
110 }
111 }
112 if (range.size() <= 2) {
113 auto iter = initialMap.right.find(std::string{range});
114 if (iter != initialMap.right.end()) {
115 return std::make_pair(range, false);
116 }
117 }
118 }
119
120 if (range.empty()) {
121 range = std::string_view(&*iter, 1);
122 }
123
124 return std::make_pair(range, false);
125 }
126
toString() const127 std::string PinyinSyllable::toString() const {
128 return PinyinEncoder::initialToString(initial_) +
129 PinyinEncoder::finalToString(final_);
130 }
131
parseUserPinyin(std::string userPinyin,PinyinFuzzyFlags flags)132 SegmentGraph PinyinEncoder::parseUserPinyin(std::string userPinyin,
133 PinyinFuzzyFlags flags) {
134 SegmentGraph result{std::move(userPinyin)};
135 auto pinyin = result.data();
136 std::transform(pinyin.begin(), pinyin.end(), pinyin.begin(),
137 fcitx::charutils::tolower);
138 auto end = pinyin.end();
139 std::priority_queue<size_t, std::vector<size_t>, std::greater<size_t>> q;
140 q.push(0);
141 while (!q.empty()) {
142 size_t top;
143 do {
144 top = q.top();
145 q.pop();
146 } while (!q.empty() && q.top() == top);
147 if (top >= pinyin.size()) {
148 continue;
149 }
150 auto iter = std::next(pinyin.begin(), top);
151 if (*iter == '\'') {
152 while (*iter == '\'' && iter != pinyin.end()) {
153 iter++;
154 }
155 auto next = std::distance(pinyin.begin(), iter);
156 result.addNext(top, next);
157 if (static_cast<size_t>(next) < pinyin.size()) {
158 q.push(next);
159 }
160 continue;
161 }
162 std::string_view str;
163 bool isCompletePinyin;
164 std::tie(str, isCompletePinyin) = longestMatch(iter, end, flags);
165
166 // it's not complete a pinyin, no need to try
167 if (!isCompletePinyin) {
168 result.addNext(top, top + str.size());
169 q.push(top + str.size());
170 } else {
171 // check fuzzy seg
172 // pinyin may end with aegimnoruv
173 // and may start with abcdefghjklmnopqrstwxyz.
174 // the intersection is aegmnor, while for m, it only 'm', so don't
175 // consider it
176 // also, make sure current pinyin does not end with a separator,
177 // other wise, jin'an may be parsed into ji'n because, nextMatch is
178 // starts with "'".
179 const auto &map = getPinyinMap();
180 std::array<size_t, 2> nextSize;
181 size_t nNextSize = 0;
182 if (str.size() > 1 && top + str.size() < pinyin.size() &&
183 pinyin[top + str.size()] != '\'' &&
184 (str.back() == 'a' || str.back() == 'e' || str.back() == 'g' ||
185 str.back() == 'n' || str.back() == 'o' || str.back() == 'r') &&
186 map.find(str.substr(0, str.size() - 1)) != map.end()) {
187 // str[0:-1] is also a full pinyin, check next pinyin
188 auto nextMatch = longestMatch(iter + str.size(), end, flags);
189 auto nextMatchAlt =
190 longestMatch(iter + str.size() - 1, end, flags);
191 auto matchSize = str.size() + nextMatch.first.size();
192 auto matchSizeAlt = str.size() - 1 + nextMatchAlt.first.size();
193 if (std::make_pair(matchSize, nextMatch.second) >=
194 std::make_pair(matchSizeAlt, nextMatchAlt.second)) {
195 result.addNext(top, top + str.size());
196 q.push(top + str.size());
197 nextSize[nNextSize++] = str.size();
198 }
199 if (std::make_pair(matchSize, nextMatch.second) <=
200 std::make_pair(matchSizeAlt, nextMatchAlt.second)) {
201 result.addNext(top, top + str.size() - 1);
202 q.push(top + str.size() - 1);
203 nextSize[nNextSize++] = str.size() - 1;
204 }
205 } else {
206 result.addNext(top, top + str.size());
207 q.push(top + str.size());
208 nextSize[nNextSize++] = str.size();
209 }
210
211 for (size_t i = 0; i < nNextSize; i++) {
212 if ((nextSize[i] >= 4 && flags.test(PinyinFuzzyFlag::Inner)) ||
213 (nextSize[i] == 3 &&
214 flags.test(PinyinFuzzyFlag::InnerShort))) {
215 const auto &innerSegments = getInnerSegment();
216 auto iter = innerSegments.find(
217 std::string{str.substr(0, nextSize[i])});
218 if (iter != innerSegments.end()) {
219 result.addNext(top, top + iter->second.first.size());
220 result.addNext(top + iter->second.first.size(),
221 top + nextSize[i]);
222 }
223 } else if (nextSize[i] == 2 &&
224 flags.test(PinyinFuzzyFlag::InnerShort) &&
225 str.substr(0, 2) == "ng") {
226 // Handle ng -> n'g, the condition is so simple so we don't
227 // make it go through the inner segment lookup.
228 result.addNext(top, top + 1);
229 result.addNext(top + 1, top + 2);
230 }
231 }
232 }
233 }
234 return result;
235 }
236
parseUserShuangpin(std::string userPinyin,const ShuangpinProfile & sp,PinyinFuzzyFlags flags)237 SegmentGraph PinyinEncoder::parseUserShuangpin(std::string userPinyin,
238 const ShuangpinProfile &sp,
239 PinyinFuzzyFlags flags) {
240 SegmentGraph result{std::move(userPinyin)};
241 auto pinyin = result.data();
242 std::transform(pinyin.begin(), pinyin.end(), pinyin.begin(),
243 fcitx::charutils::tolower);
244
245 // assume user always type valid shuangpin first, if not keep one.
246 size_t i = 0;
247
248 const auto &table = sp.table();
249 while (i < pinyin.size()) {
250 auto start = i;
251 while (pinyin[i] == '\'' && i < pinyin.size()) {
252 i++;
253 }
254 if (start != i) {
255 result.addNext(start, i);
256 continue;
257 }
258 auto initial = pinyin[i];
259 char final = '\0';
260 if (i + 1 < pinyin.size() && pinyin[i + 1] != '\'') {
261 final = pinyin[i + 1];
262 }
263
264 std::string match{initial};
265 if (final) {
266 match.push_back(final);
267 }
268
269 auto longestMatchInTable = [flags](decltype(table) t,
270 const std::string &v) {
271 auto py = v;
272 while (!py.empty()) {
273 auto iter = t.find(py);
274 if (iter != t.end()) {
275 for (const auto &p : iter->second) {
276 if (flags.test(p.second)) {
277 return iter;
278 }
279 }
280 }
281 py.pop_back();
282 }
283 return t.end();
284 };
285
286 auto iter = longestMatchInTable(table, match);
287 if (iter != table.end()) {
288 result.addNext(i, i + iter->first.size());
289 i = i + iter->first.size();
290 } else {
291 result.addNext(i, i + 1);
292 i = i + 1;
293 }
294 }
295
296 return result;
297 }
298
encodeFullPinyin(std::string_view pinyin)299 std::vector<char> PinyinEncoder::encodeFullPinyin(std::string_view pinyin) {
300 std::vector<std::string> pinyins;
301 boost::split(pinyins, pinyin, boost::is_any_of("'"));
302 std::vector<char> result;
303 result.resize(pinyins.size() * 2);
304 int idx = 0;
305 for (const auto &singlePinyin : pinyins) {
306 const auto &map = getPinyinMap();
307 auto iter = map.find(singlePinyin);
308 if (iter == map.end() || iter->flags() != PinyinFuzzyFlag::None) {
309 throw std::invalid_argument("invalid full pinyin: " +
310 std::string{pinyin});
311 }
312 result[idx++] = static_cast<char>(iter->initial());
313 result[idx++] = static_cast<char>(iter->final());
314 }
315
316 return result;
317 }
318
encodeOneUserPinyin(std::string pinyin)319 std::vector<char> PinyinEncoder::encodeOneUserPinyin(std::string pinyin) {
320 if (pinyin.empty()) {
321 return {};
322 }
323 auto graph = parseUserPinyin(std::move(pinyin), PinyinFuzzyFlag::None);
324 std::vector<char> result;
325 const SegmentGraphNode *node = &graph.start(), *prev = nullptr;
326 while (node->nextSize()) {
327 prev = node;
328 node = &node->nexts().front();
329 auto seg = graph.segment(*prev, *node);
330 if (seg.empty() || seg[0] == '\'') {
331 continue;
332 }
333 auto syls = stringToSyllables(seg, PinyinFuzzyFlag::None);
334 if (syls.empty()) {
335 return {};
336 }
337 result.push_back(static_cast<char>(syls[0].first));
338 result.push_back(static_cast<char>(syls[0].second[0].first));
339 }
340 return result;
341 }
342
isValidUserPinyin(const char * data,size_t size)343 bool PinyinEncoder::isValidUserPinyin(const char *data, size_t size) {
344 if (size % 2 != 0) {
345 return false;
346 }
347
348 for (size_t i = 0; i < size / 2; i++) {
349 if (!PinyinEncoder::isValidInitial(data[i * 2])) {
350 return false;
351 }
352 }
353 return true;
354 }
355
decodeFullPinyin(const char * data,size_t size)356 std::string PinyinEncoder::decodeFullPinyin(const char *data, size_t size) {
357 if (size % 2 != 0) {
358 throw std::invalid_argument("invalid pinyin key");
359 }
360 std::string result;
361 for (size_t i = 0, e = size / 2; i < e; i++) {
362 if (i) {
363 result += '\'';
364 }
365 result += initialToString(static_cast<PinyinInitial>(data[i * 2]));
366 result += finalToString(static_cast<PinyinFinal>(data[i * 2 + 1]));
367 }
368 return result;
369 }
370
initialToString(PinyinInitial initial)371 const std::string &PinyinEncoder::initialToString(PinyinInitial initial) {
372 const static std::vector<std::string> s = []() {
373 std::vector<std::string> s;
374 s.resize(lastInitial - firstInitial + 1);
375 for (char c = firstInitial; c <= lastInitial; c++) {
376 auto iter = initialMap.left.find(static_cast<PinyinInitial>(c));
377 s[c - firstInitial] = iter->second;
378 }
379 return s;
380 }();
381 auto c = static_cast<char>(initial);
382 if (c >= firstInitial && c <= lastInitial) {
383 return s[c - firstInitial];
384 }
385 return emptyString;
386 }
387
stringToInitial(const std::string & str)388 PinyinInitial PinyinEncoder::stringToInitial(const std::string &str) {
389 auto iter = initialMap.right.find(str);
390 if (iter != initialMap.right.end()) {
391 return iter->second;
392 }
393 return PinyinInitial::Invalid;
394 }
395
finalToString(PinyinFinal final)396 const std::string &PinyinEncoder::finalToString(PinyinFinal final) {
397 const static std::vector<std::string> s = []() {
398 std::vector<std::string> s;
399 s.resize(lastFinal - firstFinal + 1);
400 for (char c = firstFinal; c <= lastFinal; c++) {
401 auto iter = finalMap.left.find(static_cast<PinyinFinal>(c));
402 s[c - firstFinal] = iter->second;
403 }
404 return s;
405 }();
406 auto c = static_cast<char>(final);
407 if (c >= firstFinal && c <= lastFinal) {
408 return s[c - firstFinal];
409 }
410 return emptyString;
411 }
412
stringToFinal(const std::string & str)413 PinyinFinal PinyinEncoder::stringToFinal(const std::string &str) {
414 auto iter = finalMap.right.find(str);
415 if (iter != finalMap.right.end()) {
416 return iter->second;
417 }
418 return PinyinFinal::Invalid;
419 }
420
isValidInitialFinal(PinyinInitial initial,PinyinFinal final)421 bool PinyinEncoder::isValidInitialFinal(PinyinInitial initial,
422 PinyinFinal final) {
423 if (initial != PinyinInitial::Invalid && final != PinyinFinal::Invalid) {
424 int16_t encode =
425 ((static_cast<int16_t>(initial) - PinyinEncoder::firstInitial) *
426 (PinyinEncoder::lastFinal - PinyinEncoder::firstFinal + 1)) +
427 (static_cast<int16_t>(final) - PinyinEncoder::firstFinal);
428 const auto &a = getEncodedInitialFinal();
429 return encode < static_cast<int>(a.size()) && a[encode];
430 }
431 return false;
432 }
433
initialFinalToPinyinString(PinyinInitial initial,PinyinFinal final)434 std::string PinyinEncoder::initialFinalToPinyinString(PinyinInitial initial,
435 PinyinFinal final) {
436 std::string result = initialToString(initial);
437 std::string finalString;
438 switch (final) {
439 case PinyinFinal::VE:
440 case PinyinFinal::V:
441 if (initial == PinyinInitial::N || initial == PinyinInitial::L) {
442 if (final == PinyinFinal::VE) {
443 finalString = "üe";
444 } else {
445 finalString = "ü";
446 }
447 break;
448 }
449 // FALLTHROUGH
450 default:
451 finalString = finalToString(final);
452 break;
453 }
454 result.append(finalString);
455 return result;
456 }
457
getFuzzy(std::vector<std::pair<PinyinInitial,std::vector<std::pair<PinyinFinal,bool>>>> & syls,PinyinSyllable syl,PinyinFuzzyFlags flags)458 static void getFuzzy(
459 std::vector<std::pair<PinyinInitial,
460 std::vector<std::pair<PinyinFinal, bool>>>> &syls,
461 PinyinSyllable syl, PinyinFuzzyFlags flags) {
462 // ng/gn is already handled by table
463 boost::container::static_vector<PinyinInitial, 2> initials{syl.initial()};
464 boost::container::static_vector<PinyinFinal, 10> finals{syl.final()};
465
466 // for {s,z,c} we also want them to match {sh,zh,ch}
467 if (syl.final() == PinyinFinal::Invalid) {
468 if (syl.initial() == PinyinInitial::C) {
469 flags |= PinyinFuzzyFlag::C_CH;
470 }
471 if (syl.initial() == PinyinInitial::Z) {
472 flags |= PinyinFuzzyFlag::Z_ZH;
473 }
474 if (syl.initial() == PinyinInitial::S) {
475 flags |= PinyinFuzzyFlag::S_SH;
476 }
477 }
478
479 const static std::vector<
480 std::tuple<PinyinInitial, PinyinInitial, PinyinFuzzyFlag>>
481 initialFuzzies = {
482 {PinyinInitial::C, PinyinInitial::CH, PinyinFuzzyFlag::C_CH},
483 {PinyinInitial::S, PinyinInitial::SH, PinyinFuzzyFlag::S_SH},
484 {PinyinInitial::Z, PinyinInitial::ZH, PinyinFuzzyFlag::Z_ZH},
485 {PinyinInitial::F, PinyinInitial::H, PinyinFuzzyFlag::F_H},
486 {PinyinInitial::L, PinyinInitial::N, PinyinFuzzyFlag::L_N},
487 };
488
489 for (const auto &initialFuzzy : initialFuzzies) {
490 if ((syl.initial() == std::get<0>(initialFuzzy) ||
491 syl.initial() == std::get<1>(initialFuzzy)) &&
492 flags & std::get<2>(initialFuzzy)) {
493 initials.push_back(syl.initial() == std::get<0>(initialFuzzy)
494 ? std::get<1>(initialFuzzy)
495 : std::get<0>(initialFuzzy));
496 break;
497 }
498 }
499
500 const static std::vector<
501 std::tuple<PinyinFinal, PinyinFinal, PinyinFuzzyFlag>>
502 finalFuzzies = {
503 {PinyinFinal::V, PinyinFinal::U, PinyinFuzzyFlag::V_U},
504 {PinyinFinal::AN, PinyinFinal::ANG, PinyinFuzzyFlag::AN_ANG},
505 {PinyinFinal::EN, PinyinFinal::ENG, PinyinFuzzyFlag::EN_ENG},
506 {PinyinFinal::IAN, PinyinFinal::IANG, PinyinFuzzyFlag::IAN_IANG},
507 {PinyinFinal::IN, PinyinFinal::ING, PinyinFuzzyFlag::IN_ING},
508 {PinyinFinal::U, PinyinFinal::OU, PinyinFuzzyFlag::U_OU},
509 {PinyinFinal::UAN, PinyinFinal::UANG, PinyinFuzzyFlag::UAN_UANG},
510 {PinyinFinal::VE, PinyinFinal::UE, PinyinFuzzyFlag::VE_UE},
511 };
512
513 for (const auto &finalFuzzy : finalFuzzies) {
514 if ((syl.final() == std::get<0>(finalFuzzy) ||
515 syl.final() == std::get<1>(finalFuzzy)) &&
516 flags & std::get<2>(finalFuzzy)) {
517 finals.push_back(syl.final() == std::get<0>(finalFuzzy)
518 ? std::get<1>(finalFuzzy)
519 : std::get<0>(finalFuzzy));
520 break;
521 }
522 }
523
524 // "aeo"
525
526 const static std::vector<std::tuple<PinyinFinal, PinyinFinal>>
527 partialFinals = {
528 {PinyinFinal::A, PinyinFinal::AN},
529 {PinyinFinal::A, PinyinFinal::ANG},
530 {PinyinFinal::A, PinyinFinal::AI},
531 {PinyinFinal::A, PinyinFinal::AO},
532 {PinyinFinal::E, PinyinFinal::EI},
533 {PinyinFinal::E, PinyinFinal::EN},
534 {PinyinFinal::E, PinyinFinal::ENG},
535 {PinyinFinal::E, PinyinFinal::ER},
536 {PinyinFinal::O, PinyinFinal::OU},
537 {PinyinFinal::O, PinyinFinal::ONG},
538 };
539 if (initials.size() == 1 && initials[0] == PinyinInitial::Zero &&
540 flags.test(PinyinFuzzyFlag::PartialFinal)) {
541 for (const auto &partialFinal : partialFinals) {
542 if (syl.final() == std::get<0>(partialFinal)) {
543 finals.push_back(std::get<1>(partialFinal));
544 }
545 }
546 }
547
548 for (size_t i = 0; i < initials.size(); i++) {
549 for (size_t j = 0; j < finals.size(); j++) {
550 auto initial = initials[i];
551 auto final = finals[j];
552 if ((i == 0 && j == 0) || final == PinyinFinal::Invalid ||
553 PinyinEncoder::isValidInitialFinal(initial, final)) {
554 auto iter = std::find_if(
555 syls.begin(), syls.end(),
556 [initial](const auto &p) { return p.first == initial; });
557 if (iter == syls.end()) {
558 syls.emplace_back(std::piecewise_construct,
559 std::forward_as_tuple(initial),
560 std::forward_as_tuple());
561 iter = std::prev(syls.end());
562 }
563 auto &finals = iter->second;
564 if (std::find_if(finals.begin(), finals.end(),
565 [final](auto &p) {
566 return p.first == final;
567 }) == finals.end()) {
568 finals.emplace_back(final, i > 0 || j > 0);
569 }
570 }
571 }
572 }
573 }
574
575 MatchedPinyinSyllables
stringToSyllables(std::string_view pinyinView,PinyinFuzzyFlags flags)576 PinyinEncoder::stringToSyllables(std::string_view pinyinView,
577 PinyinFuzzyFlags flags) {
578 std::vector<
579 std::pair<PinyinInitial, std::vector<std::pair<PinyinFinal, bool>>>>
580 result;
581 std::string pinyin(pinyinView);
582 std::transform(pinyin.begin(), pinyin.end(), pinyin.begin(),
583 fcitx::charutils::tolower);
584 const auto &map = getPinyinMap();
585 // we only want {M,N,R}/Invalid instead of {M,N,R}/Zero, so we could get
586 // match for everything.
587 if (pinyin != "m" && pinyin != "n" && pinyin != "r") {
588 auto iterPair = map.equal_range(pinyin);
589 for (const auto &item :
590 boost::make_iterator_range(iterPair.first, iterPair.second)) {
591 if (flags.test(item.flags())) {
592 getFuzzy(result, {item.initial(), item.final()}, flags);
593 }
594 }
595 }
596
597 auto iter = initialMap.right.find(std::string{pinyin});
598 if (initialMap.right.end() != iter) {
599 getFuzzy(result, {iter->second, PinyinFinal::Invalid}, flags);
600 }
601
602 if (result.empty()) {
603 result.emplace_back(
604 std::piecewise_construct,
605 std::forward_as_tuple(PinyinInitial::Invalid),
606 std::forward_as_tuple(1,
607 std::make_pair(PinyinFinal::Invalid, false)));
608 }
609
610 #if 0
611 else {
612 // replace invalid
613 for (auto &p : result) {
614 if (p.second.size() == 1 && p.second[0] == PinyinFinal::Invalid) {
615 p.second.clear();
616 for (char test = PinyinEncoder::firstFinal;
617 test <= PinyinEncoder::lastFinal; test++) {
618 auto final = static_cast<PinyinFinal>(test);
619 if (PinyinEncoder::isValidInitialFinal(p.first, final)) {
620 p.second.push_back(final);
621 }
622 }
623 }
624 }
625 }
626 #endif
627
628 return result;
629 }
630
631 MatchedPinyinSyllables
shuangpinToSyllables(std::string_view pinyinView,const ShuangpinProfile & sp,PinyinFuzzyFlags flags)632 PinyinEncoder::shuangpinToSyllables(std::string_view pinyinView,
633 const ShuangpinProfile &sp,
634 PinyinFuzzyFlags flags) {
635 assert(pinyinView.size() <= 2);
636 std::string pinyin(pinyinView);
637 std::transform(pinyin.begin(), pinyin.end(), pinyin.begin(),
638 fcitx::charutils::tolower);
639 const auto &table = sp.table();
640 auto iter = table.find(pinyin);
641
642 // Don't match partial final if our shuangpin is full size.
643 if (pinyinView.size() > 1) {
644 // This option is somewhat meaningless in full Shuangpin.
645 flags = flags.unset(PinyinFuzzyFlag::PartialFinal);
646 }
647
648 std::vector<
649 std::pair<PinyinInitial, std::vector<std::pair<PinyinFinal, bool>>>>
650 result;
651 if (iter != table.end()) {
652 for (const auto &p : iter->second) {
653 if (flags.test(p.second)) {
654 getFuzzy(result, {p.first.initial(), p.first.final()}, flags);
655 }
656 }
657 }
658
659 if (result.empty()) {
660 result.emplace_back(
661 std::piecewise_construct,
662 std::forward_as_tuple(PinyinInitial::Invalid),
663 std::forward_as_tuple(1,
664 std::make_pair(PinyinFinal::Invalid, false)));
665 }
666
667 return result;
668 }
669
670 std::string
shuangpinToPinyin(std::string_view pinyinView,const libime::ShuangpinProfile & sp)671 PinyinEncoder::shuangpinToPinyin(std::string_view pinyinView,
672 const libime::ShuangpinProfile &sp) {
673 assert(pinyinView.size() <= 2);
674 auto syls = shuangpinToSyllables(pinyinView, sp, PinyinFuzzyFlag::None);
675 if (!syls.empty() && !syls[0].second.empty() && !syls[0].second[0].second) {
676 auto initial = syls[0].first;
677 auto final = syls[0].second[0].first;
678 return initialToString(initial) + finalToString(final);
679 }
680 return "";
681 }
682
683 } // namespace libime
684