1 /*
2 * SPDX-FileCopyrightText: 2018~2018 CSSlayer <wengxt@gmail.com>
3 *
4 * SPDX-License-Identifier: LGPL-2.1-or-later
5 *
6 */
7 #include "jyutpingcontext.h"
8 #include "jyutpingdecoder.h"
9 #include "jyutpingencoder.h"
10 #include "jyutpingime.h"
11 #include "jyutpingmatchstate.h"
12 #include "libime/core/historybigram.h"
13 #include "libime/core/userlanguagemodel.h"
14 #include <algorithm>
15 #include <fcitx-utils/log.h>
16 #include <iostream>
17
18 namespace libime {
19 namespace jyutping {
20
21 struct SelectedJyutping {
SelectedJyutpinglibime::jyutping::SelectedJyutping22 SelectedJyutping(size_t s, WordNode word, std::string encodedJyutping)
23 : offset_(s), word_(std::move(word)),
24 encodedJyutping_(std::move(encodedJyutping)) {}
25 size_t offset_;
26 WordNode word_;
27 std::string encodedJyutping_;
28 };
29
30 class JyutpingContextPrivate {
31 public:
JyutpingContextPrivate(JyutpingContext * q,JyutpingIME * ime)32 JyutpingContextPrivate(JyutpingContext *q, JyutpingIME *ime)
33 : ime_(ime), matchState_(q) {}
34
35 std::vector<std::vector<SelectedJyutping>> selected_;
36
37 JyutpingIME *ime_;
38 SegmentGraph segs_;
39 Lattice lattice_;
40 JyutpingMatchState matchState_;
41 std::vector<SentenceResult> candidates_;
42 std::vector<fcitx::ScopedConnection> conn_;
43 };
44
JyutpingContext(JyutpingIME * ime)45 JyutpingContext::JyutpingContext(JyutpingIME *ime)
46 : InputBuffer(fcitx::InputBufferOption::AsciiOnly),
47 d_ptr(std::make_unique<JyutpingContextPrivate>(this, ime)) {
48 FCITX_D();
49 d->conn_.emplace_back(
50 ime->connect<JyutpingIME::optionChanged>([this]() { clear(); }));
51 d->conn_.emplace_back(
52 ime->dict()->connect<JyutpingDictionary::dictionaryChanged>(
53 [this](size_t) {
54 FCITX_D();
55 d->matchState_.clear();
56 }));
57 }
58
~JyutpingContext()59 JyutpingContext::~JyutpingContext() {}
60
typeImpl(const char * s,size_t length)61 bool JyutpingContext::typeImpl(const char *s, size_t length) {
62 bool changed = cancelTill(cursor());
63 changed = InputBuffer::typeImpl(s, length) || changed;
64 if (changed) {
65 update();
66 }
67 return changed;
68 }
69
erase(size_t from,size_t to)70 void JyutpingContext::erase(size_t from, size_t to) {
71 if (from == to) {
72 return;
73 }
74
75 // check if erase everything
76 if (from == 0 && to >= size()) {
77 FCITX_D();
78 d->candidates_.clear();
79 d->selected_.clear();
80 d->lattice_.clear();
81 d->matchState_.clear();
82 d->segs_ = SegmentGraph();
83 } else {
84 cancelTill(from);
85 }
86 InputBuffer::erase(from, to);
87
88 if (size()) {
89 update();
90 }
91 }
92
setCursor(size_t pos)93 void JyutpingContext::setCursor(size_t pos) {
94 auto cancelled = cancelTill(pos);
95 InputBuffer::setCursor(pos);
96 if (cancelled) {
97 update();
98 }
99 }
100
jyutpingBeforeCursor() const101 int JyutpingContext::jyutpingBeforeCursor() const {
102 FCITX_D();
103 auto len = selectedLength();
104 auto c = cursor();
105 if (c < len) {
106 return -1;
107 }
108 c -= len;
109 if (d->candidates_.size()) {
110 for (auto &s : d->candidates_[0].sentence()) {
111 for (auto iter = s->path().begin(),
112 end = std::prev(s->path().end());
113 iter < end; iter++) {
114 auto from = (*iter)->index(), to = (*std::next(iter))->index();
115 if (to >= c) {
116 return from + len;
117 }
118 }
119 }
120 }
121 return -1;
122 }
123
jyutpingAfterCursor() const124 int JyutpingContext::jyutpingAfterCursor() const {
125 FCITX_D();
126 auto len = selectedLength();
127 auto c = cursor();
128 if (c < len) {
129 return -1;
130 }
131 c -= len;
132 if (d->candidates_.size()) {
133 for (auto &s : d->candidates_[0].sentence()) {
134 for (auto iter = s->path().begin(),
135 end = std::prev(s->path().end());
136 iter < end; iter++) {
137 auto to = (*std::next(iter))->index();
138 if (to > c) {
139 return to + len;
140 }
141 }
142 }
143 }
144 return -1;
145 }
146
candidates() const147 const std::vector<SentenceResult> &JyutpingContext::candidates() const {
148 FCITX_D();
149 return d->candidates_;
150 }
151
select(size_t idx)152 void JyutpingContext::select(size_t idx) {
153 FCITX_D();
154 assert(idx < d->candidates_.size());
155
156 auto offset = selectedLength();
157
158 d->selected_.emplace_back();
159
160 auto &selection = d->selected_.back();
161 for (auto &p : d->candidates_[idx].sentence()) {
162 selection.emplace_back(
163 offset + p->to()->index(),
164 WordNode{p->word(), d->ime_->model()->index(p->word())},
165 static_cast<const JyutpingLatticeNode *>(p)->encodedJyutping());
166 }
167 // add some special code for handling separator at the end
168 auto remain = std::string_view(userInput()).substr(selectedLength());
169 if (!remain.empty()) {
170 if (std::all_of(remain.begin(), remain.end(),
171 [](char c) { return c == '\''; })) {
172 selection.emplace_back(size(), WordNode("", 0), "");
173 }
174 }
175
176 update();
177 }
178
cancelTill(size_t pos)179 bool JyutpingContext::cancelTill(size_t pos) {
180 bool cancelled = false;
181 while (selectedLength() > pos) {
182 cancel();
183 cancelled = true;
184 }
185 return cancelled;
186 }
187
cancel()188 void JyutpingContext::cancel() {
189 FCITX_D();
190 if (d->selected_.size()) {
191 d->selected_.pop_back();
192 }
193 update();
194 }
195
state() const196 State JyutpingContext::state() const {
197 FCITX_D();
198 auto model = d->ime_->model();
199 State state = model->nullState();
200 if (d->selected_.size()) {
201 for (auto &s : d->selected_) {
202 for (auto &item : s) {
203 if (item.word_.word().empty()) {
204 continue;
205 }
206 State temp;
207 model->score(state, item.word_, temp);
208 state = std::move(temp);
209 }
210 }
211 }
212 return state;
213 }
214
update()215 void JyutpingContext::update() {
216 FCITX_D();
217 if (size() == 0) {
218 clear();
219 return;
220 }
221
222 if (selected()) {
223 d->candidates_.clear();
224 } else {
225 size_t start = 0;
226 auto model = d->ime_->model();
227 State state = model->nullState();
228 if (d->selected_.size()) {
229 start = d->selected_.back().back().offset_;
230
231 for (auto &s : d->selected_) {
232 for (auto &item : s) {
233 if (item.word_.word().empty()) {
234 continue;
235 }
236 State temp;
237 model->score(state, item.word_, temp);
238 state = std::move(temp);
239 }
240 }
241 }
242 SegmentGraph newGraph = JyutpingEncoder::parseUserJyutping(
243 userInput().substr(start), d->ime_->innerSegment());
244 d->segs_.merge(
245 newGraph,
246 [d](const std::unordered_set<const SegmentGraphNode *> &nodes) {
247 d->lattice_.discardNode(nodes);
248 d->matchState_.discardNode(nodes);
249 });
250 assert(d->segs_.checkGraph());
251
252 auto &graph = d->segs_;
253
254 d->ime_->decoder()->decode(d->lattice_, d->segs_, d->ime_->nbest(),
255 state, d->ime_->maxDistance(),
256 d->ime_->minPath(), d->ime_->beamSize(),
257 d->ime_->frameSize(), &d->matchState_);
258
259 d->candidates_.clear();
260 std::unordered_set<std::string> dup;
261 for (size_t i = 0, e = d->lattice_.sentenceSize(); i < e; i++) {
262 d->candidates_.push_back(d->lattice_.sentence(i));
263 dup.insert(d->candidates_.back().toString());
264 }
265
266 auto bos = &graph.start();
267
268 auto beginSize = d->candidates_.size();
269 for (size_t i = graph.size(); i > 0; i--) {
270 float min = 0;
271 float max = -std::numeric_limits<float>::max();
272 auto distancePenalty = d->ime_->model()->unknownPenalty() / 3;
273 for (auto &graphNode : graph.nodes(i)) {
274 auto distance = graph.distanceToEnd(graphNode);
275 auto adjust = static_cast<float>(distance) * distancePenalty;
276 for (auto &latticeNode : d->lattice_.nodes(&graphNode)) {
277 if (latticeNode.from() == bos) {
278 if (!d->ime_->model()->isNodeUnknown(latticeNode)) {
279 if (latticeNode.score() < min) {
280 min = latticeNode.score();
281 }
282 if (latticeNode.score() > max) {
283 max = latticeNode.score();
284 }
285 }
286 if (dup.count(latticeNode.word())) {
287 continue;
288 }
289 d->candidates_.push_back(
290 latticeNode.toSentenceResult(adjust));
291 dup.insert(latticeNode.word());
292 }
293 }
294 }
295 for (auto &graphNode : graph.nodes(i)) {
296 auto distance = graph.distanceToEnd(graphNode);
297 auto adjust = static_cast<float>(distance) * distancePenalty;
298 for (auto &latticeNode : d->lattice_.nodes(&graphNode)) {
299 if (latticeNode.from() != bos &&
300 latticeNode.score() > min &&
301 latticeNode.score() + d->ime_->maxDistance() > max) {
302 auto fullWord = latticeNode.fullWord();
303 if (dup.count(fullWord)) {
304 continue;
305 }
306 d->candidates_.push_back(
307 latticeNode.toSentenceResult(adjust));
308 }
309 }
310 }
311 }
312 std::sort(d->candidates_.begin() + beginSize, d->candidates_.end(),
313 std::greater<SentenceResult>());
314 }
315
316 if (cursor() < selectedLength()) {
317 setCursor(selectedLength());
318 }
319 }
320
selected() const321 bool JyutpingContext::selected() const {
322 FCITX_D();
323 if (userInput().empty()) {
324 return false;
325 }
326
327 if (d->selected_.size()) {
328 if (d->selected_.back().back().offset_ == size()) {
329 return true;
330 }
331 }
332
333 return false;
334 }
335
selectedSentence() const336 std::string JyutpingContext::selectedSentence() const {
337 FCITX_D();
338 std::string ss;
339 for (auto &s : d->selected_) {
340 for (auto &item : s) {
341 ss += item.word_.word();
342 }
343 }
344 return ss;
345 }
346
selectedLength() const347 size_t JyutpingContext::selectedLength() const {
348 FCITX_D();
349 if (d->selected_.size()) {
350 return d->selected_.back().back().offset_;
351 }
352 return 0;
353 }
354
preedit() const355 std::string JyutpingContext::preedit() const {
356 return preeditWithCursor().first;
357 }
358
preeditWithCursor() const359 std::pair<std::string, size_t> JyutpingContext::preeditWithCursor() const {
360 FCITX_D();
361 std::string ss = selectedSentence();
362 auto len = selectedLength();
363 auto c = cursor();
364 size_t actualCursor = ss.size();
365 // should not happen
366 if (c < len) {
367 c = len;
368 }
369
370 auto resultSize = ss.size();
371
372 if (d->candidates_.size()) {
373 bool first = true;
374 for (auto &s : d->candidates_[0].sentence()) {
375 for (auto iter = s->path().begin(),
376 end = std::prev(s->path().end());
377 iter < end; iter++) {
378 if (!first) {
379 ss += " ";
380 resultSize += 1;
381 } else {
382 first = false;
383 }
384 auto from = (*iter)->index(), to = (*std::next(iter))->index();
385 if (c >= from + len && c < to + len) {
386 actualCursor = resultSize + c - from - len;
387 }
388 auto jyutping = d->segs_.segment(from, to);
389 ss.append(jyutping.data(), jyutping.size());
390 resultSize += jyutping.size();
391 }
392 }
393 }
394 if (c == size()) {
395 actualCursor = resultSize;
396 }
397 return {ss, actualCursor};
398 }
399
selectedWords() const400 std::vector<std::string> JyutpingContext::selectedWords() const {
401 FCITX_D();
402 std::vector<std::string> newSentence;
403 for (auto &s : d->selected_) {
404 for (auto &item : s) {
405 if (!item.word_.word().empty()) {
406 newSentence.push_back(item.word_.word());
407 }
408 }
409 }
410 return newSentence;
411 }
412
selectedFullJyutping() const413 std::string JyutpingContext::selectedFullJyutping() const {
414 FCITX_D();
415 std::string jyutping;
416 for (auto &s : d->selected_) {
417 for (auto &item : s) {
418 if (!item.word_.word().empty()) {
419 if (!jyutping.empty()) {
420 jyutping.push_back('\'');
421 }
422 jyutping +=
423 JyutpingEncoder::decodeFullJyutping(item.encodedJyutping_);
424 }
425 }
426 }
427 return jyutping;
428 }
429
candidateFullJyutping(size_t idx) const430 std::string JyutpingContext::candidateFullJyutping(size_t idx) const {
431 FCITX_D();
432 std::string jyutping;
433 for (auto &p : d->candidates_[idx].sentence()) {
434 if (!p->word().empty()) {
435 if (!jyutping.empty()) {
436 jyutping.push_back('\'');
437 }
438 jyutping += JyutpingEncoder::decodeFullJyutping(
439 static_cast<const JyutpingLatticeNode *>(p)->encodedJyutping());
440 }
441 }
442 return jyutping;
443 }
444
learn()445 void JyutpingContext::learn() {
446 FCITX_D();
447 if (!selected()) {
448 return;
449 }
450
451 if (learnWord()) {
452 std::vector<std::string> newSentence{sentence()};
453 d->ime_->model()->history().add(newSentence);
454 } else {
455 std::vector<std::string> newSentence;
456 for (auto &s : d->selected_) {
457 for (auto &item : s) {
458 if (!item.word_.word().empty()) {
459 newSentence.push_back(item.word_.word());
460 }
461 }
462 }
463 d->ime_->model()->history().add(newSentence);
464 }
465 }
466
learnWord()467 bool JyutpingContext::learnWord() {
468 FCITX_D();
469 std::string ss;
470 std::string jyutping;
471 if (d->selected_.empty()) {
472 return false;
473 }
474 // don't learn single character.
475 if (d->selected_.size() == 1 && d->selected_[0].size() == 1) {
476 return false;
477 }
478 for (auto &s : d->selected_) {
479 bool first = true;
480 for (auto &item : s) {
481 if (!item.word_.word().empty()) {
482 if (item.encodedJyutping_.size() != 2) {
483 return false;
484 }
485 if (first) {
486 first = false;
487 ss += item.word_.word();
488 if (!jyutping.empty()) {
489 jyutping.push_back('\'');
490 }
491 jyutping += JyutpingEncoder::decodeFullJyutping(
492 item.encodedJyutping_);
493 } else {
494 return false;
495 }
496 }
497 }
498 }
499
500 d->ime_->dict()->addWord(JyutpingDictionary::UserDict, jyutping, ss);
501
502 return true;
503 }
504
ime() const505 JyutpingIME *JyutpingContext::ime() const {
506 FCITX_D();
507 return d->ime_;
508 }
509
510 } // namespace jyutping
511 } // namespace libime
512