1 /*
2  * SPDX-FileCopyrightText: 2018~2018 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  *
6  */
7 #include "jyutpingcontext.h"
8 #include "jyutpingdecoder.h"
9 #include "jyutpingencoder.h"
10 #include "jyutpingime.h"
11 #include "jyutpingmatchstate.h"
12 #include "libime/core/historybigram.h"
13 #include "libime/core/userlanguagemodel.h"
14 #include <algorithm>
15 #include <fcitx-utils/log.h>
16 #include <iostream>
17 
18 namespace libime {
19 namespace jyutping {
20 
21 struct SelectedJyutping {
SelectedJyutpinglibime::jyutping::SelectedJyutping22     SelectedJyutping(size_t s, WordNode word, std::string encodedJyutping)
23         : offset_(s), word_(std::move(word)),
24           encodedJyutping_(std::move(encodedJyutping)) {}
25     size_t offset_;
26     WordNode word_;
27     std::string encodedJyutping_;
28 };
29 
30 class JyutpingContextPrivate {
31 public:
JyutpingContextPrivate(JyutpingContext * q,JyutpingIME * ime)32     JyutpingContextPrivate(JyutpingContext *q, JyutpingIME *ime)
33         : ime_(ime), matchState_(q) {}
34 
35     std::vector<std::vector<SelectedJyutping>> selected_;
36 
37     JyutpingIME *ime_;
38     SegmentGraph segs_;
39     Lattice lattice_;
40     JyutpingMatchState matchState_;
41     std::vector<SentenceResult> candidates_;
42     std::vector<fcitx::ScopedConnection> conn_;
43 };
44 
JyutpingContext(JyutpingIME * ime)45 JyutpingContext::JyutpingContext(JyutpingIME *ime)
46     : InputBuffer(fcitx::InputBufferOption::AsciiOnly),
47       d_ptr(std::make_unique<JyutpingContextPrivate>(this, ime)) {
48     FCITX_D();
49     d->conn_.emplace_back(
50         ime->connect<JyutpingIME::optionChanged>([this]() { clear(); }));
51     d->conn_.emplace_back(
52         ime->dict()->connect<JyutpingDictionary::dictionaryChanged>(
53             [this](size_t) {
54                 FCITX_D();
55                 d->matchState_.clear();
56             }));
57 }
58 
~JyutpingContext()59 JyutpingContext::~JyutpingContext() {}
60 
typeImpl(const char * s,size_t length)61 bool JyutpingContext::typeImpl(const char *s, size_t length) {
62     bool changed = cancelTill(cursor());
63     changed = InputBuffer::typeImpl(s, length) || changed;
64     if (changed) {
65         update();
66     }
67     return changed;
68 }
69 
erase(size_t from,size_t to)70 void JyutpingContext::erase(size_t from, size_t to) {
71     if (from == to) {
72         return;
73     }
74 
75     // check if erase everything
76     if (from == 0 && to >= size()) {
77         FCITX_D();
78         d->candidates_.clear();
79         d->selected_.clear();
80         d->lattice_.clear();
81         d->matchState_.clear();
82         d->segs_ = SegmentGraph();
83     } else {
84         cancelTill(from);
85     }
86     InputBuffer::erase(from, to);
87 
88     if (size()) {
89         update();
90     }
91 }
92 
setCursor(size_t pos)93 void JyutpingContext::setCursor(size_t pos) {
94     auto cancelled = cancelTill(pos);
95     InputBuffer::setCursor(pos);
96     if (cancelled) {
97         update();
98     }
99 }
100 
jyutpingBeforeCursor() const101 int JyutpingContext::jyutpingBeforeCursor() const {
102     FCITX_D();
103     auto len = selectedLength();
104     auto c = cursor();
105     if (c < len) {
106         return -1;
107     }
108     c -= len;
109     if (d->candidates_.size()) {
110         for (auto &s : d->candidates_[0].sentence()) {
111             for (auto iter = s->path().begin(),
112                       end = std::prev(s->path().end());
113                  iter < end; iter++) {
114                 auto from = (*iter)->index(), to = (*std::next(iter))->index();
115                 if (to >= c) {
116                     return from + len;
117                 }
118             }
119         }
120     }
121     return -1;
122 }
123 
jyutpingAfterCursor() const124 int JyutpingContext::jyutpingAfterCursor() const {
125     FCITX_D();
126     auto len = selectedLength();
127     auto c = cursor();
128     if (c < len) {
129         return -1;
130     }
131     c -= len;
132     if (d->candidates_.size()) {
133         for (auto &s : d->candidates_[0].sentence()) {
134             for (auto iter = s->path().begin(),
135                       end = std::prev(s->path().end());
136                  iter < end; iter++) {
137                 auto to = (*std::next(iter))->index();
138                 if (to > c) {
139                     return to + len;
140                 }
141             }
142         }
143     }
144     return -1;
145 }
146 
candidates() const147 const std::vector<SentenceResult> &JyutpingContext::candidates() const {
148     FCITX_D();
149     return d->candidates_;
150 }
151 
select(size_t idx)152 void JyutpingContext::select(size_t idx) {
153     FCITX_D();
154     assert(idx < d->candidates_.size());
155 
156     auto offset = selectedLength();
157 
158     d->selected_.emplace_back();
159 
160     auto &selection = d->selected_.back();
161     for (auto &p : d->candidates_[idx].sentence()) {
162         selection.emplace_back(
163             offset + p->to()->index(),
164             WordNode{p->word(), d->ime_->model()->index(p->word())},
165             static_cast<const JyutpingLatticeNode *>(p)->encodedJyutping());
166     }
167     // add some special code for handling separator at the end
168     auto remain = std::string_view(userInput()).substr(selectedLength());
169     if (!remain.empty()) {
170         if (std::all_of(remain.begin(), remain.end(),
171                         [](char c) { return c == '\''; })) {
172             selection.emplace_back(size(), WordNode("", 0), "");
173         }
174     }
175 
176     update();
177 }
178 
cancelTill(size_t pos)179 bool JyutpingContext::cancelTill(size_t pos) {
180     bool cancelled = false;
181     while (selectedLength() > pos) {
182         cancel();
183         cancelled = true;
184     }
185     return cancelled;
186 }
187 
cancel()188 void JyutpingContext::cancel() {
189     FCITX_D();
190     if (d->selected_.size()) {
191         d->selected_.pop_back();
192     }
193     update();
194 }
195 
state() const196 State JyutpingContext::state() const {
197     FCITX_D();
198     auto model = d->ime_->model();
199     State state = model->nullState();
200     if (d->selected_.size()) {
201         for (auto &s : d->selected_) {
202             for (auto &item : s) {
203                 if (item.word_.word().empty()) {
204                     continue;
205                 }
206                 State temp;
207                 model->score(state, item.word_, temp);
208                 state = std::move(temp);
209             }
210         }
211     }
212     return state;
213 }
214 
update()215 void JyutpingContext::update() {
216     FCITX_D();
217     if (size() == 0) {
218         clear();
219         return;
220     }
221 
222     if (selected()) {
223         d->candidates_.clear();
224     } else {
225         size_t start = 0;
226         auto model = d->ime_->model();
227         State state = model->nullState();
228         if (d->selected_.size()) {
229             start = d->selected_.back().back().offset_;
230 
231             for (auto &s : d->selected_) {
232                 for (auto &item : s) {
233                     if (item.word_.word().empty()) {
234                         continue;
235                     }
236                     State temp;
237                     model->score(state, item.word_, temp);
238                     state = std::move(temp);
239                 }
240             }
241         }
242         SegmentGraph newGraph = JyutpingEncoder::parseUserJyutping(
243             userInput().substr(start), d->ime_->innerSegment());
244         d->segs_.merge(
245             newGraph,
246             [d](const std::unordered_set<const SegmentGraphNode *> &nodes) {
247                 d->lattice_.discardNode(nodes);
248                 d->matchState_.discardNode(nodes);
249             });
250         assert(d->segs_.checkGraph());
251 
252         auto &graph = d->segs_;
253 
254         d->ime_->decoder()->decode(d->lattice_, d->segs_, d->ime_->nbest(),
255                                    state, d->ime_->maxDistance(),
256                                    d->ime_->minPath(), d->ime_->beamSize(),
257                                    d->ime_->frameSize(), &d->matchState_);
258 
259         d->candidates_.clear();
260         std::unordered_set<std::string> dup;
261         for (size_t i = 0, e = d->lattice_.sentenceSize(); i < e; i++) {
262             d->candidates_.push_back(d->lattice_.sentence(i));
263             dup.insert(d->candidates_.back().toString());
264         }
265 
266         auto bos = &graph.start();
267 
268         auto beginSize = d->candidates_.size();
269         for (size_t i = graph.size(); i > 0; i--) {
270             float min = 0;
271             float max = -std::numeric_limits<float>::max();
272             auto distancePenalty = d->ime_->model()->unknownPenalty() / 3;
273             for (auto &graphNode : graph.nodes(i)) {
274                 auto distance = graph.distanceToEnd(graphNode);
275                 auto adjust = static_cast<float>(distance) * distancePenalty;
276                 for (auto &latticeNode : d->lattice_.nodes(&graphNode)) {
277                     if (latticeNode.from() == bos) {
278                         if (!d->ime_->model()->isNodeUnknown(latticeNode)) {
279                             if (latticeNode.score() < min) {
280                                 min = latticeNode.score();
281                             }
282                             if (latticeNode.score() > max) {
283                                 max = latticeNode.score();
284                             }
285                         }
286                         if (dup.count(latticeNode.word())) {
287                             continue;
288                         }
289                         d->candidates_.push_back(
290                             latticeNode.toSentenceResult(adjust));
291                         dup.insert(latticeNode.word());
292                     }
293                 }
294             }
295             for (auto &graphNode : graph.nodes(i)) {
296                 auto distance = graph.distanceToEnd(graphNode);
297                 auto adjust = static_cast<float>(distance) * distancePenalty;
298                 for (auto &latticeNode : d->lattice_.nodes(&graphNode)) {
299                     if (latticeNode.from() != bos &&
300                         latticeNode.score() > min &&
301                         latticeNode.score() + d->ime_->maxDistance() > max) {
302                         auto fullWord = latticeNode.fullWord();
303                         if (dup.count(fullWord)) {
304                             continue;
305                         }
306                         d->candidates_.push_back(
307                             latticeNode.toSentenceResult(adjust));
308                     }
309                 }
310             }
311         }
312         std::sort(d->candidates_.begin() + beginSize, d->candidates_.end(),
313                   std::greater<SentenceResult>());
314     }
315 
316     if (cursor() < selectedLength()) {
317         setCursor(selectedLength());
318     }
319 }
320 
selected() const321 bool JyutpingContext::selected() const {
322     FCITX_D();
323     if (userInput().empty()) {
324         return false;
325     }
326 
327     if (d->selected_.size()) {
328         if (d->selected_.back().back().offset_ == size()) {
329             return true;
330         }
331     }
332 
333     return false;
334 }
335 
selectedSentence() const336 std::string JyutpingContext::selectedSentence() const {
337     FCITX_D();
338     std::string ss;
339     for (auto &s : d->selected_) {
340         for (auto &item : s) {
341             ss += item.word_.word();
342         }
343     }
344     return ss;
345 }
346 
selectedLength() const347 size_t JyutpingContext::selectedLength() const {
348     FCITX_D();
349     if (d->selected_.size()) {
350         return d->selected_.back().back().offset_;
351     }
352     return 0;
353 }
354 
preedit() const355 std::string JyutpingContext::preedit() const {
356     return preeditWithCursor().first;
357 }
358 
preeditWithCursor() const359 std::pair<std::string, size_t> JyutpingContext::preeditWithCursor() const {
360     FCITX_D();
361     std::string ss = selectedSentence();
362     auto len = selectedLength();
363     auto c = cursor();
364     size_t actualCursor = ss.size();
365     // should not happen
366     if (c < len) {
367         c = len;
368     }
369 
370     auto resultSize = ss.size();
371 
372     if (d->candidates_.size()) {
373         bool first = true;
374         for (auto &s : d->candidates_[0].sentence()) {
375             for (auto iter = s->path().begin(),
376                       end = std::prev(s->path().end());
377                  iter < end; iter++) {
378                 if (!first) {
379                     ss += " ";
380                     resultSize += 1;
381                 } else {
382                     first = false;
383                 }
384                 auto from = (*iter)->index(), to = (*std::next(iter))->index();
385                 if (c >= from + len && c < to + len) {
386                     actualCursor = resultSize + c - from - len;
387                 }
388                 auto jyutping = d->segs_.segment(from, to);
389                 ss.append(jyutping.data(), jyutping.size());
390                 resultSize += jyutping.size();
391             }
392         }
393     }
394     if (c == size()) {
395         actualCursor = resultSize;
396     }
397     return {ss, actualCursor};
398 }
399 
selectedWords() const400 std::vector<std::string> JyutpingContext::selectedWords() const {
401     FCITX_D();
402     std::vector<std::string> newSentence;
403     for (auto &s : d->selected_) {
404         for (auto &item : s) {
405             if (!item.word_.word().empty()) {
406                 newSentence.push_back(item.word_.word());
407             }
408         }
409     }
410     return newSentence;
411 }
412 
selectedFullJyutping() const413 std::string JyutpingContext::selectedFullJyutping() const {
414     FCITX_D();
415     std::string jyutping;
416     for (auto &s : d->selected_) {
417         for (auto &item : s) {
418             if (!item.word_.word().empty()) {
419                 if (!jyutping.empty()) {
420                     jyutping.push_back('\'');
421                 }
422                 jyutping +=
423                     JyutpingEncoder::decodeFullJyutping(item.encodedJyutping_);
424             }
425         }
426     }
427     return jyutping;
428 }
429 
candidateFullJyutping(size_t idx) const430 std::string JyutpingContext::candidateFullJyutping(size_t idx) const {
431     FCITX_D();
432     std::string jyutping;
433     for (auto &p : d->candidates_[idx].sentence()) {
434         if (!p->word().empty()) {
435             if (!jyutping.empty()) {
436                 jyutping.push_back('\'');
437             }
438             jyutping += JyutpingEncoder::decodeFullJyutping(
439                 static_cast<const JyutpingLatticeNode *>(p)->encodedJyutping());
440         }
441     }
442     return jyutping;
443 }
444 
learn()445 void JyutpingContext::learn() {
446     FCITX_D();
447     if (!selected()) {
448         return;
449     }
450 
451     if (learnWord()) {
452         std::vector<std::string> newSentence{sentence()};
453         d->ime_->model()->history().add(newSentence);
454     } else {
455         std::vector<std::string> newSentence;
456         for (auto &s : d->selected_) {
457             for (auto &item : s) {
458                 if (!item.word_.word().empty()) {
459                     newSentence.push_back(item.word_.word());
460                 }
461             }
462         }
463         d->ime_->model()->history().add(newSentence);
464     }
465 }
466 
learnWord()467 bool JyutpingContext::learnWord() {
468     FCITX_D();
469     std::string ss;
470     std::string jyutping;
471     if (d->selected_.empty()) {
472         return false;
473     }
474     // don't learn single character.
475     if (d->selected_.size() == 1 && d->selected_[0].size() == 1) {
476         return false;
477     }
478     for (auto &s : d->selected_) {
479         bool first = true;
480         for (auto &item : s) {
481             if (!item.word_.word().empty()) {
482                 if (item.encodedJyutping_.size() != 2) {
483                     return false;
484                 }
485                 if (first) {
486                     first = false;
487                     ss += item.word_.word();
488                     if (!jyutping.empty()) {
489                         jyutping.push_back('\'');
490                     }
491                     jyutping += JyutpingEncoder::decodeFullJyutping(
492                         item.encodedJyutping_);
493                 } else {
494                     return false;
495                 }
496             }
497         }
498     }
499 
500     d->ime_->dict()->addWord(JyutpingDictionary::UserDict, jyutping, ss);
501 
502     return true;
503 }
504 
ime() const505 JyutpingIME *JyutpingContext::ime() const {
506     FCITX_D();
507     return d->ime_;
508 }
509 
510 } // namespace jyutping
511 } // namespace libime
512