1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  *  SPDX-FileCopyrightText: 2005 Takuro Ashie
4  *  SPDX-FileCopyrightText: 2012 CSSlayer <wengxt@gmail.com>
5  *
6  *  SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include "reading.h"
10 #include "engine.h"
11 #include "state.h"
12 #include "utils.h"
13 #include <fcitx-utils/utf8.h>
14 
ReadingSegment()15 ReadingSegment::ReadingSegment() {}
16 
~ReadingSegment()17 ReadingSegment::~ReadingSegment() {}
18 
find_romaji(std::string c)19 static const char *find_romaji(std::string c) {
20     ConvRule *table = fcitx_anthy_romaji_typing_rule;
21 
22     for (unsigned int i = 0; table[i].string; i++) {
23         std::string kana = table[i].result;
24         if (c == kana)
25             return table[i].string;
26     }
27 
28     return "";
29 }
30 
to_half(std::string & dest,std::string & src)31 static void to_half(std::string &dest, std::string &src) {
32     WideRule *table = fcitx_anthy_wide_table;
33 
34     for (unsigned int i = 0; i < fcitx::utf8::length(src); i++) {
35         bool found = false;
36         std::string kana1 = util::utf8_string_substr(src, i, 1);
37         for (unsigned int i = 0; table[i].code; i++) {
38             std::string kana2 = table[i].wide;
39             if (kana1 == kana2) {
40                 dest += table[i].code;
41                 found = true;
42                 break;
43             }
44         }
45         if (!found)
46             dest += kana1;
47     }
48 }
49 
50 // Only a romaji string can be splited with raw key string.
51 // Other typing method aren't supported splitting raw key string.
split(ReadingSegments & segments)52 void ReadingSegment::split(ReadingSegments &segments) {
53     if (fcitx::utf8::length(kana) <= 1)
54         segments.push_back(*this);
55 
56     std::string half;
57     to_half(half, kana);
58     bool same_with_raw = half == raw;
59 
60     std::string::iterator it;
61     for (unsigned int i = 0; i < fcitx::utf8::length(kana); i++) {
62         std::string c = util::utf8_string_substr(kana, i, 1);
63         ReadingSegment seg;
64         seg.kana = c;
65         if (same_with_raw)
66             to_half(seg.raw, c);
67         else
68             seg.raw = find_romaji(c);
69         segments.push_back(seg);
70     }
71 }
72 
Reading(AnthyState & anthy)73 Reading::Reading(AnthyState &anthy)
74     : state_(anthy), key2kanaNormal_(anthy, key2kanaTables_), kana_(anthy),
75       nicola_(anthy, nicolaTables_), key2kana_(&key2kanaNormal_),
76       segmentPos_(0), caretOffset_(0) {
77     nicolaTables_.setTypingMethod(TypingMethod::NICOLA);
78 }
79 
~Reading()80 Reading::~Reading() {}
81 
canProcesKeyEvent(const fcitx::KeyEvent & key)82 bool Reading::canProcesKeyEvent(const fcitx::KeyEvent &key) {
83     if (kana_.canAppend(key))
84         return true;
85 
86     return key2kana_->canAppend(key);
87 }
88 
processKeyEvent(const fcitx::KeyEvent & key)89 bool Reading::processKeyEvent(const fcitx::KeyEvent &key) {
90     if (!canProcesKeyEvent(key))
91         return false;
92 
93     if (caretOffset_ != 0) {
94         splitSegment(segmentPos_);
95         resetPending();
96     }
97 
98     bool was_pending;
99     if (kana_.canAppend(key))
100         was_pending = kana_.isPending();
101     else
102         was_pending = key2kana_->isPending();
103 
104     std::string raw;
105     std::string result, pending;
106     bool need_commiting;
107     if (kana_.canAppend(key))
108         need_commiting = kana_.append(key, result, pending, raw);
109     else
110         need_commiting = key2kana_->append(key, result, pending, raw);
111 
112     ReadingSegments::iterator begin = segments_.begin();
113 
114     // fix previous segment and prepare next segment if needed
115     if (!result.empty() || !pending.empty()) {
116         if (!was_pending || // previous segment was already fixed
117             need_commiting) // previous segment has been fixed
118         {
119             ReadingSegment c;
120             segments_.insert(begin + segmentPos_, c);
121             segmentPos_++;
122         }
123     }
124 
125     // fill segment
126     if (!result.empty() && !pending.empty()) {
127         segments_[segmentPos_ - 1].kana = result;
128 
129         ReadingSegment c;
130         c.raw += raw;
131         c.kana = pending;
132         segments_.insert(begin + segmentPos_, c);
133         segmentPos_++;
134 
135     } else if (!result.empty()) {
136         segments_[segmentPos_ - 1].raw += raw;
137         segments_[segmentPos_ - 1].kana = result;
138 
139     } else if (!pending.empty()) {
140         segments_[segmentPos_ - 1].raw += raw;
141         segments_[segmentPos_ - 1].kana = pending;
142 
143     } else {
144     }
145 
146     return false;
147 }
148 
finish()149 void Reading::finish() {
150     if (!key2kana_->isPending())
151         return;
152 
153     std::string result = key2kana_->flushPending();
154     if (!result.empty()) {
155         segments_[segmentPos_ - 1].kana = result;
156     }
157 }
158 
clear()159 void Reading::clear() {
160     key2kanaNormal_.clear();
161     kana_.clear();
162     nicola_.clear();
163     segments_.clear();
164     segmentPos_ = 0;
165     caretOffset_ = 0;
166 }
167 
getByChar(unsigned int start,int len,StringType type)168 std::string Reading::getByChar(unsigned int start, int len, StringType type) {
169     std::string str;
170     unsigned int pos = 0, end = len > 0 ? start + len : utf8Length() - start;
171     std::string kana;
172     std::string raw;
173 
174     if (start >= end)
175         return str;
176     if (start >= utf8Length())
177         return str;
178 
179     switch (type) {
180     case FCITX_ANTHY_STRING_LATIN:
181         raw = getRawByChar(start, len);
182         str = raw;
183         return str;
184 
185     case FCITX_ANTHY_STRING_WIDE_LATIN:
186         raw = getRawByChar(start, len);
187         str = util::convert_to_wide(raw);
188         return str;
189 
190     default:
191         break;
192     }
193 
194     for (unsigned int i = 0; i < segments_.size(); i++) {
195         if (pos >= start ||
196             pos + fcitx::utf8::length(segments_[i].kana) > start) {
197             unsigned int startstart = 0, len;
198 
199             if (pos >= start)
200                 startstart = 0;
201             else
202                 startstart = pos - start;
203 
204             if (pos + fcitx::utf8::length(segments_[i].kana) > end)
205                 len = end - start;
206             else
207                 len = fcitx::utf8::length(segments_[i].kana);
208 
209             kana +=
210                 util::utf8_string_substr(segments_[i].kana, startstart, len);
211         }
212 
213         pos += fcitx::utf8::length(segments_[i].kana);
214         if (pos >= end)
215             break;
216     }
217 
218     switch (type) {
219     case FCITX_ANTHY_STRING_HIRAGANA:
220         str = kana;
221         break;
222 
223     case FCITX_ANTHY_STRING_KATAKANA:
224         str = util::convert_to_katakana(kana);
225         break;
226 
227     case FCITX_ANTHY_STRING_HALF_KATAKANA:
228         str = util::convert_to_katakana(kana, true);
229         break;
230 
231     default:
232         break;
233     }
234 
235     return str;
236 }
237 
getRawByChar(unsigned int start,int len)238 std::string Reading::getRawByChar(unsigned int start, int len) {
239     std::string str;
240     unsigned int pos = 0, end = len > 0 ? start + len : utf8Length() - start;
241 
242     if (start >= end)
243         return str;
244 
245     for (unsigned int i = 0; i < segments_.size(); i++) {
246         if (pos >= start ||
247             pos + fcitx::utf8::length(segments_[i].kana) > start) {
248             // FIXME!
249             str += segments_[i].raw;
250         }
251 
252         pos += fcitx::utf8::length(segments_[i].kana);
253 
254         if (pos >= end)
255             break;
256     }
257 
258     return str;
259 }
260 
splitSegment(unsigned int seg_id)261 void Reading::splitSegment(unsigned int seg_id) {
262     if (seg_id >= segments_.size())
263         return;
264 
265     unsigned int pos = 0;
266     for (unsigned int i = 0; i < seg_id && i < segments_.size(); i++)
267         pos += segments_[i].kana.length();
268 
269     unsigned int caret = caretPos();
270     unsigned int seg_len = segments_[seg_id].kana.length();
271     bool caret_was_in_the_segment = false;
272     if (caret > pos && caret < pos + seg_len)
273         caret_was_in_the_segment = true;
274 
275     ReadingSegments segments;
276     segments_[seg_id].split(segments);
277     segments_.erase(segments_.begin() + seg_id);
278     for (int j = segments.size() - 1; j >= 0; j--) {
279         segments_.insert(segments_.begin() + seg_id, segments[j]);
280         if (segmentPos_ > seg_id)
281             segmentPos_++;
282     }
283 
284     if (caret_was_in_the_segment) {
285         segmentPos_ += caretOffset_;
286         caretOffset_ = 0;
287     }
288 }
289 
append(const fcitx::KeyEvent & key,const std::string & string)290 bool Reading::append(const fcitx::KeyEvent &key, const std::string &string) {
291     bool was_pending;
292     std::string result, pending;
293     bool need_commiting;
294 
295     if (!kana_.canAppend(key, true) && !key2kana_->canAppend(key, true))
296         return false;
297 
298     if (caretOffset_ != 0) {
299         splitSegment(segmentPos_);
300         resetPending();
301     }
302 
303     if (kana_.canAppend(key))
304         was_pending = kana_.isPending();
305     else
306         was_pending = key2kana_->isPending();
307 
308     if (kana_.canAppend(key))
309         need_commiting = kana_.append(string, result, pending);
310     else
311         need_commiting = key2kana_->append(string, result, pending);
312 
313     ReadingSegments::iterator begin = segments_.begin();
314 
315     // fix previous segment and prepare next segment if needed
316     if (!result.empty() || !pending.empty()) {
317         if (!was_pending || // previous segment was already fixed
318             need_commiting) // previous segment has been fixed
319         {
320             ReadingSegment c;
321             segments_.insert(begin + segmentPos_, c);
322             segmentPos_++;
323         }
324     }
325 
326     // fill segment
327     if (!result.empty() && !pending.empty()) {
328         segments_[segmentPos_ - 1].kana = result;
329 
330         ReadingSegment c;
331         c.raw += string;
332         c.kana = pending;
333         segments_.insert(begin + segmentPos_, c);
334         segmentPos_++;
335 
336     } else if (!result.empty()) {
337         segments_[segmentPos_ - 1].raw += string;
338         segments_[segmentPos_ - 1].kana = result;
339 
340     } else if (!pending.empty()) {
341         segments_[segmentPos_ - 1].raw += string;
342         segments_[segmentPos_ - 1].kana = pending;
343 
344     } else {
345     }
346 
347     return false;
348 }
349 
erase(unsigned int start,int len,bool allow_split)350 void Reading::erase(unsigned int start, int len, bool allow_split) {
351     if (segments_.size() <= 0)
352         return;
353 
354     if (utf8Length() < start)
355         return;
356 
357     if (len < 0)
358         len = utf8Length() - start;
359 
360     // erase
361     unsigned int pos = 0;
362     for (int i = 0; i <= (int)segments_.size(); i++) {
363         if (pos < start) {
364             // we have not yet reached start position.
365 
366             if (i == (int)segments_.size())
367                 break;
368 
369             pos += fcitx::utf8::length(segments_[i].kana);
370 
371         } else if (pos == start) {
372             // we have reached start position.
373 
374             if (i == (int)segments_.size())
375                 break;
376 
377             if (allow_split &&
378                 pos + fcitx::utf8::length(segments_[i].kana) > start + len) {
379                 // we have overshooted the end position!
380                 // we have to split this segment
381                 splitSegment(i);
382 
383             } else {
384                 // This segment is completely in the rage, erase it!
385                 len -= fcitx::utf8::length(segments_[i].kana);
386                 segments_.erase(segments_.begin() + i);
387                 if ((int)segmentPos_ > i)
388                     segmentPos_--;
389             }
390 
391             // retry from the same position
392             i--;
393 
394         } else {
395             // we have overshooted the start position!
396 
397             if (allow_split) {
398                 pos -= fcitx::utf8::length(segments_[i - 1].kana);
399                 splitSegment(i - 1);
400 
401                 // retry from the previous position
402                 i -= 2;
403 
404             } else {
405                 // we have overshooted the start position, but have not been
406                 // allowed to split the segment.
407                 // So remove all string of previous segment.
408                 len -= pos - start;
409                 pos -= fcitx::utf8::length(segments_[i - 1].kana);
410                 segments_.erase(segments_.begin() + i - 1);
411                 if ((int)segmentPos_ > i - 1)
412                     segmentPos_--;
413 
414                 // retry from the previous position
415                 i -= 2;
416             }
417         }
418 
419         // Now all letters in the range are removed.
420         // Exit the loop.
421         if (len <= 0)
422             break;
423     }
424 
425     // reset values
426     if (segments_.size() <= 0) {
427         clear();
428     } else {
429         resetPending();
430     }
431 }
432 
resetPending()433 void Reading::resetPending() {
434     if (key2kana_->isPending())
435         key2kana_->clear();
436     if (kana_.isPending())
437         kana_.clear();
438 
439     if (segmentPos_ <= 0)
440         return;
441 
442     key2kana_->resetPending(segments_[segmentPos_ - 1].kana,
443                             segments_[segmentPos_ - 1].raw);
444     kana_.resetPending(segments_[segmentPos_ - 1].kana,
445                        segments_[segmentPos_ - 1].raw);
446 
447     // FIXME! this code breaks pending state on normal input mode.
448     key2kana_->resetPseudoAsciiMode();
449     for (unsigned int i = 0; i < segmentPos_; i++) {
450         key2kana_->processPseudoAsciiMode(segments_[i].kana);
451     }
452 }
453 
length()454 unsigned int Reading::length() {
455     unsigned int len = 0;
456     for (unsigned int i = 0; i < segments_.size(); i++)
457         len += segments_[i].kana.length();
458     return len;
459 }
460 
utf8Length()461 unsigned int Reading::utf8Length() {
462     unsigned int len = 0;
463     for (unsigned int i = 0; i < segments_.size(); i++)
464         len += fcitx::utf8::length(segments_[i].kana);
465     return len;
466 }
467 
caretPosByChar()468 unsigned int Reading::caretPosByChar() {
469     unsigned int pos = 0;
470 
471     unsigned int i;
472     for (i = 0; i < segmentPos_ && i < segments_.size(); i++) {
473         pos += fcitx::utf8::length(segments_[i].kana);
474     }
475 
476     pos += caretOffset_;
477 
478     return pos;
479 }
480 
caretPos()481 unsigned int Reading::caretPos() {
482     unsigned int pos = 0;
483 
484     unsigned int i;
485     for (i = 0; i < segmentPos_ && i < segments_.size(); i++) {
486         pos += segments_[i].kana.length();
487     }
488 
489     if (i < segments_.size() && caretOffset_) {
490         auto iter = segments_[i].kana.begin();
491         pos += fcitx::utf8::ncharByteLength(iter, caretOffset_);
492     }
493 
494     return pos;
495 }
496 
497 // FIXME! add "allow_split" argument.
setCaretPosByChar(unsigned int pos)498 void Reading::setCaretPosByChar(unsigned int pos) {
499     if (pos == caretPosByChar())
500         return;
501 
502     key2kana_->clear();
503     kana_.clear();
504 
505     if (pos >= utf8Length()) {
506         segmentPos_ = segments_.size();
507 
508     } else if (pos == 0 || segments_.size() <= 0) {
509         segmentPos_ = 0;
510 
511     } else {
512         unsigned int i, tmp_pos = 0;
513 
514         for (i = 0; tmp_pos <= pos; i++)
515             tmp_pos += fcitx::utf8::length(segments_[i].kana);
516 
517         if (tmp_pos == pos) {
518             segmentPos_ = i + 1;
519         } else if (tmp_pos < caretPosByChar()) {
520             segmentPos_ = i;
521         } else if (tmp_pos > caretPosByChar()) {
522             segmentPos_ = i + 1;
523         }
524     }
525 
526     resetPending();
527 }
528 
moveCaret(int step,bool allow_split)529 void Reading::moveCaret(int step, bool allow_split) {
530     if (step == 0)
531         return;
532 
533     key2kana_->clear();
534     kana_.clear();
535 
536     if (allow_split) {
537         unsigned int pos = caretPosByChar();
538         if (step < 0 && pos < static_cast<unsigned int>(abs(step))) {
539             // lower limit
540             segmentPos_ = 0;
541 
542         } else if (step > 0 && pos + step > utf8Length()) {
543             // upper limit
544             segmentPos_ = segments_.size();
545 
546         } else {
547             unsigned int new_pos = pos + step;
548             ReadingSegments::iterator it;
549             pos = 0;
550             segmentPos_ = 0;
551             caretOffset_ = 0;
552             for (it = segments_.begin(); pos < new_pos; it++) {
553                 if (pos + fcitx::utf8::length(it->kana) > new_pos) {
554                     caretOffset_ = new_pos - pos;
555                     break;
556                 } else {
557                     segmentPos_++;
558                     pos += fcitx::utf8::length(it->kana);
559                 }
560             }
561         }
562 
563     } else {
564         if (step < 0 && segmentPos_ < static_cast<unsigned int>(abs(step))) {
565             // lower limit
566             segmentPos_ = 0;
567 
568         } else if (step > 0 && segmentPos_ + step > segments_.size()) {
569             // upper limit
570             segmentPos_ = segments_.size();
571 
572         } else {
573             // other
574             segmentPos_ += step;
575         }
576     }
577 
578     resetPending();
579 }
580 
setTypingMethod(TypingMethod method)581 void Reading::setTypingMethod(TypingMethod method) {
582     Key2KanaTable *fundamental_table = nullptr;
583 
584     if (method == TypingMethod::NICOLA) {
585         fundamental_table = state_.engine()->customNicolaTable();
586         key2kana_ = &nicola_;
587         nicolaTables_.setTypingMethod(method, fundamental_table);
588         nicola_.setCaseSensitive(true);
589     } else if (method == TypingMethod::KANA) {
590         fundamental_table = state_.engine()->customKanaTable();
591         key2kana_ = &key2kanaNormal_;
592         key2kanaTables_.setTypingMethod(method, fundamental_table);
593         key2kanaNormal_.setCaseSensitive(true);
594     } else {
595         fundamental_table = state_.engine()->customRomajiTable();
596         key2kana_ = &key2kanaNormal_;
597         key2kanaTables_.setTypingMethod(method, fundamental_table);
598         key2kanaNormal_.setCaseSensitive(false);
599     }
600 }
601 
typingMethod()602 TypingMethod Reading::typingMethod() {
603     if (key2kana_ == &nicola_)
604         return TypingMethod::NICOLA;
605     else
606         return key2kanaTables_.typingMethod();
607 }
608 
setPeriodStyle(PeriodStyle style)609 void Reading::setPeriodStyle(PeriodStyle style) {
610     key2kanaTables_.setPeriodStyle(style);
611 }
612 
periodStyle()613 PeriodStyle Reading::periodStyle() { return key2kanaTables_.periodStyle(); }
614 
setCommaStyle(CommaStyle style)615 void Reading::setCommaStyle(CommaStyle style) {
616     key2kanaTables_.setCommaStyle(style);
617 }
618 
commaStyle()619 CommaStyle Reading::commaStyle() { return key2kanaTables_.commaStyle(); }
620 
setBracketStyle(BracketStyle style)621 void Reading::setBracketStyle(BracketStyle style) {
622     key2kanaTables_.setBracketStyle(style);
623 }
624 
bracketStyle()625 BracketStyle Reading::bracketStyle() { return key2kanaTables_.bracketStyle(); }
626 
setSlashStyle(SlashStyle style)627 void Reading::setSlashStyle(SlashStyle style) {
628     key2kanaTables_.setSlashStyle(style);
629 }
630 
slashStyle()631 SlashStyle Reading::slashStyle() { return key2kanaTables_.slashStyle(); }
632 
setSymbolHalf(bool half)633 void Reading::setSymbolHalf(bool half) { key2kanaTables_.setSymbolHalf(half); }
634 
isSymbolHalf()635 bool Reading::isSymbolHalf() { return key2kanaTables_.symbol_is_half(); }
636 
setNumberHalf(bool half)637 void Reading::setNumberHalf(bool half) { key2kanaTables_.setNumberHalf(half); }
638 
isNumberHalf()639 bool Reading::isNumberHalf() { return key2kanaTables_.isNumberHalf(); }
640 
setPseudoAsciiMode(int mode)641 void Reading::setPseudoAsciiMode(int mode) {
642     key2kanaNormal_.setPseudoAsciiMode(mode);
643 }
644 
isPseudoAsciiMode()645 bool Reading::isPseudoAsciiMode() {
646     return key2kanaNormal_.isPseudoAsciiMode();
647 }
648 
resetPseudoAsciiMode()649 void Reading::resetPseudoAsciiMode() {
650     if (key2kanaNormal_.isPseudoAsciiMode() && key2kanaNormal_.isPending()) {
651         ReadingSegment c;
652         ReadingSegments::iterator it = segments_.begin();
653 
654         /* separate to another segment */
655         key2kanaNormal_.resetPseudoAsciiMode();
656         segments_.insert(it + segmentPos_, c);
657         segmentPos_++;
658     }
659 }
660