1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * SPDX-FileCopyrightText: 2005 Takuro Ashie
4 * SPDX-FileCopyrightText: 2012 CSSlayer <wengxt@gmail.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 */
8
9 #include "reading.h"
10 #include "engine.h"
11 #include "state.h"
12 #include "utils.h"
13 #include <fcitx-utils/utf8.h>
14
ReadingSegment()15 ReadingSegment::ReadingSegment() {}
16
~ReadingSegment()17 ReadingSegment::~ReadingSegment() {}
18
find_romaji(std::string c)19 static const char *find_romaji(std::string c) {
20 ConvRule *table = fcitx_anthy_romaji_typing_rule;
21
22 for (unsigned int i = 0; table[i].string; i++) {
23 std::string kana = table[i].result;
24 if (c == kana)
25 return table[i].string;
26 }
27
28 return "";
29 }
30
to_half(std::string & dest,std::string & src)31 static void to_half(std::string &dest, std::string &src) {
32 WideRule *table = fcitx_anthy_wide_table;
33
34 for (unsigned int i = 0; i < fcitx::utf8::length(src); i++) {
35 bool found = false;
36 std::string kana1 = util::utf8_string_substr(src, i, 1);
37 for (unsigned int i = 0; table[i].code; i++) {
38 std::string kana2 = table[i].wide;
39 if (kana1 == kana2) {
40 dest += table[i].code;
41 found = true;
42 break;
43 }
44 }
45 if (!found)
46 dest += kana1;
47 }
48 }
49
50 // Only a romaji string can be splited with raw key string.
51 // Other typing method aren't supported splitting raw key string.
split(ReadingSegments & segments)52 void ReadingSegment::split(ReadingSegments &segments) {
53 if (fcitx::utf8::length(kana) <= 1)
54 segments.push_back(*this);
55
56 std::string half;
57 to_half(half, kana);
58 bool same_with_raw = half == raw;
59
60 std::string::iterator it;
61 for (unsigned int i = 0; i < fcitx::utf8::length(kana); i++) {
62 std::string c = util::utf8_string_substr(kana, i, 1);
63 ReadingSegment seg;
64 seg.kana = c;
65 if (same_with_raw)
66 to_half(seg.raw, c);
67 else
68 seg.raw = find_romaji(c);
69 segments.push_back(seg);
70 }
71 }
72
Reading(AnthyState & anthy)73 Reading::Reading(AnthyState &anthy)
74 : state_(anthy), key2kanaNormal_(anthy, key2kanaTables_), kana_(anthy),
75 nicola_(anthy, nicolaTables_), key2kana_(&key2kanaNormal_),
76 segmentPos_(0), caretOffset_(0) {
77 nicolaTables_.setTypingMethod(TypingMethod::NICOLA);
78 }
79
~Reading()80 Reading::~Reading() {}
81
canProcesKeyEvent(const fcitx::KeyEvent & key)82 bool Reading::canProcesKeyEvent(const fcitx::KeyEvent &key) {
83 if (kana_.canAppend(key))
84 return true;
85
86 return key2kana_->canAppend(key);
87 }
88
processKeyEvent(const fcitx::KeyEvent & key)89 bool Reading::processKeyEvent(const fcitx::KeyEvent &key) {
90 if (!canProcesKeyEvent(key))
91 return false;
92
93 if (caretOffset_ != 0) {
94 splitSegment(segmentPos_);
95 resetPending();
96 }
97
98 bool was_pending;
99 if (kana_.canAppend(key))
100 was_pending = kana_.isPending();
101 else
102 was_pending = key2kana_->isPending();
103
104 std::string raw;
105 std::string result, pending;
106 bool need_commiting;
107 if (kana_.canAppend(key))
108 need_commiting = kana_.append(key, result, pending, raw);
109 else
110 need_commiting = key2kana_->append(key, result, pending, raw);
111
112 ReadingSegments::iterator begin = segments_.begin();
113
114 // fix previous segment and prepare next segment if needed
115 if (!result.empty() || !pending.empty()) {
116 if (!was_pending || // previous segment was already fixed
117 need_commiting) // previous segment has been fixed
118 {
119 ReadingSegment c;
120 segments_.insert(begin + segmentPos_, c);
121 segmentPos_++;
122 }
123 }
124
125 // fill segment
126 if (!result.empty() && !pending.empty()) {
127 segments_[segmentPos_ - 1].kana = result;
128
129 ReadingSegment c;
130 c.raw += raw;
131 c.kana = pending;
132 segments_.insert(begin + segmentPos_, c);
133 segmentPos_++;
134
135 } else if (!result.empty()) {
136 segments_[segmentPos_ - 1].raw += raw;
137 segments_[segmentPos_ - 1].kana = result;
138
139 } else if (!pending.empty()) {
140 segments_[segmentPos_ - 1].raw += raw;
141 segments_[segmentPos_ - 1].kana = pending;
142
143 } else {
144 }
145
146 return false;
147 }
148
finish()149 void Reading::finish() {
150 if (!key2kana_->isPending())
151 return;
152
153 std::string result = key2kana_->flushPending();
154 if (!result.empty()) {
155 segments_[segmentPos_ - 1].kana = result;
156 }
157 }
158
clear()159 void Reading::clear() {
160 key2kanaNormal_.clear();
161 kana_.clear();
162 nicola_.clear();
163 segments_.clear();
164 segmentPos_ = 0;
165 caretOffset_ = 0;
166 }
167
getByChar(unsigned int start,int len,StringType type)168 std::string Reading::getByChar(unsigned int start, int len, StringType type) {
169 std::string str;
170 unsigned int pos = 0, end = len > 0 ? start + len : utf8Length() - start;
171 std::string kana;
172 std::string raw;
173
174 if (start >= end)
175 return str;
176 if (start >= utf8Length())
177 return str;
178
179 switch (type) {
180 case FCITX_ANTHY_STRING_LATIN:
181 raw = getRawByChar(start, len);
182 str = raw;
183 return str;
184
185 case FCITX_ANTHY_STRING_WIDE_LATIN:
186 raw = getRawByChar(start, len);
187 str = util::convert_to_wide(raw);
188 return str;
189
190 default:
191 break;
192 }
193
194 for (unsigned int i = 0; i < segments_.size(); i++) {
195 if (pos >= start ||
196 pos + fcitx::utf8::length(segments_[i].kana) > start) {
197 unsigned int startstart = 0, len;
198
199 if (pos >= start)
200 startstart = 0;
201 else
202 startstart = pos - start;
203
204 if (pos + fcitx::utf8::length(segments_[i].kana) > end)
205 len = end - start;
206 else
207 len = fcitx::utf8::length(segments_[i].kana);
208
209 kana +=
210 util::utf8_string_substr(segments_[i].kana, startstart, len);
211 }
212
213 pos += fcitx::utf8::length(segments_[i].kana);
214 if (pos >= end)
215 break;
216 }
217
218 switch (type) {
219 case FCITX_ANTHY_STRING_HIRAGANA:
220 str = kana;
221 break;
222
223 case FCITX_ANTHY_STRING_KATAKANA:
224 str = util::convert_to_katakana(kana);
225 break;
226
227 case FCITX_ANTHY_STRING_HALF_KATAKANA:
228 str = util::convert_to_katakana(kana, true);
229 break;
230
231 default:
232 break;
233 }
234
235 return str;
236 }
237
getRawByChar(unsigned int start,int len)238 std::string Reading::getRawByChar(unsigned int start, int len) {
239 std::string str;
240 unsigned int pos = 0, end = len > 0 ? start + len : utf8Length() - start;
241
242 if (start >= end)
243 return str;
244
245 for (unsigned int i = 0; i < segments_.size(); i++) {
246 if (pos >= start ||
247 pos + fcitx::utf8::length(segments_[i].kana) > start) {
248 // FIXME!
249 str += segments_[i].raw;
250 }
251
252 pos += fcitx::utf8::length(segments_[i].kana);
253
254 if (pos >= end)
255 break;
256 }
257
258 return str;
259 }
260
splitSegment(unsigned int seg_id)261 void Reading::splitSegment(unsigned int seg_id) {
262 if (seg_id >= segments_.size())
263 return;
264
265 unsigned int pos = 0;
266 for (unsigned int i = 0; i < seg_id && i < segments_.size(); i++)
267 pos += segments_[i].kana.length();
268
269 unsigned int caret = caretPos();
270 unsigned int seg_len = segments_[seg_id].kana.length();
271 bool caret_was_in_the_segment = false;
272 if (caret > pos && caret < pos + seg_len)
273 caret_was_in_the_segment = true;
274
275 ReadingSegments segments;
276 segments_[seg_id].split(segments);
277 segments_.erase(segments_.begin() + seg_id);
278 for (int j = segments.size() - 1; j >= 0; j--) {
279 segments_.insert(segments_.begin() + seg_id, segments[j]);
280 if (segmentPos_ > seg_id)
281 segmentPos_++;
282 }
283
284 if (caret_was_in_the_segment) {
285 segmentPos_ += caretOffset_;
286 caretOffset_ = 0;
287 }
288 }
289
append(const fcitx::KeyEvent & key,const std::string & string)290 bool Reading::append(const fcitx::KeyEvent &key, const std::string &string) {
291 bool was_pending;
292 std::string result, pending;
293 bool need_commiting;
294
295 if (!kana_.canAppend(key, true) && !key2kana_->canAppend(key, true))
296 return false;
297
298 if (caretOffset_ != 0) {
299 splitSegment(segmentPos_);
300 resetPending();
301 }
302
303 if (kana_.canAppend(key))
304 was_pending = kana_.isPending();
305 else
306 was_pending = key2kana_->isPending();
307
308 if (kana_.canAppend(key))
309 need_commiting = kana_.append(string, result, pending);
310 else
311 need_commiting = key2kana_->append(string, result, pending);
312
313 ReadingSegments::iterator begin = segments_.begin();
314
315 // fix previous segment and prepare next segment if needed
316 if (!result.empty() || !pending.empty()) {
317 if (!was_pending || // previous segment was already fixed
318 need_commiting) // previous segment has been fixed
319 {
320 ReadingSegment c;
321 segments_.insert(begin + segmentPos_, c);
322 segmentPos_++;
323 }
324 }
325
326 // fill segment
327 if (!result.empty() && !pending.empty()) {
328 segments_[segmentPos_ - 1].kana = result;
329
330 ReadingSegment c;
331 c.raw += string;
332 c.kana = pending;
333 segments_.insert(begin + segmentPos_, c);
334 segmentPos_++;
335
336 } else if (!result.empty()) {
337 segments_[segmentPos_ - 1].raw += string;
338 segments_[segmentPos_ - 1].kana = result;
339
340 } else if (!pending.empty()) {
341 segments_[segmentPos_ - 1].raw += string;
342 segments_[segmentPos_ - 1].kana = pending;
343
344 } else {
345 }
346
347 return false;
348 }
349
erase(unsigned int start,int len,bool allow_split)350 void Reading::erase(unsigned int start, int len, bool allow_split) {
351 if (segments_.size() <= 0)
352 return;
353
354 if (utf8Length() < start)
355 return;
356
357 if (len < 0)
358 len = utf8Length() - start;
359
360 // erase
361 unsigned int pos = 0;
362 for (int i = 0; i <= (int)segments_.size(); i++) {
363 if (pos < start) {
364 // we have not yet reached start position.
365
366 if (i == (int)segments_.size())
367 break;
368
369 pos += fcitx::utf8::length(segments_[i].kana);
370
371 } else if (pos == start) {
372 // we have reached start position.
373
374 if (i == (int)segments_.size())
375 break;
376
377 if (allow_split &&
378 pos + fcitx::utf8::length(segments_[i].kana) > start + len) {
379 // we have overshooted the end position!
380 // we have to split this segment
381 splitSegment(i);
382
383 } else {
384 // This segment is completely in the rage, erase it!
385 len -= fcitx::utf8::length(segments_[i].kana);
386 segments_.erase(segments_.begin() + i);
387 if ((int)segmentPos_ > i)
388 segmentPos_--;
389 }
390
391 // retry from the same position
392 i--;
393
394 } else {
395 // we have overshooted the start position!
396
397 if (allow_split) {
398 pos -= fcitx::utf8::length(segments_[i - 1].kana);
399 splitSegment(i - 1);
400
401 // retry from the previous position
402 i -= 2;
403
404 } else {
405 // we have overshooted the start position, but have not been
406 // allowed to split the segment.
407 // So remove all string of previous segment.
408 len -= pos - start;
409 pos -= fcitx::utf8::length(segments_[i - 1].kana);
410 segments_.erase(segments_.begin() + i - 1);
411 if ((int)segmentPos_ > i - 1)
412 segmentPos_--;
413
414 // retry from the previous position
415 i -= 2;
416 }
417 }
418
419 // Now all letters in the range are removed.
420 // Exit the loop.
421 if (len <= 0)
422 break;
423 }
424
425 // reset values
426 if (segments_.size() <= 0) {
427 clear();
428 } else {
429 resetPending();
430 }
431 }
432
resetPending()433 void Reading::resetPending() {
434 if (key2kana_->isPending())
435 key2kana_->clear();
436 if (kana_.isPending())
437 kana_.clear();
438
439 if (segmentPos_ <= 0)
440 return;
441
442 key2kana_->resetPending(segments_[segmentPos_ - 1].kana,
443 segments_[segmentPos_ - 1].raw);
444 kana_.resetPending(segments_[segmentPos_ - 1].kana,
445 segments_[segmentPos_ - 1].raw);
446
447 // FIXME! this code breaks pending state on normal input mode.
448 key2kana_->resetPseudoAsciiMode();
449 for (unsigned int i = 0; i < segmentPos_; i++) {
450 key2kana_->processPseudoAsciiMode(segments_[i].kana);
451 }
452 }
453
length()454 unsigned int Reading::length() {
455 unsigned int len = 0;
456 for (unsigned int i = 0; i < segments_.size(); i++)
457 len += segments_[i].kana.length();
458 return len;
459 }
460
utf8Length()461 unsigned int Reading::utf8Length() {
462 unsigned int len = 0;
463 for (unsigned int i = 0; i < segments_.size(); i++)
464 len += fcitx::utf8::length(segments_[i].kana);
465 return len;
466 }
467
caretPosByChar()468 unsigned int Reading::caretPosByChar() {
469 unsigned int pos = 0;
470
471 unsigned int i;
472 for (i = 0; i < segmentPos_ && i < segments_.size(); i++) {
473 pos += fcitx::utf8::length(segments_[i].kana);
474 }
475
476 pos += caretOffset_;
477
478 return pos;
479 }
480
caretPos()481 unsigned int Reading::caretPos() {
482 unsigned int pos = 0;
483
484 unsigned int i;
485 for (i = 0; i < segmentPos_ && i < segments_.size(); i++) {
486 pos += segments_[i].kana.length();
487 }
488
489 if (i < segments_.size() && caretOffset_) {
490 auto iter = segments_[i].kana.begin();
491 pos += fcitx::utf8::ncharByteLength(iter, caretOffset_);
492 }
493
494 return pos;
495 }
496
497 // FIXME! add "allow_split" argument.
setCaretPosByChar(unsigned int pos)498 void Reading::setCaretPosByChar(unsigned int pos) {
499 if (pos == caretPosByChar())
500 return;
501
502 key2kana_->clear();
503 kana_.clear();
504
505 if (pos >= utf8Length()) {
506 segmentPos_ = segments_.size();
507
508 } else if (pos == 0 || segments_.size() <= 0) {
509 segmentPos_ = 0;
510
511 } else {
512 unsigned int i, tmp_pos = 0;
513
514 for (i = 0; tmp_pos <= pos; i++)
515 tmp_pos += fcitx::utf8::length(segments_[i].kana);
516
517 if (tmp_pos == pos) {
518 segmentPos_ = i + 1;
519 } else if (tmp_pos < caretPosByChar()) {
520 segmentPos_ = i;
521 } else if (tmp_pos > caretPosByChar()) {
522 segmentPos_ = i + 1;
523 }
524 }
525
526 resetPending();
527 }
528
moveCaret(int step,bool allow_split)529 void Reading::moveCaret(int step, bool allow_split) {
530 if (step == 0)
531 return;
532
533 key2kana_->clear();
534 kana_.clear();
535
536 if (allow_split) {
537 unsigned int pos = caretPosByChar();
538 if (step < 0 && pos < static_cast<unsigned int>(abs(step))) {
539 // lower limit
540 segmentPos_ = 0;
541
542 } else if (step > 0 && pos + step > utf8Length()) {
543 // upper limit
544 segmentPos_ = segments_.size();
545
546 } else {
547 unsigned int new_pos = pos + step;
548 ReadingSegments::iterator it;
549 pos = 0;
550 segmentPos_ = 0;
551 caretOffset_ = 0;
552 for (it = segments_.begin(); pos < new_pos; it++) {
553 if (pos + fcitx::utf8::length(it->kana) > new_pos) {
554 caretOffset_ = new_pos - pos;
555 break;
556 } else {
557 segmentPos_++;
558 pos += fcitx::utf8::length(it->kana);
559 }
560 }
561 }
562
563 } else {
564 if (step < 0 && segmentPos_ < static_cast<unsigned int>(abs(step))) {
565 // lower limit
566 segmentPos_ = 0;
567
568 } else if (step > 0 && segmentPos_ + step > segments_.size()) {
569 // upper limit
570 segmentPos_ = segments_.size();
571
572 } else {
573 // other
574 segmentPos_ += step;
575 }
576 }
577
578 resetPending();
579 }
580
setTypingMethod(TypingMethod method)581 void Reading::setTypingMethod(TypingMethod method) {
582 Key2KanaTable *fundamental_table = nullptr;
583
584 if (method == TypingMethod::NICOLA) {
585 fundamental_table = state_.engine()->customNicolaTable();
586 key2kana_ = &nicola_;
587 nicolaTables_.setTypingMethod(method, fundamental_table);
588 nicola_.setCaseSensitive(true);
589 } else if (method == TypingMethod::KANA) {
590 fundamental_table = state_.engine()->customKanaTable();
591 key2kana_ = &key2kanaNormal_;
592 key2kanaTables_.setTypingMethod(method, fundamental_table);
593 key2kanaNormal_.setCaseSensitive(true);
594 } else {
595 fundamental_table = state_.engine()->customRomajiTable();
596 key2kana_ = &key2kanaNormal_;
597 key2kanaTables_.setTypingMethod(method, fundamental_table);
598 key2kanaNormal_.setCaseSensitive(false);
599 }
600 }
601
typingMethod()602 TypingMethod Reading::typingMethod() {
603 if (key2kana_ == &nicola_)
604 return TypingMethod::NICOLA;
605 else
606 return key2kanaTables_.typingMethod();
607 }
608
setPeriodStyle(PeriodStyle style)609 void Reading::setPeriodStyle(PeriodStyle style) {
610 key2kanaTables_.setPeriodStyle(style);
611 }
612
periodStyle()613 PeriodStyle Reading::periodStyle() { return key2kanaTables_.periodStyle(); }
614
setCommaStyle(CommaStyle style)615 void Reading::setCommaStyle(CommaStyle style) {
616 key2kanaTables_.setCommaStyle(style);
617 }
618
commaStyle()619 CommaStyle Reading::commaStyle() { return key2kanaTables_.commaStyle(); }
620
setBracketStyle(BracketStyle style)621 void Reading::setBracketStyle(BracketStyle style) {
622 key2kanaTables_.setBracketStyle(style);
623 }
624
bracketStyle()625 BracketStyle Reading::bracketStyle() { return key2kanaTables_.bracketStyle(); }
626
setSlashStyle(SlashStyle style)627 void Reading::setSlashStyle(SlashStyle style) {
628 key2kanaTables_.setSlashStyle(style);
629 }
630
slashStyle()631 SlashStyle Reading::slashStyle() { return key2kanaTables_.slashStyle(); }
632
setSymbolHalf(bool half)633 void Reading::setSymbolHalf(bool half) { key2kanaTables_.setSymbolHalf(half); }
634
isSymbolHalf()635 bool Reading::isSymbolHalf() { return key2kanaTables_.symbol_is_half(); }
636
setNumberHalf(bool half)637 void Reading::setNumberHalf(bool half) { key2kanaTables_.setNumberHalf(half); }
638
isNumberHalf()639 bool Reading::isNumberHalf() { return key2kanaTables_.isNumberHalf(); }
640
setPseudoAsciiMode(int mode)641 void Reading::setPseudoAsciiMode(int mode) {
642 key2kanaNormal_.setPseudoAsciiMode(mode);
643 }
644
isPseudoAsciiMode()645 bool Reading::isPseudoAsciiMode() {
646 return key2kanaNormal_.isPseudoAsciiMode();
647 }
648
resetPseudoAsciiMode()649 void Reading::resetPseudoAsciiMode() {
650 if (key2kanaNormal_.isPseudoAsciiMode() && key2kanaNormal_.isPending()) {
651 ReadingSegment c;
652 ReadingSegments::iterator it = segments_.begin();
653
654 /* separate to another segment */
655 key2kanaNormal_.resetPseudoAsciiMode();
656 segments_.insert(it + segmentPos_, c);
657 segmentPos_++;
658 }
659 }
660