1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // Interactive composer from a Roman string to a Hiragana string
31
32 #include "composer/composer.h"
33
34 #include "base/flags.h"
35 #include "base/logging.h"
36 #include "base/util.h"
37 #include "composer/internal/composition.h"
38 #include "composer/internal/composition_input.h"
39 #include "composer/internal/mode_switching_handler.h"
40 #include "composer/internal/transliterators.h"
41 #include "composer/internal/typing_corrector.h"
42 #include "composer/key_event_util.h"
43 #include "composer/table.h"
44 #include "composer/type_corrected_query.h"
45 #include "config/character_form_manager.h"
46 #include "config/config_handler.h"
47 #include "protocol/commands.pb.h"
48 #include "protocol/config.pb.h"
49
50 // Use flags instead of constant for performance evaluation.
51 DEFINE_uint64(max_typing_correction_query_candidates, 40,
52 "Maximum # of typing correction query temporary candidates.");
53 DEFINE_uint64(max_typing_correction_query_results, 8,
54 "Maximum # of typing correction query results.");
55 DECLARE_bool(enable_typing_correction);
56
57 namespace mozc {
58 namespace composer {
59
60 using ::mozc::config::CharacterFormManager;
61
62 namespace {
63
GetTransliterator(transliteration::TransliterationType comp_mode)64 const Transliterators::Transliterator GetTransliterator(
65 transliteration::TransliterationType comp_mode) {
66 switch (comp_mode) {
67 case transliteration::HALF_ASCII:
68 case transliteration::HALF_ASCII_UPPER:
69 case transliteration::HALF_ASCII_LOWER:
70 case transliteration::HALF_ASCII_CAPITALIZED:
71 return Transliterators::HALF_ASCII;
72
73 case transliteration::FULL_ASCII:
74 case transliteration::FULL_ASCII_UPPER:
75 case transliteration::FULL_ASCII_LOWER:
76 case transliteration::FULL_ASCII_CAPITALIZED:
77 return Transliterators::FULL_ASCII;
78
79 case transliteration::HALF_KATAKANA:
80 return Transliterators::HALF_KATAKANA;
81
82 case transliteration::FULL_KATAKANA:
83 return Transliterators::FULL_KATAKANA;
84
85 case transliteration::HIRAGANA:
86 return Transliterators::HIRAGANA;
87
88 default:
89 LOG(ERROR) << "Unknown TransliterationType: " << comp_mode;
90 return Transliterators::CONVERSION_STRING;
91 }
92 }
93
GetTransliterationType(Transliterators::Transliterator transliterator,const transliteration::TransliterationType default_type)94 transliteration::TransliterationType GetTransliterationType(
95 Transliterators::Transliterator transliterator,
96 const transliteration::TransliterationType default_type) {
97 if (transliterator == Transliterators::HIRAGANA) {
98 return transliteration::HIRAGANA;
99 }
100 if (transliterator == Transliterators::HALF_ASCII) {
101 return transliteration::HALF_ASCII;
102 }
103 if (transliterator == Transliterators::FULL_ASCII) {
104 return transliteration::FULL_ASCII;
105 }
106 if (transliterator == Transliterators::FULL_KATAKANA) {
107 return transliteration::FULL_KATAKANA;
108 }
109 if (transliterator == Transliterators::HALF_KATAKANA) {
110 return transliteration::HALF_KATAKANA;
111 }
112 return default_type;
113 }
114
Transliterate(const transliteration::TransliterationType mode,const string & input,string * output)115 void Transliterate(const transliteration::TransliterationType mode,
116 const string &input,
117 string *output) {
118 // When the mode is HALF_KATAKANA, Full width ASCII is also
119 // transformed.
120 if (mode == transliteration::HALF_KATAKANA) {
121 string tmp_input;
122 Util::HiraganaToKatakana(input, &tmp_input);
123 Util::FullWidthToHalfWidth(tmp_input, output);
124 return;
125 }
126
127 switch (mode) {
128 case transliteration::HALF_ASCII:
129 Util::FullWidthAsciiToHalfWidthAscii(input, output);
130 break;
131 case transliteration::HALF_ASCII_UPPER:
132 Util::FullWidthAsciiToHalfWidthAscii(input, output);
133 Util::UpperString(output);
134 break;
135 case transliteration::HALF_ASCII_LOWER:
136 Util::FullWidthAsciiToHalfWidthAscii(input, output);
137 Util::LowerString(output);
138 break;
139 case transliteration::HALF_ASCII_CAPITALIZED:
140 Util::FullWidthAsciiToHalfWidthAscii(input, output);
141 Util::CapitalizeString(output);
142 break;
143
144 case transliteration::FULL_ASCII:
145 Util::HalfWidthAsciiToFullWidthAscii(input, output);
146 break;
147 case transliteration::FULL_ASCII_UPPER:
148 Util::HalfWidthAsciiToFullWidthAscii(input, output);
149 Util::UpperString(output);
150 break;
151 case transliteration::FULL_ASCII_LOWER:
152 Util::HalfWidthAsciiToFullWidthAscii(input, output);
153 Util::LowerString(output);
154 break;
155 case transliteration::FULL_ASCII_CAPITALIZED:
156 Util::HalfWidthAsciiToFullWidthAscii(input, output);
157 Util::CapitalizeString(output);
158 break;
159
160 case transliteration::FULL_KATAKANA:
161 Util::HiraganaToKatakana(input, output);
162 break;
163 case transliteration::HIRAGANA:
164 *output = input;
165 break;
166 default:
167 LOG(ERROR) << "Unknown TransliterationType: " << mode;
168 *output = input;
169 break;
170 }
171 }
172
GetTransliterationTypeFromCompositionMode(const commands::CompositionMode mode)173 transliteration::TransliterationType GetTransliterationTypeFromCompositionMode(
174 const commands::CompositionMode mode) {
175 switch (mode) {
176 case commands::HIRAGANA:
177 return transliteration::HIRAGANA;
178 case commands::FULL_KATAKANA:
179 return transliteration::FULL_KATAKANA;
180 case commands::HALF_ASCII:
181 return transliteration::HALF_ASCII;
182 case commands::FULL_ASCII:
183 return transliteration::FULL_ASCII;
184 case commands::HALF_KATAKANA:
185 return transliteration::HALF_KATAKANA;
186 default:
187 // commands::DIRECT or invalid mode.
188 LOG(ERROR) << "Invalid CompositionMode: " << mode;
189 return transliteration::HIRAGANA;
190 }
191 }
192
193 const size_t kMaxPreeditLength = 256;
194
195 } // namespace
196
Composer(const Table * table,const commands::Request * request,const config::Config * config)197 Composer::Composer(const Table *table,
198 const commands::Request *request,
199 const config::Config *config)
200 : position_(0),
201 is_new_input_(true),
202 input_mode_(transliteration::HIRAGANA),
203 output_mode_(transliteration::HIRAGANA),
204 comeback_input_mode_(transliteration::HIRAGANA),
205 input_field_type_(commands::Context::NORMAL),
206 shifted_sequence_count_(0),
207 composition_(new Composition(table)),
208 typing_corrector_(table,
209 FLAGS_max_typing_correction_query_candidates,
210 FLAGS_max_typing_correction_query_results),
211 max_length_(kMaxPreeditLength),
212 request_(request),
213 config_(config) {
214 SetInputMode(transliteration::HIRAGANA);
215 typing_corrector_.SetConfig(config);
216 Reset();
217 }
218
~Composer()219 Composer::~Composer() {}
220
Reset()221 void Composer::Reset() {
222 EditErase();
223 ResetInputMode();
224 SetOutputMode(transliteration::HIRAGANA);
225 source_text_.assign("");
226 typing_corrector_.Reset();
227 }
228
ResetInputMode()229 void Composer::ResetInputMode() {
230 SetInputMode(comeback_input_mode_);
231 }
232
ReloadConfig()233 void Composer::ReloadConfig() {
234 // Do nothing at this moment.
235 }
236
Empty() const237 bool Composer::Empty() const {
238 return (GetLength() == 0);
239 }
240
SetTable(const Table * table)241 void Composer::SetTable(const Table *table) {
242 composition_->SetTable(table);
243
244 typing_corrector_.SetTable(table);
245 }
246
SetRequest(const commands::Request * request)247 void Composer::SetRequest(const commands::Request *request) {
248 request_ = request;
249 }
250
SetConfig(const config::Config * config)251 void Composer::SetConfig(const config::Config *config) {
252 config_ = config;
253 typing_corrector_.SetConfig(config);
254 }
255
SetInputMode(transliteration::TransliterationType mode)256 void Composer::SetInputMode(transliteration::TransliterationType mode) {
257 comeback_input_mode_ = mode;
258 input_mode_ = mode;
259 shifted_sequence_count_ = 0;
260 is_new_input_ = true;
261 composition_->SetInputMode(GetTransliterator(mode));
262 }
263
SetTemporaryInputMode(transliteration::TransliterationType mode)264 void Composer::SetTemporaryInputMode(
265 transliteration::TransliterationType mode) {
266 // Set comeback_input_mode_ to revert back the current input mode.
267 comeback_input_mode_ = input_mode_;
268 input_mode_ = mode;
269 shifted_sequence_count_ = 0;
270 is_new_input_ = true;
271 composition_->SetInputMode(GetTransliterator(mode));
272 }
273
UpdateInputMode()274 void Composer::UpdateInputMode() {
275 if (position_ != 0 &&
276 request_->update_input_mode_from_surrounding_text()) {
277 const Transliterators::Transliterator current_t12r =
278 composition_->GetTransliterator(position_);
279 if (position_ == composition_->GetLength() ||
280 current_t12r == composition_->GetTransliterator(position_ + 1)) {
281 // - The cursor is at the tail of composition.
282 // Use last character's transliterator as the input mode.
283 // - If the current cursor is between the same character type like
284 // "A|B" and "あ|い", the input mode follows the character type.
285 input_mode_ = GetTransliterationType(current_t12r, comeback_input_mode_);
286 shifted_sequence_count_ = 0;
287 is_new_input_ = true;
288 composition_->SetInputMode(GetTransliterator(input_mode_));
289 return;
290 }
291 }
292
293 // Set the default input mode.
294 SetInputMode(comeback_input_mode_);
295 }
296
GetInputMode() const297 transliteration::TransliterationType Composer::GetInputMode() const {
298 return input_mode_;
299 }
300
GetComebackInputMode() const301 transliteration::TransliterationType Composer::GetComebackInputMode() const {
302 return comeback_input_mode_;
303 }
304
ToggleInputMode()305 void Composer::ToggleInputMode() {
306 if (input_mode_ == transliteration::HIRAGANA) {
307 // TODO(komatsu): Refer user's perference.
308 SetInputMode(transliteration::HALF_ASCII);
309 } else {
310 SetInputMode(transliteration::HIRAGANA);
311 }
312 }
313
GetOutputMode() const314 transliteration::TransliterationType Composer::GetOutputMode() const {
315 return output_mode_;
316 }
317
SetOutputMode(transliteration::TransliterationType mode)318 void Composer::SetOutputMode(transliteration::TransliterationType mode) {
319 output_mode_ = mode;
320 composition_->SetTransliterator(
321 0, composition_->GetLength(), GetTransliterator(mode));
322 position_ = composition_->GetLength();
323 }
324
ApplyTemporaryInputMode(const string & input,bool caps_locked)325 void Composer::ApplyTemporaryInputMode(const string &input, bool caps_locked) {
326 DCHECK(!input.empty());
327
328 const config::Config::ShiftKeyModeSwitch switch_mode =
329 config_->shift_key_mode_switch();
330
331 // When input is not an ASCII code, reset the input mode to the one before
332 // temporary input mode.
333 if (Util::OneCharLen(input.c_str()) != 1) {
334 // Call SetInputMode() only when the current input mode is temporary, which
335 // is detected by the if-condition below. Without this check,
336 // SetInputMode() is called always for multi-byte charactesrs. This causes
337 // a bug that multi-byte characters is inserted to a new chunk because
338 // |is_new_input_| is set to true in SetInputMode(); see b/31444698.
339 if (comeback_input_mode_ != input_mode_) {
340 SetInputMode(comeback_input_mode_);
341 }
342 return;
343 }
344
345 // Input is an ASCII code.
346 // we use first character to determin temporary input mode.
347 const char key = input[0];
348 const bool alpha_with_shift =
349 (!caps_locked && ('A' <= key && key <= 'Z')) ||
350 (caps_locked && ('a' <= key && key <= 'z'));
351 const bool alpha_without_shift =
352 (caps_locked && ('A' <= key && key <= 'Z')) ||
353 (!caps_locked && ('a' <= key && key <= 'z'));
354
355 if (alpha_with_shift) {
356 if (switch_mode == config::Config::ASCII_INPUT_MODE) {
357 if (input_mode_ == transliteration::HALF_ASCII ||
358 input_mode_ == transliteration::FULL_ASCII) {
359 // Do nothing.
360 } else {
361 SetTemporaryInputMode(transliteration::HALF_ASCII);
362 }
363 } else if (switch_mode == config::Config::KATAKANA_INPUT_MODE) {
364 if (input_mode_ == transliteration::HIRAGANA) {
365 SetTemporaryInputMode(transliteration::FULL_KATAKANA);
366 } else {
367 // Do nothing.
368 }
369 }
370 ++shifted_sequence_count_;
371 } else if (alpha_without_shift) {
372 // When shifted input continues, the next lower input is the end
373 // of temporary half-width Ascii input.
374 if (shifted_sequence_count_ > 1 &&
375 switch_mode == config::Config::ASCII_INPUT_MODE) {
376 SetInputMode(comeback_input_mode_);
377 }
378 if (switch_mode == config::Config::KATAKANA_INPUT_MODE) {
379 SetInputMode(comeback_input_mode_);
380 }
381 shifted_sequence_count_ = 0;
382 } else {
383 // If the key is not an alphabet, reset shifted_sequence_count_
384 // because "Continuous shifted input" feature should be reset
385 // when the input meets non-alphabet character.
386 shifted_sequence_count_ = 0;
387 }
388 }
389
InsertCharacterInternal(const string & key)390 bool Composer::InsertCharacterInternal(const string &key) {
391 if (!EnableInsert()) {
392 return false;
393 }
394 CompositionInput input;
395 input.set_raw(key);
396 input.set_is_new_input(is_new_input_);
397 position_ = composition_->InsertInput(position_, input);
398 is_new_input_ = false;
399 return true;
400 }
401
InsertCharacter(const string & key)402 void Composer::InsertCharacter(const string &key) {
403 if (!InsertCharacterInternal(key)) {
404 return;
405 }
406 const ProbableKeyEvents empty_events;
407 typing_corrector_.InsertCharacter(key, empty_events);
408 }
409
InsertCharacterForProbableKeyEvents(const string & key,const ProbableKeyEvents & probable_key_events)410 void Composer::InsertCharacterForProbableKeyEvents(
411 const string &key,
412 const ProbableKeyEvents &probable_key_events) {
413 if (!InsertCharacterInternal(key)) {
414 return;
415 }
416 typing_corrector_.InsertCharacter(key, probable_key_events);
417 }
418
InsertCommandCharacter(const InternalCommand internal_command)419 void Composer::InsertCommandCharacter(const InternalCommand internal_command) {
420 switch (internal_command) {
421 case REWIND:
422 InsertCharacter(Table::ParseSpecialKey("{<}"));
423 break;
424 default:
425 LOG(ERROR) << "Unkown command : " << internal_command;
426 }
427 }
428
InsertCharacterPreedit(const string & input)429 void Composer::InsertCharacterPreedit(const string &input) {
430 size_t begin = 0;
431 const size_t end = input.size();
432 while (begin < end) {
433 const size_t mblen = Util::OneCharLen(input.c_str() + begin);
434 const string character(input, begin, mblen);
435 if (!InsertCharacterKeyAndPreedit(character, character)) {
436 return;
437 }
438 begin += mblen;
439 }
440 DCHECK_EQ(begin, end);
441 }
442
443 // Note: This method is only for test.
SetPreeditTextForTestOnly(const string & input)444 void Composer::SetPreeditTextForTestOnly(const string &input) {
445 SetTemporaryInputMode(transliteration::HALF_ASCII);
446 InsertCharacterPreedit(input);
447 }
448
InsertCharacterPreeditForProbableKeyEvents(const string & input,const ProbableKeyEvents & probable_key_events)449 void Composer::InsertCharacterPreeditForProbableKeyEvents(
450 const string &input,
451 const ProbableKeyEvents &probable_key_events) {
452 InsertCharacterKeyAndPreeditForProbableKeyEvents(input,
453 input,
454 probable_key_events);
455 }
456
InsertCharacterKeyAndPreeditInternal(const string & key,const string & preedit)457 bool Composer::InsertCharacterKeyAndPreeditInternal(const string &key,
458 const string &preedit) {
459 if (!EnableInsert()) {
460 return false;
461 }
462 CompositionInput input;
463 input.set_raw(key);
464 input.set_conversion(preedit);
465 input.set_is_new_input(is_new_input_);
466 position_ = composition_->InsertInput(position_, input);
467 is_new_input_ = false;
468 return true;
469 }
470
InsertCharacterKeyAndPreedit(const string & key,const string & preedit)471 bool Composer::InsertCharacterKeyAndPreedit(const string &key,
472 const string &preedit) {
473 if (!InsertCharacterKeyAndPreeditInternal(key, preedit)) {
474 return false;
475 }
476 const ProbableKeyEvents empty_events;
477 typing_corrector_.InsertCharacter(key, empty_events);
478 return true;
479 }
480
InsertCharacterKeyAndPreeditForProbableKeyEvents(const string & key,const string & preedit,const ProbableKeyEvents & probable_key_events)481 void Composer::InsertCharacterKeyAndPreeditForProbableKeyEvents(
482 const string &key,
483 const string &preedit,
484 const ProbableKeyEvents &probable_key_events) {
485 if (!InsertCharacterKeyAndPreeditInternal(key, preedit)) {
486 return;
487 }
488 typing_corrector_.InsertCharacter(key, probable_key_events);
489 }
490
InsertCharacterKeyEvent(const commands::KeyEvent & key)491 bool Composer::InsertCharacterKeyEvent(const commands::KeyEvent &key) {
492 if (!EnableInsert()) {
493 return false;
494 }
495 if (key.has_mode()) {
496 const transliteration::TransliterationType new_input_mode =
497 GetTransliterationTypeFromCompositionMode(key.mode());
498 if (new_input_mode != input_mode_) {
499 // Only when the new input mode is different from the current
500 // input mode, SetInputMode is called. Otherwise the value of
501 // comeback_input_mode_ is lost.
502 SetInputMode(new_input_mode);
503 }
504 }
505
506 // If only SHIFT is pressed, this is used to revert back to the
507 // previous input mode.
508 if (!key.has_key_code()) {
509 for (size_t i = 0; key.modifier_keys_size(); ++i) {
510 if (key.modifier_keys(i) == commands::KeyEvent::SHIFT) {
511 // TODO(komatsu): Enable to customize the behavior.
512 SetInputMode(comeback_input_mode_);
513 return true;
514 }
515 }
516 }
517
518 // Fill input representing user's raw input.
519 string input;
520 if (key.has_key_code()) {
521 Util::UCS4ToUTF8(key.key_code(), &input);
522 } else if (key.has_key_string()) {
523 input = key.key_string();
524 } else {
525 LOG(WARNING) << "input is empty";
526 return false;
527 }
528
529 bool is_typing_correction_enabled = config_->use_typing_correction() ||
530 FLAGS_enable_typing_correction;
531 if (key.has_key_string()) {
532 if (key.input_style() == commands::KeyEvent::AS_IS ||
533 key.input_style() == commands::KeyEvent::DIRECT_INPUT) {
534 composition_->SetInputMode(Transliterators::CONVERSION_STRING);
535 if (is_typing_correction_enabled) {
536 InsertCharacterKeyAndPreeditForProbableKeyEvents(
537 input,
538 key.key_string(),
539 key.probable_key_event());
540 } else {
541 InsertCharacterKeyAndPreedit(input, key.key_string());
542 }
543 SetInputMode(comeback_input_mode_);
544 } else {
545 // Kana input usually has key_string. Note that, the existence of
546 // key_string never determine if the input mode is Kana or Romaji.
547 if (is_typing_correction_enabled) {
548 InsertCharacterKeyAndPreeditForProbableKeyEvents(
549 input,
550 key.key_string(),
551 key.probable_key_event());
552 } else {
553 InsertCharacterKeyAndPreedit(input, key.key_string());
554 }
555 }
556 } else {
557 // Romaji input usually does not has key_string. Note that, the
558 // existence of key_string never determines if the input mode is
559 // Kana or Romaji.
560 const uint32 modifiers = KeyEventUtil::GetModifiers(key);
561 ApplyTemporaryInputMode(input, KeyEventUtil::HasCaps(modifiers));
562 if (is_typing_correction_enabled) {
563 InsertCharacterForProbableKeyEvents(input, key.probable_key_event());
564 } else {
565 InsertCharacter(input);
566 }
567 }
568
569 if (comeback_input_mode_ == input_mode_) {
570 AutoSwitchMode();
571 }
572 return true;
573 }
574
DeleteAt(size_t pos)575 void Composer::DeleteAt(size_t pos) {
576 composition_->DeleteAt(pos);
577 // Adjust cursor position for composition mode.
578 if (position_ > pos) {
579 position_--;
580 }
581 // We do not call UpdateInputMode() here.
582 // 1. In composition mode, UpdateInputMode finalizes pending chunk.
583 // 2. In conversion mode, InputMode needs not to change.
584 typing_corrector_.Invalidate();
585 }
586
Delete()587 void Composer::Delete() {
588 position_ = composition_->DeleteAt(position_);
589 UpdateInputMode();
590
591 typing_corrector_.Invalidate();
592 }
593
DeleteRange(size_t pos,size_t length)594 void Composer::DeleteRange(size_t pos, size_t length) {
595 for (int i = 0; i < length && pos < composition_->GetLength(); ++i) {
596 DeleteAt(pos);
597 }
598 typing_corrector_.Invalidate();
599 }
600
EditErase()601 void Composer::EditErase() {
602 composition_->Erase();
603 position_ = 0;
604 SetInputMode(comeback_input_mode_);
605 typing_corrector_.Reset();
606 }
607
Backspace()608 void Composer::Backspace() {
609 if (position_ == 0) {
610 return;
611 }
612
613 // In the view point of updating input mode,
614 // backspace is special case because new input mode is based on both
615 // new current character and *character to be deleted*.
616
617 // At first, move to left.
618 // Now the cursor is between 'new current character'
619 // and 'character to be deleted'.
620 --position_;
621
622 // Update input mode based on both 'new current character' and
623 // 'character to be deleted'.
624 UpdateInputMode();
625
626 // Delete 'character to be deleted'
627 position_ = composition_->DeleteAt(position_);
628
629 typing_corrector_.Invalidate();
630 }
631
MoveCursorLeft()632 void Composer::MoveCursorLeft() {
633 if (position_ > 0) {
634 --position_;
635 }
636 UpdateInputMode();
637
638 typing_corrector_.Invalidate();
639 }
640
MoveCursorRight()641 void Composer::MoveCursorRight() {
642 if (position_ < composition_->GetLength()) {
643 ++position_;
644 }
645 UpdateInputMode();
646
647 typing_corrector_.Invalidate();
648 }
649
MoveCursorToBeginning()650 void Composer::MoveCursorToBeginning() {
651 position_ = 0;
652 SetInputMode(comeback_input_mode_);
653
654 typing_corrector_.Invalidate();
655 }
656
MoveCursorToEnd()657 void Composer::MoveCursorToEnd() {
658 position_ = composition_->GetLength();
659 // Behavior between MoveCursorToEnd and MoveCursorToRight is different.
660 // MoveCursorToEnd always makes current input mode default.
661 SetInputMode(comeback_input_mode_);
662
663 typing_corrector_.Invalidate();
664 }
665
MoveCursorTo(uint32 new_position)666 void Composer::MoveCursorTo(uint32 new_position) {
667 if (new_position <= composition_->GetLength()) {
668 position_ = new_position;
669 UpdateInputMode();
670 }
671 typing_corrector_.Invalidate();
672 }
673
GetPreedit(string * left,string * focused,string * right) const674 void Composer::GetPreedit(string *left, string *focused, string *right) const {
675 DCHECK(left);
676 DCHECK(focused);
677 DCHECK(right);
678 composition_->GetPreedit(position_, left, focused, right);
679
680 // TODO(komatsu): This function can be obsolete.
681 string preedit = *left + *focused + *right;
682 if (TransformCharactersForNumbers(&preedit)) {
683 const size_t left_size = Util::CharsLen(*left);
684 const size_t focused_size = Util::CharsLen(*focused);
685 *left = Util::SubString(preedit, 0, left_size);
686 *focused = Util::SubString(preedit, left_size, focused_size);
687 *right = Util::SubString(preedit, left_size + focused_size, string::npos);
688 }
689 }
690
GetStringForPreedit(string * output) const691 void Composer::GetStringForPreedit(string *output) const {
692 composition_->GetString(output);
693 TransformCharactersForNumbers(output);
694 // If the input field type needs half ascii characters,
695 // perform conversion here.
696 // Note that this purpose is also achieved by the client by setting
697 // input type as "half ascii".
698 // But the architecture of Mozc expects the server to handle such character
699 // width management.
700 // In addition, we also think about PASSWORD field type.
701 // we can prepare NUMBER and TEL keyboard layout, which has
702 // "half ascii" composition mode. This works.
703 // But we will not have PASSWORD only keyboard. We will share the basic
704 // keyboard on usual and password mode
705 // so such hacky code cannot be applicable.
706 // TODO(matsuzakit): Move this logic to another appopriate location.
707 // SetOutputMode() is not currently applicable but ideally it is
708 // better location than here.
709 const commands::Context::InputFieldType field_type =
710 GetInputFieldType();
711 if (field_type == commands::Context::NUMBER ||
712 field_type == commands::Context::PASSWORD ||
713 field_type == commands::Context::TEL) {
714 const string tmp = *output;
715 Util::FullWidthAsciiToHalfWidthAscii(tmp, output);
716 }
717 }
718
GetStringForSubmission(string * output) const719 void Composer::GetStringForSubmission(string *output) const {
720 // TODO(komatsu): We should make sure if we can integrate this
721 // function to GetStringForPreedit after a while.
722 GetStringForPreedit(output);
723 }
724
GetQueryForConversion(string * output) const725 void Composer::GetQueryForConversion(string *output) const {
726 string base_output;
727 composition_->GetStringWithTrimMode(FIX, &base_output);
728 TransformCharactersForNumbers(&base_output);
729 Util::FullWidthAsciiToHalfWidthAscii(base_output, output);
730 }
731
732 namespace {
733 // Determine which query is suitable for a prediction query and return
734 // its pointer.
735 // Exmaple:
736 // = Romanji Input =
737 // ("もz", "も") -> "も" // a part of romanji should be trimed.
738 // ("もzky", "もz") -> "もzky" // a user might intentionally typed them.
739 // ("z", "") -> "z" // ditto.
740 // = Kana Input =
741 // ("か", "") -> "か" // a part of kana (it can be "が") should not be trimed.
GetBaseQueryForPrediction(string * asis_query,string * trimed_query)742 string *GetBaseQueryForPrediction(string *asis_query,
743 string *trimed_query) {
744 // If the sizes are equal, there is no matter.
745 if (asis_query->size() == trimed_query->size()) {
746 return asis_query;
747 }
748
749 // Get the different part between asis_query and trimed_query. For
750 // example, "ky" is the different part where asis_query is "もzky"
751 // and trimed_query is "もz".
752 DCHECK_GT(asis_query->size(), trimed_query->size());
753 const string asis_tail(*asis_query, trimed_query->size());
754 DCHECK(!asis_tail.empty());
755
756 // If the different part is not an alphabet, asis_query is used.
757 // This check is mainly used for Kana Input.
758 const Util::ScriptType asis_tail_type = Util::GetScriptType(asis_tail);
759 if (asis_tail_type != Util::ALPHABET) {
760 return asis_query;
761 }
762
763 // If the trimed_query is empty and asis_query is alphabet, an asis
764 // string is used because the query may be typed intentionally.
765 if (trimed_query->empty()) { // alphabet???
766 const Util::ScriptType asis_type = Util::GetScriptType(*asis_query);
767 if (asis_type == Util::ALPHABET) {
768 return asis_query;
769 } else {
770 return trimed_query;
771 }
772 }
773
774 // Now there are two patterns: ("もzk", "もz") and ("もずk", "もず").
775 // We assume "もzk" is user's intentional query, but "もずk" is not.
776 // So our results are:
777 // ("もzk", "もz") => "もzk" and ("もずk", "もず") => "もず".
778 const string trimed_tail = Util::SubString(*trimed_query,
779 Util::CharsLen(*trimed_query) - 1,
780 string::npos);
781 DCHECK(!trimed_tail.empty());
782 const Util::ScriptType trimed_tail_type = Util::GetScriptType(trimed_tail);
783 if (trimed_tail_type == Util::ALPHABET) {
784 return asis_query;
785 } else {
786 return trimed_query;
787 }
788 }
789 } // namespace
790
GetQueryForPrediction(string * output) const791 void Composer::GetQueryForPrediction(string *output) const {
792 string asis_query;
793 composition_->GetStringWithTrimMode(ASIS, &asis_query);
794
795 switch (input_mode_) {
796 case transliteration::HALF_ASCII: {
797 output->assign(asis_query);
798 return;
799 }
800 case transliteration::FULL_ASCII: {
801 Util::FullWidthAsciiToHalfWidthAscii(asis_query, output);
802 return;
803 }
804 default: {}
805 }
806
807 string trimed_query;
808 composition_->GetStringWithTrimMode(TRIM, &trimed_query);
809
810 // NOTE(komatsu): This is a hack to go around the difference
811 // expectation between Romanji-Input and Kana-Input. "かn" in
812 // Romaji-Input should be "か" while "あか" in Kana-Input should be
813 // "あか", although "かn" and "あか" have the same properties. An
814 // ideal solution is to expand the ambguity and pass all of them to
815 // the converter. (e.g. "かn" -> ["かな",..."かの", "かん", ...] /
816 // "あか" -> ["あか", "あが"])
817 string *base_query = GetBaseQueryForPrediction(&asis_query, &trimed_query);
818 TransformCharactersForNumbers(base_query);
819 Util::FullWidthAsciiToHalfWidthAscii(*base_query, output);
820 }
821
GetQueriesForPrediction(string * base,std::set<string> * expanded) const822 void Composer::GetQueriesForPrediction(
823 string *base, std::set<string> *expanded) const {
824 DCHECK(base);
825 DCHECK(expanded);
826 DCHECK(composition_.get());
827 // In case of the Latin input modes, we don't perform expansion.
828 switch (input_mode_) {
829 case transliteration::HALF_ASCII:
830 case transliteration::FULL_ASCII: {
831 GetQueryForPrediction(base);
832 expanded->clear();
833 return;
834 }
835 default: {}
836 }
837 composition_->GetExpandedStrings(base, expanded);
838 }
839
GetTypeCorrectedQueriesForPrediction(std::vector<TypeCorrectedQuery> * queries) const840 void Composer::GetTypeCorrectedQueriesForPrediction(
841 std::vector<TypeCorrectedQuery> *queries) const {
842 typing_corrector_.GetQueriesForPrediction(queries);
843 }
844
GetLength() const845 size_t Composer::GetLength() const {
846 return composition_->GetLength();
847 }
848
GetCursor() const849 size_t Composer::GetCursor() const {
850 return position_;
851 }
852
GetTransliteratedText(Transliterators::Transliterator t12r,const size_t position,const size_t size,string * result) const853 void Composer::GetTransliteratedText(
854 Transliterators::Transliterator t12r,
855 const size_t position,
856 const size_t size,
857 string *result) const {
858 DCHECK(result);
859 string full_base;
860 composition_->GetStringWithTransliterator(t12r, &full_base);
861
862 const size_t t13n_start =
863 composition_->ConvertPosition(position, Transliterators::LOCAL, t12r);
864 const size_t t13n_end =
865 composition_->ConvertPosition(position + size,
866 Transliterators::LOCAL, t12r);
867 const size_t t13n_size = t13n_end - t13n_start;
868
869 Util::SubString(full_base, t13n_start, t13n_size, result);
870 }
871
GetRawString(string * raw_string) const872 void Composer::GetRawString(string *raw_string) const {
873 GetRawSubString(0, GetLength(), raw_string);
874 }
875
GetRawSubString(const size_t position,const size_t size,string * raw_sub_string) const876 void Composer::GetRawSubString(
877 const size_t position,
878 const size_t size,
879 string *raw_sub_string) const {
880 DCHECK(raw_sub_string);
881 GetTransliteratedText(Transliterators::RAW_STRING, position, size,
882 raw_sub_string);
883 }
884
GetTransliterations(transliteration::Transliterations * t13ns) const885 void Composer::GetTransliterations(
886 transliteration::Transliterations *t13ns) const {
887 GetSubTransliterations(0, GetLength(), t13ns);
888 }
889
GetSubTransliteration(const transliteration::TransliterationType type,const size_t position,const size_t size,string * transliteration) const890 void Composer::GetSubTransliteration(
891 const transliteration::TransliterationType type,
892 const size_t position,
893 const size_t size,
894 string *transliteration) const {
895 const Transliterators::Transliterator t12r = GetTransliterator(type);
896 string result;
897 GetTransliteratedText(t12r, position, size, &result);
898 transliteration->clear();
899 Transliterate(type, result, transliteration);
900 }
901
GetSubTransliterations(const size_t position,const size_t size,transliteration::Transliterations * transliterations) const902 void Composer::GetSubTransliterations(
903 const size_t position,
904 const size_t size,
905 transliteration::Transliterations *transliterations) const {
906 string t13n;
907 for (size_t i = 0; i < transliteration::NUM_T13N_TYPES; ++i) {
908 const transliteration::TransliterationType t13n_type =
909 transliteration::TransliterationTypeArray[i];
910 GetSubTransliteration(t13n_type, position, size, &t13n);
911 transliterations->push_back(t13n);
912 }
913 }
914
EnableInsert() const915 bool Composer::EnableInsert() const {
916 if (GetLength() >= max_length_) {
917 // do not accept long chars to prevent DOS attack.
918 LOG(WARNING) << "The length is too long.";
919 return false;
920 }
921 return true;
922 }
923
AutoSwitchMode()924 void Composer::AutoSwitchMode() {
925 if (!config_->use_auto_ime_turn_off()) {
926 return;
927 }
928
929 // AutoSwitchMode is only available on Roma input
930 if (config_->preedit_method() != config::Config::ROMAN) {
931 return;
932 }
933
934 string key;
935 // Key should be in half-width alphanumeric.
936 composition_->GetStringWithTransliterator(
937 GetTransliterator(transliteration::HALF_ASCII), &key);
938
939 ModeSwitchingHandler::ModeSwitching display_mode =
940 ModeSwitchingHandler::NO_CHANGE;
941 ModeSwitchingHandler::ModeSwitching input_mode =
942 ModeSwitchingHandler::NO_CHANGE;
943 if (!ModeSwitchingHandler::GetModeSwitchingHandler()->GetModeSwitchingRule(
944 key, &display_mode, &input_mode)) {
945 // If the key is not a pattern of mode switch rule, the procedure
946 // stops here.
947 return;
948 }
949
950 // |display_mode| affects the existing composition the user typed.
951 switch (display_mode) {
952 case ModeSwitchingHandler::NO_CHANGE:
953 // Do nothing.
954 break;
955 case ModeSwitchingHandler::REVERT_TO_PREVIOUS_MODE:
956 // Invalid value for display_mode
957 LOG(ERROR) << "REVERT_TO_PREVIOUS_MODE is an invalid value "
958 << "for display_mode.";
959 break;
960 case ModeSwitchingHandler::PREFERRED_ALPHANUMERIC:
961 if (input_mode_ == transliteration::FULL_ASCII) {
962 SetOutputMode(transliteration::FULL_ASCII);
963 } else {
964 SetOutputMode(transliteration::HALF_ASCII);
965 }
966 break;
967 case ModeSwitchingHandler::HALF_ALPHANUMERIC:
968 SetOutputMode(transliteration::HALF_ASCII);
969 break;
970 case ModeSwitchingHandler::FULL_ALPHANUMERIC:
971 SetOutputMode(transliteration::FULL_ASCII);
972 break;
973 default:
974 LOG(ERROR) << "Unkown value: " << display_mode;
975 break;
976 }
977
978 // |input_mode| affects the current input mode used for the user's
979 // new typing.
980 switch (input_mode) {
981 case ModeSwitchingHandler::NO_CHANGE:
982 // Do nothing.
983 break;
984 case ModeSwitchingHandler::REVERT_TO_PREVIOUS_MODE:
985 SetInputMode(comeback_input_mode_);
986 break;
987 case ModeSwitchingHandler::PREFERRED_ALPHANUMERIC:
988 if (input_mode_ != transliteration::HALF_ASCII &&
989 input_mode_ != transliteration::FULL_ASCII) {
990 SetTemporaryInputMode(transliteration::HALF_ASCII);
991 }
992 break;
993 case ModeSwitchingHandler::HALF_ALPHANUMERIC:
994 if (input_mode_ != transliteration::HALF_ASCII) {
995 SetTemporaryInputMode(transliteration::HALF_ASCII);
996 }
997 break;
998 case ModeSwitchingHandler::FULL_ALPHANUMERIC:
999 if (input_mode_ != transliteration::FULL_ASCII) {
1000 SetTemporaryInputMode(transliteration::FULL_ASCII);
1001 }
1002 break;
1003 default:
1004 LOG(ERROR) << "Unkown value: " << display_mode;
1005 break;
1006 }
1007 }
1008
ShouldCommit() const1009 bool Composer::ShouldCommit() const {
1010 return composition_->ShouldCommit();
1011 }
1012
ShouldCommitHead(size_t * length_to_commit) const1013 bool Composer::ShouldCommitHead(size_t *length_to_commit) const {
1014 size_t max_remaining_composition_length;
1015 switch (GetInputFieldType()) {
1016 case commands::Context::PASSWORD:
1017 max_remaining_composition_length = 1;
1018 break;
1019 case commands::Context::TEL:
1020 case commands::Context::NUMBER:
1021 max_remaining_composition_length = 0;
1022 break;
1023 default:
1024 // No need to commit. Return here.
1025 return false;
1026 }
1027 if (GetLength() > max_remaining_composition_length) {
1028 *length_to_commit = GetLength() - max_remaining_composition_length;
1029 return true;
1030 }
1031 return false;
1032 }
1033
1034 namespace {
1035 enum Script {
1036 ALPHABET, // alphabet characters or symbols
1037 NUMBER, // 0 - 9, "0" - "9"
1038 JA_HYPHEN, // "ー"
1039 JA_COMMA, // "、"
1040 JA_PERIOD, // "。"
1041 OTHER,
1042 };
1043
IsAlphabetOrNumber(const Script script)1044 bool IsAlphabetOrNumber(const Script script) {
1045 return (script == ALPHABET) || (script == NUMBER);
1046 }
1047 } // namespace
1048
1049 // static
TransformCharactersForNumbers(string * query)1050 bool Composer::TransformCharactersForNumbers(string *query) {
1051 if (query == NULL) {
1052 LOG(ERROR) << "query is NULL";
1053 return false;
1054 }
1055
1056 // Create a vector of scripts of query characters to avoid
1057 // processing query string many times.
1058 const size_t chars_len = Util::CharsLen(*query);
1059 std::vector<Script> char_scripts;
1060 char_scripts.reserve(chars_len);
1061
1062 // flags to determine whether continue to the next step.
1063 bool has_symbols = false;
1064 bool has_alphanumerics = false;
1065 for (ConstChar32Iterator iter(*query); !iter.Done(); iter.Next()) {
1066 const char32 one_char = iter.Get();
1067 switch (one_char) {
1068 case 0x30FC: // "ー"
1069 has_symbols = true;
1070 char_scripts.push_back(JA_HYPHEN);
1071 break;
1072 case 0x3001: // "、"
1073 has_symbols = true;
1074 char_scripts.push_back(JA_COMMA);
1075 break;
1076 case 0x3002: // "。"
1077 has_symbols = true;
1078 char_scripts.push_back(JA_PERIOD);
1079 break;
1080 case '+':
1081 case '*':
1082 case '/':
1083 case '=':
1084 case '(':
1085 case ')':
1086 case '<':
1087 case '>':
1088 case 0xFF0B: // "+"
1089 case 0xFF0A: // "*"
1090 case 0xFF0F: // "/"
1091 case 0xFF1D: // "="
1092 case 0xFF08: // "("
1093 case 0xFF09: // ")"
1094 case 0xFF1C: // "<"
1095 case 0xFF1E: // ">"
1096 char_scripts.push_back(ALPHABET);
1097 break;
1098 default: {
1099 Util::ScriptType script_type = Util::GetScriptType(one_char);
1100 if (script_type == Util::NUMBER) {
1101 has_alphanumerics = true;
1102 char_scripts.push_back(NUMBER);
1103 } else if (script_type == Util::ALPHABET) {
1104 has_alphanumerics = true;
1105 char_scripts.push_back(ALPHABET);
1106 } else {
1107 char_scripts.push_back(OTHER);
1108 }
1109 }
1110 }
1111 }
1112
1113 DCHECK_EQ(chars_len, char_scripts.size());
1114 if (!has_alphanumerics || !has_symbols) {
1115 VLOG(1) << "The query contains neither alphanumeric nor symbol.";
1116 return false;
1117 }
1118
1119 string transformed_query;
1120 bool transformed = false;
1121 size_t i = 0;
1122 string append_char;
1123 for (ConstChar32Iterator iter(*query); !iter.Done(); iter.Next(), ++i) {
1124 append_char.clear();
1125 switch (char_scripts[i]) {
1126 case JA_HYPHEN: {
1127 // JA_HYPHEN(s) "ー" is/are transformed to "−" if:
1128 // (i) query has one and only one leading JA_HYPHEN followed by a
1129 // number,
1130 // (ii) JA_HYPHEN(s) follow(s) after an alphanumeric (ex. 0-, 0----,
1131 // etc).
1132 // Note that rule (i) implies that if query starts with more than
1133 // one JA_HYPHENs, those JA_HYPHENs are not transformed.
1134 bool check = false;
1135 if (i == 0 && chars_len > 1) {
1136 check = (char_scripts[1] == NUMBER);
1137 } else {
1138 for (size_t j = i; j > 0; --j) {
1139 if (char_scripts[j - 1] == JA_HYPHEN) {
1140 continue;
1141 }
1142 check = IsAlphabetOrNumber(char_scripts[j - 1]);
1143 break;
1144 }
1145 }
1146
1147 // JA_HYPHEN should be transformed to MINUS.
1148 if (check) {
1149 CharacterFormManager::GetCharacterFormManager()->ConvertPreeditString(
1150 "−", // U+2212
1151 &append_char);
1152 DCHECK(!append_char.empty());
1153 }
1154 break;
1155 }
1156
1157 case JA_COMMA: {
1158 // "、" should be "," if the previous character is alphanumerics.
1159 // character are both alphanumerics.
1160 // Previous char should exist and be a number.
1161 const bool lhs_check =
1162 (i > 0 && IsAlphabetOrNumber(char_scripts[i - 1]));
1163 // JA_COMMA should be transformed to COMMA.
1164 if (lhs_check) {
1165 CharacterFormManager::GetCharacterFormManager()->
1166 ConvertPreeditString(",", &append_char);
1167 DCHECK(!append_char.empty());
1168 }
1169 break;
1170 }
1171
1172 case JA_PERIOD: {
1173 // "。" should be "." if the previous character and the next
1174 // character are both alphanumerics.
1175 // Previous char should exist and be a number.
1176 const bool lhs_check =
1177 (i > 0 && IsAlphabetOrNumber(char_scripts[i - 1]));
1178 // JA_PRERIOD should be transformed to PRERIOD.
1179 if (lhs_check) {
1180 CharacterFormManager::GetCharacterFormManager()->
1181 ConvertPreeditString(".", &append_char);
1182 DCHECK(!append_char.empty());
1183 }
1184 break;
1185 }
1186
1187 default: {
1188 // Do nothing.
1189 }
1190 }
1191
1192 if (append_char.empty()) {
1193 // Append one character.
1194 Util::UCS4ToUTF8Append(iter.Get(), &transformed_query);
1195 } else {
1196 // Append the transformed character.
1197 transformed_query.append(append_char);
1198 transformed = true;
1199 }
1200 }
1201 if (!transformed) {
1202 return false;
1203 }
1204
1205 // It is possible that the query's size in byte differs from the
1206 // orig_query's size in byte.
1207 DCHECK_EQ(Util::CharsLen(*query), Util::CharsLen(transformed_query));
1208 *query = transformed_query;
1209 return true;
1210 }
1211
SetNewInput()1212 void Composer::SetNewInput() {
1213 is_new_input_ = true;
1214 }
1215
CopyFrom(const Composer & src)1216 void Composer::CopyFrom(const Composer &src) {
1217 Reset();
1218
1219 input_mode_ = src.input_mode_;
1220 comeback_input_mode_ = src.comeback_input_mode_;
1221 output_mode_ = src.output_mode_;
1222 input_field_type_ = src.input_field_type_;
1223
1224 position_ = src.position_;
1225 is_new_input_ = src.is_new_input_;
1226 shifted_sequence_count_ = src.shifted_sequence_count_;
1227 source_text_.assign(src.source_text_);
1228 max_length_ = src.max_length_;
1229
1230 composition_.reset(src.composition_->Clone());
1231 request_ = src.request_;
1232 config_ = src.config_;
1233
1234 typing_corrector_.CopyFrom(src.typing_corrector_);
1235 }
1236
is_new_input() const1237 bool Composer::is_new_input() const {
1238 return is_new_input_;
1239 }
1240
shifted_sequence_count() const1241 size_t Composer::shifted_sequence_count() const {
1242 return shifted_sequence_count_;
1243 }
1244
source_text() const1245 const string &Composer::source_text() const {
1246 return source_text_;
1247 }
mutable_source_text()1248 string *Composer::mutable_source_text() {
1249 return &source_text_;
1250 }
set_source_text(const string & source_text)1251 void Composer::set_source_text(const string &source_text) {
1252 source_text_.assign(source_text);
1253 }
1254
max_length() const1255 size_t Composer::max_length() const {
1256 return max_length_;
1257 }
set_max_length(size_t length)1258 void Composer::set_max_length(size_t length) {
1259 max_length_ = length;
1260 }
1261
SetInputFieldType(commands::Context::InputFieldType type)1262 void Composer::SetInputFieldType(commands::Context::InputFieldType type) {
1263 input_field_type_ = type;
1264 }
1265
GetInputFieldType() const1266 commands::Context::InputFieldType Composer::GetInputFieldType() const {
1267 return input_field_type_;
1268 }
1269 } // namespace composer
1270 } // namespace mozc
1271