1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Interactive composer from a Roman string to a Hiragana string
31 
32 #include "composer/composer.h"
33 
34 #include "base/flags.h"
35 #include "base/logging.h"
36 #include "base/util.h"
37 #include "composer/internal/composition.h"
38 #include "composer/internal/composition_input.h"
39 #include "composer/internal/mode_switching_handler.h"
40 #include "composer/internal/transliterators.h"
41 #include "composer/internal/typing_corrector.h"
42 #include "composer/key_event_util.h"
43 #include "composer/table.h"
44 #include "composer/type_corrected_query.h"
45 #include "config/character_form_manager.h"
46 #include "config/config_handler.h"
47 #include "protocol/commands.pb.h"
48 #include "protocol/config.pb.h"
49 
50 // Use flags instead of constant for performance evaluation.
51 DEFINE_uint64(max_typing_correction_query_candidates, 40,
52               "Maximum # of typing correction query temporary candidates.");
53 DEFINE_uint64(max_typing_correction_query_results, 8,
54               "Maximum # of typing correction query results.");
55 DECLARE_bool(enable_typing_correction);
56 
57 namespace mozc {
58 namespace composer {
59 
60 using ::mozc::config::CharacterFormManager;
61 
62 namespace {
63 
GetTransliterator(transliteration::TransliterationType comp_mode)64 const Transliterators::Transliterator GetTransliterator(
65     transliteration::TransliterationType comp_mode) {
66   switch (comp_mode) {
67     case transliteration::HALF_ASCII:
68     case transliteration::HALF_ASCII_UPPER:
69     case transliteration::HALF_ASCII_LOWER:
70     case transliteration::HALF_ASCII_CAPITALIZED:
71       return Transliterators::HALF_ASCII;
72 
73     case transliteration::FULL_ASCII:
74     case transliteration::FULL_ASCII_UPPER:
75     case transliteration::FULL_ASCII_LOWER:
76     case transliteration::FULL_ASCII_CAPITALIZED:
77       return Transliterators::FULL_ASCII;
78 
79     case transliteration::HALF_KATAKANA:
80       return Transliterators::HALF_KATAKANA;
81 
82     case transliteration::FULL_KATAKANA:
83       return Transliterators::FULL_KATAKANA;
84 
85     case transliteration::HIRAGANA:
86       return Transliterators::HIRAGANA;
87 
88     default:
89       LOG(ERROR) << "Unknown TransliterationType: " << comp_mode;
90       return Transliterators::CONVERSION_STRING;
91   }
92 }
93 
GetTransliterationType(Transliterators::Transliterator transliterator,const transliteration::TransliterationType default_type)94 transliteration::TransliterationType GetTransliterationType(
95     Transliterators::Transliterator transliterator,
96     const transliteration::TransliterationType default_type) {
97   if (transliterator == Transliterators::HIRAGANA) {
98     return transliteration::HIRAGANA;
99   }
100   if (transliterator == Transliterators::HALF_ASCII) {
101     return transliteration::HALF_ASCII;
102   }
103   if (transliterator == Transliterators::FULL_ASCII) {
104     return transliteration::FULL_ASCII;
105   }
106   if (transliterator == Transliterators::FULL_KATAKANA) {
107     return transliteration::FULL_KATAKANA;
108   }
109   if (transliterator == Transliterators::HALF_KATAKANA) {
110     return transliteration::HALF_KATAKANA;
111   }
112   return default_type;
113 }
114 
Transliterate(const transliteration::TransliterationType mode,const string & input,string * output)115 void Transliterate(const transliteration::TransliterationType mode,
116                    const string &input,
117                    string *output) {
118   // When the mode is HALF_KATAKANA, Full width ASCII is also
119   // transformed.
120   if (mode == transliteration::HALF_KATAKANA) {
121     string tmp_input;
122     Util::HiraganaToKatakana(input, &tmp_input);
123     Util::FullWidthToHalfWidth(tmp_input, output);
124     return;
125   }
126 
127   switch (mode) {
128     case transliteration::HALF_ASCII:
129       Util::FullWidthAsciiToHalfWidthAscii(input, output);
130       break;
131     case transliteration::HALF_ASCII_UPPER:
132       Util::FullWidthAsciiToHalfWidthAscii(input, output);
133       Util::UpperString(output);
134       break;
135     case transliteration::HALF_ASCII_LOWER:
136       Util::FullWidthAsciiToHalfWidthAscii(input, output);
137       Util::LowerString(output);
138       break;
139     case transliteration::HALF_ASCII_CAPITALIZED:
140       Util::FullWidthAsciiToHalfWidthAscii(input, output);
141       Util::CapitalizeString(output);
142       break;
143 
144     case transliteration::FULL_ASCII:
145       Util::HalfWidthAsciiToFullWidthAscii(input, output);
146       break;
147     case transliteration::FULL_ASCII_UPPER:
148       Util::HalfWidthAsciiToFullWidthAscii(input, output);
149       Util::UpperString(output);
150       break;
151     case transliteration::FULL_ASCII_LOWER:
152       Util::HalfWidthAsciiToFullWidthAscii(input, output);
153       Util::LowerString(output);
154       break;
155     case transliteration::FULL_ASCII_CAPITALIZED:
156       Util::HalfWidthAsciiToFullWidthAscii(input, output);
157       Util::CapitalizeString(output);
158       break;
159 
160     case transliteration::FULL_KATAKANA:
161       Util::HiraganaToKatakana(input, output);
162       break;
163     case transliteration::HIRAGANA:
164       *output = input;
165       break;
166     default:
167       LOG(ERROR) << "Unknown TransliterationType: " << mode;
168       *output = input;
169       break;
170   }
171 }
172 
GetTransliterationTypeFromCompositionMode(const commands::CompositionMode mode)173 transliteration::TransliterationType GetTransliterationTypeFromCompositionMode(
174     const commands::CompositionMode mode) {
175   switch (mode) {
176     case commands::HIRAGANA:
177       return transliteration::HIRAGANA;
178     case commands::FULL_KATAKANA:
179       return transliteration::FULL_KATAKANA;
180     case commands::HALF_ASCII:
181       return transliteration::HALF_ASCII;
182     case commands::FULL_ASCII:
183       return transliteration::FULL_ASCII;
184     case commands::HALF_KATAKANA:
185       return transliteration::HALF_KATAKANA;
186     default:
187       // commands::DIRECT or invalid mode.
188       LOG(ERROR) << "Invalid CompositionMode: " << mode;
189       return transliteration::HIRAGANA;
190   }
191 }
192 
193 const size_t kMaxPreeditLength = 256;
194 
195 }  // namespace
196 
Composer(const Table * table,const commands::Request * request,const config::Config * config)197 Composer::Composer(const Table *table,
198                    const commands::Request *request,
199                    const config::Config *config)
200     : position_(0),
201       is_new_input_(true),
202       input_mode_(transliteration::HIRAGANA),
203       output_mode_(transliteration::HIRAGANA),
204       comeback_input_mode_(transliteration::HIRAGANA),
205       input_field_type_(commands::Context::NORMAL),
206       shifted_sequence_count_(0),
207       composition_(new Composition(table)),
208       typing_corrector_(table,
209                         FLAGS_max_typing_correction_query_candidates,
210                         FLAGS_max_typing_correction_query_results),
211       max_length_(kMaxPreeditLength),
212       request_(request),
213       config_(config) {
214   SetInputMode(transliteration::HIRAGANA);
215   typing_corrector_.SetConfig(config);
216   Reset();
217 }
218 
~Composer()219 Composer::~Composer() {}
220 
Reset()221 void Composer::Reset() {
222   EditErase();
223   ResetInputMode();
224   SetOutputMode(transliteration::HIRAGANA);
225   source_text_.assign("");
226   typing_corrector_.Reset();
227 }
228 
ResetInputMode()229 void Composer::ResetInputMode() {
230   SetInputMode(comeback_input_mode_);
231 }
232 
ReloadConfig()233 void Composer::ReloadConfig() {
234   // Do nothing at this moment.
235 }
236 
Empty() const237 bool Composer::Empty() const {
238   return (GetLength() == 0);
239 }
240 
SetTable(const Table * table)241 void Composer::SetTable(const Table *table) {
242   composition_->SetTable(table);
243 
244   typing_corrector_.SetTable(table);
245 }
246 
SetRequest(const commands::Request * request)247 void Composer::SetRequest(const commands::Request *request) {
248   request_ = request;
249 }
250 
SetConfig(const config::Config * config)251 void Composer::SetConfig(const config::Config *config) {
252   config_ = config;
253   typing_corrector_.SetConfig(config);
254 }
255 
SetInputMode(transliteration::TransliterationType mode)256 void Composer::SetInputMode(transliteration::TransliterationType mode) {
257   comeback_input_mode_ = mode;
258   input_mode_ = mode;
259   shifted_sequence_count_ = 0;
260   is_new_input_ = true;
261   composition_->SetInputMode(GetTransliterator(mode));
262 }
263 
SetTemporaryInputMode(transliteration::TransliterationType mode)264 void Composer::SetTemporaryInputMode(
265     transliteration::TransliterationType mode) {
266   // Set comeback_input_mode_ to revert back the current input mode.
267   comeback_input_mode_ = input_mode_;
268   input_mode_ = mode;
269   shifted_sequence_count_ = 0;
270   is_new_input_ = true;
271   composition_->SetInputMode(GetTransliterator(mode));
272 }
273 
UpdateInputMode()274 void Composer::UpdateInputMode() {
275   if (position_ != 0 &&
276       request_->update_input_mode_from_surrounding_text()) {
277     const Transliterators::Transliterator current_t12r =
278         composition_->GetTransliterator(position_);
279     if (position_ == composition_->GetLength() ||
280         current_t12r == composition_->GetTransliterator(position_ + 1)) {
281       // - The cursor is at the tail of composition.
282       //   Use last character's transliterator as the input mode.
283       // - If the current cursor is between the same character type like
284       //   "A|B" and "あ|い", the input mode follows the character type.
285       input_mode_ = GetTransliterationType(current_t12r, comeback_input_mode_);
286       shifted_sequence_count_ = 0;
287       is_new_input_ = true;
288       composition_->SetInputMode(GetTransliterator(input_mode_));
289       return;
290     }
291   }
292 
293   // Set the default input mode.
294   SetInputMode(comeback_input_mode_);
295 }
296 
GetInputMode() const297 transliteration::TransliterationType Composer::GetInputMode() const {
298   return input_mode_;
299 }
300 
GetComebackInputMode() const301 transliteration::TransliterationType Composer::GetComebackInputMode() const {
302   return comeback_input_mode_;
303 }
304 
ToggleInputMode()305 void Composer::ToggleInputMode() {
306   if (input_mode_ == transliteration::HIRAGANA) {
307     // TODO(komatsu): Refer user's perference.
308     SetInputMode(transliteration::HALF_ASCII);
309   } else {
310     SetInputMode(transliteration::HIRAGANA);
311   }
312 }
313 
GetOutputMode() const314 transliteration::TransliterationType Composer::GetOutputMode() const {
315   return output_mode_;
316 }
317 
SetOutputMode(transliteration::TransliterationType mode)318 void Composer::SetOutputMode(transliteration::TransliterationType mode) {
319   output_mode_ = mode;
320   composition_->SetTransliterator(
321       0, composition_->GetLength(), GetTransliterator(mode));
322   position_ = composition_->GetLength();
323 }
324 
ApplyTemporaryInputMode(const string & input,bool caps_locked)325 void Composer::ApplyTemporaryInputMode(const string &input, bool caps_locked) {
326   DCHECK(!input.empty());
327 
328   const config::Config::ShiftKeyModeSwitch switch_mode =
329       config_->shift_key_mode_switch();
330 
331   // When input is not an ASCII code, reset the input mode to the one before
332   // temporary input mode.
333   if (Util::OneCharLen(input.c_str()) != 1) {
334     // Call SetInputMode() only when the current input mode is temporary, which
335     // is detected by the if-condition below.  Without this check,
336     // SetInputMode() is called always for multi-byte charactesrs.  This causes
337     // a bug that multi-byte characters is inserted to a new chunk because
338     // |is_new_input_| is set to true in SetInputMode(); see b/31444698.
339     if (comeback_input_mode_ != input_mode_) {
340       SetInputMode(comeback_input_mode_);
341     }
342     return;
343   }
344 
345   // Input is an ASCII code.
346   // we use first character to determin temporary input mode.
347   const char key = input[0];
348   const bool alpha_with_shift =
349       (!caps_locked && ('A' <= key && key <= 'Z')) ||
350       (caps_locked && ('a' <= key && key <= 'z'));
351   const bool alpha_without_shift =
352       (caps_locked && ('A' <= key && key <= 'Z')) ||
353       (!caps_locked && ('a' <= key && key <= 'z'));
354 
355   if (alpha_with_shift) {
356     if (switch_mode == config::Config::ASCII_INPUT_MODE) {
357       if (input_mode_ == transliteration::HALF_ASCII ||
358           input_mode_ == transliteration::FULL_ASCII) {
359         // Do nothing.
360       } else {
361         SetTemporaryInputMode(transliteration::HALF_ASCII);
362       }
363     } else if (switch_mode == config::Config::KATAKANA_INPUT_MODE) {
364       if (input_mode_ == transliteration::HIRAGANA) {
365         SetTemporaryInputMode(transliteration::FULL_KATAKANA);
366       } else {
367         // Do nothing.
368       }
369     }
370     ++shifted_sequence_count_;
371   } else if (alpha_without_shift) {
372     // When shifted input continues, the next lower input is the end
373     // of temporary half-width Ascii input.
374     if (shifted_sequence_count_ > 1 &&
375         switch_mode == config::Config::ASCII_INPUT_MODE) {
376       SetInputMode(comeback_input_mode_);
377     }
378     if (switch_mode == config::Config::KATAKANA_INPUT_MODE) {
379       SetInputMode(comeback_input_mode_);
380     }
381     shifted_sequence_count_ = 0;
382   } else {
383     // If the key is not an alphabet, reset shifted_sequence_count_
384     // because "Continuous shifted input" feature should be reset
385     // when the input meets non-alphabet character.
386     shifted_sequence_count_ = 0;
387   }
388 }
389 
InsertCharacterInternal(const string & key)390 bool Composer::InsertCharacterInternal(const string &key) {
391   if (!EnableInsert()) {
392     return false;
393   }
394   CompositionInput input;
395   input.set_raw(key);
396   input.set_is_new_input(is_new_input_);
397   position_ = composition_->InsertInput(position_, input);
398   is_new_input_ = false;
399   return true;
400 }
401 
InsertCharacter(const string & key)402 void Composer::InsertCharacter(const string &key) {
403   if (!InsertCharacterInternal(key)) {
404     return;
405   }
406   const ProbableKeyEvents empty_events;
407   typing_corrector_.InsertCharacter(key, empty_events);
408 }
409 
InsertCharacterForProbableKeyEvents(const string & key,const ProbableKeyEvents & probable_key_events)410 void Composer::InsertCharacterForProbableKeyEvents(
411     const string &key,
412     const ProbableKeyEvents &probable_key_events) {
413   if (!InsertCharacterInternal(key)) {
414     return;
415   }
416   typing_corrector_.InsertCharacter(key, probable_key_events);
417 }
418 
InsertCommandCharacter(const InternalCommand internal_command)419 void Composer::InsertCommandCharacter(const InternalCommand internal_command) {
420   switch (internal_command) {
421     case REWIND:
422       InsertCharacter(Table::ParseSpecialKey("{<}"));
423       break;
424     default:
425       LOG(ERROR) << "Unkown command : " << internal_command;
426   }
427 }
428 
InsertCharacterPreedit(const string & input)429 void Composer::InsertCharacterPreedit(const string &input) {
430   size_t begin = 0;
431   const size_t end = input.size();
432   while (begin < end) {
433     const size_t mblen = Util::OneCharLen(input.c_str() + begin);
434     const string character(input, begin, mblen);
435     if (!InsertCharacterKeyAndPreedit(character, character)) {
436       return;
437     }
438     begin += mblen;
439   }
440   DCHECK_EQ(begin, end);
441 }
442 
443 // Note: This method is only for test.
SetPreeditTextForTestOnly(const string & input)444 void Composer::SetPreeditTextForTestOnly(const string &input) {
445   SetTemporaryInputMode(transliteration::HALF_ASCII);
446   InsertCharacterPreedit(input);
447 }
448 
InsertCharacterPreeditForProbableKeyEvents(const string & input,const ProbableKeyEvents & probable_key_events)449 void Composer::InsertCharacterPreeditForProbableKeyEvents(
450     const string &input,
451     const ProbableKeyEvents &probable_key_events) {
452   InsertCharacterKeyAndPreeditForProbableKeyEvents(input,
453                                                    input,
454                                                    probable_key_events);
455 }
456 
InsertCharacterKeyAndPreeditInternal(const string & key,const string & preedit)457 bool Composer::InsertCharacterKeyAndPreeditInternal(const string &key,
458                                                     const string &preedit) {
459   if (!EnableInsert()) {
460     return false;
461   }
462   CompositionInput input;
463   input.set_raw(key);
464   input.set_conversion(preedit);
465   input.set_is_new_input(is_new_input_);
466   position_ = composition_->InsertInput(position_, input);
467   is_new_input_ = false;
468   return true;
469 }
470 
InsertCharacterKeyAndPreedit(const string & key,const string & preedit)471 bool Composer::InsertCharacterKeyAndPreedit(const string &key,
472                                             const string &preedit) {
473   if (!InsertCharacterKeyAndPreeditInternal(key, preedit)) {
474     return false;
475   }
476   const ProbableKeyEvents empty_events;
477   typing_corrector_.InsertCharacter(key, empty_events);
478   return true;
479 }
480 
InsertCharacterKeyAndPreeditForProbableKeyEvents(const string & key,const string & preedit,const ProbableKeyEvents & probable_key_events)481 void Composer::InsertCharacterKeyAndPreeditForProbableKeyEvents(
482     const string &key,
483     const string &preedit,
484     const ProbableKeyEvents &probable_key_events) {
485   if (!InsertCharacterKeyAndPreeditInternal(key, preedit)) {
486     return;
487   }
488   typing_corrector_.InsertCharacter(key, probable_key_events);
489 }
490 
InsertCharacterKeyEvent(const commands::KeyEvent & key)491 bool Composer::InsertCharacterKeyEvent(const commands::KeyEvent &key) {
492   if (!EnableInsert()) {
493     return false;
494   }
495   if (key.has_mode()) {
496     const transliteration::TransliterationType new_input_mode =
497         GetTransliterationTypeFromCompositionMode(key.mode());
498     if (new_input_mode != input_mode_) {
499       // Only when the new input mode is different from the current
500       // input mode, SetInputMode is called.  Otherwise the value of
501       // comeback_input_mode_ is lost.
502       SetInputMode(new_input_mode);
503     }
504   }
505 
506   // If only SHIFT is pressed, this is used to revert back to the
507   // previous input mode.
508   if (!key.has_key_code()) {
509     for (size_t i = 0; key.modifier_keys_size(); ++i) {
510       if (key.modifier_keys(i) == commands::KeyEvent::SHIFT) {
511         // TODO(komatsu): Enable to customize the behavior.
512         SetInputMode(comeback_input_mode_);
513         return true;
514       }
515     }
516   }
517 
518   // Fill input representing user's raw input.
519   string input;
520   if (key.has_key_code()) {
521     Util::UCS4ToUTF8(key.key_code(), &input);
522   } else if (key.has_key_string()) {
523     input = key.key_string();
524   } else {
525     LOG(WARNING) << "input is empty";
526     return false;
527   }
528 
529   bool is_typing_correction_enabled = config_->use_typing_correction() ||
530                                       FLAGS_enable_typing_correction;
531   if (key.has_key_string()) {
532     if (key.input_style() == commands::KeyEvent::AS_IS ||
533         key.input_style() == commands::KeyEvent::DIRECT_INPUT) {
534       composition_->SetInputMode(Transliterators::CONVERSION_STRING);
535       if (is_typing_correction_enabled) {
536         InsertCharacterKeyAndPreeditForProbableKeyEvents(
537             input,
538             key.key_string(),
539             key.probable_key_event());
540       } else {
541         InsertCharacterKeyAndPreedit(input, key.key_string());
542       }
543       SetInputMode(comeback_input_mode_);
544     } else {
545       // Kana input usually has key_string.  Note that, the existence of
546       // key_string never determine if the input mode is Kana or Romaji.
547       if (is_typing_correction_enabled) {
548         InsertCharacterKeyAndPreeditForProbableKeyEvents(
549             input,
550             key.key_string(),
551             key.probable_key_event());
552       } else {
553         InsertCharacterKeyAndPreedit(input, key.key_string());
554       }
555     }
556   } else {
557     // Romaji input usually does not has key_string.  Note that, the
558     // existence of key_string never determines if the input mode is
559     // Kana or Romaji.
560     const uint32 modifiers = KeyEventUtil::GetModifiers(key);
561     ApplyTemporaryInputMode(input, KeyEventUtil::HasCaps(modifiers));
562     if (is_typing_correction_enabled) {
563       InsertCharacterForProbableKeyEvents(input, key.probable_key_event());
564     } else {
565       InsertCharacter(input);
566     }
567   }
568 
569   if (comeback_input_mode_ == input_mode_) {
570     AutoSwitchMode();
571   }
572   return true;
573 }
574 
DeleteAt(size_t pos)575 void Composer::DeleteAt(size_t pos) {
576   composition_->DeleteAt(pos);
577   // Adjust cursor position for composition mode.
578   if (position_ > pos) {
579     position_--;
580   }
581   // We do not call UpdateInputMode() here.
582   // 1. In composition mode, UpdateInputMode finalizes pending chunk.
583   // 2. In conversion mode, InputMode needs not to change.
584   typing_corrector_.Invalidate();
585 }
586 
Delete()587 void Composer::Delete() {
588   position_ = composition_->DeleteAt(position_);
589   UpdateInputMode();
590 
591   typing_corrector_.Invalidate();
592 }
593 
DeleteRange(size_t pos,size_t length)594 void Composer::DeleteRange(size_t pos, size_t length) {
595   for (int i = 0; i < length && pos < composition_->GetLength(); ++i) {
596     DeleteAt(pos);
597   }
598   typing_corrector_.Invalidate();
599 }
600 
EditErase()601 void Composer::EditErase() {
602   composition_->Erase();
603   position_ = 0;
604   SetInputMode(comeback_input_mode_);
605   typing_corrector_.Reset();
606 }
607 
Backspace()608 void Composer::Backspace() {
609   if (position_ == 0) {
610     return;
611   }
612 
613   // In the view point of updating input mode,
614   // backspace is special case because new input mode is based on both
615   // new current character and *character to be deleted*.
616 
617   // At first, move to left.
618   // Now the cursor is between 'new current character'
619   // and 'character to be deleted'.
620   --position_;
621 
622   // Update input mode based on both 'new current character' and
623   // 'character to be deleted'.
624   UpdateInputMode();
625 
626   // Delete 'character to be deleted'
627   position_ = composition_->DeleteAt(position_);
628 
629   typing_corrector_.Invalidate();
630 }
631 
MoveCursorLeft()632 void Composer::MoveCursorLeft() {
633   if (position_ > 0) {
634     --position_;
635   }
636   UpdateInputMode();
637 
638   typing_corrector_.Invalidate();
639 }
640 
MoveCursorRight()641 void Composer::MoveCursorRight() {
642   if (position_ < composition_->GetLength()) {
643     ++position_;
644   }
645   UpdateInputMode();
646 
647   typing_corrector_.Invalidate();
648 }
649 
MoveCursorToBeginning()650 void Composer::MoveCursorToBeginning() {
651   position_ = 0;
652   SetInputMode(comeback_input_mode_);
653 
654   typing_corrector_.Invalidate();
655 }
656 
MoveCursorToEnd()657 void Composer::MoveCursorToEnd() {
658   position_ = composition_->GetLength();
659   // Behavior between MoveCursorToEnd and MoveCursorToRight is different.
660   // MoveCursorToEnd always makes current input mode default.
661   SetInputMode(comeback_input_mode_);
662 
663   typing_corrector_.Invalidate();
664 }
665 
MoveCursorTo(uint32 new_position)666 void Composer::MoveCursorTo(uint32 new_position) {
667   if (new_position <= composition_->GetLength()) {
668     position_ = new_position;
669     UpdateInputMode();
670   }
671   typing_corrector_.Invalidate();
672 }
673 
GetPreedit(string * left,string * focused,string * right) const674 void Composer::GetPreedit(string *left, string *focused, string *right) const {
675   DCHECK(left);
676   DCHECK(focused);
677   DCHECK(right);
678   composition_->GetPreedit(position_, left, focused, right);
679 
680   // TODO(komatsu): This function can be obsolete.
681   string preedit = *left + *focused + *right;
682   if (TransformCharactersForNumbers(&preedit)) {
683     const size_t left_size = Util::CharsLen(*left);
684     const size_t focused_size = Util::CharsLen(*focused);
685     *left = Util::SubString(preedit, 0, left_size);
686     *focused = Util::SubString(preedit, left_size, focused_size);
687     *right = Util::SubString(preedit, left_size + focused_size, string::npos);
688   }
689 }
690 
GetStringForPreedit(string * output) const691 void Composer::GetStringForPreedit(string *output) const {
692   composition_->GetString(output);
693   TransformCharactersForNumbers(output);
694   // If the input field type needs half ascii characters,
695   // perform conversion here.
696   // Note that this purpose is also achieved by the client by setting
697   // input type as "half ascii".
698   // But the architecture of Mozc expects the server to handle such character
699   // width management.
700   // In addition, we also think about PASSWORD field type.
701   // we can prepare NUMBER and TEL keyboard layout, which has
702   // "half ascii" composition mode. This works.
703   // But we will not have PASSWORD only keyboard. We will share the basic
704   // keyboard on usual and password mode
705   // so such hacky code cannot be applicable.
706   // TODO(matsuzakit): Move this logic to another appopriate location.
707   // SetOutputMode() is not currently applicable but ideally it is
708   // better location than here.
709   const commands::Context::InputFieldType field_type =
710       GetInputFieldType();
711   if (field_type == commands::Context::NUMBER ||
712       field_type == commands::Context::PASSWORD ||
713       field_type == commands::Context::TEL) {
714     const string tmp = *output;
715     Util::FullWidthAsciiToHalfWidthAscii(tmp, output);
716   }
717 }
718 
GetStringForSubmission(string * output) const719 void Composer::GetStringForSubmission(string *output) const {
720   // TODO(komatsu): We should make sure if we can integrate this
721   // function to GetStringForPreedit after a while.
722   GetStringForPreedit(output);
723 }
724 
GetQueryForConversion(string * output) const725 void Composer::GetQueryForConversion(string *output) const {
726   string base_output;
727   composition_->GetStringWithTrimMode(FIX, &base_output);
728   TransformCharactersForNumbers(&base_output);
729   Util::FullWidthAsciiToHalfWidthAscii(base_output, output);
730 }
731 
732 namespace {
733 // Determine which query is suitable for a prediction query and return
734 // its pointer.
735 // Exmaple:
736 // = Romanji Input =
737 // ("もz", "も") -> "も"  // a part of romanji should be trimed.
738 // ("もzky", "もz") -> "もzky"  // a user might intentionally typed them.
739 // ("z", "") -> "z"      // ditto.
740 // = Kana Input =
741 // ("か", "") -> "か"  // a part of kana (it can be "が") should not be trimed.
GetBaseQueryForPrediction(string * asis_query,string * trimed_query)742 string *GetBaseQueryForPrediction(string *asis_query,
743                                   string *trimed_query) {
744   // If the sizes are equal, there is no matter.
745   if (asis_query->size() == trimed_query->size()) {
746     return asis_query;
747   }
748 
749   // Get the different part between asis_query and trimed_query.  For
750   // example, "ky" is the different part where asis_query is "もzky"
751   // and trimed_query is "もz".
752   DCHECK_GT(asis_query->size(), trimed_query->size());
753   const string asis_tail(*asis_query, trimed_query->size());
754   DCHECK(!asis_tail.empty());
755 
756   // If the different part is not an alphabet, asis_query is used.
757   // This check is mainly used for Kana Input.
758   const Util::ScriptType asis_tail_type = Util::GetScriptType(asis_tail);
759   if (asis_tail_type != Util::ALPHABET) {
760     return asis_query;
761   }
762 
763   // If the trimed_query is empty and asis_query is alphabet, an asis
764   // string is used because the query may be typed intentionally.
765   if (trimed_query->empty()) {  // alphabet???
766     const Util::ScriptType asis_type = Util::GetScriptType(*asis_query);
767     if (asis_type == Util::ALPHABET) {
768       return asis_query;
769     } else {
770       return trimed_query;
771     }
772   }
773 
774   // Now there are two patterns: ("もzk", "もz") and ("もずk", "もず").
775   // We assume "もzk" is user's intentional query, but "もずk" is not.
776   // So our results are:
777   // ("もzk", "もz") => "もzk" and ("もずk", "もず") => "もず".
778   const string trimed_tail = Util::SubString(*trimed_query,
779                                              Util::CharsLen(*trimed_query) - 1,
780                                              string::npos);
781   DCHECK(!trimed_tail.empty());
782   const Util::ScriptType trimed_tail_type = Util::GetScriptType(trimed_tail);
783   if (trimed_tail_type == Util::ALPHABET) {
784     return asis_query;
785   } else {
786     return trimed_query;
787   }
788 }
789 }  // namespace
790 
GetQueryForPrediction(string * output) const791 void Composer::GetQueryForPrediction(string *output) const {
792   string asis_query;
793   composition_->GetStringWithTrimMode(ASIS, &asis_query);
794 
795   switch (input_mode_) {
796     case transliteration::HALF_ASCII: {
797       output->assign(asis_query);
798       return;
799     }
800     case transliteration::FULL_ASCII: {
801       Util::FullWidthAsciiToHalfWidthAscii(asis_query, output);
802       return;
803     }
804     default: {}
805   }
806 
807   string trimed_query;
808   composition_->GetStringWithTrimMode(TRIM, &trimed_query);
809 
810   // NOTE(komatsu): This is a hack to go around the difference
811   // expectation between Romanji-Input and Kana-Input.  "かn" in
812   // Romaji-Input should be "か" while "あか" in Kana-Input should be
813   // "あか", although "かn" and "あか" have the same properties.  An
814   // ideal solution is to expand the ambguity and pass all of them to
815   // the converter. (e.g. "かn" -> ["かな",..."かの", "かん", ...] /
816   // "あか" -> ["あか", "あが"])
817   string *base_query = GetBaseQueryForPrediction(&asis_query, &trimed_query);
818   TransformCharactersForNumbers(base_query);
819   Util::FullWidthAsciiToHalfWidthAscii(*base_query, output);
820 }
821 
GetQueriesForPrediction(string * base,std::set<string> * expanded) const822 void Composer::GetQueriesForPrediction(
823     string *base, std::set<string> *expanded) const {
824   DCHECK(base);
825   DCHECK(expanded);
826   DCHECK(composition_.get());
827   // In case of the Latin input modes, we don't perform expansion.
828   switch (input_mode_) {
829     case transliteration::HALF_ASCII:
830     case transliteration::FULL_ASCII: {
831       GetQueryForPrediction(base);
832       expanded->clear();
833       return;
834     }
835     default: {}
836   }
837   composition_->GetExpandedStrings(base, expanded);
838 }
839 
GetTypeCorrectedQueriesForPrediction(std::vector<TypeCorrectedQuery> * queries) const840 void Composer::GetTypeCorrectedQueriesForPrediction(
841     std::vector<TypeCorrectedQuery> *queries) const {
842   typing_corrector_.GetQueriesForPrediction(queries);
843 }
844 
GetLength() const845 size_t Composer::GetLength() const {
846   return composition_->GetLength();
847 }
848 
GetCursor() const849 size_t Composer::GetCursor() const {
850   return position_;
851 }
852 
GetTransliteratedText(Transliterators::Transliterator t12r,const size_t position,const size_t size,string * result) const853 void Composer::GetTransliteratedText(
854     Transliterators::Transliterator t12r,
855     const size_t position,
856     const size_t size,
857     string *result) const {
858   DCHECK(result);
859   string full_base;
860   composition_->GetStringWithTransliterator(t12r, &full_base);
861 
862   const size_t t13n_start =
863     composition_->ConvertPosition(position, Transliterators::LOCAL, t12r);
864   const size_t t13n_end =
865     composition_->ConvertPosition(position + size,
866                                   Transliterators::LOCAL, t12r);
867   const size_t t13n_size = t13n_end - t13n_start;
868 
869   Util::SubString(full_base, t13n_start, t13n_size, result);
870 }
871 
GetRawString(string * raw_string) const872 void Composer::GetRawString(string *raw_string) const {
873   GetRawSubString(0, GetLength(), raw_string);
874 }
875 
GetRawSubString(const size_t position,const size_t size,string * raw_sub_string) const876 void Composer::GetRawSubString(
877     const size_t position,
878     const size_t size,
879     string *raw_sub_string) const {
880   DCHECK(raw_sub_string);
881   GetTransliteratedText(Transliterators::RAW_STRING, position, size,
882                         raw_sub_string);
883 }
884 
GetTransliterations(transliteration::Transliterations * t13ns) const885 void Composer::GetTransliterations(
886     transliteration::Transliterations *t13ns) const {
887   GetSubTransliterations(0, GetLength(), t13ns);
888 }
889 
GetSubTransliteration(const transliteration::TransliterationType type,const size_t position,const size_t size,string * transliteration) const890 void Composer::GetSubTransliteration(
891     const transliteration::TransliterationType type,
892     const size_t position,
893     const size_t size,
894     string *transliteration) const {
895   const Transliterators::Transliterator t12r = GetTransliterator(type);
896   string result;
897   GetTransliteratedText(t12r, position, size, &result);
898   transliteration->clear();
899   Transliterate(type, result, transliteration);
900 }
901 
GetSubTransliterations(const size_t position,const size_t size,transliteration::Transliterations * transliterations) const902 void Composer::GetSubTransliterations(
903     const size_t position,
904     const size_t size,
905     transliteration::Transliterations *transliterations) const {
906   string t13n;
907   for (size_t i = 0; i < transliteration::NUM_T13N_TYPES; ++i) {
908     const transliteration::TransliterationType t13n_type =
909       transliteration::TransliterationTypeArray[i];
910     GetSubTransliteration(t13n_type, position, size, &t13n);
911     transliterations->push_back(t13n);
912   }
913 }
914 
EnableInsert() const915 bool Composer::EnableInsert() const {
916   if (GetLength() >= max_length_) {
917     // do not accept long chars to prevent DOS attack.
918     LOG(WARNING) << "The length is too long.";
919     return false;
920   }
921   return true;
922 }
923 
AutoSwitchMode()924 void Composer::AutoSwitchMode() {
925   if (!config_->use_auto_ime_turn_off()) {
926     return;
927   }
928 
929   // AutoSwitchMode is only available on Roma input
930   if (config_->preedit_method() != config::Config::ROMAN) {
931     return;
932   }
933 
934   string key;
935   // Key should be in half-width alphanumeric.
936   composition_->GetStringWithTransliterator(
937       GetTransliterator(transliteration::HALF_ASCII), &key);
938 
939   ModeSwitchingHandler::ModeSwitching display_mode =
940       ModeSwitchingHandler::NO_CHANGE;
941   ModeSwitchingHandler::ModeSwitching input_mode =
942       ModeSwitchingHandler::NO_CHANGE;
943   if (!ModeSwitchingHandler::GetModeSwitchingHandler()->GetModeSwitchingRule(
944           key, &display_mode, &input_mode)) {
945     // If the key is not a pattern of mode switch rule, the procedure
946     // stops here.
947     return;
948   }
949 
950   // |display_mode| affects the existing composition the user typed.
951   switch (display_mode) {
952     case ModeSwitchingHandler::NO_CHANGE:
953       // Do nothing.
954       break;
955     case ModeSwitchingHandler::REVERT_TO_PREVIOUS_MODE:
956       // Invalid value for display_mode
957       LOG(ERROR) << "REVERT_TO_PREVIOUS_MODE is an invalid value "
958                  << "for display_mode.";
959       break;
960     case ModeSwitchingHandler::PREFERRED_ALPHANUMERIC:
961       if (input_mode_ == transliteration::FULL_ASCII) {
962         SetOutputMode(transliteration::FULL_ASCII);
963       } else {
964         SetOutputMode(transliteration::HALF_ASCII);
965       }
966       break;
967     case ModeSwitchingHandler::HALF_ALPHANUMERIC:
968       SetOutputMode(transliteration::HALF_ASCII);
969       break;
970     case ModeSwitchingHandler::FULL_ALPHANUMERIC:
971       SetOutputMode(transliteration::FULL_ASCII);
972       break;
973     default:
974       LOG(ERROR) << "Unkown value: " << display_mode;
975       break;
976   }
977 
978   // |input_mode| affects the current input mode used for the user's
979   // new typing.
980   switch (input_mode) {
981     case ModeSwitchingHandler::NO_CHANGE:
982       // Do nothing.
983       break;
984     case ModeSwitchingHandler::REVERT_TO_PREVIOUS_MODE:
985       SetInputMode(comeback_input_mode_);
986       break;
987     case ModeSwitchingHandler::PREFERRED_ALPHANUMERIC:
988       if (input_mode_ != transliteration::HALF_ASCII &&
989           input_mode_ != transliteration::FULL_ASCII) {
990         SetTemporaryInputMode(transliteration::HALF_ASCII);
991       }
992       break;
993     case ModeSwitchingHandler::HALF_ALPHANUMERIC:
994       if (input_mode_ != transliteration::HALF_ASCII) {
995         SetTemporaryInputMode(transliteration::HALF_ASCII);
996       }
997       break;
998     case ModeSwitchingHandler::FULL_ALPHANUMERIC:
999       if (input_mode_ != transliteration::FULL_ASCII) {
1000         SetTemporaryInputMode(transliteration::FULL_ASCII);
1001       }
1002       break;
1003     default:
1004       LOG(ERROR) << "Unkown value: " << display_mode;
1005       break;
1006   }
1007 }
1008 
ShouldCommit() const1009 bool Composer::ShouldCommit() const {
1010   return composition_->ShouldCommit();
1011 }
1012 
ShouldCommitHead(size_t * length_to_commit) const1013 bool Composer::ShouldCommitHead(size_t *length_to_commit) const {
1014   size_t max_remaining_composition_length;
1015   switch (GetInputFieldType()) {
1016     case commands::Context::PASSWORD:
1017       max_remaining_composition_length = 1;
1018       break;
1019     case commands::Context::TEL:
1020     case commands::Context::NUMBER:
1021       max_remaining_composition_length = 0;
1022       break;
1023     default:
1024       // No need to commit. Return here.
1025       return false;
1026   }
1027   if (GetLength() > max_remaining_composition_length) {
1028     *length_to_commit = GetLength() - max_remaining_composition_length;
1029     return true;
1030   }
1031   return false;
1032 }
1033 
1034 namespace {
1035 enum Script {
1036   ALPHABET,   // alphabet characters or symbols
1037   NUMBER,     // 0 - 9, "0" - "9"
1038   JA_HYPHEN,  // "ー"
1039   JA_COMMA,   // "、"
1040   JA_PERIOD,  // "。"
1041   OTHER,
1042 };
1043 
IsAlphabetOrNumber(const Script script)1044 bool IsAlphabetOrNumber(const Script script) {
1045   return (script == ALPHABET) || (script == NUMBER);
1046 }
1047 }  // namespace
1048 
1049 // static
TransformCharactersForNumbers(string * query)1050 bool Composer::TransformCharactersForNumbers(string *query) {
1051   if (query == NULL) {
1052     LOG(ERROR) << "query is NULL";
1053     return false;
1054   }
1055 
1056   // Create a vector of scripts of query characters to avoid
1057   // processing query string many times.
1058   const size_t chars_len = Util::CharsLen(*query);
1059   std::vector<Script> char_scripts;
1060   char_scripts.reserve(chars_len);
1061 
1062   // flags to determine whether continue to the next step.
1063   bool has_symbols = false;
1064   bool has_alphanumerics = false;
1065   for (ConstChar32Iterator iter(*query); !iter.Done(); iter.Next()) {
1066     const char32 one_char = iter.Get();
1067     switch (one_char) {
1068       case 0x30FC:  // "ー"
1069         has_symbols = true;
1070         char_scripts.push_back(JA_HYPHEN);
1071         break;
1072       case 0x3001:  // "、"
1073         has_symbols = true;
1074         char_scripts.push_back(JA_COMMA);
1075         break;
1076       case 0x3002:  // "。"
1077         has_symbols = true;
1078         char_scripts.push_back(JA_PERIOD);
1079         break;
1080       case '+':
1081       case '*':
1082       case '/':
1083       case '=':
1084       case '(':
1085       case ')':
1086       case '<':
1087       case '>':
1088       case 0xFF0B:  // "+"
1089       case 0xFF0A:  // "*"
1090       case 0xFF0F:  // "/"
1091       case 0xFF1D:  // "="
1092       case 0xFF08:  // "("
1093       case 0xFF09:  // ")"
1094       case 0xFF1C:  // "<"
1095       case 0xFF1E:  // ">"
1096         char_scripts.push_back(ALPHABET);
1097         break;
1098       default: {
1099         Util::ScriptType script_type = Util::GetScriptType(one_char);
1100         if (script_type == Util::NUMBER) {
1101           has_alphanumerics = true;
1102           char_scripts.push_back(NUMBER);
1103         } else if (script_type == Util::ALPHABET) {
1104           has_alphanumerics = true;
1105           char_scripts.push_back(ALPHABET);
1106         } else {
1107           char_scripts.push_back(OTHER);
1108         }
1109       }
1110     }
1111   }
1112 
1113   DCHECK_EQ(chars_len, char_scripts.size());
1114   if (!has_alphanumerics || !has_symbols) {
1115     VLOG(1) << "The query contains neither alphanumeric nor symbol.";
1116     return false;
1117   }
1118 
1119   string transformed_query;
1120   bool transformed = false;
1121   size_t i = 0;
1122   string append_char;
1123   for (ConstChar32Iterator iter(*query); !iter.Done(); iter.Next(), ++i) {
1124     append_char.clear();
1125     switch (char_scripts[i]) {
1126       case JA_HYPHEN: {
1127         // JA_HYPHEN(s) "ー" is/are transformed to "−" if:
1128         // (i) query has one and only one leading JA_HYPHEN followed by a
1129         //     number,
1130         // (ii) JA_HYPHEN(s) follow(s) after an alphanumeric (ex. 0-, 0----,
1131         //     etc).
1132         // Note that rule (i) implies that if query starts with more than
1133         // one JA_HYPHENs, those JA_HYPHENs are not transformed.
1134         bool check = false;
1135         if (i == 0 && chars_len > 1) {
1136           check = (char_scripts[1] == NUMBER);
1137         } else {
1138           for (size_t j = i; j > 0; --j) {
1139             if (char_scripts[j - 1] == JA_HYPHEN) {
1140               continue;
1141             }
1142             check = IsAlphabetOrNumber(char_scripts[j - 1]);
1143             break;
1144           }
1145         }
1146 
1147         // JA_HYPHEN should be transformed to MINUS.
1148         if (check) {
1149           CharacterFormManager::GetCharacterFormManager()->ConvertPreeditString(
1150               "−",  // U+2212
1151               &append_char);
1152           DCHECK(!append_char.empty());
1153         }
1154         break;
1155       }
1156 
1157       case JA_COMMA: {
1158         // "、" should be "," if the previous character is alphanumerics.
1159         // character are both alphanumerics.
1160         // Previous char should exist and be a number.
1161         const bool lhs_check =
1162             (i > 0 && IsAlphabetOrNumber(char_scripts[i - 1]));
1163         // JA_COMMA should be transformed to COMMA.
1164         if (lhs_check) {
1165           CharacterFormManager::GetCharacterFormManager()->
1166               ConvertPreeditString(",", &append_char);
1167           DCHECK(!append_char.empty());
1168         }
1169         break;
1170       }
1171 
1172       case JA_PERIOD: {
1173         // "。" should be "." if the previous character and the next
1174         // character are both alphanumerics.
1175         // Previous char should exist and be a number.
1176         const bool lhs_check =
1177             (i > 0 && IsAlphabetOrNumber(char_scripts[i - 1]));
1178         // JA_PRERIOD should be transformed to PRERIOD.
1179         if (lhs_check) {
1180           CharacterFormManager::GetCharacterFormManager()->
1181               ConvertPreeditString(".", &append_char);
1182           DCHECK(!append_char.empty());
1183         }
1184         break;
1185       }
1186 
1187       default: {
1188         // Do nothing.
1189       }
1190     }
1191 
1192     if (append_char.empty()) {
1193       // Append one character.
1194       Util::UCS4ToUTF8Append(iter.Get(), &transformed_query);
1195     } else {
1196       // Append the transformed character.
1197       transformed_query.append(append_char);
1198       transformed = true;
1199     }
1200   }
1201   if (!transformed) {
1202     return false;
1203   }
1204 
1205   // It is possible that the query's size in byte differs from the
1206   // orig_query's size in byte.
1207   DCHECK_EQ(Util::CharsLen(*query), Util::CharsLen(transformed_query));
1208   *query = transformed_query;
1209   return true;
1210 }
1211 
SetNewInput()1212 void Composer::SetNewInput() {
1213   is_new_input_ = true;
1214 }
1215 
CopyFrom(const Composer & src)1216 void Composer::CopyFrom(const Composer &src) {
1217   Reset();
1218 
1219   input_mode_ = src.input_mode_;
1220   comeback_input_mode_ = src.comeback_input_mode_;
1221   output_mode_ = src.output_mode_;
1222   input_field_type_ = src.input_field_type_;
1223 
1224   position_ = src.position_;
1225   is_new_input_ = src.is_new_input_;
1226   shifted_sequence_count_ = src.shifted_sequence_count_;
1227   source_text_.assign(src.source_text_);
1228   max_length_ = src.max_length_;
1229 
1230   composition_.reset(src.composition_->Clone());
1231   request_ = src.request_;
1232   config_ = src.config_;
1233 
1234   typing_corrector_.CopyFrom(src.typing_corrector_);
1235 }
1236 
is_new_input() const1237 bool Composer::is_new_input() const {
1238   return is_new_input_;
1239 }
1240 
shifted_sequence_count() const1241 size_t Composer::shifted_sequence_count() const {
1242   return shifted_sequence_count_;
1243 }
1244 
source_text() const1245 const string &Composer::source_text() const {
1246   return source_text_;
1247 }
mutable_source_text()1248 string *Composer::mutable_source_text() {
1249   return &source_text_;
1250 }
set_source_text(const string & source_text)1251 void Composer::set_source_text(const string &source_text) {
1252   source_text_.assign(source_text);
1253 }
1254 
max_length() const1255 size_t Composer::max_length() const {
1256   return max_length_;
1257 }
set_max_length(size_t length)1258 void Composer::set_max_length(size_t length) {
1259   max_length_ = length;
1260 }
1261 
SetInputFieldType(commands::Context::InputFieldType type)1262 void Composer::SetInputFieldType(commands::Context::InputFieldType type) {
1263   input_field_type_ = type;
1264 }
1265 
GetInputFieldType() const1266 commands::Context::InputFieldType Composer::GetInputFieldType() const {
1267   return input_field_type_;
1268 }
1269 }  // namespace composer
1270 }  // namespace mozc
1271