1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ui/base/ime/character_composer.h"
6 
7 #include <algorithm>
8 #include <iterator>
9 #include <string>
10 
11 #include "base/check.h"
12 #include "base/notreached.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversion_utils.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/third_party/icu/icu_utf.h"
17 #include "ui/events/event.h"
18 #include "ui/events/keycodes/dom/dom_key.h"
19 #include "ui/events/keycodes/dom/keycode_converter.h"
20 #include "ui/events/keycodes/keyboard_codes.h"
21 
22 namespace {
23 
24 #include "ui/base/ime/character_composer_data.h"
25 
CheckCharacterComposeTable(const ui::CharacterComposer::ComposeBuffer & compose_sequence,uint32_t * composed_character)26 bool CheckCharacterComposeTable(
27     const ui::CharacterComposer::ComposeBuffer& compose_sequence,
28     uint32_t* composed_character) {
29   const ui::TreeComposeChecker kTreeComposeChecker(kCompositions);
30   return kTreeComposeChecker.CheckSequence(compose_sequence,
31                                            composed_character) !=
32          ui::ComposeChecker::CheckSequenceResult::NO_MATCH;
33 }
34 
35 // Converts |character| to UTF16 string.
36 // Returns false when |character| is not a valid character.
UTF32CharacterToUTF16(uint32_t character,base::string16 * output)37 bool UTF32CharacterToUTF16(uint32_t character, base::string16* output) {
38   output->clear();
39   // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
40   if (!CBU_IS_UNICODE_CHAR(character))
41     return false;
42   if (character) {
43     output->resize(CBU16_LENGTH(character));
44     size_t i = 0;
45     CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
46   }
47   return true;
48 }
49 
50 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
51 // -1 is returned when |keycode| cannot be a hexadecimal digit.
KeycodeToHexDigit(unsigned int keycode)52 int KeycodeToHexDigit(unsigned int keycode) {
53   if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9)
54     return keycode - ui::VKEY_0;
55   if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F)
56     return keycode - ui::VKEY_A + 10;
57   return -1;  // |keycode| cannot be a hexadecimal digit.
58 }
59 
60 }  // namespace
61 
62 namespace ui {
63 
CharacterComposer()64 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
65 
~CharacterComposer()66 CharacterComposer::~CharacterComposer() {}
67 
Reset()68 void CharacterComposer::Reset() {
69   compose_buffer_.clear();
70   hex_buffer_.clear();
71   composed_character_.clear();
72   preedit_string_.clear();
73   composition_mode_ = KEY_SEQUENCE_MODE;
74 }
75 
FilterKeyPress(const ui::KeyEvent & event)76 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
77   if (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED)
78     return false;
79 
80   // We don't care about modifier key presses.
81   if (KeycodeConverter::IsDomKeyForModifier(event.GetDomKey()))
82     return false;
83 
84   composed_character_.clear();
85   preedit_string_.clear();
86 
87   // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
88   // We don't care about other modifiers like Alt.  When CapsLock is on, we do
89   // nothing because what we receive is Ctrl+Shift+u (not U).
90   if (event.key_code() == VKEY_U &&
91       (event.flags() & (EF_SHIFT_DOWN | EF_CONTROL_DOWN | EF_CAPS_LOCK_ON)) ==
92           (EF_SHIFT_DOWN | EF_CONTROL_DOWN)) {
93     if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
94       // There is no ongoing composition.  Let's switch to HEX_MODE.
95       composition_mode_ = HEX_MODE;
96       UpdatePreeditStringHexMode();
97       return true;
98     }
99   }
100 
101   // Filter key press in an appropriate manner.
102   switch (composition_mode_) {
103     case KEY_SEQUENCE_MODE:
104       return FilterKeyPressSequenceMode(event);
105     case HEX_MODE:
106       return FilterKeyPressHexMode(event);
107     default:
108       NOTREACHED();
109       return false;
110   }
111 }
112 
FilterKeyPressSequenceMode(const KeyEvent & event)113 bool CharacterComposer::FilterKeyPressSequenceMode(const KeyEvent& event) {
114   DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
115   compose_buffer_.push_back(event.GetDomKey());
116 
117   // Check compose table.
118   uint32_t composed_character_utf32 = 0;
119   if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
120     // Key press is recognized as a part of composition.
121     if (composed_character_utf32 != 0) {
122       // We get a composed character.
123       compose_buffer_.clear();
124       UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
125     }
126     return true;
127   }
128   // Key press is not a part of composition.
129   compose_buffer_.pop_back();  // Remove the keypress added this time.
130   if (!compose_buffer_.empty()) {
131     // Check for Windows-style composition fallback: If the dead key encodes
132     // a printable ASCII character, output that followed by the new keypress.
133     // (This could be extended to allow any printable Unicode character in
134     // the dead key, and/or for longer sequences, but there is no current use
135     // for that, so we keep it simple.)
136     if ((compose_buffer_.size() == 1) && (compose_buffer_[0].IsDeadKey())) {
137       int32_t dead_character = compose_buffer_[0].ToDeadKeyCombiningCharacter();
138       if (dead_character >= 0x20 && dead_character <= 0x7E) {
139         DomKey current_key = event.GetDomKey();
140         int32_t current_character = 0;
141         if (current_key.IsCharacter())
142           current_character = current_key.ToCharacter();
143         else if (current_key.IsDeadKey())
144           current_character = current_key.ToDeadKeyCombiningCharacter();
145         if (current_character) {
146           base::WriteUnicodeCharacter(dead_character, &composed_character_);
147           base::WriteUnicodeCharacter(current_character, &composed_character_);
148         }
149       }
150     }
151     compose_buffer_.clear();
152     return true;
153   }
154   return false;
155 }
156 
FilterKeyPressHexMode(const KeyEvent & event)157 bool CharacterComposer::FilterKeyPressHexMode(const KeyEvent& event) {
158   DCHECK(composition_mode_ == HEX_MODE);
159   const size_t kMaxHexSequenceLength = 8;
160   base::char16 c = event.GetCharacter();
161   int hex_digit = 0;
162   if (base::IsHexDigit(c)) {
163     hex_digit = base::HexDigitToInt(c);
164   } else {
165     // With 101 keyboard, control + shift + 3 produces '#', but a user may
166     // have intended to type '3'.  So, if a hexadecimal character was not found,
167     // suppose a user is holding shift key (and possibly control key, too) and
168     // try a character with modifier keys removed.
169     hex_digit = KeycodeToHexDigit(event.key_code());
170   }
171   if (hex_digit >= 0) {
172     if (hex_buffer_.size() < kMaxHexSequenceLength) {
173       // Add the key to the buffer if it is a hex digit.
174       hex_buffer_.push_back(hex_digit);
175     }
176   } else {
177     DomKey key = event.GetDomKey();
178     if (key == DomKey::ESCAPE) {
179       // Cancel composition when ESC is pressed.
180       Reset();
181     } else if (key == DomKey::ENTER || c == ' ') {
182       // Commit the composed character when Enter or space is pressed.
183       CommitHex();
184     } else if (key == DomKey::BACKSPACE) {
185       // Pop back the buffer when Backspace is pressed.
186       if (!hex_buffer_.empty()) {
187         hex_buffer_.pop_back();
188       } else {
189         // If there is no character in |hex_buffer_|, cancel composition.
190         Reset();
191       }
192     }
193     // Other keystrokes are ignored in hex composition mode.
194   }
195   UpdatePreeditStringHexMode();
196   return true;
197 }
198 
CommitHex()199 void CharacterComposer::CommitHex() {
200   DCHECK(composition_mode_ == HEX_MODE);
201   uint32_t composed_character_utf32 = 0;
202   for (size_t i = 0; i != hex_buffer_.size(); ++i) {
203     const uint32_t digit = hex_buffer_[i];
204     DCHECK(0 <= digit && digit < 16);
205     composed_character_utf32 <<= 4;
206     composed_character_utf32 |= digit;
207   }
208   Reset();
209   UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
210 }
211 
UpdatePreeditStringHexMode()212 void CharacterComposer::UpdatePreeditStringHexMode() {
213   if (composition_mode_ != HEX_MODE) {
214     preedit_string_.clear();
215     return;
216   }
217   std::string preedit_string_ascii("u");
218   for (size_t i = 0; i != hex_buffer_.size(); ++i) {
219     const int digit = hex_buffer_[i];
220     DCHECK(0 <= digit && digit < 16);
221     preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
222   }
223   preedit_string_ = base::ASCIIToUTF16(preedit_string_ascii);
224 }
225 
CheckSequence(const ui::CharacterComposer::ComposeBuffer & sequence,uint32_t * composed_character) const226 ComposeChecker::CheckSequenceResult TreeComposeChecker::CheckSequence(
227     const ui::CharacterComposer::ComposeBuffer& sequence,
228     uint32_t* composed_character) const {
229   *composed_character = 0;
230   if (sequence.size() > data_.maximum_sequence_length)
231     return CheckSequenceResult::NO_MATCH;
232 
233   uint16_t tree_index = 0;
234   for (const auto& keystroke : sequence) {
235     DCHECK(tree_index < data_.tree_entries);
236 
237     // If we are looking up a dead key, skip over the character tables.
238     int32_t character = -1;
239     if (keystroke.IsDeadKey()) {
240       tree_index += 2 * data_.tree[tree_index] + 1;  // internal unicode table
241       tree_index += 2 * data_.tree[tree_index] + 1;  // leaf unicode table
242       character = keystroke.ToDeadKeyCombiningCharacter();
243     } else if (keystroke.IsCharacter()) {
244       character = keystroke.ToCharacter();
245     }
246     if (character < 0 || character > 0xFFFF)
247       return CheckSequenceResult::NO_MATCH;
248 
249     // Check the internal subtree table.
250     uint16_t result = 0;
251     uint16_t entries = data_.tree[tree_index++];
252     if (entries &&
253         Find(tree_index, entries, static_cast<uint16_t>(character), &result)) {
254       tree_index = result;
255       continue;
256     }
257 
258     // Skip over the internal subtree table and check the leaf table.
259     tree_index += 2 * entries;
260     entries = data_.tree[tree_index++];
261     if (entries &&
262         Find(tree_index, entries, static_cast<uint16_t>(character), &result)) {
263       *composed_character = result;
264       return CheckSequenceResult::FULL_MATCH;
265     }
266     return CheckSequenceResult::NO_MATCH;
267   }
268   return CheckSequenceResult::PREFIX_MATCH;
269 }
270 
Find(uint16_t index,uint16_t size,uint16_t key,uint16_t * value) const271 bool TreeComposeChecker::Find(uint16_t index,
272                               uint16_t size,
273                               uint16_t key,
274                               uint16_t* value) const {
275   struct TableEntry {
276     uint16_t key;
277     uint16_t value;
278     bool operator<(const TableEntry& other) const {
279       return this->key < other.key;
280     }
281   };
282   const TableEntry* a = reinterpret_cast<const TableEntry*>(&data_.tree[index]);
283   const TableEntry* z = a + size;
284   const TableEntry target = {key, 0};
285   const TableEntry* it = std::lower_bound(a, z, target);
286   if ((it != z) && (it->key == key)) {
287     *value = it->value;
288     return true;
289   }
290   return false;
291 }
292 
293 }  // namespace ui
294