1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/base/ime/character_composer.h"
6
7 #include <algorithm>
8 #include <iterator>
9 #include <string>
10
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversion_utils.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/third_party/icu/icu_utf.h"
15 #include "ui/events/event.h"
16 #include "ui/events/keycodes/dom/dom_key.h"
17 #include "ui/events/keycodes/dom/keycode_converter.h"
18 #include "ui/events/keycodes/keyboard_codes.h"
19
20 namespace {
21
22 #include "ui/base/ime/character_composer_data.h"
23
CheckCharacterComposeTable(const ui::CharacterComposer::ComposeBuffer & compose_sequence,uint32_t * composed_character)24 bool CheckCharacterComposeTable(
25 const ui::CharacterComposer::ComposeBuffer& compose_sequence,
26 uint32_t* composed_character) {
27 const ui::TreeComposeChecker kTreeComposeChecker(kCompositions);
28 return kTreeComposeChecker.CheckSequence(compose_sequence,
29 composed_character) !=
30 ui::ComposeChecker::CheckSequenceResult::NO_MATCH;
31 }
32
33 // Converts |character| to UTF16 string.
34 // Returns false when |character| is not a valid character.
UTF32CharacterToUTF16(uint32_t character,base::string16 * output)35 bool UTF32CharacterToUTF16(uint32_t character, base::string16* output) {
36 output->clear();
37 // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
38 if (!CBU_IS_UNICODE_CHAR(character))
39 return false;
40 if (character) {
41 output->resize(CBU16_LENGTH(character));
42 size_t i = 0;
43 CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
44 }
45 return true;
46 }
47
48 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
49 // -1 is returned when |keycode| cannot be a hexadecimal digit.
KeycodeToHexDigit(unsigned int keycode)50 int KeycodeToHexDigit(unsigned int keycode) {
51 if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9)
52 return keycode - ui::VKEY_0;
53 if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F)
54 return keycode - ui::VKEY_A + 10;
55 return -1; // |keycode| cannot be a hexadecimal digit.
56 }
57
58 } // namespace
59
60 namespace ui {
61
CharacterComposer()62 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
63
~CharacterComposer()64 CharacterComposer::~CharacterComposer() {}
65
Reset()66 void CharacterComposer::Reset() {
67 compose_buffer_.clear();
68 hex_buffer_.clear();
69 composed_character_.clear();
70 preedit_string_.clear();
71 composition_mode_ = KEY_SEQUENCE_MODE;
72 }
73
FilterKeyPress(const ui::KeyEvent & event)74 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
75 if (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED)
76 return false;
77
78 // We don't care about modifier key presses.
79 if (KeycodeConverter::IsDomKeyForModifier(event.GetDomKey()))
80 return false;
81
82 composed_character_.clear();
83 preedit_string_.clear();
84
85 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
86 // We don't care about other modifiers like Alt. When CapsLock is on, we do
87 // nothing because what we receive is Ctrl+Shift+u (not U).
88 if (event.key_code() == VKEY_U &&
89 (event.flags() & (EF_SHIFT_DOWN | EF_CONTROL_DOWN | EF_CAPS_LOCK_ON)) ==
90 (EF_SHIFT_DOWN | EF_CONTROL_DOWN)) {
91 if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
92 // There is no ongoing composition. Let's switch to HEX_MODE.
93 composition_mode_ = HEX_MODE;
94 UpdatePreeditStringHexMode();
95 return true;
96 }
97 }
98
99 // Filter key press in an appropriate manner.
100 switch (composition_mode_) {
101 case KEY_SEQUENCE_MODE:
102 return FilterKeyPressSequenceMode(event);
103 case HEX_MODE:
104 return FilterKeyPressHexMode(event);
105 default:
106 NOTREACHED();
107 return false;
108 }
109 }
110
FilterKeyPressSequenceMode(const KeyEvent & event)111 bool CharacterComposer::FilterKeyPressSequenceMode(const KeyEvent& event) {
112 DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
113 compose_buffer_.push_back(event.GetDomKey());
114
115 // Check compose table.
116 uint32_t composed_character_utf32 = 0;
117 if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
118 // Key press is recognized as a part of composition.
119 if (composed_character_utf32 != 0) {
120 // We get a composed character.
121 compose_buffer_.clear();
122 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
123 }
124 return true;
125 }
126 // Key press is not a part of composition.
127 compose_buffer_.pop_back(); // Remove the keypress added this time.
128 if (!compose_buffer_.empty()) {
129 // Check for Windows-style composition fallback: If the dead key encodes
130 // a printable ASCII character, output that followed by the new keypress.
131 // (This could be extended to allow any printable Unicode character in
132 // the dead key, and/or for longer sequences, but there is no current use
133 // for that, so we keep it simple.)
134 if ((compose_buffer_.size() == 1) && (compose_buffer_[0].IsDeadKey())) {
135 int32_t dead_character = compose_buffer_[0].ToDeadKeyCombiningCharacter();
136 if (dead_character >= 0x20 && dead_character <= 0x7E) {
137 DomKey current_key = event.GetDomKey();
138 int32_t current_character = 0;
139 if (current_key.IsCharacter())
140 current_character = current_key.ToCharacter();
141 else if (current_key.IsDeadKey())
142 current_character = current_key.ToDeadKeyCombiningCharacter();
143 if (current_character) {
144 base::WriteUnicodeCharacter(dead_character, &composed_character_);
145 base::WriteUnicodeCharacter(current_character, &composed_character_);
146 }
147 }
148 }
149 compose_buffer_.clear();
150 return true;
151 }
152 return false;
153 }
154
FilterKeyPressHexMode(const KeyEvent & event)155 bool CharacterComposer::FilterKeyPressHexMode(const KeyEvent& event) {
156 DCHECK(composition_mode_ == HEX_MODE);
157 const size_t kMaxHexSequenceLength = 8;
158 base::char16 c = event.GetCharacter();
159 int hex_digit = 0;
160 if (base::IsHexDigit(c)) {
161 hex_digit = base::HexDigitToInt(c);
162 } else {
163 // With 101 keyboard, control + shift + 3 produces '#', but a user may
164 // have intended to type '3'. So, if a hexadecimal character was not found,
165 // suppose a user is holding shift key (and possibly control key, too) and
166 // try a character with modifier keys removed.
167 hex_digit = KeycodeToHexDigit(event.key_code());
168 }
169 if (hex_digit >= 0) {
170 if (hex_buffer_.size() < kMaxHexSequenceLength) {
171 // Add the key to the buffer if it is a hex digit.
172 hex_buffer_.push_back(hex_digit);
173 }
174 } else {
175 DomKey key = event.GetDomKey();
176 if (key == DomKey::ESCAPE) {
177 // Cancel composition when ESC is pressed.
178 Reset();
179 } else if (key == DomKey::ENTER || c == ' ') {
180 // Commit the composed character when Enter or space is pressed.
181 CommitHex();
182 } else if (key == DomKey::BACKSPACE) {
183 // Pop back the buffer when Backspace is pressed.
184 if (!hex_buffer_.empty()) {
185 hex_buffer_.pop_back();
186 } else {
187 // If there is no character in |hex_buffer_|, cancel composition.
188 Reset();
189 }
190 }
191 // Other keystrokes are ignored in hex composition mode.
192 }
193 UpdatePreeditStringHexMode();
194 return true;
195 }
196
CommitHex()197 void CharacterComposer::CommitHex() {
198 DCHECK(composition_mode_ == HEX_MODE);
199 uint32_t composed_character_utf32 = 0;
200 for (size_t i = 0; i != hex_buffer_.size(); ++i) {
201 const uint32_t digit = hex_buffer_[i];
202 DCHECK(0 <= digit && digit < 16);
203 composed_character_utf32 <<= 4;
204 composed_character_utf32 |= digit;
205 }
206 Reset();
207 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
208 }
209
UpdatePreeditStringHexMode()210 void CharacterComposer::UpdatePreeditStringHexMode() {
211 if (composition_mode_ != HEX_MODE) {
212 preedit_string_.clear();
213 return;
214 }
215 std::string preedit_string_ascii("u");
216 for (size_t i = 0; i != hex_buffer_.size(); ++i) {
217 const int digit = hex_buffer_[i];
218 DCHECK(0 <= digit && digit < 16);
219 preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
220 }
221 preedit_string_ = base::ASCIIToUTF16(preedit_string_ascii);
222 }
223
CheckSequence(const ui::CharacterComposer::ComposeBuffer & sequence,uint32_t * composed_character) const224 ComposeChecker::CheckSequenceResult TreeComposeChecker::CheckSequence(
225 const ui::CharacterComposer::ComposeBuffer& sequence,
226 uint32_t* composed_character) const {
227 *composed_character = 0;
228 if (sequence.size() > data_.maximum_sequence_length)
229 return CheckSequenceResult::NO_MATCH;
230
231 uint16_t tree_index = 0;
232 for (const auto& keystroke : sequence) {
233 DCHECK(tree_index < data_.tree_entries);
234
235 // If we are looking up a dead key, skip over the character tables.
236 int32_t character = -1;
237 if (keystroke.IsDeadKey()) {
238 tree_index += 2 * data_.tree[tree_index] + 1; // internal unicode table
239 tree_index += 2 * data_.tree[tree_index] + 1; // leaf unicode table
240 character = keystroke.ToDeadKeyCombiningCharacter();
241 } else if (keystroke.IsCharacter()) {
242 character = keystroke.ToCharacter();
243 }
244 if (character < 0 || character > 0xFFFF)
245 return CheckSequenceResult::NO_MATCH;
246
247 // Check the internal subtree table.
248 uint16_t result = 0;
249 uint16_t entries = data_.tree[tree_index++];
250 if (entries &&
251 Find(tree_index, entries, static_cast<uint16_t>(character), &result)) {
252 tree_index = result;
253 continue;
254 }
255
256 // Skip over the internal subtree table and check the leaf table.
257 tree_index += 2 * entries;
258 entries = data_.tree[tree_index++];
259 if (entries &&
260 Find(tree_index, entries, static_cast<uint16_t>(character), &result)) {
261 *composed_character = result;
262 return CheckSequenceResult::FULL_MATCH;
263 }
264 return CheckSequenceResult::NO_MATCH;
265 }
266 return CheckSequenceResult::PREFIX_MATCH;
267 }
268
Find(uint16_t index,uint16_t size,uint16_t key,uint16_t * value) const269 bool TreeComposeChecker::Find(uint16_t index,
270 uint16_t size,
271 uint16_t key,
272 uint16_t* value) const {
273 struct TableEntry {
274 uint16_t key;
275 uint16_t value;
276 bool operator<(const TableEntry& other) const {
277 return this->key < other.key;
278 }
279 };
280 const TableEntry* a = reinterpret_cast<const TableEntry*>(&data_.tree[index]);
281 const TableEntry* z = a + size;
282 const TableEntry target = {key, 0};
283 const TableEntry* it = std::lower_bound(a, z, target);
284 if ((it != z) && (it->key == key)) {
285 *value = it->value;
286 return true;
287 }
288 return false;
289 }
290
291 } // namespace ui
292