1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // System dictionary maintains following sections
31 // (1) Key trie
32 // Trie containing encoded key. Returns ids for lookup.
33 // We can get the key using the id by performing reverse lookup
34 // against the trie.
35 // (2) Value trie
36 // Trie containing encoded value. Returns ids for lookup.
37 // We can get the value using the id by performing reverse lookup
38 // against the trie.
39 // (3) Token array
40 // Array containing encoded tokens. Array index is the id in key trie.
41 // Token contains cost, POS, the id in key trie, etc.
42 // (4) Table for high frequent POS(left/right ID)
43 // Frequenty appearing POSs are stored as POS ids in token info for
44 // reducing binary size. This table is the map from the id to the
45 // actual ids.
46
47 #include "dictionary/system/system_dictionary.h"
48
49 #include <algorithm>
50 #include <cstring>
51 #include <limits>
52 #include <map>
53 #include <memory>
54 #include <queue>
55 #include <string>
56 #include <utility>
57 #include <vector>
58
59 #include "base/logging.h"
60 #include "base/mmap.h"
61 #include "base/port.h"
62 #include "base/string_piece.h"
63 #include "base/util.h"
64 #include "dictionary/dictionary_token.h"
65 #include "dictionary/file/codec_factory.h"
66 #include "dictionary/file/dictionary_file.h"
67 #include "dictionary/system/codec_interface.h"
68 #include "dictionary/system/token_decode_iterator.h"
69 #include "dictionary/system/words_info.h"
70 #include "storage/louds/bit_vector_based_array.h"
71 #include "storage/louds/louds_trie.h"
72
73 namespace mozc {
74 namespace dictionary {
75
76 using ::mozc::storage::louds::BitVectorBasedArray;
77 using ::mozc::storage::louds::LoudsTrie;
78
79 namespace {
80
81 const int kMinTokenArrayBlobSize = 4;
82
83 // TODO(noriyukit): The following paramters may not be well optimized. In our
84 // experiments, Select1 is computational burden, so increasing cache size for
85 // lb1/select1 may improve performance.
86 const size_t kKeyTrieLb0CacheSize = 1 * 1024;
87 const size_t kKeyTrieLb1CacheSize = 1 * 1024;
88 const size_t kKeyTrieSelect0CacheSize = 4 * 1024;
89 const size_t kKeyTrieSelect1CacheSize = 4 * 1024;
90 const size_t kKeyTrieTermvecCacheSize = 1 * 1024;
91
92 const size_t kValueTrieLb0CacheSize = 1 * 1024;
93 const size_t kValueTrieLb1CacheSize = 1 * 1024;
94 const size_t kValueTrieSelect0CacheSize = 1 * 1024;
95 const size_t kValueTrieSelect1CacheSize = 16 * 1024;
96 const size_t kValueTrieTermvecCacheSize = 4 * 1024;
97
98 // Expansion table format:
99 // "<Character to expand>[<Expanded character 1><Expanded character 2>...]"
100 //
101 // Only characters that will be encoded into 1-byte ASCII char are allowed in
102 // the table.
103 //
104 // Note that this implementation has potential issue that the key/values may
105 // be mixed.
106 // TODO(hidehiko): Clean up this hacky implementation.
107 const char *kHiraganaExpansionTable[] = {
108 "ああぁ",
109 "いいぃ",
110 "ううぅゔ",
111 "ええぇ",
112 "おおぉ",
113 "かかが",
114 "ききぎ",
115 "くくぐ",
116 "けけげ",
117 "ここご",
118 "ささざ",
119 "ししじ",
120 "すすず",
121 "せせぜ",
122 "そそぞ",
123 "たただ",
124 "ちちぢ",
125 "つつっづ",
126 "ててで",
127 "ととど",
128 "ははばぱ",
129 "ひひびぴ",
130 "ふふぶぷ",
131 "へへべぺ",
132 "ほほぼぽ",
133 "ややゃ",
134 "ゆゆゅ",
135 "よよょ",
136 "わわゎ",
137 };
138
139 const uint32 kAsciiRange = 0x80;
140
141 // Confirm that all the characters are within ASCII range.
ContainsAsciiCodeOnly(const string & str)142 bool ContainsAsciiCodeOnly(const string &str) {
143 for (string::const_iterator it = str.begin(); it != str.end(); ++it) {
144 if (static_cast<unsigned char>(*it) >= kAsciiRange) {
145 return false;
146 }
147 }
148 return true;
149 }
150
SetKeyExpansion(char key,const string & expansion,KeyExpansionTable * key_expansion_table)151 void SetKeyExpansion(char key, const string &expansion,
152 KeyExpansionTable *key_expansion_table) {
153 key_expansion_table->Add(key, expansion);
154 }
155
BuildHiraganaExpansionTable(const SystemDictionaryCodecInterface & codec,KeyExpansionTable * encoded_table)156 void BuildHiraganaExpansionTable(
157 const SystemDictionaryCodecInterface &codec,
158 KeyExpansionTable *encoded_table) {
159 for (size_t index = 0; index < arraysize(kHiraganaExpansionTable); ++index) {
160 string encoded;
161 codec.EncodeKey(kHiraganaExpansionTable[index], &encoded);
162 DCHECK(ContainsAsciiCodeOnly(encoded)) <<
163 "Encoded expansion data are supposed to fit within ASCII";
164
165 DCHECK_GT(encoded.size(), 0) << "Expansion data is empty";
166
167 if (encoded.size() == 1) {
168 continue;
169 } else {
170 SetKeyExpansion(encoded[0], encoded.substr(1), encoded_table);
171 }
172 }
173 }
174
GetTokenArrayPtr(const BitVectorBasedArray & token_array,int key_id)175 inline const uint8 *GetTokenArrayPtr(const BitVectorBasedArray &token_array,
176 int key_id) {
177 size_t length = 0;
178 return reinterpret_cast<const uint8*>(token_array.Get(key_id, &length));
179 }
180
181 // Iterator for scanning token array.
182 // This iterator does not return actual token info but returns
183 // id data and the position only.
184 // This will be used only for reverse lookup.
185 // Forward lookup does not need such iterator because it can access
186 // a token directly without linear scan.
187 //
188 // Usage:
189 // for (TokenScanIterator iter(codec_, token_array_);
190 // !iter.Done(); iter.Next()) {
191 // const TokenScanIterator::Result &result = iter.Get();
192 // // Do something with |result|.
193 // }
194 class TokenScanIterator {
195 public:
196 struct Result {
197 // Value id for the current token
198 int value_id;
199 // Index (= key id) for the current token
200 int index;
201 // Offset from the tokens section beginning.
202 // (token_array_->Get(id_in_key_trie) ==
203 // token_array_->Get(0) + tokens_offset)
204 int tokens_offset;
205 };
206
TokenScanIterator(const SystemDictionaryCodecInterface * codec,const BitVectorBasedArray & token_array)207 TokenScanIterator(
208 const SystemDictionaryCodecInterface *codec,
209 const BitVectorBasedArray &token_array)
210 : codec_(codec),
211 termination_flag_(codec->GetTokensTerminationFlag()),
212 state_(HAS_NEXT),
213 offset_(0),
214 tokens_offset_(0),
215 index_(0) {
216 encoded_tokens_ptr_ = GetTokenArrayPtr(token_array, 0);
217 NextInternal();
218 }
219
~TokenScanIterator()220 ~TokenScanIterator() {
221 }
222
Get() const223 const Result &Get() const { return result_; }
224
Done() const225 bool Done() const { return state_ == DONE; }
226
Next()227 void Next() {
228 DCHECK_NE(state_, DONE);
229 NextInternal();
230 }
231
232 private:
233 enum State {
234 HAS_NEXT,
235 DONE,
236 };
237
NextInternal()238 void NextInternal() {
239 if (encoded_tokens_ptr_[offset_] == termination_flag_) {
240 state_ = DONE;
241 return;
242 }
243 int read_bytes;
244 result_.value_id = -1;
245 result_.index = index_;
246 result_.tokens_offset = tokens_offset_;
247 const bool is_last_token =
248 !(codec_->ReadTokenForReverseLookup(encoded_tokens_ptr_ + offset_,
249 &result_.value_id, &read_bytes));
250 if (is_last_token) {
251 int tokens_size = offset_ + read_bytes - tokens_offset_;
252 if (tokens_size < kMinTokenArrayBlobSize) {
253 tokens_size = kMinTokenArrayBlobSize;
254 }
255 tokens_offset_ += tokens_size;
256 ++index_;
257 offset_ = tokens_offset_;
258 } else {
259 offset_ += read_bytes;
260 }
261 }
262
263 const SystemDictionaryCodecInterface *codec_;
264 const uint8 *encoded_tokens_ptr_;
265 const uint8 termination_flag_;
266 State state_;
267 Result result_;
268 int offset_;
269 int tokens_offset_;
270 int index_;
271
272 private:
273 DISALLOW_COPY_AND_ASSIGN(TokenScanIterator);
274 };
275
276 struct ReverseLookupResult {
ReverseLookupResultmozc::dictionary::__anon38b8538e0111::ReverseLookupResult277 ReverseLookupResult() : tokens_offset(-1), id_in_key_trie(-1) {}
278 // Offset from the tokens section beginning.
279 // (token_array_.Get(id_in_key_trie) == token_array_.Get(0) + tokens_offset)
280 int tokens_offset;
281 // Id in key trie
282 int id_in_key_trie;
283 };
284
285 } // namespace
286
287 class SystemDictionary::ReverseLookupCache {
288 public:
ReverseLookupCache()289 ReverseLookupCache() {}
290
IsAvailable(const std::set<int> & id_set) const291 bool IsAvailable(const std::set<int> &id_set) const {
292 for (std::set<int>::const_iterator itr = id_set.begin();
293 itr != id_set.end();
294 ++itr) {
295 if (results.find(*itr) == results.end()) {
296 return false;
297 }
298 }
299 return true;
300 }
301
302 std::multimap<int, ReverseLookupResult> results;
303
304 private:
305 DISALLOW_COPY_AND_ASSIGN(ReverseLookupCache);
306 };
307
308 class SystemDictionary::ReverseLookupIndex {
309 public:
ReverseLookupIndex(const SystemDictionaryCodecInterface * codec,const BitVectorBasedArray & token_array)310 ReverseLookupIndex(
311 const SystemDictionaryCodecInterface *codec,
312 const BitVectorBasedArray &token_array) {
313 // Gets id size.
314 int value_id_max = -1;
315 for (TokenScanIterator iter(codec, token_array);
316 !iter.Done(); iter.Next()) {
317 const TokenScanIterator::Result &result = iter.Get();
318 value_id_max = std::max(value_id_max, result.value_id);
319 }
320
321 CHECK_GE(value_id_max, 0);
322 index_size_ = value_id_max + 1;
323 index_.reset(new ReverseLookupResultArray[index_size_]);
324
325 // Gets result size for each ids.
326 for (TokenScanIterator iter(codec, token_array);
327 !iter.Done(); iter.Next()) {
328 const TokenScanIterator::Result &result = iter.Get();
329 if (result.value_id != -1) {
330 DCHECK_LT(result.value_id, index_size_);
331 ++(index_[result.value_id].size);
332 }
333 }
334
335 for (size_t i = 0; i < index_size_; ++i) {
336 index_[i].results.reset(new ReverseLookupResult[index_[i].size]);
337 }
338
339 // Builds index.
340 for (TokenScanIterator iter(codec, token_array);
341 !iter.Done(); iter.Next()) {
342 const TokenScanIterator::Result &result = iter.Get();
343 if (result.value_id == -1) {
344 continue;
345 }
346
347 DCHECK_LT(result.value_id, index_size_);
348 ReverseLookupResultArray *result_array = &index_[result.value_id];
349
350 // Finds uninitialized result.
351 size_t result_index = 0;
352 for (result_index = 0;
353 result_index < result_array->size;
354 ++result_index) {
355 const ReverseLookupResult &lookup_result =
356 result_array->results[result_index];
357 if (lookup_result.tokens_offset == -1 &&
358 lookup_result.id_in_key_trie == -1) {
359 result_array->results[result_index].tokens_offset =
360 result.tokens_offset;
361 result_array->results[result_index].id_in_key_trie = result.index;
362 break;
363 }
364 }
365 }
366
367 CHECK(index_ != nullptr);
368 }
369
~ReverseLookupIndex()370 ~ReverseLookupIndex() {}
371
FillResultMap(const std::set<int> & id_set,std::multimap<int,ReverseLookupResult> * result_map)372 void FillResultMap(const std::set<int> &id_set,
373 std::multimap<int, ReverseLookupResult> *result_map) {
374 for (std::set<int>::const_iterator id_itr = id_set.begin();
375 id_itr != id_set.end(); ++id_itr) {
376 const ReverseLookupResultArray &result_array = index_[*id_itr];
377 for (size_t i = 0; i < result_array.size; ++i) {
378 result_map->insert(std::make_pair(*id_itr, result_array.results[i]));
379 }
380 }
381 }
382
383 private:
384 struct ReverseLookupResultArray {
ReverseLookupResultArraymozc::dictionary::SystemDictionary::ReverseLookupIndex::ReverseLookupResultArray385 ReverseLookupResultArray() : size(0) {}
386 // Use std::unique_ptr for reducing memory consumption as possible.
387 // Using vector requires 90 MB even when we call resize explicitly.
388 // On the other hand, std::unique_ptr requires 57 MB.
389 std::unique_ptr<ReverseLookupResult[]> results;
390 size_t size;
391 };
392
393 // Use scoped array for reducing memory consumption as possible.
394 std::unique_ptr<ReverseLookupResultArray[]> index_;
395 size_t index_size_;
396
397 DISALLOW_COPY_AND_ASSIGN(ReverseLookupIndex);
398 };
399
400 struct SystemDictionary::PredictiveLookupSearchState {
PredictiveLookupSearchStatemozc::dictionary::SystemDictionary::PredictiveLookupSearchState401 PredictiveLookupSearchState() : key_pos(0), is_expanded(false) {}
PredictiveLookupSearchStatemozc::dictionary::SystemDictionary::PredictiveLookupSearchState402 PredictiveLookupSearchState(const storage::louds::LoudsTrie::Node &n,
403 size_t pos, bool expanded)
404 : node(n), key_pos(pos), is_expanded(expanded) {}
405
406 storage::louds::LoudsTrie::Node node;
407 size_t key_pos;
408 bool is_expanded;
409 };
410
411 struct SystemDictionary::Builder::Specification {
412 enum InputType {
413 FILENAME,
414 IMAGE,
415 };
416
Specificationmozc::dictionary::SystemDictionary::Builder::Specification417 Specification(InputType t, const string &fn, const char *p, int l, Options o,
418 const SystemDictionaryCodecInterface *codec,
419 const DictionaryFileCodecInterface *file_codec)
420 : type(t), filename(fn), ptr(p), len(l), options(o), codec(codec),
421 file_codec(file_codec) {}
422
423 InputType type;
424
425 // For InputType::FILENAME
426 const string filename;
427
428 // For InputType::IMAGE
429 const char *ptr;
430 const int len;
431
432 Options options;
433 const SystemDictionaryCodecInterface *codec;
434 const DictionaryFileCodecInterface *file_codec;
435 };
436
Builder(const string & filename)437 SystemDictionary::Builder::Builder(const string &filename)
438 : spec_(new Specification(Specification::FILENAME,
439 filename, nullptr, -1, NONE, nullptr, nullptr)) {}
440
Builder(const char * ptr,int len)441 SystemDictionary::Builder::Builder(const char *ptr, int len)
442 : spec_(new Specification(Specification::IMAGE,
443 "", ptr, len, NONE, nullptr, nullptr)) {}
444
~Builder()445 SystemDictionary::Builder::~Builder() {}
446
SetOptions(Options options)447 SystemDictionary::Builder & SystemDictionary::Builder::SetOptions(
448 Options options) {
449 spec_->options = options;
450 return *this;
451 }
452
SetCodec(const SystemDictionaryCodecInterface * codec)453 SystemDictionary::Builder &SystemDictionary::Builder::SetCodec(
454 const SystemDictionaryCodecInterface *codec) {
455 spec_->codec = codec;
456 return *this;
457 }
458
Build()459 SystemDictionary *SystemDictionary::Builder::Build() {
460 if (spec_->codec == nullptr) {
461 spec_->codec = SystemDictionaryCodecFactory::GetCodec();
462 }
463
464 if (spec_->file_codec == nullptr) {
465 spec_->file_codec = DictionaryFileCodecFactory::GetCodec();
466 }
467
468 std::unique_ptr<SystemDictionary> instance(
469 new SystemDictionary(spec_->codec, spec_->file_codec));
470
471 switch (spec_->type) {
472 case Specification::FILENAME:
473 if (!instance->dictionary_file_->OpenFromFile(spec_->filename)) {
474 LOG(ERROR) << "Failed to open system dictionary file";
475 return nullptr;
476 }
477 break;
478 case Specification::IMAGE:
479 // Make the dictionary not to be paged out.
480 // We don't check the return value because the process doesn't necessarily
481 // has the priviledge to mlock.
482 // Note that we don't munlock the space because it's always better to keep
483 // the singleton system dictionary paged in as long as the process runs.
484 Mmap::MaybeMLock(spec_->ptr, spec_->len);
485 if (!instance->dictionary_file_->OpenFromImage(spec_->ptr, spec_->len)) {
486 LOG(ERROR) << "Failed to open system dictionary image";
487 return nullptr;
488 }
489 break;
490 default:
491 LOG(ERROR) << "Invalid input type.";
492 return nullptr;
493 }
494
495 if (!instance->OpenDictionaryFile(
496 (spec_->options & ENABLE_REVERSE_LOOKUP_INDEX) != 0)) {
497 LOG(ERROR) << "Failed to create system dictionary";
498 return nullptr;
499 }
500
501 return instance.release();
502 }
503
SystemDictionary(const SystemDictionaryCodecInterface * codec,const DictionaryFileCodecInterface * file_codec)504 SystemDictionary::SystemDictionary(
505 const SystemDictionaryCodecInterface *codec,
506 const DictionaryFileCodecInterface *file_codec)
507 : frequent_pos_(nullptr),
508 codec_(codec),
509 dictionary_file_(new DictionaryFile(file_codec)) {}
510
~SystemDictionary()511 SystemDictionary::~SystemDictionary() {}
512
OpenDictionaryFile(bool enable_reverse_lookup_index)513 bool SystemDictionary::OpenDictionaryFile(bool enable_reverse_lookup_index) {
514 int len;
515
516 const uint8 *key_image = reinterpret_cast<const uint8 *>(
517 dictionary_file_->GetSection(codec_->GetSectionNameForKey(), &len));
518 if (!key_trie_.Open(key_image,
519 kKeyTrieLb0CacheSize,
520 kKeyTrieLb1CacheSize,
521 kKeyTrieSelect0CacheSize,
522 kKeyTrieSelect1CacheSize,
523 kKeyTrieTermvecCacheSize)) {
524 LOG(ERROR) << "cannot open key trie";
525 return false;
526 }
527
528 BuildHiraganaExpansionTable(*codec_, &hiragana_expansion_table_);
529
530 const uint8 *value_image = reinterpret_cast<const uint8 *>(
531 dictionary_file_->GetSection(codec_->GetSectionNameForValue(), &len));
532 if (!value_trie_.Open(value_image,
533 kValueTrieLb0CacheSize,
534 kValueTrieLb1CacheSize,
535 kValueTrieSelect0CacheSize,
536 kValueTrieSelect1CacheSize,
537 kValueTrieTermvecCacheSize)) {
538 LOG(ERROR) << "can not open value trie";
539 return false;
540 }
541
542 const unsigned char *token_image = reinterpret_cast<const unsigned char *>(
543 dictionary_file_->GetSection(codec_->GetSectionNameForTokens(), &len));
544 token_array_.Open(token_image);
545
546 frequent_pos_ = reinterpret_cast<const uint32*>(
547 dictionary_file_->GetSection(codec_->GetSectionNameForPos(), &len));
548 if (frequent_pos_ == nullptr) {
549 LOG(ERROR) << "can not find frequent pos section";
550 return false;
551 }
552
553 if (enable_reverse_lookup_index) {
554 InitReverseLookupIndex();
555 }
556
557 return true;
558 }
559
InitReverseLookupIndex()560 void SystemDictionary::InitReverseLookupIndex() {
561 if (reverse_lookup_index_ != nullptr) {
562 return;
563 }
564 reverse_lookup_index_.reset(new ReverseLookupIndex(codec_, token_array_));
565 }
566
HasKey(StringPiece key) const567 bool SystemDictionary::HasKey(StringPiece key) const {
568 string encoded_key;
569 codec_->EncodeKey(key, &encoded_key);
570 return key_trie_.HasKey(encoded_key);
571 }
572
HasValue(StringPiece value) const573 bool SystemDictionary::HasValue(StringPiece value) const {
574 string encoded_value;
575 codec_->EncodeValue(value, &encoded_value);
576 if (value_trie_.HasKey(encoded_value)) {
577 return true;
578 }
579
580 // Because Hiragana, Katakana and Alphabet words are not stored in the
581 // value_trie for the data compression. They are only stored in the
582 // key_trie with flags. So we also need to check the existence in
583 // the key_trie.
584
585 // Normalize the value as the key. This process depends on the
586 // implementation of SystemDictionaryBuilder::BuildValueTrie.
587 string key;
588 Util::KatakanaToHiragana(value, &key);
589
590 string encoded_key;
591 codec_->EncodeKey(key, &encoded_key);
592 const int key_id = key_trie_.ExactSearch(encoded_key);
593 if (key_id == -1) {
594 return false;
595 }
596
597 // We need to check the contents of value_trie for Katakana values.
598 // If (key, value) = (かな, カナ) is in the dictionary, "カナ" is
599 // not used as a key for value_trie or key_trie. Only "かな" is
600 // used as a key for key_trie. If we accept this limitation, we can
601 // skip the following code.
602 //
603 // If we add "if (key == value) return true;" here, we can check
604 // almost all cases of Hiragana and Alphabet words without the
605 // following iteration. However, when (mozc, MOZC) is stored but
606 // (mozc, mozc) is NOT stored, HasValue("mozc") wrongly returns
607 // true.
608
609 // Get the block of tokens for this key.
610 const uint8 *encoded_tokens_ptr = GetTokenArrayPtr(token_array_, key_id);
611
612 // Check tokens.
613 for (TokenDecodeIterator iter(codec_, value_trie_, frequent_pos_, key,
614 encoded_tokens_ptr);
615 !iter.Done(); iter.Next()) {
616 const Token *token = iter.Get().token;
617 if (value == token->value) {
618 return true;
619 }
620 }
621 return false;
622 }
623
CollectPredictiveNodesInBfsOrder(StringPiece encoded_key,const KeyExpansionTable & table,size_t limit,std::vector<PredictiveLookupSearchState> * result) const624 void SystemDictionary::CollectPredictiveNodesInBfsOrder(
625 StringPiece encoded_key,
626 const KeyExpansionTable &table,
627 size_t limit,
628 std::vector<PredictiveLookupSearchState> *result) const {
629 std::queue<PredictiveLookupSearchState> queue;
630 queue.push(PredictiveLookupSearchState(LoudsTrie::Node(), 0, false));
631 do {
632 PredictiveLookupSearchState state = queue.front();
633 queue.pop();
634
635 // Update traversal state for |encoded_key| and its expanded keys.
636 if (state.key_pos < encoded_key.size()) {
637 const char target_char = encoded_key[state.key_pos];
638 const ExpandedKey &chars = table.ExpandKey(target_char);
639
640 for (key_trie_.MoveToFirstChild(&state.node);
641 key_trie_.IsValidNode(state.node);
642 key_trie_.MoveToNextSibling(&state.node)) {
643 const char c = key_trie_.GetEdgeLabelToParentNode(state.node);
644 if (!chars.IsHit(c)) {
645 continue;
646 }
647 const bool is_expanded = state.is_expanded || c != target_char;
648 queue.push(PredictiveLookupSearchState(state.node,
649 state.key_pos + 1,
650 is_expanded));
651 }
652 continue;
653 }
654
655 // Collect prediction keys (state.key_pos >= encoded_key.size()).
656 if (key_trie_.IsTerminalNode(state.node)) {
657 result->push_back(state);
658 }
659
660 // Collected enough entries. Collect all the remaining keys that have the
661 // same length as the longest key.
662 if (result->size() > limit) {
663 // The current key is the longest because of BFS property.
664 const size_t max_key_len = state.key_pos;
665 while (!queue.empty()) {
666 state = queue.front();
667 queue.pop();
668 if (state.key_pos > max_key_len) {
669 // Key length in the queue is monotonically increasing because of BFS
670 // property. So we don't need to check all the elements in the queue.
671 break;
672 }
673 DCHECK_EQ(state.key_pos, max_key_len);
674 if (key_trie_.IsTerminalNode(state.node)) {
675 result->push_back(state);
676 }
677 }
678 break;
679 }
680
681 // Update traversal state for children.
682 for (key_trie_.MoveToFirstChild(&state.node);
683 key_trie_.IsValidNode(state.node);
684 key_trie_.MoveToNextSibling(&state.node)) {
685 queue.push(PredictiveLookupSearchState(state.node,
686 state.key_pos + 1,
687 state.is_expanded));
688 }
689 } while (!queue.empty());
690 }
691
LookupPredictive(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const692 void SystemDictionary::LookupPredictive(
693 StringPiece key,
694 const ConversionRequest &conversion_request,
695 Callback *callback) const {
696 // Do nothing for empty key, although looking up all the entries with empty
697 // string seems natural.
698 if (key.empty()) {
699 return;
700 }
701
702 string encoded_key;
703 codec_->EncodeKey(key, &encoded_key);
704 if (encoded_key.size() > LoudsTrie::kMaxDepth) {
705 return;
706 }
707
708 const KeyExpansionTable &table =
709 conversion_request.IsKanaModifierInsensitiveConversion() ?
710 hiragana_expansion_table_ : KeyExpansionTable::GetDefaultInstance();
711
712 // TODO(noriyukit): Lookup limit should be implemented at caller side by using
713 // callback mechanism. This hard-coding limits the capability and generality
714 // of dictionary module. CollectPredictiveNodesInBfsOrder() and the following
715 // loop for callback should be integrated for this purpose.
716 const size_t kLookupLimit = 64;
717 std::vector<PredictiveLookupSearchState> result;
718 result.reserve(kLookupLimit);
719 CollectPredictiveNodesInBfsOrder(encoded_key, table, kLookupLimit, &result);
720
721 // Reused buffer and instances inside the following loop.
722 char encoded_actual_key_buffer[LoudsTrie::kMaxDepth + 1];
723 string decoded_key, actual_key_str;
724 decoded_key.reserve(key.size() * 2);
725 actual_key_str.reserve(key.size() * 2);
726 for (size_t i = 0; i < result.size(); ++i) {
727 const PredictiveLookupSearchState &state = result[i];
728
729 // Computes the actual key. For example:
730 // key = "くー"
731 // encoded_actual_key = encode("ぐーぐる") [expanded]
732 // encoded_actual_key_prediction_suffix = encode("ぐる")
733 const StringPiece encoded_actual_key =
734 key_trie_.RestoreKeyString(state.node, encoded_actual_key_buffer);
735 const StringPiece encoded_actual_key_prediction_suffix =
736 ClippedSubstr(encoded_actual_key, encoded_key.size(),
737 encoded_actual_key.size() - encoded_key.size());
738
739 // decoded_key = "くーぐる" (= key + prediction suffix)
740 decoded_key.clear();
741 decoded_key.assign(key.data(), key.size());
742 codec_->DecodeKey(encoded_actual_key_prediction_suffix, &decoded_key);
743 switch (callback->OnKey(decoded_key)) {
744 case Callback::TRAVERSE_DONE:
745 return;
746 case Callback::TRAVERSE_NEXT_KEY:
747 continue;
748 case DictionaryInterface::Callback::TRAVERSE_CULL:
749 LOG(FATAL) << "Culling is not implemented.";
750 continue;
751 default:
752 break;
753 }
754
755 StringPiece actual_key;
756 if (state.is_expanded) {
757 actual_key_str.clear();
758 codec_->DecodeKey(encoded_actual_key, &actual_key_str);
759 actual_key = actual_key_str;
760 } else {
761 actual_key = decoded_key;
762 }
763 switch (callback->OnActualKey(decoded_key, actual_key, state.is_expanded)) {
764 case Callback::TRAVERSE_DONE:
765 return;
766 case Callback::TRAVERSE_NEXT_KEY:
767 continue;
768 case Callback::TRAVERSE_CULL:
769 LOG(FATAL) << "Culling is not implemented.";
770 continue;
771 default:
772 break;
773 }
774
775 const int key_id = key_trie_.GetKeyIdOfTerminalNode(state.node);
776 for (TokenDecodeIterator iter(codec_, value_trie_,
777 frequent_pos_, actual_key,
778 GetTokenArrayPtr(token_array_, key_id));
779 !iter.Done(); iter.Next()) {
780 const TokenInfo &token_info = iter.Get();
781 const Callback::ResultType result =
782 callback->OnToken(decoded_key, actual_key, *token_info.token);
783 if (result == Callback::TRAVERSE_DONE) {
784 return;
785 }
786 if (result == Callback::TRAVERSE_NEXT_KEY) {
787 break;
788 }
789 DCHECK_NE(Callback::TRAVERSE_CULL, result) << "Not implemented";
790 }
791 }
792 }
793
794 namespace {
795
796 // An implementation of prefix search without key expansion. Runs |callback|
797 // for prefixes of |encoded_key| in |key_trie|.
798 // Args:
799 // key_trie, value_trie, token_array, codec, frequent_pos:
800 // Members in SystemDictionary.
801 // key:
802 // The head address of the original key before applying codec.
803 // encoded_key:
804 // The encoded |key|.
805 // callback:
806 // A callback function to be called.
807 // token_filter:
808 // A functor of signature bool(const TokenInfo &). Only tokens for which
809 // this functor returns true are passed to callback function.
810 template <typename Func>
RunCallbackOnEachPrefix(const LoudsTrie & key_trie,const LoudsTrie & value_trie,const BitVectorBasedArray & token_array,const SystemDictionaryCodecInterface * codec,const uint32 * frequent_pos,const char * key,StringPiece encoded_key,DictionaryInterface::Callback * callback,Func token_filter)811 void RunCallbackOnEachPrefix(const LoudsTrie &key_trie,
812 const LoudsTrie &value_trie,
813 const BitVectorBasedArray &token_array,
814 const SystemDictionaryCodecInterface *codec,
815 const uint32 *frequent_pos,
816 const char *key,
817 StringPiece encoded_key,
818 DictionaryInterface::Callback *callback,
819 Func token_filter) {
820 typedef DictionaryInterface::Callback Callback;
821 LoudsTrie::Node node;
822 for (StringPiece::size_type i = 0; i < encoded_key.size(); ) {
823 if (!key_trie.MoveToChildByLabel(encoded_key[i], &node)) {
824 return;
825 }
826 ++i; // Increment here for next loop and |encoded_prefix| defined below.
827 if (!key_trie.IsTerminalNode(node)) {
828 continue;
829 }
830 const StringPiece encoded_prefix = encoded_key.substr(0, i);
831 const StringPiece prefix(key, codec->GetDecodedKeyLength(encoded_prefix));
832
833 switch (callback->OnKey(prefix)) {
834 case Callback::TRAVERSE_DONE:
835 case Callback::TRAVERSE_CULL:
836 return;
837 case Callback::TRAVERSE_NEXT_KEY:
838 continue;
839 default:
840 break;
841 }
842
843 switch (callback->OnActualKey(prefix, prefix, false)) {
844 case Callback::TRAVERSE_DONE:
845 case Callback::TRAVERSE_CULL:
846 return;
847 case Callback::TRAVERSE_NEXT_KEY:
848 continue;
849 default:
850 break;
851 }
852
853 const int key_id = key_trie.GetKeyIdOfTerminalNode(node);
854 for (TokenDecodeIterator iter(codec, value_trie, frequent_pos, prefix,
855 GetTokenArrayPtr(token_array, key_id));
856 !iter.Done(); iter.Next()) {
857 const TokenInfo &token_info = iter.Get();
858 if (!token_filter(token_info)) {
859 continue;
860 }
861 const Callback::ResultType res =
862 callback->OnToken(prefix, prefix, *token_info.token);
863 if (res == Callback::TRAVERSE_DONE || res == Callback::TRAVERSE_CULL) {
864 return;
865 }
866 if (res == Callback::TRAVERSE_NEXT_KEY) {
867 break;
868 }
869 }
870 }
871 }
872
873 struct SelectAllTokens {
operator ()mozc::dictionary::__anon38b8538e0211::SelectAllTokens874 bool operator()(const TokenInfo &token_info) const { return true; }
875 };
876
877 class ReverseLookupCallbackWrapper : public DictionaryInterface::Callback {
878 public:
ReverseLookupCallbackWrapper(DictionaryInterface::Callback * callback)879 explicit ReverseLookupCallbackWrapper(DictionaryInterface::Callback *callback)
880 : callback_(callback) {}
~ReverseLookupCallbackWrapper()881 virtual ~ReverseLookupCallbackWrapper() {}
OnToken(StringPiece key,StringPiece actual_key,const Token & token)882 virtual SystemDictionary::Callback::ResultType OnToken(
883 StringPiece key, StringPiece actual_key, const Token &token) {
884 Token modified_token = token;
885 modified_token.key.swap(modified_token.value);
886 return callback_->OnToken(key, actual_key, modified_token);
887 }
888
889 DictionaryInterface::Callback *callback_;
890 };
891
892 } // namespace
893
894 // Recursive implemention of depth-first prefix search with key expansion.
895 // Input parameters:
896 // key:
897 // The head address of the original key before applying codec.
898 // encoded_key:
899 // The encoded |key|.
900 // table:
901 // Key expansion table.
902 // callback:
903 // A callback function to be called.
904 // Parameters for recursion:
905 // node:
906 // Stores the current location in |key_trie_|.
907 // key_pos:
908 // Depth of node, i.e., encoded_key.substr(0, key_pos) is the current prefix
909 // for search.
910 // is_expanded:
911 // true is stored if the current node is reached by key expansion.
912 // actual_key_buffer:
913 // Buffer for storing actually used characters to reach this node, i.e.,
914 // StringPiece(actual_key_buffer, key_pos) is the matched prefix using key
915 // expansion.
916 // actual_prefix:
917 // A reused string for decoded actual key. This is just for performance
918 // purpose.
919 DictionaryInterface::Callback::ResultType
LookupPrefixWithKeyExpansionImpl(const char * key,StringPiece encoded_key,const KeyExpansionTable & table,Callback * callback,LoudsTrie::Node node,StringPiece::size_type key_pos,bool is_expanded,char * actual_key_buffer,string * actual_prefix) const920 SystemDictionary::LookupPrefixWithKeyExpansionImpl(
921 const char *key,
922 StringPiece encoded_key,
923 const KeyExpansionTable &table,
924 Callback *callback,
925 LoudsTrie::Node node,
926 StringPiece::size_type key_pos,
927 bool is_expanded,
928 char *actual_key_buffer,
929 string *actual_prefix) const {
930 // This do-block handles a terminal node and callback. do-block is used to
931 // break the block and continue to the subsequent traversal phase.
932 do {
933 if (!key_trie_.IsTerminalNode(node)) {
934 break;
935 }
936
937 const StringPiece encoded_prefix = encoded_key.substr(0, key_pos);
938 const StringPiece prefix(key, codec_->GetDecodedKeyLength(encoded_prefix));
939 Callback::ResultType result = callback->OnKey(prefix);
940 if (result == Callback::TRAVERSE_DONE ||
941 result == Callback::TRAVERSE_CULL) {
942 return result;
943 }
944 if (result == Callback::TRAVERSE_NEXT_KEY) {
945 break; // Go to the traversal phase.
946 }
947
948 const StringPiece encoded_actual_prefix(actual_key_buffer, key_pos);
949 actual_prefix->clear();
950 codec_->DecodeKey(encoded_actual_prefix, actual_prefix);
951 result = callback->OnActualKey(prefix, *actual_prefix, is_expanded);
952 if (result == Callback::TRAVERSE_DONE ||
953 result == Callback::TRAVERSE_CULL) {
954 return result;
955 }
956 if (result == Callback::TRAVERSE_NEXT_KEY) {
957 break; // Go to the traversal phase.
958 }
959
960 const int key_id = key_trie_.GetKeyIdOfTerminalNode(node);
961 for (TokenDecodeIterator iter(codec_, value_trie_, frequent_pos_,
962 *actual_prefix,
963 GetTokenArrayPtr(token_array_, key_id));
964 !iter.Done(); iter.Next()) {
965 const TokenInfo &token_info = iter.Get();
966 result = callback->OnToken(prefix, *actual_prefix, *token_info.token);
967 if (result == Callback::TRAVERSE_DONE ||
968 result == Callback::TRAVERSE_CULL) {
969 return result;
970 }
971 if (result == Callback::TRAVERSE_NEXT_KEY) {
972 break; // Go to the traversal phase.
973 }
974 }
975 } while (false);
976
977 // Traversal phase.
978 if (key_pos == encoded_key.size()) {
979 return Callback::TRAVERSE_CONTINUE;
980 }
981 const char current_char = encoded_key[key_pos];
982 const ExpandedKey &chars = table.ExpandKey(current_char);
983 for (key_trie_.MoveToFirstChild(&node); key_trie_.IsValidNode(node);
984 key_trie_.MoveToNextSibling(&node)) {
985 const char c = key_trie_.GetEdgeLabelToParentNode(node);
986 if (!chars.IsHit(c)) {
987 continue;
988 }
989 actual_key_buffer[key_pos] = c;
990 const Callback::ResultType result = LookupPrefixWithKeyExpansionImpl(
991 key, encoded_key, table, callback, node, key_pos + 1,
992 is_expanded || c != current_char,
993 actual_key_buffer, actual_prefix);
994 if (result == Callback::TRAVERSE_DONE) {
995 return Callback::TRAVERSE_DONE;
996 }
997 }
998
999 return Callback::TRAVERSE_CONTINUE;
1000 }
1001
LookupPrefix(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1002 void SystemDictionary::LookupPrefix(
1003 StringPiece key,
1004 const ConversionRequest &conversion_request,
1005 Callback *callback) const {
1006 string encoded_key;
1007 codec_->EncodeKey(key, &encoded_key);
1008
1009 if (!conversion_request.IsKanaModifierInsensitiveConversion()) {
1010 RunCallbackOnEachPrefix(key_trie_, value_trie_, token_array_, codec_,
1011 frequent_pos_, key.data(), encoded_key, callback,
1012 SelectAllTokens());
1013 return;
1014 }
1015
1016 char actual_key_buffer[LoudsTrie::kMaxDepth + 1];
1017 string actual_prefix;
1018 actual_prefix.reserve(key.size() * 3);
1019 LookupPrefixWithKeyExpansionImpl(key.data(), encoded_key,
1020 hiragana_expansion_table_, callback,
1021 LoudsTrie::Node(), 0, false,
1022 actual_key_buffer, &actual_prefix);
1023 }
1024
LookupExact(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1025 void SystemDictionary::LookupExact(
1026 StringPiece key,
1027 const ConversionRequest &conversion_request,
1028 Callback *callback) const {
1029 // Find the key in the key trie.
1030 string encoded_key;
1031 codec_->EncodeKey(key, &encoded_key);
1032 const int key_id = key_trie_.ExactSearch(encoded_key);
1033 if (key_id == -1) {
1034 return;
1035 }
1036 if (callback->OnKey(key) != Callback::TRAVERSE_CONTINUE) {
1037 return;
1038 }
1039
1040 // Callback on each token.
1041 for (TokenDecodeIterator iter(codec_, value_trie_, frequent_pos_, key,
1042 GetTokenArrayPtr(token_array_, key_id));
1043 !iter.Done(); iter.Next()) {
1044 if (callback->OnToken(key, key, *iter.Get().token) !=
1045 Callback::TRAVERSE_CONTINUE) {
1046 break;
1047 }
1048 }
1049 }
1050
LookupReverse(StringPiece str,const ConversionRequest & conversion_request,Callback * callback) const1051 void SystemDictionary::LookupReverse(
1052 StringPiece str,
1053 const ConversionRequest &conversion_request,
1054 Callback *callback) const {
1055 // 1st step: Hiragana/Katakana are not in the value trie
1056 // 2nd step: Reverse lookup in value trie
1057 ReverseLookupCallbackWrapper callback_wrapper(callback);
1058 RegisterReverseLookupTokensForT13N(str, &callback_wrapper);
1059 RegisterReverseLookupTokensForValue(str, &callback_wrapper);
1060 }
1061
1062 namespace {
1063
1064 class AddKeyIdsToSet {
1065 public:
AddKeyIdsToSet(std::set<int> * output)1066 explicit AddKeyIdsToSet(std::set<int> *output) : output_(output) {}
1067
operator ()(StringPiece key,size_t prefix_len,const LoudsTrie & trie,LoudsTrie::Node node)1068 void operator()(StringPiece key, size_t prefix_len,
1069 const LoudsTrie &trie, LoudsTrie::Node node) {
1070 output_->insert(trie.GetKeyIdOfTerminalNode(node));
1071 }
1072
1073 private:
1074 std::set<int> *output_;
1075 };
1076
AddKeyIdsOfAllPrefixes(const LoudsTrie & trie,StringPiece key,std::set<int> * key_ids)1077 inline void AddKeyIdsOfAllPrefixes(const LoudsTrie &trie, StringPiece key,
1078 std::set<int> *key_ids) {
1079 trie.PrefixSearch(key, AddKeyIdsToSet(key_ids));
1080 }
1081
1082 } // namespace
1083
PopulateReverseLookupCache(StringPiece str) const1084 void SystemDictionary::PopulateReverseLookupCache(StringPiece str) const {
1085 if (reverse_lookup_index_ != nullptr) {
1086 // We don't need to prepare cache for the current reverse conversion,
1087 // as we have already built the index for reverse lookup.
1088 return;
1089 }
1090 reverse_lookup_cache_.reset(new ReverseLookupCache);
1091 DCHECK(reverse_lookup_cache_.get());
1092
1093 // Iterate each suffix and collect IDs of all substrings.
1094 std::set<int> id_set;
1095 int pos = 0;
1096 string lookup_key;
1097 lookup_key.reserve(str.size());
1098 while (pos < str.size()) {
1099 const StringPiece suffix = ClippedSubstr(str, pos);
1100 lookup_key.clear();
1101 codec_->EncodeValue(suffix, &lookup_key);
1102 AddKeyIdsOfAllPrefixes(value_trie_, lookup_key, &id_set);
1103 pos += Util::OneCharLen(suffix.data());
1104 }
1105 // Collect tokens for all IDs.
1106 ScanTokens(id_set, reverse_lookup_cache_.get());
1107 }
1108
ClearReverseLookupCache() const1109 void SystemDictionary::ClearReverseLookupCache() const {
1110 reverse_lookup_cache_.reset();
1111 }
1112
1113 namespace {
1114
1115 class FilterTokenForRegisterReverseLookupTokensForT13N {
1116 public:
FilterTokenForRegisterReverseLookupTokensForT13N()1117 FilterTokenForRegisterReverseLookupTokensForT13N() {
1118 tmp_str_.reserve(LoudsTrie::kMaxDepth * 3);
1119 }
1120
operator ()(const TokenInfo & token_info)1121 bool operator()(const TokenInfo &token_info) {
1122 // Skip spelling corrections.
1123 if (token_info.token->attributes & Token::SPELLING_CORRECTION) {
1124 return false;
1125 }
1126 if (token_info.value_type != TokenInfo::AS_IS_HIRAGANA &&
1127 token_info.value_type != TokenInfo::AS_IS_KATAKANA) {
1128 // SAME_AS_PREV_VALUE may be t13n token.
1129 tmp_str_.clear();
1130 Util::KatakanaToHiragana(token_info.token->value, &tmp_str_);
1131 if (token_info.token->key != tmp_str_) {
1132 return false;
1133 }
1134 }
1135 return true;
1136 }
1137
1138 private:
1139 string tmp_str_;
1140 };
1141
1142 } // namespace
1143
RegisterReverseLookupTokensForT13N(StringPiece value,Callback * callback) const1144 void SystemDictionary::RegisterReverseLookupTokensForT13N(
1145 StringPiece value, Callback *callback) const {
1146 string hiragana_value, encoded_key;
1147 Util::KatakanaToHiragana(value, &hiragana_value);
1148 codec_->EncodeKey(hiragana_value, &encoded_key);
1149 RunCallbackOnEachPrefix(key_trie_, value_trie_, token_array_, codec_,
1150 frequent_pos_, hiragana_value.data(),
1151 encoded_key, callback,
1152 FilterTokenForRegisterReverseLookupTokensForT13N());
1153 }
1154
RegisterReverseLookupTokensForValue(StringPiece value,Callback * callback) const1155 void SystemDictionary::RegisterReverseLookupTokensForValue(
1156 StringPiece value, Callback *callback) const {
1157 string lookup_key;
1158 codec_->EncodeValue(value, &lookup_key);
1159
1160 std::set<int> id_set;
1161 AddKeyIdsOfAllPrefixes(value_trie_, lookup_key, &id_set);
1162
1163 ReverseLookupCache *results = nullptr;
1164 ReverseLookupCache non_cached_results;
1165 if (reverse_lookup_index_ != nullptr) {
1166 reverse_lookup_index_->FillResultMap(id_set, &non_cached_results.results);
1167 results = &non_cached_results;
1168 } else if (reverse_lookup_cache_ != nullptr &&
1169 reverse_lookup_cache_->IsAvailable(id_set)) {
1170 results = reverse_lookup_cache_.get();
1171 } else {
1172 // Cache is not available. Get token for each ID.
1173 ScanTokens(id_set, &non_cached_results);
1174 results = &non_cached_results;
1175 }
1176 DCHECK(results != nullptr);
1177
1178 RegisterReverseLookupResults(id_set, *results, callback);
1179 }
1180
ScanTokens(const std::set<int> & id_set,ReverseLookupCache * cache) const1181 void SystemDictionary::ScanTokens(
1182 const std::set<int> &id_set, ReverseLookupCache *cache) const {
1183 for (TokenScanIterator iter(codec_, token_array_);
1184 !iter.Done(); iter.Next()) {
1185 const TokenScanIterator::Result &result = iter.Get();
1186 if (result.value_id != -1 &&
1187 id_set.find(result.value_id) != id_set.end()) {
1188 ReverseLookupResult lookup_result;
1189 lookup_result.tokens_offset = result.tokens_offset;
1190 lookup_result.id_in_key_trie = result.index;
1191 cache->results.insert(std::make_pair(result.value_id, lookup_result));
1192 }
1193 }
1194 }
1195
RegisterReverseLookupResults(const std::set<int> & id_set,const ReverseLookupCache & cache,Callback * callback) const1196 void SystemDictionary::RegisterReverseLookupResults(
1197 const std::set<int> &id_set,
1198 const ReverseLookupCache &cache,
1199 Callback *callback) const {
1200 const uint8 *encoded_tokens_ptr = GetTokenArrayPtr(token_array_, 0);
1201 char buffer[LoudsTrie::kMaxDepth + 1];
1202 for (std::set<int>::const_iterator set_itr = id_set.begin();
1203 set_itr != id_set.end();
1204 ++set_itr) {
1205 const int value_id = *set_itr;
1206 typedef std::multimap<int, ReverseLookupResult>::const_iterator ResultItr;
1207 std::pair<ResultItr, ResultItr> range = cache.results.equal_range(*set_itr);
1208 for (ResultItr result_itr = range.first;
1209 result_itr != range.second;
1210 ++result_itr) {
1211 const ReverseLookupResult &reverse_result = result_itr->second;
1212
1213 const StringPiece encoded_key =
1214 key_trie_.RestoreKeyString(reverse_result.id_in_key_trie, buffer);
1215 string tokens_key;
1216 codec_->DecodeKey(encoded_key, &tokens_key);
1217 if (callback->OnKey(tokens_key) != Callback::TRAVERSE_CONTINUE) {
1218 continue;
1219 }
1220 for (TokenDecodeIterator iter(
1221 codec_, value_trie_, frequent_pos_, tokens_key,
1222 encoded_tokens_ptr + reverse_result.tokens_offset);
1223 !iter.Done(); iter.Next()) {
1224 const TokenInfo &token_info = iter.Get();
1225 if (token_info.token->attributes & Token::SPELLING_CORRECTION ||
1226 token_info.id_in_value_trie != value_id) {
1227 continue;
1228 }
1229 callback->OnToken(tokens_key, tokens_key, *token_info.token);
1230 }
1231 }
1232 }
1233 }
1234
1235 } // namespace dictionary
1236 } // namespace mozc
1237