1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "prediction/dictionary_predictor.h"
31
32 #include <algorithm>
33 #include <memory>
34 #include <random>
35 #include <set>
36 #include <string>
37 #include <utility>
38 #include <vector>
39
40 #include "base/flags.h"
41 #include "base/logging.h"
42 #include "base/port.h"
43 #include "base/serialized_string_array.h"
44 #include "base/system_util.h"
45 #include "base/util.h"
46 #include "composer/composer.h"
47 #include "composer/internal/typing_model.h"
48 #include "composer/table.h"
49 #include "config/config_handler.h"
50 #include "converter/connector.h"
51 #include "converter/converter_interface.h"
52 #include "converter/converter_mock.h"
53 #include "converter/immutable_converter.h"
54 #include "converter/immutable_converter_interface.h"
55 #include "converter/node_allocator.h"
56 #include "converter/segmenter.h"
57 #include "converter/segments.h"
58 #include "data_manager/data_manager_interface.h"
59 #include "data_manager/testing/mock_data_manager.h"
60 #include "dictionary/dictionary_interface.h"
61 #include "dictionary/dictionary_mock.h"
62 #include "dictionary/pos_group.h"
63 #include "dictionary/pos_matcher.h"
64 #include "dictionary/suffix_dictionary.h"
65 #include "dictionary/suppression_dictionary.h"
66 #include "dictionary/system/system_dictionary.h"
67 #include "prediction/suggestion_filter.h"
68 #include "prediction/zero_query_dict.h"
69 #include "protocol/commands.pb.h"
70 #include "protocol/config.pb.h"
71 #include "request/conversion_request.h"
72 #include "session/request_test_util.h"
73 #include "testing/base/public/gmock.h"
74 #include "testing/base/public/googletest.h"
75 #include "testing/base/public/gunit.h"
76 #include "transliteration/transliteration.h"
77 #include "usage_stats/usage_stats.h"
78 #include "usage_stats/usage_stats_testing_util.h"
79
80 DECLARE_bool(enable_expansion_for_dictionary_predictor);
81
82 namespace mozc {
83 namespace {
84
85 using std::unique_ptr;
86
87 using dictionary::DictionaryInterface;
88 using dictionary::DictionaryMock;
89 using dictionary::POSMatcher;
90 using dictionary::PosGroup;
91 using dictionary::SuffixDictionary;
92 using dictionary::SuppressionDictionary;
93 using dictionary::Token;
94 using ::testing::_;
95
96 const int kInfinity = (2 << 20);
97
CreateSystemDictionaryFromDataManager(const DataManagerInterface & data_manager)98 DictionaryInterface *CreateSystemDictionaryFromDataManager(
99 const DataManagerInterface &data_manager) {
100 const char *data = NULL;
101 int size = 0;
102 data_manager.GetSystemDictionaryData(&data, &size);
103 using mozc::dictionary::SystemDictionary;
104 return SystemDictionary::Builder(data, size).Build();
105 }
106
CreateSuffixDictionaryFromDataManager(const DataManagerInterface & data_manager)107 DictionaryInterface *CreateSuffixDictionaryFromDataManager(
108 const DataManagerInterface &data_manager) {
109 StringPiece suffix_key_array_data, suffix_value_array_data;
110 const uint32 *token_array;
111 data_manager.GetSuffixDictionaryData(&suffix_key_array_data,
112 &suffix_value_array_data,
113 &token_array);
114 return new SuffixDictionary(suffix_key_array_data,
115 suffix_value_array_data,
116 token_array);
117 }
118
CreateSuggestionFilter(const DataManagerInterface & data_manager)119 SuggestionFilter *CreateSuggestionFilter(
120 const DataManagerInterface &data_manager) {
121 const char *data = NULL;
122 size_t size = 0;
123 data_manager.GetSuggestionFilterData(&data, &size);
124 return new SuggestionFilter(data, size);
125 }
126
127 // Simple immutable converter mock for the realtime conversion test
128 class ImmutableConverterMock : public ImmutableConverterInterface {
129 public:
ImmutableConverterMock()130 ImmutableConverterMock() {
131 Segment *segment = segments_.add_segment();
132 segment->set_key("わたしのなまえはなかのです");
133 Segment::Candidate *candidate = segment->add_candidate();
134 candidate->value = "私の名前は中野です";
135 candidate->key = ("わたしのなまえはなかのです");
136 // "わたしの, 私の", "わたし, 私"
137 candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
138 // "なまえは, 名前は", "なまえ, 名前"
139 candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
140 // "なかのです, 中野です", "なかの, 中野"
141 candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
142 }
143
ConvertForRequest(const ConversionRequest & request,Segments * segments) const144 bool ConvertForRequest(
145 const ConversionRequest &request, Segments *segments) const override {
146 segments->CopyFrom(segments_);
147 return true;
148 }
149
150 private:
151 Segments segments_;
152 };
153
154 class TestableDictionaryPredictor : public DictionaryPredictor {
155 // Test-only subclass: Just changing access levels
156 public:
TestableDictionaryPredictor(const DataManagerInterface & data_manager,const ConverterInterface * converter,const ImmutableConverterInterface * immutable_converter,const DictionaryInterface * dictionary,const DictionaryInterface * suffix_dictionary,const Connector * connector,const Segmenter * segmenter,const POSMatcher * pos_matcher,const SuggestionFilter * suggestion_filter)157 TestableDictionaryPredictor(
158 const DataManagerInterface &data_manager,
159 const ConverterInterface *converter,
160 const ImmutableConverterInterface *immutable_converter,
161 const DictionaryInterface *dictionary,
162 const DictionaryInterface *suffix_dictionary,
163 const Connector *connector,
164 const Segmenter *segmenter,
165 const POSMatcher *pos_matcher,
166 const SuggestionFilter *suggestion_filter)
167 : DictionaryPredictor(data_manager,
168 converter,
169 immutable_converter,
170 dictionary,
171 suffix_dictionary,
172 connector,
173 segmenter,
174 pos_matcher,
175 suggestion_filter) {}
176
177 using DictionaryPredictor::PredictionTypes;
178 using DictionaryPredictor::NO_PREDICTION;
179 using DictionaryPredictor::UNIGRAM;
180 using DictionaryPredictor::BIGRAM;
181 using DictionaryPredictor::REALTIME;
182 using DictionaryPredictor::REALTIME_TOP;
183 using DictionaryPredictor::SUFFIX;
184 using DictionaryPredictor::ENGLISH;
185 using DictionaryPredictor::Result;
186 using DictionaryPredictor::MakeEmptyResult;
187 using DictionaryPredictor::AddPredictionToCandidates;
188 using DictionaryPredictor::AggregateRealtimeConversion;
189 using DictionaryPredictor::AggregateUnigramPrediction;
190 using DictionaryPredictor::AggregateBigramPrediction;
191 using DictionaryPredictor::AggregateSuffixPrediction;
192 using DictionaryPredictor::AggregateEnglishPrediction;
193 using DictionaryPredictor::ApplyPenaltyForKeyExpansion;
194 using DictionaryPredictor::TYPING_CORRECTION;
195 using DictionaryPredictor::AggregateTypeCorrectingPrediction;
196 };
197
198 // Helper class to hold dictionary data and predictor objects.
199 class MockDataAndPredictor {
200 public:
201 // Initializes predictor with given dictionary and suffix_dictionary. When
202 // NULL is passed to the first argument |dictionary|, the default
203 // DictionaryMock is used. For the second, the default is MockDataManager's
204 // suffix dictionary. Note that |dictionary| is owned by this class but
205 // |suffix_dictionary| is NOT owned because the current design assumes that
206 // suffix dictionary is singleton.
Init(const DictionaryInterface * dictionary=NULL,const DictionaryInterface * suffix_dictionary=NULL)207 void Init(const DictionaryInterface *dictionary = NULL,
208 const DictionaryInterface *suffix_dictionary = NULL) {
209 pos_matcher_.Set(data_manager_.GetPOSMatcherData());
210 suppression_dictionary_.reset(new SuppressionDictionary);
211 if (!dictionary) {
212 dictionary_mock_ = new DictionaryMock;
213 dictionary_.reset(dictionary_mock_);
214 } else {
215 dictionary_mock_ = NULL;
216 dictionary_.reset(dictionary);
217 }
218 if (!suffix_dictionary) {
219 suffix_dictionary_.reset(
220 CreateSuffixDictionaryFromDataManager(data_manager_));
221 } else {
222 suffix_dictionary_.reset(suffix_dictionary);
223 }
224 CHECK(suffix_dictionary_.get());
225
226 connector_.reset(Connector::CreateFromDataManager(data_manager_));
227 CHECK(connector_.get());
228
229 segmenter_.reset(Segmenter::CreateFromDataManager(data_manager_));
230 CHECK(segmenter_.get());
231
232 pos_group_.reset(new PosGroup(data_manager_.GetPosGroupData()));
233 suggestion_filter_.reset(CreateSuggestionFilter(data_manager_));
234 immutable_converter_.reset(
235 new ImmutableConverterImpl(dictionary_.get(),
236 suffix_dictionary_.get(),
237 suppression_dictionary_.get(),
238 connector_.get(),
239 segmenter_.get(),
240 &pos_matcher_,
241 pos_group_.get(),
242 suggestion_filter_.get()));
243 converter_.reset(new ConverterMock());
244 dictionary_predictor_.reset(
245 new TestableDictionaryPredictor(data_manager_,
246 converter_.get(),
247 immutable_converter_.get(),
248 dictionary_.get(),
249 suffix_dictionary_.get(),
250 connector_.get(),
251 segmenter_.get(),
252 &pos_matcher_,
253 suggestion_filter_.get()));
254 }
255
pos_matcher() const256 const POSMatcher &pos_matcher() const {
257 return pos_matcher_;
258 }
259
mutable_dictionary()260 DictionaryMock *mutable_dictionary() {
261 return dictionary_mock_;
262 }
263
mutable_converter_mock()264 ConverterMock *mutable_converter_mock() {
265 return converter_.get();
266 }
267
dictionary_predictor()268 const TestableDictionaryPredictor *dictionary_predictor() {
269 return dictionary_predictor_.get();
270 }
271
mutable_dictionary_predictor()272 TestableDictionaryPredictor *mutable_dictionary_predictor() {
273 return dictionary_predictor_.get();
274 }
275
276 private:
277 const testing::MockDataManager data_manager_;
278 POSMatcher pos_matcher_;
279 unique_ptr<SuppressionDictionary> suppression_dictionary_;
280 unique_ptr<const Connector> connector_;
281 unique_ptr<const Segmenter> segmenter_;
282 unique_ptr<const DictionaryInterface> suffix_dictionary_;
283 unique_ptr<const DictionaryInterface> dictionary_;
284 DictionaryMock *dictionary_mock_;
285 unique_ptr<const PosGroup> pos_group_;
286 unique_ptr<ImmutableConverterInterface> immutable_converter_;
287 unique_ptr<ConverterMock> converter_;
288 unique_ptr<const SuggestionFilter> suggestion_filter_;
289 unique_ptr<TestableDictionaryPredictor> dictionary_predictor_;
290 };
291
292 class CallCheckDictionary : public DictionaryInterface {
293 public:
294 CallCheckDictionary() = default;
295 ~CallCheckDictionary() override = default;
296
297 MOCK_CONST_METHOD1(HasKey,
298 bool(StringPiece));
299 MOCK_CONST_METHOD1(HasValue,
300 bool(StringPiece));
301 MOCK_CONST_METHOD3(LookupPredictive,
302 void(StringPiece key,
303 const ConversionRequest& convreq,
304 Callback *callback));
305 MOCK_CONST_METHOD3(LookupPrefix,
306 void(StringPiece key,
307 const ConversionRequest& convreq,
308 Callback *callback));
309 MOCK_CONST_METHOD3(LookupExact,
310 void(StringPiece key,
311 const ConversionRequest& convreq,
312 Callback *callback));
313 MOCK_CONST_METHOD3(LookupReverse,
314 void(StringPiece str,
315 const ConversionRequest& convreq,
316 Callback *callback));
317 };
318
319 // Action to call the third argument of LookupPrefix with the token
320 // <key, value>.
ACTION_P4(LookupPrefixOneToken,key,value,lid,rid)321 ACTION_P4(LookupPrefixOneToken, key, value, lid, rid) {
322 Token token;
323 token.key = key;
324 token.value = value;
325 token.lid = lid;
326 token.rid = rid;
327 arg2->OnToken(key, key, token);
328 }
329
MakeSegmentsForSuggestion(const string key,Segments * segments)330 void MakeSegmentsForSuggestion(const string key, Segments *segments) {
331 segments->Clear();
332 segments->set_max_prediction_candidates_size(10);
333 segments->set_request_type(Segments::SUGGESTION);
334 Segment *seg = segments->add_segment();
335 seg->set_key(key);
336 seg->set_segment_type(Segment::FREE);
337 }
338
MakeSegmentsForPrediction(const string key,Segments * segments)339 void MakeSegmentsForPrediction(const string key, Segments *segments) {
340 segments->Clear();
341 segments->set_max_prediction_candidates_size(50);
342 segments->set_request_type(Segments::PREDICTION);
343 Segment *seg = segments->add_segment();
344 seg->set_key(key);
345 seg->set_segment_type(Segment::FREE);
346 }
347
PrependHistorySegments(const string & key,const string & value,Segments * segments)348 void PrependHistorySegments(const string &key,
349 const string &value,
350 Segments *segments) {
351 Segment *seg = segments->push_front_segment();
352 seg->set_segment_type(Segment::HISTORY);
353 seg->set_key(key);
354 Segment::Candidate *c = seg->add_candidate();
355 c->key = key;
356 c->content_key = key;
357 c->value = value;
358 c->content_value = value;
359 }
360
361 class MockTypingModel : public mozc::composer::TypingModel {
362 public:
MockTypingModel()363 MockTypingModel() : TypingModel(nullptr, 0, nullptr, 0, nullptr) {}
364 ~MockTypingModel() override = default;
GetCost(StringPiece key) const365 int GetCost(StringPiece key) const override {
366 return 10;
367 }
368 };
369
370 } // namespace
371
372 class DictionaryPredictorTest : public ::testing::Test {
373 public:
DictionaryPredictorTest()374 DictionaryPredictorTest() :
375 default_expansion_flag_(
376 FLAGS_enable_expansion_for_dictionary_predictor) {
377 }
378
~DictionaryPredictorTest()379 ~DictionaryPredictorTest() override {
380 FLAGS_enable_expansion_for_dictionary_predictor = default_expansion_flag_;
381 }
382
383 protected:
SetUp()384 void SetUp() override {
385 FLAGS_enable_expansion_for_dictionary_predictor = false;
386 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
387 request_.reset(new commands::Request);
388 config_.reset(new config::Config);
389 config::ConfigHandler::GetDefaultConfig(config_.get());
390 table_.reset(new composer::Table);
391 composer_.reset(
392 new composer::Composer(table_.get(), request_.get(), config_.get()));
393 convreq_.reset(
394 new ConversionRequest(composer_.get(), request_.get(), config_.get()));
395
396 mozc::usage_stats::UsageStats::ClearAllStatsForTest();
397 }
398
TearDown()399 void TearDown() override {
400 FLAGS_enable_expansion_for_dictionary_predictor = false;
401 mozc::usage_stats::UsageStats::ClearAllStatsForTest();
402 }
403
AddWordsToMockDic(DictionaryMock * mock)404 static void AddWordsToMockDic(DictionaryMock *mock) {
405 const char kGoogleA[] = "ぐーぐるあ";
406
407 const char kGoogleAdsenseHiragana[] = "ぐーぐるあどせんす";
408 const char kGoogleAdsenseKatakana[] = "グーグルアドセンス";
409 mock->AddLookupPredictive(kGoogleA, kGoogleAdsenseHiragana,
410 kGoogleAdsenseKatakana, Token::NONE);
411
412 const char kGoogleAdwordsHiragana[] = "ぐーぐるあどわーず";
413 const char kGoogleAdwordsKatakana[] = "グーグルアドワーズ";
414 mock->AddLookupPredictive(kGoogleA, kGoogleAdwordsHiragana,
415 kGoogleAdwordsKatakana, Token::NONE);
416
417 const char kGoogle[] = "ぐーぐる";
418 mock->AddLookupPredictive(kGoogle, kGoogleAdsenseHiragana,
419 kGoogleAdsenseKatakana, Token::NONE);
420 mock->AddLookupPredictive(kGoogle, kGoogleAdwordsHiragana,
421 kGoogleAdwordsKatakana, Token::NONE);
422
423 const char kGoogleKatakana[] = "グーグル";
424 mock->AddLookupPrefix(kGoogle, kGoogleKatakana, kGoogleKatakana,
425 Token::NONE);
426
427 const char kAdsense[] = "あどせんす";
428 const char kAdsenseKatakana[] = "アドセンス";
429 mock->AddLookupPrefix(kAdsense, kAdsenseKatakana, kAdsenseKatakana,
430 Token::NONE);
431
432 const char kTestHiragana[] = "てすと";
433 const char kTestKatakana[] = "テスト";
434 mock->AddLookupPrefix(kTestHiragana, kTestHiragana, kTestKatakana,
435 Token::NONE);
436
437 const char kFilterHiragana[] = "ふぃるたーたいしょう";
438 const char kFilterPrefixHiragana[] = "ふぃるたーたいし";
439
440 // Note: This is in the filter
441 const char kFilterWord[] = "フィルター対象";
442
443 // Note: This is NOT in the filter
444 const char kNonFilterWord[] = "フィルター大将";
445
446 mock->AddLookupPrefix(kFilterHiragana, kFilterHiragana, kFilterWord,
447 Token::NONE);
448
449 mock->AddLookupPrefix(kFilterHiragana, kFilterHiragana, kNonFilterWord,
450 Token::NONE);
451
452 mock->AddLookupPredictive(kFilterHiragana, kFilterHiragana, kFilterWord,
453 Token::NONE);
454
455 mock->AddLookupPredictive(kFilterHiragana, kFilterPrefixHiragana,
456 kFilterWord, Token::NONE);
457
458 const char kWrongCapriHiragana[] = "かぷりちょうざ";
459 const char kRightCapriHiragana[] = "かぷりちょーざ";
460 const char kCapriKatakana[] = "カプリチョーザ";
461
462 mock->AddLookupPrefix(kWrongCapriHiragana, kRightCapriHiragana,
463 kCapriKatakana, Token::SPELLING_CORRECTION);
464
465 mock->AddLookupPredictive(kWrongCapriHiragana, kRightCapriHiragana,
466 kCapriKatakana, Token::SPELLING_CORRECTION);
467
468 const char kDe[] = "で";
469
470 mock->AddLookupPrefix(kDe, kDe, kDe, Token::NONE);
471
472 const char kHirosueHiragana[] = "ひろすえ";
473 const char kHirosue[] = "広末";
474
475 mock->AddLookupPrefix(kHirosueHiragana, kHirosueHiragana, kHirosue,
476 Token::NONE);
477
478 const char kYuzaHiragana[] = "ゆーざー";
479 const char kYuza[] = "ユーザー";
480 // For dictionary suggestion
481 mock->AddLookupPredictive(kYuzaHiragana, kYuzaHiragana, kYuza,
482 Token::USER_DICTIONARY);
483 // For realtime conversion
484 mock->AddLookupPrefix(kYuzaHiragana, kYuzaHiragana, kYuza,
485 Token::USER_DICTIONARY);
486
487 // Some English entries
488 mock->AddLookupPredictive("conv", "converge", "converge", Token::NONE);
489 mock->AddLookupPredictive("conv", "converged", "converged", Token::NONE);
490 mock->AddLookupPredictive("conv", "convergent", "convergent", Token::NONE);
491 mock->AddLookupPredictive("con", "contraction", "contraction", Token::NONE);
492 mock->AddLookupPredictive("con", "control", "control", Token::NONE);
493 }
494
CreateDictionaryPredictorWithMockData()495 MockDataAndPredictor *CreateDictionaryPredictorWithMockData() {
496 MockDataAndPredictor *ret = new MockDataAndPredictor;
497 ret->Init();
498 AddWordsToMockDic(ret->mutable_dictionary());
499 return ret;
500 }
501
GenerateKeyEvents(const string & text,std::vector<commands::KeyEvent> * keys)502 void GenerateKeyEvents(const string &text,
503 std::vector<commands::KeyEvent> *keys) {
504 keys->clear();
505
506 const char *begin = text.data();
507 const char *end = text.data() + text.size();
508 size_t mblen = 0;
509
510 while (begin < end) {
511 commands::KeyEvent key;
512 const char32 w = Util::UTF8ToUCS4(begin, end, &mblen);
513 if (Util::GetCharacterSet(w) == Util::ASCII) {
514 key.set_key_code(*begin);
515 } else {
516 key.set_key_code('?');
517 key.set_key_string(string(begin, mblen));
518 }
519 begin += mblen;
520 keys->push_back(key);
521 }
522 }
523
InsertInputSequence(const string & text,composer::Composer * composer)524 void InsertInputSequence(const string &text, composer::Composer *composer) {
525 std::vector<commands::KeyEvent> keys;
526 GenerateKeyEvents(text, &keys);
527
528 for (size_t i = 0; i < keys.size(); ++i) {
529 composer->InsertCharacterKeyEvent(keys[i]);
530 }
531 }
532
InsertInputSequenceForProbableKeyEvent(const string & text,const uint32 * corrected_key_codes,composer::Composer * composer)533 void InsertInputSequenceForProbableKeyEvent(const string &text,
534 const uint32 *corrected_key_codes,
535 composer::Composer *composer) {
536 std::vector<commands::KeyEvent> keys;
537 GenerateKeyEvents(text, &keys);
538
539 for (size_t i = 0; i < keys.size(); ++i) {
540 if (keys[i].key_code() != corrected_key_codes[i]) {
541 commands::KeyEvent::ProbableKeyEvent *probable_key_event;
542
543 probable_key_event = keys[i].add_probable_key_event();
544 probable_key_event->set_key_code(keys[i].key_code());
545 probable_key_event->set_probability(0.9f);
546
547 probable_key_event = keys[i].add_probable_key_event();
548 probable_key_event->set_key_code(corrected_key_codes[i]);
549 probable_key_event->set_probability(0.1f);
550 }
551 composer->InsertCharacterKeyEvent(keys[i]);
552 }
553 }
554
ExpansionForUnigramTestHelper(bool use_expansion)555 void ExpansionForUnigramTestHelper(bool use_expansion) {
556 config_->set_use_dictionary_suggest(true);
557 config_->set_use_realtime_conversion(false);
558 config_->set_use_kana_modifier_insensitive_conversion(use_expansion);
559
560 table_->LoadFromFile("system://romanji-hiragana.tsv");
561 composer_->SetTable(table_.get());
562 unique_ptr<MockDataAndPredictor> data_and_predictor(
563 new MockDataAndPredictor);
564 // CallCheckDictionary is managed by data_and_predictor;
565 CallCheckDictionary *check_dictionary = new CallCheckDictionary;
566 data_and_predictor->Init(check_dictionary, NULL);
567 const TestableDictionaryPredictor *predictor =
568 data_and_predictor->dictionary_predictor();
569
570 {
571 Segments segments;
572 segments.set_request_type(Segments::PREDICTION);
573 request_->set_kana_modifier_insensitive_conversion(use_expansion);
574 InsertInputSequence("gu-g", composer_.get());
575 Segment *segment = segments.add_segment();
576 CHECK(segment);
577 string query;
578 composer_->GetQueryForPrediction(&query);
579 segment->set_key(query);
580
581 EXPECT_CALL(*check_dictionary,
582 LookupPredictive(::testing::Ne(""),
583 ::testing::Ref(*convreq_), _))
584 .Times(::testing::AtLeast(1));
585
586 std::vector<TestableDictionaryPredictor::Result> results;
587 predictor->AggregateUnigramPrediction(
588 TestableDictionaryPredictor::UNIGRAM,
589 *convreq_, segments, &results);
590 }
591 }
592
ExpansionForBigramTestHelper(bool use_expansion)593 void ExpansionForBigramTestHelper(bool use_expansion) {
594 config_->set_use_dictionary_suggest(true);
595 config_->set_use_realtime_conversion(false);
596 config_->set_use_kana_modifier_insensitive_conversion(use_expansion);
597
598 table_->LoadFromFile("system://romanji-hiragana.tsv");
599 composer_->SetTable(table_.get());
600 unique_ptr<MockDataAndPredictor> data_and_predictor(
601 new MockDataAndPredictor);
602 // CallCheckDictionary is managed by data_and_predictor;
603 CallCheckDictionary *check_dictionary = new CallCheckDictionary;
604 data_and_predictor->Init(check_dictionary, NULL);
605 const TestableDictionaryPredictor *predictor =
606 data_and_predictor->dictionary_predictor();
607
608 {
609 Segments segments;
610 segments.set_request_type(Segments::PREDICTION);
611 // History segment's key and value should be in the dictionary
612 Segment *segment = segments.add_segment();
613 CHECK(segment);
614 segment->set_segment_type(Segment::HISTORY);
615 segment->set_key("ぐーぐる");
616 Segment::Candidate *cand = segment->add_candidate();
617 cand->key = "ぐーぐる";
618 cand->content_key = "ぐーぐる";
619 cand->value = "グーグル";
620 cand->content_value = "グーグル";
621
622 segment = segments.add_segment();
623 CHECK(segment);
624
625 request_->set_kana_modifier_insensitive_conversion(use_expansion);
626 InsertInputSequence("m", composer_.get());
627 string query;
628 composer_->GetQueryForPrediction(&query);
629 segment->set_key(query);
630
631 // History key and value should be in the dictionary.
632 EXPECT_CALL(*check_dictionary,
633 LookupPrefix(_, ::testing::Ref(*convreq_), _))
634 .WillOnce(LookupPrefixOneToken("ぐーぐる", "グーグル", 1, 1));
635 EXPECT_CALL(*check_dictionary,
636 LookupPredictive(_, ::testing::Ref(*convreq_), _));
637
638 std::vector<TestableDictionaryPredictor::Result> results;
639 predictor->AggregateBigramPrediction(TestableDictionaryPredictor::BIGRAM,
640 *convreq_, segments, &results);
641 }
642 }
643
ExpansionForSuffixTestHelper(bool use_expansion)644 void ExpansionForSuffixTestHelper(bool use_expansion) {
645 config_->set_use_dictionary_suggest(true);
646 config_->set_use_realtime_conversion(false);
647 config_->set_use_kana_modifier_insensitive_conversion(use_expansion);
648
649 table_->LoadFromFile("system://romanji-hiragana.tsv");
650 composer_->SetTable(table_.get());
651 unique_ptr<MockDataAndPredictor> data_and_predictor(
652 new MockDataAndPredictor);
653 // CallCheckDictionary is managed by data_and_predictor.
654 CallCheckDictionary *check_dictionary = new CallCheckDictionary;
655 data_and_predictor->Init(NULL, check_dictionary);
656 const TestableDictionaryPredictor *predictor =
657 data_and_predictor->dictionary_predictor();
658
659 {
660 Segments segments;
661 segments.set_request_type(Segments::PREDICTION);
662 Segment *segment = segments.add_segment();
663 CHECK(segment);
664
665 request_->set_kana_modifier_insensitive_conversion(use_expansion);
666 InsertInputSequence("des", composer_.get());
667 string query;
668 composer_->GetQueryForPrediction(&query);
669 segment->set_key(query);
670
671 EXPECT_CALL(*check_dictionary,
672 LookupPredictive(::testing::Ne(""),
673 ::testing::Ref(*convreq_), _))
674 .Times(::testing::AtLeast(1));
675
676 std::vector<TestableDictionaryPredictor::Result> results;
677 predictor->AggregateSuffixPrediction(
678 TestableDictionaryPredictor::SUFFIX,
679 *convreq_, segments, &results);
680 }
681 }
682
FindCandidateByValue(const Segment & segment,const string & value)683 bool FindCandidateByValue(
684 const Segment &segment,
685 const string &value) {
686 for (size_t i = 0; i < segment.candidates_size(); ++i) {
687 const Segment::Candidate &c = segment.candidate(i);
688 if (c.value == value) {
689 return true;
690 }
691 }
692 return false;
693 }
694
FindResultByValue(const std::vector<TestableDictionaryPredictor::Result> & results,const string & value)695 bool FindResultByValue(
696 const std::vector<TestableDictionaryPredictor::Result> &results,
697 const string &value) {
698 for (size_t i = 0; i < results.size(); ++i) {
699 if (results[i].value == value) {
700 return true;
701 }
702 }
703 return false;
704 }
705
AggregateEnglishPredictionTestHelper(transliteration::TransliterationType input_mode,const char * key,const char * expected_prefix,const char * expected_values[],size_t expected_values_size)706 void AggregateEnglishPredictionTestHelper(
707 transliteration::TransliterationType input_mode,
708 const char *key, const char *expected_prefix,
709 const char *expected_values[], size_t expected_values_size) {
710 unique_ptr<MockDataAndPredictor> data_and_predictor(
711 CreateDictionaryPredictorWithMockData());
712 const TestableDictionaryPredictor *predictor =
713 data_and_predictor->dictionary_predictor();
714
715 table_->LoadFromFile("system://romanji-hiragana.tsv");
716 composer_->Reset();
717 composer_->SetTable(table_.get());
718 composer_->SetInputMode(input_mode);
719 InsertInputSequence(key, composer_.get());
720
721 Segments segments;
722 MakeSegmentsForPrediction(key, &segments);
723
724 std::vector<TestableDictionaryPredictor::Result> results;
725 predictor->AggregateEnglishPrediction(
726 TestableDictionaryPredictor::ENGLISH,
727 *convreq_, segments, &results);
728
729 std::set<string> values;
730 for (size_t i = 0; i < results.size(); ++i) {
731 EXPECT_EQ(TestableDictionaryPredictor::ENGLISH, results[i].types);
732 EXPECT_TRUE(Util::StartsWith(results[i].value, expected_prefix))
733 << results[i].value
734 << " doesn't start with " << expected_prefix;
735 values.insert(results[i].value);
736 }
737 for (size_t i = 0; i < expected_values_size; ++i) {
738 EXPECT_TRUE(values.find(expected_values[i]) != values.end())
739 << expected_values[i] << " isn't in the results";
740 }
741 }
742
AggregateTypeCorrectingTestHelper(const char * key,const uint32 * corrected_key_codes,const char * expected_values[],size_t expected_values_size)743 void AggregateTypeCorrectingTestHelper(
744 const char *key,
745 const uint32 *corrected_key_codes,
746 const char *expected_values[],
747 size_t expected_values_size) {
748 request_->set_special_romanji_table(
749 commands::Request::QWERTY_MOBILE_TO_HIRAGANA);
750
751 unique_ptr<MockDataAndPredictor> data_and_predictor(
752 CreateDictionaryPredictorWithMockData());
753 const TestableDictionaryPredictor *predictor =
754 data_and_predictor->dictionary_predictor();
755
756 table_->LoadFromFile("system://qwerty_mobile-hiragana.tsv");
757 table_->typing_model_.reset(new MockTypingModel());
758 InsertInputSequenceForProbableKeyEvent(
759 key, corrected_key_codes, composer_.get());
760
761 Segments segments;
762 MakeSegmentsForPrediction(key, &segments);
763
764 std::vector<TestableDictionaryPredictor::Result> results;
765 predictor->AggregateTypeCorrectingPrediction(
766 TestableDictionaryPredictor::TYPING_CORRECTION,
767 *convreq_, segments, &results);
768
769 std::set<string> values;
770 for (size_t i = 0; i < results.size(); ++i) {
771 EXPECT_EQ(TestableDictionaryPredictor::TYPING_CORRECTION,
772 results[i].types);
773 values.insert(results[i].value);
774 }
775 for (size_t i = 0; i < expected_values_size; ++i) {
776 EXPECT_TRUE(values.find(expected_values[i]) != values.end())
777 << expected_values[i] << " isn't in the results";
778 }
779 }
780
781 unique_ptr<composer::Composer> composer_;
782 unique_ptr<composer::Table> table_;
783 unique_ptr<ConversionRequest> convreq_;
784 unique_ptr<config::Config> config_;
785 unique_ptr<commands::Request> request_;
786
787 private:
788 const bool default_expansion_flag_;
789 unique_ptr<ImmutableConverterInterface> immutable_converter_;
790 mozc::usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
791 };
792
TEST_F(DictionaryPredictorTest,OnOffTest)793 TEST_F(DictionaryPredictorTest, OnOffTest) {
794 unique_ptr<MockDataAndPredictor> data_and_predictor(
795 CreateDictionaryPredictorWithMockData());
796 const DictionaryPredictor *predictor =
797 data_and_predictor->dictionary_predictor();
798
799 // turn off
800 Segments segments;
801 config_->set_use_dictionary_suggest(false);
802 config_->set_use_realtime_conversion(false);
803
804 MakeSegmentsForSuggestion("ぐーぐるあ", &segments);
805 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
806
807 // turn on
808 config_->set_use_dictionary_suggest(true);
809 MakeSegmentsForSuggestion("ぐーぐるあ", &segments);
810 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
811
812 // empty query
813 MakeSegmentsForSuggestion("", &segments);
814 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
815 }
816
TEST_F(DictionaryPredictorTest,PartialSuggestion)817 TEST_F(DictionaryPredictorTest, PartialSuggestion) {
818 unique_ptr<MockDataAndPredictor> data_and_predictor(
819 CreateDictionaryPredictorWithMockData());
820 {
821 // Set up mock converter.
822 Segments segments;
823 Segment *segment = segments.add_segment();
824 Segment::Candidate *candidate = segment->add_candidate();
825 candidate->value = "Realtime top result";
826 ConverterMock *converter = data_and_predictor->mutable_converter_mock();
827 converter->SetStartConversionForRequest(&segments, true);
828 }
829 const DictionaryPredictor *predictor =
830 data_and_predictor->dictionary_predictor();
831
832 Segments segments;
833 config_->set_use_dictionary_suggest(true);
834 config_->set_use_realtime_conversion(true);
835 // turn on mobile mode
836 request_->set_mixed_conversion(true);
837
838 segments.Clear();
839 segments.set_max_prediction_candidates_size(10);
840 segments.set_request_type(Segments::PARTIAL_SUGGESTION);
841 Segment *seg = segments.add_segment();
842 seg->set_key("ぐーぐるあ");
843 seg->set_segment_type(Segment::FREE);
844 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
845 }
846
TEST_F(DictionaryPredictorTest,BigramTest)847 TEST_F(DictionaryPredictorTest, BigramTest) {
848 Segments segments;
849 config_->set_use_dictionary_suggest(true);
850
851 MakeSegmentsForSuggestion("あ", &segments);
852
853 // history is "グーグル"
854 PrependHistorySegments("ぐーぐる", "グーグル", &segments);
855
856 unique_ptr<MockDataAndPredictor> data_and_predictor(
857 CreateDictionaryPredictorWithMockData());
858 const DictionaryPredictor *predictor =
859 data_and_predictor->dictionary_predictor();
860 // "グーグルアドセンス" will be returned.
861 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
862 }
863
TEST_F(DictionaryPredictorTest,BigramTestWithZeroQuery)864 TEST_F(DictionaryPredictorTest, BigramTestWithZeroQuery) {
865 Segments segments;
866 config_->set_use_dictionary_suggest(true);
867 request_->set_zero_query_suggestion(true);
868
869 // current query is empty
870 MakeSegmentsForSuggestion("", &segments);
871
872 // history is "グーグル"
873 PrependHistorySegments("ぐーぐる", "グーグル", &segments);
874
875 unique_ptr<MockDataAndPredictor> data_and_predictor(
876 CreateDictionaryPredictorWithMockData());
877 const DictionaryPredictor *predictor =
878 data_and_predictor->dictionary_predictor();
879 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
880 }
881
882 // Check that previous candidate never be shown at the current candidate.
TEST_F(DictionaryPredictorTest,Regression3042706)883 TEST_F(DictionaryPredictorTest, Regression3042706) {
884 Segments segments;
885 config_->set_use_dictionary_suggest(true);
886
887 MakeSegmentsForSuggestion("だい", &segments);
888
889 // history is "きょうと/京都"
890 PrependHistorySegments("きょうと", "京都", &segments);
891
892 unique_ptr<MockDataAndPredictor> data_and_predictor(
893 CreateDictionaryPredictorWithMockData());
894 const DictionaryPredictor *predictor =
895 data_and_predictor->dictionary_predictor();
896 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
897 EXPECT_EQ(2, segments.segments_size()); // history + current
898 for (int i = 0; i < segments.segment(1).candidates_size(); ++i) {
899 const Segment::Candidate &candidate = segments.segment(1).candidate(i);
900 EXPECT_FALSE(Util::StartsWith(candidate.content_value, "京都"));
901 EXPECT_TRUE(Util::StartsWith(candidate.content_key, "だい"));
902 }
903 }
904
TEST_F(DictionaryPredictorTest,GetPredictionTypes)905 TEST_F(DictionaryPredictorTest, GetPredictionTypes) {
906 Segments segments;
907 config_->set_use_dictionary_suggest(true);
908 config_->set_use_realtime_conversion(false);
909
910 // empty segments
911 {
912 EXPECT_EQ(
913 DictionaryPredictor::NO_PREDICTION,
914 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
915 }
916
917 // normal segments
918 {
919 MakeSegmentsForSuggestion("てすとだよ", &segments);
920 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
921 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
922
923 segments.set_request_type(Segments::PREDICTION);
924 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
925 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
926
927 segments.set_request_type(Segments::CONVERSION);
928 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
929 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
930 }
931
932 // short key
933 {
934 MakeSegmentsForSuggestion("てす", &segments);
935 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
936 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
937
938 // on prediction mode, return UNIGRAM
939 segments.set_request_type(Segments::PREDICTION);
940 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
941 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
942 }
943
944 // zipcode-like key
945 {
946 MakeSegmentsForSuggestion("0123", &segments);
947 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
948 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
949 }
950
951 // History is short => UNIGRAM
952 {
953 MakeSegmentsForSuggestion("てすとだよ", &segments);
954 PrependHistorySegments("A", "A", &segments);
955 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
956 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
957 }
958
959 // both History and current segment are long => UNIGRAM|BIGRAM
960 {
961 MakeSegmentsForSuggestion("てすとだよ", &segments);
962 PrependHistorySegments("てすとだよ", "abc", &segments);
963 EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::BIGRAM,
964 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
965 }
966
967 // Current segment is short => BIGRAM
968 {
969 MakeSegmentsForSuggestion("A", &segments);
970 PrependHistorySegments("てすとだよ", "abc", &segments);
971 EXPECT_EQ(DictionaryPredictor::BIGRAM,
972 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
973 }
974
975 // Typing correction type shouldn't be appended.
976 {
977 MakeSegmentsForSuggestion("pはよう", &segments);
978 EXPECT_FALSE(DictionaryPredictor::TYPING_CORRECTION &
979 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
980 }
981
982 // Input mode is HALF_ASCII or FULL_ASCII => ENGLISH
983 {
984 config_->set_use_dictionary_suggest(true);
985
986 MakeSegmentsForSuggestion("hel", &segments);
987
988 composer_->SetInputMode(transliteration::HALF_ASCII);
989 EXPECT_EQ(DictionaryPredictor::ENGLISH,
990 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
991
992 composer_->SetInputMode(transliteration::FULL_ASCII);
993 EXPECT_EQ(DictionaryPredictor::ENGLISH,
994 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
995
996 // When dictionary suggest is turned off, English prediction should be
997 // disabled.
998 config_->set_use_dictionary_suggest(false);
999
1000 composer_->SetInputMode(transliteration::HALF_ASCII);
1001 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
1002 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1003
1004 composer_->SetInputMode(transliteration::FULL_ASCII);
1005 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
1006 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1007
1008 config_->set_use_dictionary_suggest(true);
1009
1010 segments.set_request_type(Segments::PARTIAL_SUGGESTION);
1011 composer_->SetInputMode(transliteration::HALF_ASCII);
1012 EXPECT_EQ(DictionaryPredictor::ENGLISH | DictionaryPredictor::REALTIME,
1013 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1014
1015 composer_->SetInputMode(transliteration::FULL_ASCII);
1016 EXPECT_EQ(DictionaryPredictor::ENGLISH | DictionaryPredictor::REALTIME,
1017 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1018
1019 config_->set_use_dictionary_suggest(false);
1020
1021 composer_->SetInputMode(transliteration::HALF_ASCII);
1022 EXPECT_EQ(DictionaryPredictor::REALTIME,
1023 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1024
1025 composer_->SetInputMode(transliteration::FULL_ASCII);
1026 EXPECT_EQ(DictionaryPredictor::REALTIME,
1027 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1028 }
1029
1030 // When romaji table is qwerty mobile => ENGLISH is included depending on the
1031 // language aware input setting.
1032 {
1033 const auto orig_input_mode = composer_->GetInputMode();
1034 const auto orig_table = request_->special_romanji_table();
1035 const auto orig_lang_aware = request_->language_aware_input();
1036 const bool orig_use_dictionary_suggest = config_->use_dictionary_suggest();
1037
1038 composer_->SetInputMode(transliteration::HIRAGANA);
1039 config_->set_use_dictionary_suggest(true);
1040
1041 // The case where romaji table is set to qwerty. ENGLISH is turned on if
1042 // language aware input is enabled.
1043 for (const auto table :
1044 {commands::Request::QWERTY_MOBILE_TO_HIRAGANA,
1045 commands::Request::QWERTY_MOBILE_TO_HALFWIDTHASCII}) {
1046 request_->set_special_romanji_table(table);
1047
1048 // Language aware input is default: No English prediction.
1049 request_->set_language_aware_input(
1050 commands::Request::DEFAULT_LANGUAGE_AWARE_BEHAVIOR);
1051 auto type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1052 EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1053
1054 // Language aware input is off: No English prediction.
1055 request_->set_language_aware_input(
1056 commands::Request::NO_LANGUAGE_AWARE_INPUT);
1057 type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1058 EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1059
1060 // Language aware input is on: English prediction is included.
1061 request_->set_language_aware_input(
1062 commands::Request::LANGUAGE_AWARE_SUGGESTION);
1063 type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1064 EXPECT_EQ(DictionaryPredictor::ENGLISH,
1065 type & DictionaryPredictor::ENGLISH);
1066 }
1067
1068 // The case where romaji table is not qwerty. ENGLISH is turned off
1069 // regardless of language aware input setting.
1070 for (const auto table : {
1071 commands::Request::FLICK_TO_HALFWIDTHASCII,
1072 commands::Request::FLICK_TO_HIRAGANA,
1073 commands::Request::GODAN_TO_HALFWIDTHASCII,
1074 commands::Request::GODAN_TO_HIRAGANA,
1075 commands::Request::NOTOUCH_TO_HALFWIDTHASCII,
1076 commands::Request::NOTOUCH_TO_HIRAGANA,
1077 commands::Request::TOGGLE_FLICK_TO_HALFWIDTHASCII,
1078 commands::Request::TOGGLE_FLICK_TO_HIRAGANA,
1079 commands::Request::TWELVE_KEYS_TO_HALFWIDTHASCII,
1080 commands::Request::TWELVE_KEYS_TO_HIRAGANA,
1081 }) {
1082 request_->set_special_romanji_table(table);
1083
1084 // Language aware input is default.
1085 request_->set_language_aware_input(
1086 commands::Request::DEFAULT_LANGUAGE_AWARE_BEHAVIOR);
1087 auto type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1088 EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1089
1090 // Language aware input is off.
1091 request_->set_language_aware_input(
1092 commands::Request::NO_LANGUAGE_AWARE_INPUT);
1093 type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1094 EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1095
1096 // Language aware input is on.
1097 request_->set_language_aware_input(
1098 commands::Request::LANGUAGE_AWARE_SUGGESTION);
1099 type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1100 EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1101 }
1102
1103 config_->set_use_dictionary_suggest(orig_use_dictionary_suggest);
1104 request_->set_language_aware_input(orig_lang_aware);
1105 request_->set_special_romanji_table(orig_table);
1106 composer_->SetInputMode(orig_input_mode);
1107 }
1108 }
1109
TEST_F(DictionaryPredictorTest,GetPredictionTypesTestWithTypingCorrection)1110 TEST_F(DictionaryPredictorTest, GetPredictionTypesTestWithTypingCorrection) {
1111 Segments segments;
1112 config_->set_use_dictionary_suggest(true);
1113 config_->set_use_realtime_conversion(false);
1114 config_->set_use_typing_correction(true);
1115
1116 MakeSegmentsForSuggestion("pはよう", &segments);
1117 EXPECT_EQ(
1118 DictionaryPredictor::UNIGRAM | DictionaryPredictor::TYPING_CORRECTION,
1119 DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1120 }
1121
TEST_F(DictionaryPredictorTest,GetPredictionTypesTestWithZeroQuerySuggestion)1122 TEST_F(DictionaryPredictorTest, GetPredictionTypesTestWithZeroQuerySuggestion) {
1123 Segments segments;
1124 config_->set_use_dictionary_suggest(true);
1125 config_->set_use_realtime_conversion(false);
1126 request_->set_zero_query_suggestion(true);
1127
1128 unique_ptr<MockDataAndPredictor> data_and_predictor(
1129 CreateDictionaryPredictorWithMockData());
1130 const DictionaryPredictor *predictor =
1131 data_and_predictor->dictionary_predictor();
1132
1133 // empty segments
1134 {
1135 EXPECT_EQ(
1136 DictionaryPredictor::NO_PREDICTION,
1137 predictor->GetPredictionTypes(*convreq_, segments));
1138 }
1139
1140 // normal segments
1141 {
1142 MakeSegmentsForSuggestion("てすとだよ", &segments);
1143 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1144 predictor->GetPredictionTypes(*convreq_, segments));
1145
1146 segments.set_request_type(Segments::PREDICTION);
1147 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1148 predictor->GetPredictionTypes(*convreq_, segments));
1149
1150 segments.set_request_type(Segments::CONVERSION);
1151 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
1152 predictor->GetPredictionTypes(*convreq_, segments));
1153 }
1154
1155 // short key
1156 {
1157 MakeSegmentsForSuggestion("て", &segments);
1158 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1159 predictor->GetPredictionTypes(*convreq_, segments));
1160
1161 // on prediction mode, return UNIGRAM
1162 segments.set_request_type(Segments::PREDICTION);
1163 EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1164 predictor->GetPredictionTypes(*convreq_, segments));
1165 }
1166
1167 // History is short => UNIGRAM
1168 {
1169 MakeSegmentsForSuggestion("てすとだよ", &segments);
1170 PrependHistorySegments("A", "A", &segments);
1171 EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::SUFFIX,
1172 predictor->GetPredictionTypes(*convreq_, segments));
1173 }
1174
1175 // both History and current segment are long => UNIGRAM|BIGRAM
1176 {
1177 MakeSegmentsForSuggestion("てすとだよ", &segments);
1178 PrependHistorySegments("てすとだよ", "abc", &segments);
1179 EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::BIGRAM |
1180 DictionaryPredictor::SUFFIX,
1181 predictor->GetPredictionTypes(*convreq_, segments));
1182 }
1183
1184 {
1185 MakeSegmentsForSuggestion("A", &segments);
1186 PrependHistorySegments("てすとだよ", "abc", &segments);
1187 EXPECT_EQ(DictionaryPredictor::BIGRAM | DictionaryPredictor::UNIGRAM |
1188 DictionaryPredictor::SUFFIX,
1189 predictor->GetPredictionTypes(*convreq_, segments));
1190 }
1191
1192 {
1193 MakeSegmentsForSuggestion("", &segments);
1194 PrependHistorySegments("て", "abc", &segments);
1195 EXPECT_EQ(DictionaryPredictor::SUFFIX,
1196 predictor->GetPredictionTypes(*convreq_, segments));
1197 }
1198
1199 {
1200 MakeSegmentsForSuggestion("A", &segments);
1201 PrependHistorySegments("て", "abc", &segments);
1202 EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::SUFFIX,
1203 predictor->GetPredictionTypes(*convreq_, segments));
1204 }
1205
1206 {
1207 MakeSegmentsForSuggestion("", &segments);
1208 PrependHistorySegments("てすとだよ", "abc", &segments);
1209 EXPECT_EQ(DictionaryPredictor::BIGRAM | DictionaryPredictor::SUFFIX,
1210 predictor->GetPredictionTypes(*convreq_, segments));
1211 }
1212 }
1213
TEST_F(DictionaryPredictorTest,AggregateUnigramPrediction)1214 TEST_F(DictionaryPredictorTest, AggregateUnigramPrediction) {
1215 Segments segments;
1216 unique_ptr<MockDataAndPredictor> data_and_predictor(
1217 CreateDictionaryPredictorWithMockData());
1218 const DictionaryPredictor *predictor =
1219 data_and_predictor->dictionary_predictor();
1220
1221 const char kKey[] = "ぐーぐるあ";
1222
1223 MakeSegmentsForSuggestion(kKey, &segments);
1224
1225 std::vector<DictionaryPredictor::Result> results;
1226
1227 predictor->AggregateUnigramPrediction(
1228 DictionaryPredictor::BIGRAM,
1229 *convreq_, segments, &results);
1230 EXPECT_TRUE(results.empty());
1231
1232 predictor->AggregateUnigramPrediction(
1233 DictionaryPredictor::REALTIME,
1234 *convreq_, segments, &results);
1235 EXPECT_TRUE(results.empty());
1236
1237 predictor->AggregateUnigramPrediction(
1238 DictionaryPredictor::UNIGRAM,
1239 *convreq_, segments, &results);
1240 EXPECT_FALSE(results.empty());
1241
1242 for (size_t i = 0; i < results.size(); ++i) {
1243 EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[i].types);
1244 EXPECT_TRUE(Util::StartsWith(results[i].key, kKey));
1245 }
1246
1247 EXPECT_EQ(1, segments.conversion_segments_size());
1248 }
1249
TEST_F(DictionaryPredictorTest,AggregateUnigramCandidateForMixedConversion)1250 TEST_F(DictionaryPredictorTest, AggregateUnigramCandidateForMixedConversion) {
1251 const char kHiraganaA[] = "あ";
1252
1253 DictionaryMock mock_dict;
1254 // A system dictionary entry "a".
1255 mock_dict.AddLookupPredictive(kHiraganaA, kHiraganaA, "a", Token::NONE);
1256 // System dictionary entries "a0", ..., "a9", which are detected as redundant
1257 // by MaybeRedundant(); see dictionary_predictor.cc.
1258 for (int i = 0; i < 10; ++i) {
1259 mock_dict.AddLookupPredictive(kHiraganaA, kHiraganaA,
1260 Util::StringPrintf("a%d", i), Token::NONE);
1261 }
1262 // A user dictionary entry "aaa". MaybeRedundant() detects this entry as
1263 // redundant but it should not be filtered in prediction.
1264 mock_dict.AddLookupPredictive(kHiraganaA, kHiraganaA, "aaa",
1265 Token::USER_DICTIONARY);
1266
1267 config_->set_use_dictionary_suggest(true);
1268 config_->set_use_realtime_conversion(false);
1269 table_->LoadFromFile("system://12keys-hiragana.tsv");
1270 composer_->SetTable(table_.get());
1271 InsertInputSequence(kHiraganaA, composer_.get());
1272 Segments segments;
1273 segments.set_request_type(Segments::PREDICTION);
1274 Segment *segment = segments.add_segment();
1275 segment->set_key(kHiraganaA);
1276
1277 std::vector<DictionaryPredictor::Result> results;
1278 DictionaryPredictor::AggregateUnigramCandidateForMixedConversion(
1279 mock_dict, *convreq_, segments, &results);
1280
1281 // Check if "aaa" is not filtered.
1282 auto iter = results.begin();
1283 for (; iter != results.end(); ++iter) {
1284 if (iter->key == kHiraganaA && iter->value == "aaa" &&
1285 iter->IsUserDictionaryResult()) {
1286 break;
1287 }
1288 }
1289 EXPECT_NE(results.end(), iter);
1290 }
1291
TEST_F(DictionaryPredictorTest,AggregateBigramPrediction)1292 TEST_F(DictionaryPredictorTest, AggregateBigramPrediction) {
1293 unique_ptr<MockDataAndPredictor> data_and_predictor(
1294 CreateDictionaryPredictorWithMockData());
1295 const DictionaryPredictor *predictor =
1296 data_and_predictor->dictionary_predictor();
1297
1298 {
1299 Segments segments;
1300
1301 MakeSegmentsForSuggestion("あ", &segments);
1302
1303 // history is "グーグル"
1304 const char kHistoryKey[] = "ぐーぐる";
1305 const char kHistoryValue[] = "グーグル";
1306
1307 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1308
1309 std::vector<DictionaryPredictor::Result> results;
1310
1311 predictor->AggregateBigramPrediction(DictionaryPredictor::UNIGRAM,
1312 *convreq_, segments, &results);
1313 EXPECT_TRUE(results.empty());
1314
1315 predictor->AggregateBigramPrediction(DictionaryPredictor::REALTIME,
1316 *convreq_, segments, &results);
1317 EXPECT_TRUE(results.empty());
1318
1319 predictor->AggregateBigramPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1320 segments, &results);
1321 EXPECT_FALSE(results.empty());
1322
1323 for (size_t i = 0; i < results.size(); ++i) {
1324 // "グーグルアドセンス", "グーグル", "アドセンス"
1325 // are in the dictionary.
1326 if (results[i].value == "グーグルアドセンス") {
1327 EXPECT_EQ(DictionaryPredictor::BIGRAM, results[i].types);
1328 } else {
1329 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[i].types);
1330 }
1331 EXPECT_TRUE(Util::StartsWith(results[i].key, kHistoryKey));
1332 EXPECT_TRUE(Util::StartsWith(results[i].value, kHistoryValue));
1333 // Not zero query
1334 EXPECT_FALSE(results[i].source_info &
1335 Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX);
1336 }
1337
1338 EXPECT_EQ(1, segments.conversion_segments_size());
1339 }
1340
1341 {
1342 Segments segments;
1343
1344 MakeSegmentsForSuggestion("あ", &segments);
1345
1346 const char kHistoryKey[] = "てす";
1347 const char kHistoryValue[] = "テス";
1348
1349 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1350
1351 std::vector<DictionaryPredictor::Result> results;
1352
1353 predictor->AggregateBigramPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1354 segments, &results);
1355 EXPECT_TRUE(results.empty());
1356 }
1357 }
1358
TEST_F(DictionaryPredictorTest,AggregateZeroQueryBigramPrediction)1359 TEST_F(DictionaryPredictorTest, AggregateZeroQueryBigramPrediction) {
1360 unique_ptr<MockDataAndPredictor> data_and_predictor(
1361 CreateDictionaryPredictorWithMockData());
1362 const DictionaryPredictor *predictor =
1363 data_and_predictor->dictionary_predictor();
1364 commands::RequestForUnitTest::FillMobileRequest(request_.get());
1365
1366 {
1367 Segments segments;
1368
1369 // Zero query
1370 MakeSegmentsForSuggestion("", &segments);
1371
1372 // history is "グーグル"
1373 const char kHistoryKey[] = "ぐーぐる";
1374 const char kHistoryValue[] = "グーグル";
1375
1376 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1377
1378 std::vector<DictionaryPredictor::Result> results;
1379
1380 predictor->AggregateBigramPrediction(DictionaryPredictor::UNIGRAM,
1381 *convreq_, segments, &results);
1382 EXPECT_TRUE(results.empty());
1383
1384 predictor->AggregateBigramPrediction(DictionaryPredictor::REALTIME,
1385 *convreq_, segments, &results);
1386 EXPECT_TRUE(results.empty());
1387
1388 predictor->AggregateBigramPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1389 segments, &results);
1390 EXPECT_FALSE(results.empty());
1391
1392 for (size_t i = 0; i < results.size(); ++i) {
1393 EXPECT_TRUE(Util::StartsWith(results[i].key, kHistoryKey));
1394 EXPECT_TRUE(Util::StartsWith(results[i].value, kHistoryValue));
1395 // Zero query
1396 EXPECT_FALSE(results[i].source_info &
1397 Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX);
1398 }
1399 }
1400 }
1401
TEST_F(DictionaryPredictorTest,GetRealtimeCandidateMaxSize)1402 TEST_F(DictionaryPredictorTest, GetRealtimeCandidateMaxSize) {
1403 unique_ptr<MockDataAndPredictor> data_and_predictor(
1404 CreateDictionaryPredictorWithMockData());
1405 const DictionaryPredictor *predictor =
1406 data_and_predictor->dictionary_predictor();
1407 Segments segments;
1408
1409 // GetRealtimeCandidateMaxSize has some heuristics so here we test following
1410 // conditions.
1411 // - The result must be equal or less than kMaxSize;
1412 // - If mixed_conversion is the same, the result of SUGGESTION is
1413 // equal or less than PREDICTION.
1414 // - If mixed_conversion is the same, the result of PARTIAL_SUGGESTION is
1415 // equal or less than PARTIAL_PREDICTION.
1416 // - Partial version has equal or greater than non-partial version.
1417
1418 const size_t kMaxSize = 100;
1419
1420 // non-partial, non-mixed-conversion
1421 segments.set_request_type(Segments::PREDICTION);
1422 const size_t prediction_no_mixed =
1423 predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1424 EXPECT_GE(kMaxSize, prediction_no_mixed);
1425
1426 segments.set_request_type(Segments::SUGGESTION);
1427 const size_t suggestion_no_mixed =
1428 predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1429 EXPECT_GE(kMaxSize, suggestion_no_mixed);
1430 EXPECT_LE(suggestion_no_mixed, prediction_no_mixed);
1431
1432 // non-partial, mixed-conversion
1433 segments.set_request_type(Segments::PREDICTION);
1434 const size_t prediction_mixed =
1435 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1436 EXPECT_GE(kMaxSize, prediction_mixed);
1437
1438 segments.set_request_type(Segments::SUGGESTION);
1439 const size_t suggestion_mixed =
1440 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1441 EXPECT_GE(kMaxSize, suggestion_mixed);
1442
1443 // partial, non-mixed-conversion
1444 segments.set_request_type(Segments::PARTIAL_PREDICTION);
1445 const size_t partial_prediction_no_mixed =
1446 predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1447 EXPECT_GE(kMaxSize, partial_prediction_no_mixed);
1448
1449 segments.set_request_type(Segments::PARTIAL_SUGGESTION);
1450 const size_t partial_suggestion_no_mixed =
1451 predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1452 EXPECT_GE(kMaxSize, partial_suggestion_no_mixed);
1453 EXPECT_LE(partial_suggestion_no_mixed, partial_prediction_no_mixed);
1454
1455 // partial, mixed-conversion
1456 segments.set_request_type(Segments::PARTIAL_PREDICTION);
1457 const size_t partial_prediction_mixed =
1458 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1459 EXPECT_GE(kMaxSize, partial_prediction_mixed);
1460
1461 segments.set_request_type(Segments::PARTIAL_SUGGESTION);
1462 const size_t partial_suggestion_mixed =
1463 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1464 EXPECT_GE(kMaxSize, partial_suggestion_mixed);
1465 EXPECT_LE(partial_suggestion_mixed, partial_prediction_mixed);
1466
1467 EXPECT_GE(partial_prediction_no_mixed, prediction_no_mixed);
1468 EXPECT_GE(partial_prediction_mixed, prediction_mixed);
1469 EXPECT_GE(partial_suggestion_no_mixed, suggestion_no_mixed);
1470 EXPECT_GE(partial_suggestion_mixed, suggestion_mixed);
1471 }
1472
TEST_F(DictionaryPredictorTest,GetRealtimeCandidateMaxSizeForMixed)1473 TEST_F(DictionaryPredictorTest, GetRealtimeCandidateMaxSizeForMixed) {
1474 unique_ptr<MockDataAndPredictor> data_and_predictor(
1475 CreateDictionaryPredictorWithMockData());
1476 const DictionaryPredictor *predictor =
1477 data_and_predictor->dictionary_predictor();
1478 Segments segments;
1479 Segment *segment = segments.add_segment();
1480
1481 const size_t kMaxSize = 100;
1482
1483 // for short key, try to provide many results as possible
1484 segment->set_key("short");
1485 segments.set_request_type(Segments::SUGGESTION);
1486 const size_t short_suggestion_mixed =
1487 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1488 EXPECT_GE(kMaxSize, short_suggestion_mixed);
1489
1490 segments.set_request_type(Segments::PREDICTION);
1491 const size_t short_prediction_mixed =
1492 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1493 EXPECT_GE(kMaxSize, short_prediction_mixed);
1494
1495 // for long key, provide few results
1496 segment->set_key("long_request_key");
1497 segments.set_request_type(Segments::SUGGESTION);
1498 const size_t long_suggestion_mixed =
1499 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1500 EXPECT_GE(kMaxSize, long_suggestion_mixed);
1501 EXPECT_GT(short_suggestion_mixed, long_suggestion_mixed);
1502
1503 segments.set_request_type(Segments::PREDICTION);
1504 const size_t long_prediction_mixed =
1505 predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1506 EXPECT_GE(kMaxSize, long_prediction_mixed);
1507 EXPECT_GT(kMaxSize, long_prediction_mixed + long_suggestion_mixed);
1508 EXPECT_GT(short_prediction_mixed, long_prediction_mixed);
1509 }
1510
TEST_F(DictionaryPredictorTest,AggregateRealtimeConversion)1511 TEST_F(DictionaryPredictorTest, AggregateRealtimeConversion) {
1512 testing::MockDataManager data_manager;
1513 unique_ptr<const DictionaryInterface> dictionary(new DictionaryMock);
1514 unique_ptr<ConverterMock> converter(new ConverterMock);
1515 unique_ptr<ImmutableConverterInterface> immutable_converter(
1516 new ImmutableConverterMock);
1517 unique_ptr<const DictionaryInterface> suffix_dictionary(
1518 CreateSuffixDictionaryFromDataManager(data_manager));
1519 unique_ptr<const Connector> connector(
1520 Connector::CreateFromDataManager(data_manager));
1521 unique_ptr<const Segmenter> segmenter(
1522 Segmenter::CreateFromDataManager(data_manager));
1523 unique_ptr<const SuggestionFilter> suggestion_filter(
1524 CreateSuggestionFilter(data_manager));
1525 const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
1526 unique_ptr<TestableDictionaryPredictor> predictor(
1527 new TestableDictionaryPredictor(data_manager,
1528 converter.get(),
1529 immutable_converter.get(),
1530 dictionary.get(),
1531 suffix_dictionary.get(),
1532 connector.get(),
1533 segmenter.get(),
1534 &pos_matcher,
1535 suggestion_filter.get()));
1536
1537 const char kKey[] = "わたしのなまえはなかのです";
1538
1539 // Set up mock converter
1540 {
1541 // Make segments like:
1542 // "わたしの" | "なまえは" | "なかのです"
1543 // "Watashino" | "Namaeha" | "Nakanodesu"
1544 Segments segments;
1545
1546 Segment *segment = segments.add_segment();
1547 segment->set_key("わたしの");
1548 segment->add_candidate()->value = "Watashino";
1549
1550 segment = segments.add_segment();
1551 segment->set_key("なまえは");
1552 segment->add_candidate()->value = "Namaeha";
1553
1554 segment = segments.add_segment();
1555 segment->set_key("なかのです");
1556 segment->add_candidate()->value = "Nakanodesu";
1557
1558 converter->SetStartConversionForRequest(&segments, true);
1559 }
1560
1561 // A test case with use_actual_converter_for_realtime_conversion being false,
1562 // i.e., realtime conversion result is generated by ImmutableConverterMock.
1563 {
1564 Segments segments;
1565
1566 MakeSegmentsForSuggestion(kKey, &segments);
1567
1568 std::vector<TestableDictionaryPredictor::Result> results;
1569 convreq_->set_use_actual_converter_for_realtime_conversion(false);
1570
1571 predictor->AggregateRealtimeConversion(
1572 TestableDictionaryPredictor::UNIGRAM, *convreq_, &segments, &results);
1573 EXPECT_TRUE(results.empty());
1574
1575 predictor->AggregateRealtimeConversion(
1576 TestableDictionaryPredictor::BIGRAM, *convreq_, &segments, &results);
1577 EXPECT_TRUE(results.empty());
1578
1579 predictor->AggregateRealtimeConversion(
1580 TestableDictionaryPredictor::REALTIME, *convreq_, &segments, &results);
1581
1582 ASSERT_EQ(1, results.size());
1583 EXPECT_EQ(TestableDictionaryPredictor::REALTIME, results[0].types);
1584 EXPECT_EQ(kKey, results[0].key);
1585 EXPECT_EQ(3, results[0].inner_segment_boundary.size());
1586 }
1587
1588 // A test case with use_actual_converter_for_realtime_conversion being true,
1589 // i.e., realtime conversion result is generated by ConverterMock.
1590 {
1591 Segments segments;
1592
1593 MakeSegmentsForSuggestion(kKey, &segments);
1594
1595 std::vector<TestableDictionaryPredictor::Result> results;
1596 convreq_->set_use_actual_converter_for_realtime_conversion(true);
1597
1598 predictor->AggregateRealtimeConversion(
1599 TestableDictionaryPredictor::UNIGRAM, *convreq_, &segments, &results);
1600 EXPECT_TRUE(results.empty());
1601
1602 predictor->AggregateRealtimeConversion(
1603 TestableDictionaryPredictor::BIGRAM, *convreq_, &segments, &results);
1604 EXPECT_TRUE(results.empty());
1605
1606 predictor->AggregateRealtimeConversion(
1607 TestableDictionaryPredictor::REALTIME, *convreq_, &segments, &results);
1608
1609 // When |request.use_actual_converter_for_realtime_conversion| is true, the
1610 // extra label REALTIME_TOP is expected to be added.
1611 ASSERT_EQ(2, results.size());
1612 bool realtime_top_found = false;
1613 for (size_t i = 0; i < results.size(); ++i) {
1614 EXPECT_EQ(TestableDictionaryPredictor::REALTIME |
1615 TestableDictionaryPredictor::REALTIME_TOP, results[i].types);
1616 if (results[i].key == kKey &&
1617 results[i].value == "WatashinoNamaehaNakanodesu" &&
1618 results[i].inner_segment_boundary.size() == 3) {
1619 realtime_top_found = true;
1620 break;
1621 }
1622 }
1623 EXPECT_TRUE(realtime_top_found);
1624 }
1625 }
1626
1627 namespace {
1628
1629 struct SimpleSuffixToken {
1630 const char *key;
1631 const char *value;
1632 };
1633
1634 const SimpleSuffixToken kSuffixTokens[] = {
1635 {"いか", "以下"}
1636 };
1637
1638 class TestSuffixDictionary : public DictionaryInterface {
1639 public:
1640 TestSuffixDictionary() = default;
1641 ~TestSuffixDictionary() override = default;
1642
HasKey(StringPiece value) const1643 bool HasKey(StringPiece value) const override { return false; }
1644
HasValue(StringPiece value) const1645 bool HasValue(StringPiece value) const override { return false; }
1646
LookupPredictive(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1647 void LookupPredictive(StringPiece key,
1648 const ConversionRequest &conversion_request,
1649 Callback *callback) const override {
1650 Token token;
1651 for (size_t i = 0; i < arraysize(kSuffixTokens); ++i) {
1652 const SimpleSuffixToken &suffix_token = kSuffixTokens[i];
1653 if (!key.empty() && !Util::StartsWith(suffix_token.key, key)) {
1654 continue;
1655 }
1656 switch (callback->OnKey(suffix_token.key)) {
1657 case Callback::TRAVERSE_DONE:
1658 return;
1659 case Callback::TRAVERSE_NEXT_KEY:
1660 continue;
1661 case Callback::TRAVERSE_CULL:
1662 LOG(FATAL) << "Culling is not supported.";
1663 break;
1664 default:
1665 break;
1666 }
1667 token.key = suffix_token.key;
1668 token.value = suffix_token.value;
1669 token.cost = 1000;
1670 token.lid = token.rid = 0;
1671 if (callback->OnToken(token.key, token.key, token) ==
1672 Callback::TRAVERSE_DONE) {
1673 break;
1674 }
1675 }
1676 }
1677
LookupPrefix(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1678 void LookupPrefix(StringPiece key,
1679 const ConversionRequest &conversion_request,
1680 Callback *callback) const override {}
1681
LookupExact(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1682 void LookupExact(StringPiece key, const ConversionRequest &conversion_request,
1683 Callback *callback) const override {}
1684
LookupReverse(StringPiece str,const ConversionRequest & conversion_request,Callback * callback) const1685 void LookupReverse(StringPiece str,
1686 const ConversionRequest &conversion_request,
1687 Callback *callback) const override {}
1688 };
1689
1690 } // namespace
1691
TEST_F(DictionaryPredictorTest,GetCandidateCutoffThreshold)1692 TEST_F(DictionaryPredictorTest, GetCandidateCutoffThreshold) {
1693 unique_ptr<MockDataAndPredictor> data_and_predictor(
1694 CreateDictionaryPredictorWithMockData());
1695 const DictionaryPredictor *predictor =
1696 data_and_predictor->dictionary_predictor();
1697 Segments segments;
1698
1699 segments.set_request_type(Segments::PREDICTION);
1700 const size_t prediction =
1701 predictor->GetCandidateCutoffThreshold(segments);
1702
1703 segments.set_request_type(Segments::SUGGESTION);
1704 const size_t suggestion =
1705 predictor->GetCandidateCutoffThreshold(segments);
1706 EXPECT_LE(suggestion, prediction);
1707 }
1708
TEST_F(DictionaryPredictorTest,AggregateSuffixPrediction)1709 TEST_F(DictionaryPredictorTest, AggregateSuffixPrediction) {
1710 unique_ptr<MockDataAndPredictor> data_and_predictor(new MockDataAndPredictor);
1711 data_and_predictor->Init(NULL, new TestSuffixDictionary());
1712
1713 const DictionaryPredictor *predictor =
1714 data_and_predictor->dictionary_predictor();
1715
1716 Segments segments;
1717
1718 MakeSegmentsForSuggestion("あ", &segments);
1719
1720 // history is "グーグル"
1721 const char kHistoryKey[] = "ぐーぐる";
1722 const char kHistoryValue[] = "グーグル";
1723
1724 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1725
1726 std::vector<DictionaryPredictor::Result> results;
1727
1728 // Since SuffixDictionary only returns when key is "い".
1729 // result should be empty.
1730 predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1731 segments, &results);
1732 EXPECT_TRUE(results.empty());
1733
1734 results.clear();
1735 segments.mutable_conversion_segment(0)->set_key("");
1736 predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1737 segments, &results);
1738 EXPECT_FALSE(results.empty());
1739
1740 results.clear();
1741 predictor->AggregateSuffixPrediction(DictionaryPredictor::UNIGRAM, *convreq_,
1742 segments, &results);
1743 EXPECT_TRUE(results.empty());
1744
1745 predictor->AggregateSuffixPrediction(DictionaryPredictor::REALTIME, *convreq_,
1746 segments, &results);
1747 EXPECT_TRUE(results.empty());
1748
1749 predictor->AggregateSuffixPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1750 segments, &results);
1751 EXPECT_TRUE(results.empty());
1752
1753 // Candidates generated by AggregateSuffixPrediction should have SUFFIX type.
1754 results.clear();
1755 segments.mutable_conversion_segment(0)->set_key("い");
1756 predictor->AggregateSuffixPrediction(
1757 DictionaryPredictor::SUFFIX | DictionaryPredictor::BIGRAM, *convreq_,
1758 segments, &results);
1759 EXPECT_FALSE(results.empty());
1760 for (size_t i = 0; i < results.size(); ++i) {
1761 EXPECT_EQ(DictionaryPredictor::SUFFIX, results[i].types);
1762 // Not zero query
1763 EXPECT_FALSE(Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX &
1764 results[i].source_info);
1765 }
1766 }
1767
TEST_F(DictionaryPredictorTest,AggregateZeroQuerySuffixPrediction)1768 TEST_F(DictionaryPredictorTest, AggregateZeroQuerySuffixPrediction) {
1769 unique_ptr<MockDataAndPredictor> data_and_predictor(new MockDataAndPredictor);
1770 data_and_predictor->Init(NULL, new TestSuffixDictionary());
1771
1772 const DictionaryPredictor *predictor =
1773 data_and_predictor->dictionary_predictor();
1774
1775 commands::RequestForUnitTest::FillMobileRequest(request_.get());
1776 Segments segments;
1777
1778 // Zero query
1779 MakeSegmentsForSuggestion("", &segments);
1780
1781 // history is "グーグル"
1782 const char kHistoryKey[] = "ぐーぐる";
1783 const char kHistoryValue[] = "グーグル";
1784
1785 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1786
1787 std::vector<DictionaryPredictor::Result> results;
1788
1789 // Candidates generated by AggregateSuffixPrediction should have SUFFIX type.
1790 predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1791 segments, &results);
1792 EXPECT_FALSE(results.empty());
1793 for (size_t i = 0; i < results.size(); ++i) {
1794 EXPECT_EQ(DictionaryPredictor::SUFFIX, results[i].types);
1795 // Zero query
1796 EXPECT_TRUE(Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX &
1797 results[i].source_info);
1798 }
1799 }
1800
TEST_F(DictionaryPredictorTest,AggregateEnglishPrediction)1801 TEST_F(DictionaryPredictorTest, AggregateEnglishPrediction) {
1802 // Input mode: HALF_ASCII, Key: lower case
1803 // => Prediction should be in half-width lower case.
1804 {
1805 const char *kExpectedValues[] = {
1806 "converge",
1807 "converged",
1808 "convergent",
1809 };
1810 AggregateEnglishPredictionTestHelper(transliteration::HALF_ASCII, "conv",
1811 "conv", kExpectedValues,
1812 arraysize(kExpectedValues));
1813 }
1814 // Input mode: HALF_ASCII, Key: upper case
1815 // => Prediction should be in half-width upper case.
1816 {
1817 const char *kExpectedValues[] = {
1818 "CONVERGE",
1819 "CONVERGED",
1820 "CONVERGENT",
1821 };
1822 AggregateEnglishPredictionTestHelper(transliteration::HALF_ASCII, "CONV",
1823 "CONV", kExpectedValues,
1824 arraysize(kExpectedValues));
1825 }
1826 // Input mode: HALF_ASCII, Key: capitalized
1827 // => Prediction should be half-width and capitalized
1828 {
1829 const char *kExpectedValues[] = {
1830 "Converge",
1831 "Converged",
1832 "Convergent",
1833 };
1834 AggregateEnglishPredictionTestHelper(transliteration::HALF_ASCII, "Conv",
1835 "Conv", kExpectedValues,
1836 arraysize(kExpectedValues));
1837 }
1838 // Input mode: FULL_ASCII, Key: lower case
1839 // => Prediction should be in full-wdith lower case.
1840 {
1841 const char *kExpectedValues[] = {
1842 "converge",
1843 "converged",
1844 "convergent",
1845 };
1846 AggregateEnglishPredictionTestHelper(transliteration::FULL_ASCII, "conv",
1847 "conv",
1848 kExpectedValues,
1849 arraysize(kExpectedValues));
1850 }
1851 // Input mode: FULL_ASCII, Key: upper case
1852 // => Prediction should be in full-width upper case.
1853 {
1854 const char *kExpectedValues[] = {
1855 "CONVERGE",
1856 "CONVERGED",
1857 "CONVERGENT",
1858 };
1859 AggregateEnglishPredictionTestHelper(transliteration::FULL_ASCII, "CONV",
1860 "CONV",
1861 kExpectedValues,
1862 arraysize(kExpectedValues));
1863 }
1864 // Input mode: FULL_ASCII, Key: capitalized
1865 // => Prediction should be full-width and capitalized
1866 {
1867 const char *kExpectedValues[] = {
1868 "Converge",
1869 "Converged",
1870 "Convergent",
1871 };
1872 AggregateEnglishPredictionTestHelper(transliteration::FULL_ASCII, "Conv",
1873 "Conv",
1874 kExpectedValues,
1875 arraysize(kExpectedValues));
1876 }
1877 }
1878
TEST_F(DictionaryPredictorTest,AggregateTypeCorrectingPrediction)1879 TEST_F(DictionaryPredictorTest, AggregateTypeCorrectingPrediction) {
1880 config_->set_use_typing_correction(true);
1881
1882 const char kInputText[] = "gu-huru";
1883 const uint32 kCorrectedKeyCodes[] = {'g', 'u', '-', 'g', 'u', 'r', 'u'};
1884 const char *kExpectedValues[] = {
1885 "グーグルアドセンス",
1886 "グーグルアドワーズ",
1887 };
1888 AggregateTypeCorrectingTestHelper(kInputText, kCorrectedKeyCodes,
1889 kExpectedValues,
1890 arraysize(kExpectedValues));
1891 }
1892
TEST_F(DictionaryPredictorTest,ZeroQuerySuggestionAfterNumbers)1893 TEST_F(DictionaryPredictorTest, ZeroQuerySuggestionAfterNumbers) {
1894 unique_ptr<MockDataAndPredictor> data_and_predictor(
1895 CreateDictionaryPredictorWithMockData());
1896 const DictionaryPredictor *predictor =
1897 data_and_predictor->dictionary_predictor();
1898 const POSMatcher &pos_matcher = data_and_predictor->pos_matcher();
1899 Segments segments;
1900
1901 {
1902 MakeSegmentsForSuggestion("", &segments);
1903
1904 const char kHistoryKey[] = "12";
1905 const char kHistoryValue[] = "12";
1906 const char kExpectedValue[] = "月";
1907 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1908 std::vector<DictionaryPredictor::Result> results;
1909 predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1910 segments, &results);
1911 EXPECT_FALSE(results.empty());
1912
1913 std::vector<DictionaryPredictor::Result>::const_iterator target =
1914 results.end();
1915 for (std::vector<DictionaryPredictor::Result>::const_iterator it =
1916 results.begin();
1917 it != results.end(); ++it) {
1918 EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
1919
1920 EXPECT_TRUE(
1921 Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX &
1922 it->source_info);
1923
1924 if (it->value == kExpectedValue) {
1925 target = it;
1926 break;
1927 }
1928 }
1929 EXPECT_NE(results.end(), target);
1930 EXPECT_EQ(target->value, kExpectedValue);
1931 EXPECT_EQ(target->lid, pos_matcher.GetCounterSuffixWordId());
1932 EXPECT_EQ(target->rid, pos_matcher.GetCounterSuffixWordId());
1933
1934 // Make sure number suffixes are not suggested when there is a key
1935 results.clear();
1936 MakeSegmentsForSuggestion("あ", &segments);
1937 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1938 predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1939 segments, &results);
1940 target = results.end();
1941 for (std::vector<DictionaryPredictor::Result>::const_iterator it =
1942 results.begin();
1943 it != results.end(); ++it) {
1944 EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
1945 if (it->value == kExpectedValue) {
1946 target = it;
1947 break;
1948 }
1949 }
1950 EXPECT_EQ(results.end(), target);
1951 }
1952
1953 {
1954 MakeSegmentsForSuggestion("", &segments);
1955
1956 const char kHistoryKey[] = "66050713"; // A random number
1957 const char kHistoryValue[] = "66050713";
1958 const char kExpectedValue[] = "個";
1959 PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1960 std::vector<DictionaryPredictor::Result> results;
1961 predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1962 segments, &results);
1963 EXPECT_FALSE(results.empty());
1964
1965 bool found = false;
1966 for (std::vector<DictionaryPredictor::Result>::const_iterator it =
1967 results.begin();
1968 it != results.end(); ++it) {
1969 EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
1970 if (it->value == kExpectedValue) {
1971 EXPECT_TRUE(
1972 Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX &
1973 it->source_info);
1974 found = true;
1975 break;
1976 }
1977 }
1978 EXPECT_TRUE(found);
1979 }
1980 }
1981
TEST_F(DictionaryPredictorTest,TriggerNumberZeroQuerySuggestion)1982 TEST_F(DictionaryPredictorTest, TriggerNumberZeroQuerySuggestion) {
1983 unique_ptr<MockDataAndPredictor> data_and_predictor(
1984 CreateDictionaryPredictorWithMockData());
1985 const DictionaryPredictor *predictor =
1986 data_and_predictor->dictionary_predictor();
1987 const POSMatcher &pos_matcher = data_and_predictor->pos_matcher();
1988
1989 const struct TestCase {
1990 const char *history_key;
1991 const char *history_value;
1992 const char *find_suffix_value;
1993 bool expected_result;
1994 } kTestCases[] = {
1995 {"12", "12", "月", true},
1996 {"12", "12", "月", true},
1997 {"12", "壱拾弐", "月", false},
1998 {"12", "十二", "月", false},
1999 {"12", "一二", "月", false},
2000 {"12", "Ⅻ", "月", false},
2001 {"あか", "12", "月", true}, // T13N
2002 {"あか", "12", "月", true}, // T13N
2003 {"じゅう", "10", "時", true},
2004 {"じゅう", "10", "時", true},
2005 {"じゅう", "十", "時", false},
2006 {"じゅう", "拾", "時", false},
2007 };
2008
2009 for (size_t i = 0; i < arraysize(kTestCases); ++i) {
2010 Segments segments;
2011 MakeSegmentsForSuggestion("", &segments);
2012
2013 const TestCase &test_case = kTestCases[i];
2014 PrependHistorySegments(
2015 test_case.history_key, test_case.history_value, &segments);
2016 std::vector<DictionaryPredictor::Result> results;
2017 predictor->AggregateSuffixPrediction(
2018 DictionaryPredictor::SUFFIX,
2019 *convreq_, segments, &results);
2020 EXPECT_FALSE(results.empty());
2021
2022 bool found = false;
2023 for (std::vector<DictionaryPredictor::Result>::const_iterator it =
2024 results.begin();
2025 it != results.end(); ++it) {
2026 EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
2027 if (it->value == test_case.find_suffix_value &&
2028 it->lid == pos_matcher.GetCounterSuffixWordId()) {
2029 EXPECT_TRUE(
2030 Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX &
2031 it->source_info);
2032 found = true;
2033 break;
2034 }
2035 }
2036 EXPECT_EQ(test_case.expected_result, found) << test_case.history_value;
2037 }
2038 }
2039
TEST_F(DictionaryPredictorTest,TriggerZeroQuerySuggestion)2040 TEST_F(DictionaryPredictorTest, TriggerZeroQuerySuggestion) {
2041 unique_ptr<MockDataAndPredictor> data_and_predictor(
2042 CreateDictionaryPredictorWithMockData());
2043 const DictionaryPredictor *predictor =
2044 data_and_predictor->dictionary_predictor();
2045
2046 const struct TestCase {
2047 const char *history_key;
2048 const char *history_value;
2049 const char *find_value;
2050 bool expected_result;
2051 } kTestCases[] = {
2052 {"@", "@", "gmail.com", true},
2053 {"!", "!", "?", false},
2054 };
2055
2056 for (size_t i = 0; i < arraysize(kTestCases); ++i) {
2057 Segments segments;
2058 MakeSegmentsForSuggestion("", &segments);
2059
2060 const TestCase &test_case = kTestCases[i];
2061 PrependHistorySegments(
2062 test_case.history_key, test_case.history_value, &segments);
2063 std::vector<DictionaryPredictor::Result> results;
2064 predictor->AggregateSuffixPrediction(
2065 DictionaryPredictor::SUFFIX,
2066 *convreq_, segments, &results);
2067 EXPECT_FALSE(results.empty());
2068
2069 bool found = false;
2070 for (std::vector<DictionaryPredictor::Result>::const_iterator it =
2071 results.begin();
2072 it != results.end(); ++it) {
2073 EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
2074 if (it->value == test_case.find_value &&
2075 it->lid == 0 /* EOS */) {
2076 found = true;
2077 break;
2078 }
2079 }
2080 EXPECT_EQ(test_case.expected_result, found) << test_case.history_value;
2081 }
2082 }
2083
TEST_F(DictionaryPredictorTest,GetHistoryKeyAndValue)2084 TEST_F(DictionaryPredictorTest, GetHistoryKeyAndValue) {
2085 Segments segments;
2086 unique_ptr<MockDataAndPredictor> data_and_predictor(
2087 CreateDictionaryPredictorWithMockData());
2088 const DictionaryPredictor *predictor =
2089 data_and_predictor->dictionary_predictor();
2090
2091 MakeSegmentsForSuggestion("test", &segments);
2092
2093 string key, value;
2094 EXPECT_FALSE(predictor->GetHistoryKeyAndValue(segments, &key, &value));
2095
2096 PrependHistorySegments("key", "value", &segments);
2097 EXPECT_TRUE(predictor->GetHistoryKeyAndValue(segments, &key, &value));
2098 EXPECT_EQ("key", key);
2099 EXPECT_EQ("value", value);
2100 }
2101
TEST_F(DictionaryPredictorTest,IsZipCodeRequest)2102 TEST_F(DictionaryPredictorTest, IsZipCodeRequest) {
2103 EXPECT_FALSE(DictionaryPredictor::IsZipCodeRequest(""));
2104 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("000"));
2105 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("000"));
2106 EXPECT_FALSE(DictionaryPredictor::IsZipCodeRequest("ABC"));
2107 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("---"));
2108 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("0124-"));
2109 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("0124-0"));
2110 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("012-0"));
2111 EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("012-3456"));
2112 EXPECT_FALSE(DictionaryPredictor::IsZipCodeRequest("012-0"));
2113 }
2114
TEST_F(DictionaryPredictorTest,IsAggressiveSuggestion)2115 TEST_F(DictionaryPredictorTest, IsAggressiveSuggestion) {
2116 unique_ptr<MockDataAndPredictor> data_and_predictor(
2117 CreateDictionaryPredictorWithMockData());
2118 const DictionaryPredictor *predictor =
2119 data_and_predictor->dictionary_predictor();
2120
2121 // "ただしい",
2122 // "ただしいけめんにかぎる",
2123 EXPECT_TRUE(predictor->IsAggressiveSuggestion(
2124 4, // query_len
2125 11, // key_len
2126 6000, // cost
2127 true, // is_suggestion
2128 20)); // total_candidates_size
2129
2130 // cost <= 4000
2131 EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2132 4,
2133 11,
2134 4000,
2135 true,
2136 20));
2137
2138 // not suggestion
2139 EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2140 4,
2141 11,
2142 4000,
2143 false,
2144 20));
2145
2146 // total_candidates_size is small
2147 EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2148 4,
2149 11,
2150 4000,
2151 true,
2152 5));
2153
2154 // query_length = 5
2155 EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2156 5,
2157 11,
2158 6000,
2159 true,
2160 20));
2161
2162 // "それでも",
2163 // "それでもぼくはやっていない",
2164 EXPECT_TRUE(predictor->IsAggressiveSuggestion(
2165 4,
2166 13,
2167 6000,
2168 true,
2169 20));
2170
2171 // cost <= 4000
2172 EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2173 4,
2174 13,
2175 4000,
2176 true,
2177 20));
2178 }
2179
TEST_F(DictionaryPredictorTest,RealtimeConversionStartingWithAlphabets)2180 TEST_F(DictionaryPredictorTest, RealtimeConversionStartingWithAlphabets) {
2181 Segments segments;
2182 // turn on real-time conversion
2183 config_->set_use_dictionary_suggest(false);
2184 config_->set_use_realtime_conversion(true);
2185
2186 unique_ptr<MockDataAndPredictor> data_and_predictor(
2187 CreateDictionaryPredictorWithMockData());
2188 const DictionaryPredictor *predictor =
2189 data_and_predictor->dictionary_predictor();
2190
2191 const char kKey[] = "PCてすと";
2192 const char *kExpectedSuggestionValues[] = {
2193 "Realtime top result",
2194 "PCテスト",
2195 };
2196
2197 // Set up mock converter for realtime top result.
2198 {
2199 Segments segments;
2200 Segment *segment = segments.add_segment();
2201 segment->set_key(kKey);
2202 Segment::Candidate *candidate = segment->add_candidate();
2203 candidate->value = kExpectedSuggestionValues[0];
2204 ConverterMock *converter = data_and_predictor->mutable_converter_mock();
2205 converter->SetStartConversionForRequest(&segments, true);
2206 }
2207
2208 MakeSegmentsForSuggestion(kKey, &segments);
2209
2210 std::vector<DictionaryPredictor::Result> results;
2211
2212 convreq_->set_use_actual_converter_for_realtime_conversion(false);
2213 predictor->AggregateRealtimeConversion(
2214 DictionaryPredictor::REALTIME, *convreq_, &segments, &results);
2215 ASSERT_EQ(1, results.size());
2216
2217 EXPECT_EQ(DictionaryPredictor::REALTIME, results[0].types);
2218 EXPECT_EQ(kExpectedSuggestionValues[1], results[0].value);
2219 EXPECT_EQ(1, segments.conversion_segments_size());
2220 }
2221
TEST_F(DictionaryPredictorTest,RealtimeConversionWithSpellingCorrection)2222 TEST_F(DictionaryPredictorTest, RealtimeConversionWithSpellingCorrection) {
2223 Segments segments;
2224 // turn on real-time conversion
2225 config_->set_use_dictionary_suggest(false);
2226 config_->set_use_realtime_conversion(true);
2227
2228 unique_ptr<MockDataAndPredictor> data_and_predictor(
2229 CreateDictionaryPredictorWithMockData());
2230 const DictionaryPredictor *predictor =
2231 data_and_predictor->dictionary_predictor();
2232
2233 const char kCapriHiragana[] = "かぷりちょうざ";
2234
2235 // Set up mock converter for realtime top result.
2236 {
2237 Segments segments;
2238 Segment *segment = segments.add_segment();
2239 segment->set_key(kCapriHiragana);
2240 Segment::Candidate *candidate = segment->add_candidate();
2241 candidate->value = "Dummy";
2242 ConverterMock *converter = data_and_predictor->mutable_converter_mock();
2243 converter->SetStartConversionForRequest(&segments, true);
2244 }
2245
2246 MakeSegmentsForSuggestion(kCapriHiragana, &segments);
2247
2248 std::vector<DictionaryPredictor::Result> results;
2249
2250 convreq_->set_use_actual_converter_for_realtime_conversion(false);
2251 predictor->AggregateUnigramPrediction(
2252 DictionaryPredictor::UNIGRAM,
2253 *convreq_, segments, &results);
2254 ASSERT_FALSE(results.empty());
2255 EXPECT_NE(0, (results[0].candidate_attributes &
2256 Segment::Candidate::SPELLING_CORRECTION));
2257
2258 results.clear();
2259
2260 const char kKeyWithDe[] = "かぷりちょうざで";
2261 const char kExpectedSuggestionValueWithDe[] = "カプリチョーザで";
2262
2263 MakeSegmentsForSuggestion(kKeyWithDe, &segments);
2264 predictor->AggregateRealtimeConversion(
2265 DictionaryPredictor::REALTIME, *convreq_, &segments, &results);
2266 EXPECT_EQ(1, results.size());
2267
2268 EXPECT_EQ(results[0].types, DictionaryPredictor::REALTIME);
2269 EXPECT_NE(0, (results[0].candidate_attributes &
2270 Segment::Candidate::SPELLING_CORRECTION));
2271 EXPECT_EQ(kExpectedSuggestionValueWithDe, results[0].value);
2272 EXPECT_EQ(1, segments.conversion_segments_size());
2273 }
2274
TEST_F(DictionaryPredictorTest,GetMissSpelledPosition)2275 TEST_F(DictionaryPredictorTest, GetMissSpelledPosition) {
2276 unique_ptr<MockDataAndPredictor> data_and_predictor(
2277 CreateDictionaryPredictorWithMockData());
2278 const DictionaryPredictor *predictor =
2279 data_and_predictor->dictionary_predictor();
2280
2281 EXPECT_EQ(0, predictor->GetMissSpelledPosition("", ""));
2282 EXPECT_EQ(3,
2283 predictor->GetMissSpelledPosition("れみおめろん", "レミオロメン"));
2284 EXPECT_EQ(5,
2285 predictor->GetMissSpelledPosition("とーとばっく", "トートバッグ"));
2286 EXPECT_EQ(
2287 4, predictor->GetMissSpelledPosition("おーすとりらあ", "オーストラリア"));
2288 EXPECT_EQ(7, predictor->GetMissSpelledPosition("じきそうしょう", "時期尚早"));
2289 }
2290
TEST_F(DictionaryPredictorTest,RemoveMissSpelledCandidates)2291 TEST_F(DictionaryPredictorTest, RemoveMissSpelledCandidates) {
2292 unique_ptr<MockDataAndPredictor> data_and_predictor(
2293 CreateDictionaryPredictorWithMockData());
2294 const DictionaryPredictor *predictor =
2295 data_and_predictor->dictionary_predictor();
2296
2297 {
2298 std::vector<DictionaryPredictor::Result> results;
2299 DictionaryPredictor::Result *result;
2300
2301 results.push_back(DictionaryPredictor::Result());
2302 result = &results.back();
2303 result->key = "ばっく";
2304 result->value = "バッグ";
2305 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2306 Token::SPELLING_CORRECTION);
2307
2308 results.push_back(DictionaryPredictor::Result());
2309 result = &results.back();
2310 result->key = "ばっぐ";
2311 result->value = "バッグ";
2312 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2313 Token::NONE);
2314
2315 results.push_back(DictionaryPredictor::Result());
2316 result = &results.back();
2317 result->key = "ばっく";
2318 result->value = "バック";
2319 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2320 Token::NONE);
2321
2322 predictor->RemoveMissSpelledCandidates(1, &results);
2323 ASSERT_EQ(3, results.size());
2324
2325 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[0].types);
2326 EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[1].types);
2327 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[2].types);
2328 }
2329
2330 {
2331 std::vector<DictionaryPredictor::Result> results;
2332 DictionaryPredictor::Result *result;
2333
2334 results.push_back(DictionaryPredictor::Result());
2335 result = &results.back();
2336 result->key = "ばっく";
2337 result->value = "バッグ";
2338 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2339 Token::SPELLING_CORRECTION);
2340
2341 results.push_back(DictionaryPredictor::Result());
2342 result = &results.back();
2343 result->key = "てすと";
2344 result->value = "テスト";
2345 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2346 Token::NONE);
2347
2348 predictor->RemoveMissSpelledCandidates(1, &results);
2349 CHECK_EQ(2, results.size());
2350
2351 EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[0].types);
2352 EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[1].types);
2353 }
2354
2355 {
2356 std::vector<DictionaryPredictor::Result> results;
2357 DictionaryPredictor::Result *result;
2358
2359 results.push_back(DictionaryPredictor::Result());
2360 result = &results.back();
2361 result->key = "ばっく";
2362 result->value = "バッグ";
2363 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2364 Token::SPELLING_CORRECTION);
2365
2366 results.push_back(DictionaryPredictor::Result());
2367 result = &results.back();
2368 result->key = "ばっく";
2369 result->value = "バック";
2370 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2371 Token::NONE);
2372
2373 predictor->RemoveMissSpelledCandidates(1, &results);
2374 CHECK_EQ(2, results.size());
2375
2376 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[0].types);
2377 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[1].types);
2378 }
2379
2380 {
2381 std::vector<DictionaryPredictor::Result> results;
2382 DictionaryPredictor::Result *result;
2383
2384 results.push_back(DictionaryPredictor::Result());
2385 result = &results.back();
2386 result->key = "ばっく";
2387 result->value = "バッグ";
2388 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2389 Token::SPELLING_CORRECTION);
2390
2391 results.push_back(DictionaryPredictor::Result());
2392 result = &results.back();
2393 result->key = "ばっく";
2394 result->value = "バック";
2395 result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2396 Token::NONE);
2397
2398 predictor->RemoveMissSpelledCandidates(3, &results);
2399 CHECK_EQ(2, results.size());
2400
2401 EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[0].types);
2402 EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[1].types);
2403 }
2404 }
2405
TEST_F(DictionaryPredictorTest,UseExpansionForUnigramTest)2406 TEST_F(DictionaryPredictorTest, UseExpansionForUnigramTest) {
2407 FLAGS_enable_expansion_for_dictionary_predictor = true;
2408 ExpansionForUnigramTestHelper(true);
2409 }
2410
TEST_F(DictionaryPredictorTest,UnuseExpansionForUnigramTest)2411 TEST_F(DictionaryPredictorTest, UnuseExpansionForUnigramTest) {
2412 FLAGS_enable_expansion_for_dictionary_predictor = false;
2413 ExpansionForUnigramTestHelper(false);
2414 }
2415
TEST_F(DictionaryPredictorTest,UseExpansionForBigramTest)2416 TEST_F(DictionaryPredictorTest, UseExpansionForBigramTest) {
2417 FLAGS_enable_expansion_for_dictionary_predictor = true;
2418 ExpansionForBigramTestHelper(true);
2419 }
2420
TEST_F(DictionaryPredictorTest,UnuseExpansionForBigramTest)2421 TEST_F(DictionaryPredictorTest, UnuseExpansionForBigramTest) {
2422 FLAGS_enable_expansion_for_dictionary_predictor = false;
2423 ExpansionForBigramTestHelper(false);
2424 }
2425
TEST_F(DictionaryPredictorTest,UseExpansionForSuffixTest)2426 TEST_F(DictionaryPredictorTest, UseExpansionForSuffixTest) {
2427 FLAGS_enable_expansion_for_dictionary_predictor = true;
2428 ExpansionForSuffixTestHelper(true);
2429 }
2430
TEST_F(DictionaryPredictorTest,UnuseExpansionForSuffixTest)2431 TEST_F(DictionaryPredictorTest, UnuseExpansionForSuffixTest) {
2432 FLAGS_enable_expansion_for_dictionary_predictor = false;
2433 ExpansionForSuffixTestHelper(false);
2434 }
2435
TEST_F(DictionaryPredictorTest,ExpansionPenaltyForRomanTest)2436 TEST_F(DictionaryPredictorTest, ExpansionPenaltyForRomanTest) {
2437 FLAGS_enable_expansion_for_dictionary_predictor = true;
2438 config_->set_use_dictionary_suggest(true);
2439 config_->set_use_realtime_conversion(false);
2440
2441 table_->LoadFromFile("system://romanji-hiragana.tsv");
2442 composer_->SetTable(table_.get());
2443 unique_ptr<MockDataAndPredictor> data_and_predictor(
2444 CreateDictionaryPredictorWithMockData());
2445 const TestableDictionaryPredictor *predictor =
2446 data_and_predictor->dictionary_predictor();
2447
2448 Segments segments;
2449 segments.set_request_type(Segments::PREDICTION);
2450 InsertInputSequence("ak", composer_.get());
2451 Segment *segment = segments.add_segment();
2452 CHECK(segment);
2453 {
2454 string query;
2455 composer_->GetQueryForPrediction(&query);
2456 segment->set_key(query);
2457 EXPECT_EQ("あ", query);
2458 }
2459 {
2460 string base;
2461 std::set<string> expanded;
2462 composer_->GetQueriesForPrediction(&base, &expanded);
2463 EXPECT_EQ("あ", base);
2464 EXPECT_GT(expanded.size(), 5);
2465 }
2466
2467 std::vector<TestableDictionaryPredictor::Result> results;
2468 TestableDictionaryPredictor::Result *result;
2469
2470 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2471 result = &results.back();
2472 result->key = "あか";
2473 result->value = "赤";
2474 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2475 Token::NONE);
2476
2477 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2478 result = &results.back();
2479 result->key = "あき";
2480 result->value = "秋";
2481 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2482 Token::NONE);
2483
2484 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2485 result = &results.back();
2486 result->key = "あかぎ";
2487 result->value = "アカギ";
2488 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2489 Token::NONE);
2490
2491 EXPECT_EQ(3, results.size());
2492 EXPECT_EQ(0, results[0].cost);
2493 EXPECT_EQ(0, results[1].cost);
2494 EXPECT_EQ(0, results[2].cost);
2495
2496 predictor->ApplyPenaltyForKeyExpansion(segments, &results);
2497
2498 // no penalties
2499 EXPECT_EQ(0, results[0].cost);
2500 EXPECT_EQ(0, results[1].cost);
2501 EXPECT_EQ(0, results[2].cost);
2502 }
2503
TEST_F(DictionaryPredictorTest,ExpansionPenaltyForKanaTest)2504 TEST_F(DictionaryPredictorTest, ExpansionPenaltyForKanaTest) {
2505 FLAGS_enable_expansion_for_dictionary_predictor = true;
2506 config_->set_use_dictionary_suggest(true);
2507 config_->set_use_realtime_conversion(false);
2508
2509 table_->LoadFromFile("system://kana.tsv");
2510 unique_ptr<MockDataAndPredictor> data_and_predictor(
2511 CreateDictionaryPredictorWithMockData());
2512 const TestableDictionaryPredictor *predictor =
2513 data_and_predictor->dictionary_predictor();
2514
2515 Segments segments;
2516 segments.set_request_type(Segments::PREDICTION);
2517 InsertInputSequence("あし", composer_.get());
2518
2519 Segment *segment = segments.add_segment();
2520 CHECK(segment);
2521 {
2522 string query;
2523 composer_->GetQueryForPrediction(&query);
2524 segment->set_key(query);
2525 EXPECT_EQ("あし", query);
2526 }
2527 {
2528 string base;
2529 std::set<string> expanded;
2530 composer_->GetQueriesForPrediction(&base, &expanded);
2531 EXPECT_EQ("あ", base);
2532 EXPECT_EQ(2, expanded.size());
2533 }
2534
2535 std::vector<TestableDictionaryPredictor::Result> results;
2536 TestableDictionaryPredictor::Result *result;
2537
2538 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2539 result = &results.back();
2540 result->key = "あし";
2541 result->value = "足";
2542 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2543 Token::NONE);
2544
2545 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2546 result = &results.back();
2547 result->key = "あじ";
2548 result->value = "味";
2549 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2550 Token::NONE);
2551
2552 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2553 result = &results.back();
2554 result->key = "あした";
2555 result->value = "明日";
2556 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2557 Token::NONE);
2558
2559 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2560 result = &results.back();
2561 result->key = "あじあ";
2562 result->value = "アジア";
2563 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2564 Token::NONE);
2565
2566 EXPECT_EQ(4, results.size());
2567 EXPECT_EQ(0, results[0].cost);
2568 EXPECT_EQ(0, results[1].cost);
2569 EXPECT_EQ(0, results[2].cost);
2570 EXPECT_EQ(0, results[3].cost);
2571
2572 predictor->ApplyPenaltyForKeyExpansion(segments, &results);
2573
2574 EXPECT_EQ(0, results[0].cost);
2575 EXPECT_LT(0, results[1].cost);
2576 EXPECT_EQ(0, results[2].cost);
2577 EXPECT_LT(0, results[3].cost);
2578 }
2579
TEST_F(DictionaryPredictorTest,SetLMCost)2580 TEST_F(DictionaryPredictorTest, SetLMCost) {
2581 unique_ptr<MockDataAndPredictor> data_and_predictor(
2582 CreateDictionaryPredictorWithMockData());
2583 const TestableDictionaryPredictor *predictor =
2584 data_and_predictor->dictionary_predictor();
2585
2586 Segments segments;
2587 segments.set_request_type(Segments::PREDICTION);
2588 Segment *segment = segments.add_segment();
2589 CHECK(segment);
2590 segment->set_key("てすと");
2591
2592 std::vector<TestableDictionaryPredictor::Result> results;
2593 TestableDictionaryPredictor::Result *result;
2594
2595 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2596 result = &results.back();
2597 result->key = "てすと";
2598 result->value = "てすと";
2599 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2600 Token::NONE);
2601
2602 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2603 result = &results.back();
2604 result->key = "てすと";
2605 result->value = "テスト";
2606 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2607 Token::NONE);
2608
2609 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2610 result = &results.back();
2611 result->key = "てすとてすと";
2612 result->value = "テストテスト";
2613 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2614 Token::NONE);
2615
2616 predictor->SetLMCost(segments, &results);
2617
2618 EXPECT_EQ(3, results.size());
2619 EXPECT_EQ("てすと", results[0].value);
2620 EXPECT_EQ("テスト", results[1].value);
2621 EXPECT_EQ("テストテスト", results[2].value);
2622 EXPECT_GT(results[2].cost, results[0].cost);
2623 EXPECT_GT(results[2].cost, results[1].cost);
2624 }
2625
2626 namespace {
2627
AddTestableDictionaryPredictorResult(const char * key,const char * value,int wcost,TestableDictionaryPredictor::PredictionTypes prediction_types,Token::AttributesBitfield attributes,std::vector<TestableDictionaryPredictor::Result> * results)2628 void AddTestableDictionaryPredictorResult(
2629 const char *key, const char *value, int wcost,
2630 TestableDictionaryPredictor::PredictionTypes prediction_types,
2631 Token::AttributesBitfield attributes,
2632 std::vector<TestableDictionaryPredictor::Result> *results) {
2633 results->push_back(TestableDictionaryPredictor::MakeEmptyResult());
2634 TestableDictionaryPredictor::Result *result = &results->back();
2635 result->key = key;
2636 result->value = value;
2637 result->wcost = wcost;
2638 result->SetTypesAndTokenAttributes(prediction_types, attributes);
2639 }
2640
2641 } // namespace
2642
TEST_F(DictionaryPredictorTest,SetLMCostForUserDictionaryWord)2643 TEST_F(DictionaryPredictorTest, SetLMCostForUserDictionaryWord) {
2644 unique_ptr<MockDataAndPredictor> data_and_predictor(
2645 CreateDictionaryPredictorWithMockData());
2646 const TestableDictionaryPredictor *predictor =
2647 data_and_predictor->dictionary_predictor();
2648
2649 const char *kAikaHiragana = "あいか";
2650 const char *kAikaKanji = "愛佳";
2651
2652 Segments segments;
2653 segments.set_request_type(Segments::PREDICTION);
2654 Segment *segment = segments.add_segment();
2655 ASSERT_NE(nullptr, segment);
2656 segment->set_key(kAikaHiragana);
2657
2658 {
2659 // Cost of words in user dictionary should be decreased.
2660 const int kOrigianlWordCost = 10000;
2661 std::vector<TestableDictionaryPredictor::Result> results;
2662 AddTestableDictionaryPredictorResult(
2663 kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2664 TestableDictionaryPredictor::UNIGRAM, Token::USER_DICTIONARY,
2665 &results);
2666
2667 predictor->SetLMCost(segments, &results);
2668
2669 EXPECT_EQ(1, results.size());
2670 EXPECT_EQ(kAikaKanji, results[0].value);
2671 EXPECT_GT(kOrigianlWordCost, results[0].cost);
2672 EXPECT_LE(1, results[0].cost);
2673 }
2674
2675 {
2676 // Cost of words in user dictionary should not be decreased to below 1.
2677 const int kOrigianlWordCost = 10;
2678 std::vector<TestableDictionaryPredictor::Result> results;
2679 AddTestableDictionaryPredictorResult(
2680 kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2681 TestableDictionaryPredictor::UNIGRAM, Token::USER_DICTIONARY,
2682 &results);
2683
2684 predictor->SetLMCost(segments, &results);
2685
2686 EXPECT_EQ(1, results.size());
2687 EXPECT_EQ(kAikaKanji, results[0].value);
2688 EXPECT_GT(kOrigianlWordCost, results[0].cost);
2689 EXPECT_LE(1, results[0].cost);
2690 }
2691
2692 {
2693 // Cost of general symbols should not be decreased.
2694 const int kOrigianlWordCost = 10000;
2695 std::vector<TestableDictionaryPredictor::Result> results;
2696 AddTestableDictionaryPredictorResult(
2697 kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2698 TestableDictionaryPredictor::UNIGRAM, Token::USER_DICTIONARY,
2699 &results);
2700 ASSERT_EQ(1, results.size());
2701 results[0].lid = data_and_predictor->pos_matcher().GetGeneralSymbolId();
2702 results[0].rid = results[0].lid;
2703 predictor->SetLMCost(segments, &results);
2704
2705 EXPECT_EQ(1, results.size());
2706 EXPECT_EQ(kAikaKanji, results[0].value);
2707 EXPECT_LE(kOrigianlWordCost, results[0].cost);
2708 }
2709
2710 {
2711 // Cost of words not in user dictionary should not be decreased.
2712 const int kOrigianlWordCost = 10000;
2713 std::vector<TestableDictionaryPredictor::Result> results;
2714 AddTestableDictionaryPredictorResult(
2715 kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2716 TestableDictionaryPredictor::UNIGRAM, Token::NONE,
2717 &results);
2718
2719 predictor->SetLMCost(segments, &results);
2720
2721 EXPECT_EQ(1, results.size());
2722 EXPECT_EQ(kAikaKanji, results[0].value);
2723 EXPECT_EQ(kOrigianlWordCost, results[0].cost);
2724 }
2725 }
2726
TEST_F(DictionaryPredictorTest,SuggestSpellingCorrection)2727 TEST_F(DictionaryPredictorTest, SuggestSpellingCorrection) {
2728 testing::MockDataManager data_manager;
2729
2730 unique_ptr<MockDataAndPredictor> data_and_predictor(
2731 new MockDataAndPredictor());
2732 data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2733 CreateSuffixDictionaryFromDataManager(data_manager));
2734
2735 const TestableDictionaryPredictor *predictor =
2736 data_and_predictor->dictionary_predictor();
2737
2738 Segments segments;
2739 MakeSegmentsForPrediction("あぼがど", &segments);
2740
2741 predictor->PredictForRequest(*convreq_, &segments);
2742
2743 EXPECT_TRUE(FindCandidateByValue(segments.conversion_segment(0), "アボカド"));
2744 }
2745
TEST_F(DictionaryPredictorTest,DoNotSuggestSpellingCorrectionBeforeMismatch)2746 TEST_F(DictionaryPredictorTest, DoNotSuggestSpellingCorrectionBeforeMismatch) {
2747 testing::MockDataManager data_manager;
2748
2749 unique_ptr<MockDataAndPredictor> data_and_predictor(
2750 new MockDataAndPredictor());
2751 data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2752 CreateSuffixDictionaryFromDataManager(data_manager));
2753
2754 const TestableDictionaryPredictor *predictor =
2755 data_and_predictor->dictionary_predictor();
2756
2757 Segments segments;
2758 MakeSegmentsForPrediction("あぼが", &segments);
2759
2760 predictor->PredictForRequest(*convreq_, &segments);
2761
2762 EXPECT_FALSE(
2763 FindCandidateByValue(segments.conversion_segment(0), "アボカド"));
2764 }
2765
TEST_F(DictionaryPredictorTest,MobileUnigramSuggestion)2766 TEST_F(DictionaryPredictorTest, MobileUnigramSuggestion) {
2767 testing::MockDataManager data_manager;
2768
2769 unique_ptr<MockDataAndPredictor> data_and_predictor(
2770 new MockDataAndPredictor());
2771 data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2772 CreateSuffixDictionaryFromDataManager(data_manager));
2773
2774 const TestableDictionaryPredictor *predictor =
2775 data_and_predictor->dictionary_predictor();
2776
2777 Segments segments;
2778 const char kKey[] = "とうきょう";
2779
2780 MakeSegmentsForSuggestion(kKey, &segments);
2781
2782 commands::RequestForUnitTest::FillMobileRequest(request_.get());
2783
2784 std::vector<TestableDictionaryPredictor::Result> results;
2785 predictor->AggregateUnigramPrediction(TestableDictionaryPredictor::UNIGRAM,
2786 *convreq_, segments, &results);
2787
2788 EXPECT_TRUE(FindResultByValue(results, "東京"));
2789
2790 int prefix_count = 0;
2791 for (size_t i = 0; i < results.size(); ++i) {
2792 if (Util::StartsWith(results[i].value, "東京")) {
2793 ++prefix_count;
2794 }
2795 }
2796 // Should not have same prefix candidates a lot.
2797 EXPECT_LE(prefix_count, 6);
2798 }
2799
TEST_F(DictionaryPredictorTest,MobileZeroQuerySuggestion)2800 TEST_F(DictionaryPredictorTest, MobileZeroQuerySuggestion) {
2801 testing::MockDataManager data_manager;
2802
2803 unique_ptr<MockDataAndPredictor> data_and_predictor(
2804 new MockDataAndPredictor());
2805 data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2806 CreateSuffixDictionaryFromDataManager(data_manager));
2807
2808 const TestableDictionaryPredictor *predictor =
2809 data_and_predictor->dictionary_predictor();
2810
2811 Segments segments;
2812 MakeSegmentsForPrediction("", &segments);
2813
2814 PrependHistorySegments("だいがく", "大学", &segments);
2815
2816 commands::RequestForUnitTest::FillMobileRequest(request_.get());
2817 predictor->PredictForRequest(*convreq_, &segments);
2818
2819 EXPECT_TRUE(FindCandidateByValue(segments.conversion_segment(0), "入試"));
2820 EXPECT_TRUE(
2821 FindCandidateByValue(segments.conversion_segment(0), "入試センター"));
2822 }
2823
2824 // We are not sure what should we suggest after the end of sentence for now.
2825 // However, we decided to show zero query suggestion rather than stopping
2826 // zero query completely. Users may be confused if they cannot see suggestion
2827 // window only after the certain conditions.
2828 // TODO(toshiyuki): Show useful zero query suggestions after EOS.
TEST_F(DictionaryPredictorTest,DISABLED_MobileZeroQuerySuggestionAfterEOS)2829 TEST_F(DictionaryPredictorTest, DISABLED_MobileZeroQuerySuggestionAfterEOS) {
2830 testing::MockDataManager data_manager;
2831
2832 unique_ptr<MockDataAndPredictor> data_and_predictor(
2833 new MockDataAndPredictor());
2834 data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2835 CreateSuffixDictionaryFromDataManager(data_manager));
2836
2837 const TestableDictionaryPredictor *predictor =
2838 data_and_predictor->dictionary_predictor();
2839
2840 commands::RequestForUnitTest::FillMobileRequest(request_.get());
2841
2842 const POSMatcher &pos_matcher = data_and_predictor->pos_matcher();
2843
2844 const struct TestCase {
2845 const char *key;
2846 const char *value;
2847 int rid;
2848 bool expected_result;
2849 } kTestcases[] = {
2850 {"ですよね。", "ですよね。", pos_matcher.GetEOSSymbolId(), false},
2851 {"。", "。", pos_matcher.GetEOSSymbolId(), false},
2852 {"まるいち", "①", pos_matcher.GetEOSSymbolId(), false},
2853 {"そう", "そう", pos_matcher.GetGeneralNounId(), true},
2854 {"そう!", "そう!", pos_matcher.GetGeneralNounId(), false},
2855 {"むすめ。", "娘。", pos_matcher.GetUniqueNounId(), true},
2856 };
2857
2858 for (size_t i = 0; i < arraysize(kTestcases); ++i) {
2859 const TestCase &test_case = kTestcases[i];
2860
2861 Segments segments;
2862 MakeSegmentsForPrediction("", &segments);
2863
2864 Segment *seg = segments.push_front_segment();
2865 seg->set_segment_type(Segment::HISTORY);
2866 seg->set_key(test_case.key);
2867 Segment::Candidate *c = seg->add_candidate();
2868 c->key = test_case.key;
2869 c->content_key = test_case.key;
2870 c->value = test_case.value;
2871 c->content_value = test_case.value;
2872 c->rid = test_case.rid;
2873
2874 predictor->PredictForRequest(*convreq_, &segments);
2875 const bool candidates_inserted =
2876 segments.conversion_segment(0).candidates_size() > 0;
2877 EXPECT_EQ(test_case.expected_result, candidates_inserted);
2878 }
2879 }
2880
TEST_F(DictionaryPredictorTest,PropagateUserDictionaryAttribute)2881 TEST_F(DictionaryPredictorTest, PropagateUserDictionaryAttribute) {
2882 unique_ptr<MockDataAndPredictor> data_and_predictor(
2883 CreateDictionaryPredictorWithMockData());
2884 const DictionaryPredictor *predictor =
2885 data_and_predictor->dictionary_predictor();
2886
2887 Segments segments;
2888 config_->set_use_dictionary_suggest(true);
2889 config_->set_use_realtime_conversion(true);
2890
2891 {
2892 segments.Clear();
2893 segments.set_max_prediction_candidates_size(10);
2894 segments.set_request_type(Segments::SUGGESTION);
2895 Segment *seg = segments.add_segment();
2896 seg->set_key("ゆーざー");
2897 seg->set_segment_type(Segment::FREE);
2898 EXPECT_TRUE(predictor->PredictForRequest(*convreq_,
2899 &segments));
2900 EXPECT_EQ(1, segments.conversion_segments_size());
2901 bool find_yuza_candidate = false;
2902 for (size_t i = 0;
2903 i < segments.conversion_segment(0).candidates_size();
2904 ++i) {
2905 const Segment::Candidate &cand =
2906 segments.conversion_segment(0).candidate(i);
2907 if (cand.value == "ユーザー" &&
2908 (cand.attributes & (Segment::Candidate::NO_VARIANTS_EXPANSION |
2909 Segment::Candidate::USER_DICTIONARY))) {
2910 find_yuza_candidate = true;
2911 }
2912 }
2913 EXPECT_TRUE(find_yuza_candidate);
2914 }
2915
2916 {
2917 segments.Clear();
2918 segments.set_max_prediction_candidates_size(10);
2919 segments.set_request_type(Segments::SUGGESTION);
2920 Segment *seg = segments.add_segment();
2921 seg->set_key("ゆーざーの");
2922 seg->set_segment_type(Segment::FREE);
2923 EXPECT_TRUE(predictor->PredictForRequest(*convreq_,
2924 &segments));
2925 EXPECT_EQ(1, segments.conversion_segments_size());
2926 bool find_yuza_candidate = false;
2927 for (size_t i = 0;
2928 i < segments.conversion_segment(0).candidates_size();
2929 ++i) {
2930 const Segment::Candidate &cand =
2931 segments.conversion_segment(0).candidate(i);
2932 if ((cand.value == "ユーザーの") &&
2933 (cand.attributes & (Segment::Candidate::NO_VARIANTS_EXPANSION |
2934 Segment::Candidate::USER_DICTIONARY))) {
2935 find_yuza_candidate = true;
2936 }
2937 }
2938 EXPECT_TRUE(find_yuza_candidate);
2939 }
2940 }
2941
TEST_F(DictionaryPredictorTest,SetDescription)2942 TEST_F(DictionaryPredictorTest, SetDescription) {
2943 {
2944 string description;
2945 DictionaryPredictor::SetDescription(
2946 TestableDictionaryPredictor::TYPING_CORRECTION, 0, &description);
2947 EXPECT_EQ("補正", description);
2948
2949 description.clear();
2950 DictionaryPredictor::SetDescription(
2951 0, Segment::Candidate::AUTO_PARTIAL_SUGGESTION, &description);
2952 EXPECT_EQ("部分", description);
2953 }
2954 }
2955
TEST_F(DictionaryPredictorTest,SetDebugDescription)2956 TEST_F(DictionaryPredictorTest, SetDebugDescription) {
2957 {
2958 string description;
2959 const TestableDictionaryPredictor::PredictionTypes types =
2960 TestableDictionaryPredictor::UNIGRAM |
2961 TestableDictionaryPredictor::ENGLISH;
2962 DictionaryPredictor::SetDebugDescription(types, &description);
2963 EXPECT_EQ("UE", description);
2964 }
2965 {
2966 string description = "description";
2967 const TestableDictionaryPredictor::PredictionTypes types =
2968 TestableDictionaryPredictor::REALTIME |
2969 TestableDictionaryPredictor::BIGRAM;
2970 DictionaryPredictor::SetDebugDescription(types, &description);
2971 EXPECT_EQ("description BR", description);
2972 }
2973 {
2974 string description;
2975 const TestableDictionaryPredictor::PredictionTypes types =
2976 TestableDictionaryPredictor::BIGRAM |
2977 TestableDictionaryPredictor::REALTIME |
2978 TestableDictionaryPredictor::SUFFIX;
2979 DictionaryPredictor::SetDebugDescription(types, &description);
2980 EXPECT_EQ("BRS", description);
2981 }
2982 }
2983
TEST_F(DictionaryPredictorTest,PropagateRealtimeConversionBoundary)2984 TEST_F(DictionaryPredictorTest, PropagateRealtimeConversionBoundary) {
2985 testing::MockDataManager data_manager;
2986 unique_ptr<const DictionaryInterface> dictionary(new DictionaryMock);
2987 unique_ptr<ConverterInterface> converter(new ConverterMock);
2988 unique_ptr<ImmutableConverterInterface> immutable_converter(
2989 new ImmutableConverterMock);
2990 unique_ptr<const DictionaryInterface> suffix_dictionary(
2991 CreateSuffixDictionaryFromDataManager(data_manager));
2992 unique_ptr<const Connector> connector(
2993 Connector::CreateFromDataManager(data_manager));
2994 unique_ptr<const Segmenter> segmenter(
2995 Segmenter::CreateFromDataManager(data_manager));
2996 unique_ptr<const SuggestionFilter> suggestion_filter(
2997 CreateSuggestionFilter(data_manager));
2998 const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
2999 unique_ptr<TestableDictionaryPredictor> predictor(
3000 new TestableDictionaryPredictor(data_manager,
3001 converter.get(),
3002 immutable_converter.get(),
3003 dictionary.get(),
3004 suffix_dictionary.get(),
3005 connector.get(),
3006 segmenter.get(),
3007 &pos_matcher,
3008 suggestion_filter.get()));
3009 Segments segments;
3010 const char kKey[] =
3011 "わたしのなまえはなかのです";
3012 MakeSegmentsForSuggestion(kKey, &segments);
3013
3014 std::vector<TestableDictionaryPredictor::Result> results;
3015 predictor->AggregateRealtimeConversion(
3016 TestableDictionaryPredictor::REALTIME, *convreq_,
3017 &segments, &results);
3018
3019 // mock results
3020 EXPECT_EQ(1, results.size());
3021 predictor->AddPredictionToCandidates(*convreq_,
3022 &segments, &results);
3023 EXPECT_EQ(1, segments.conversion_segments_size());
3024 EXPECT_EQ(1, segments.conversion_segment(0).candidates_size());
3025 const Segment::Candidate &cand = segments.conversion_segment(0).candidate(0);
3026 EXPECT_EQ("わたしのなまえはなかのです", cand.key);
3027 EXPECT_EQ("私の名前は中野です", cand.value);
3028 EXPECT_EQ(3, cand.inner_segment_boundary.size());
3029 }
3030
TEST_F(DictionaryPredictorTest,PropagateResultCosts)3031 TEST_F(DictionaryPredictorTest, PropagateResultCosts) {
3032 unique_ptr<MockDataAndPredictor> data_and_predictor(
3033 CreateDictionaryPredictorWithMockData());
3034 const TestableDictionaryPredictor *predictor =
3035 data_and_predictor->dictionary_predictor();
3036
3037 std::vector<TestableDictionaryPredictor::Result> results;
3038 const int kTestSize = 20;
3039 for (size_t i = 0; i < kTestSize; ++i) {
3040 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
3041 TestableDictionaryPredictor::Result *result = &results.back();
3042 result->key = string(1, 'a' + i);
3043 result->value = string(1, 'A' + i);
3044 result->wcost = i;
3045 result->cost = i + 1000;
3046 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::REALTIME,
3047 Token::NONE);
3048 }
3049 std::random_device rd;
3050 std::mt19937 urbg(rd());
3051 std::shuffle(results.begin(), results.end(), urbg);
3052
3053 Segments segments;
3054 MakeSegmentsForSuggestion("test", &segments);
3055 segments.set_max_prediction_candidates_size(kTestSize);
3056
3057 predictor->AddPredictionToCandidates(*convreq_,
3058 &segments, &results);
3059
3060 EXPECT_EQ(1, segments.conversion_segments_size());
3061 ASSERT_EQ(kTestSize, segments.conversion_segment(0).candidates_size());
3062 const Segment &segment = segments.conversion_segment(0);
3063 for (size_t i = 0; i < segment.candidates_size(); ++i) {
3064 EXPECT_EQ(i + 1000, segment.candidate(i).cost);
3065 }
3066 }
3067
TEST_F(DictionaryPredictorTest,PredictNCandidates)3068 TEST_F(DictionaryPredictorTest, PredictNCandidates) {
3069 unique_ptr<MockDataAndPredictor> data_and_predictor(
3070 CreateDictionaryPredictorWithMockData());
3071 const TestableDictionaryPredictor *predictor =
3072 data_and_predictor->dictionary_predictor();
3073
3074 std::vector<TestableDictionaryPredictor::Result> results;
3075 const int kTotalCandidateSize = 100;
3076 const int kLowCostCandidateSize = 5;
3077 for (size_t i = 0; i < kTotalCandidateSize; ++i) {
3078 results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
3079 TestableDictionaryPredictor::Result *result = &results.back();
3080 result->key = string(1, 'a' + i);
3081 result->value = string(1, 'A' + i);
3082 result->wcost = i;
3083 result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::REALTIME,
3084 Token::NONE);
3085 if (i < kLowCostCandidateSize) {
3086 result->cost = i + 1000;
3087 } else {
3088 result->cost = i + kInfinity;
3089 }
3090 }
3091 std::random_shuffle(results.begin(), results.end());
3092
3093 Segments segments;
3094 MakeSegmentsForSuggestion("test", &segments);
3095 segments.set_max_prediction_candidates_size(kLowCostCandidateSize + 1);
3096
3097 predictor->AddPredictionToCandidates(*convreq_,
3098 &segments, &results);
3099
3100 ASSERT_EQ(1, segments.conversion_segments_size());
3101 ASSERT_EQ(kLowCostCandidateSize,
3102 segments.conversion_segment(0).candidates_size());
3103 const Segment &segment = segments.conversion_segment(0);
3104 for (size_t i = 0; i < segment.candidates_size(); ++i) {
3105 EXPECT_EQ(i + 1000, segment.candidate(i).cost);
3106 }
3107 }
3108
TEST_F(DictionaryPredictorTest,SuggestFilteredwordForExactMatchOnMobile)3109 TEST_F(DictionaryPredictorTest, SuggestFilteredwordForExactMatchOnMobile) {
3110 unique_ptr<MockDataAndPredictor> data_and_predictor(
3111 CreateDictionaryPredictorWithMockData());
3112 const TestableDictionaryPredictor *predictor =
3113 data_and_predictor->dictionary_predictor();
3114
3115 // turn on mobile mode
3116 commands::RequestForUnitTest::FillMobileRequest(request_.get());
3117
3118 Segments segments;
3119 // Note: The suggestion filter entry "フィルター" for test is not
3120 // appropriate here, as Katakana entry will be added by realtime conversion.
3121 // Here, we want to confirm the behavior including unigram prediction.
3122 MakeSegmentsForSuggestion("ふぃるたーたいしょう", &segments);
3123
3124 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3125 EXPECT_TRUE(
3126 FindCandidateByValue(segments.conversion_segment(0), "フィルター対象"));
3127 EXPECT_TRUE(
3128 FindCandidateByValue(segments.conversion_segment(0), "フィルター大将"));
3129
3130 // However, filtered word should not be the top.
3131 EXPECT_EQ("フィルター大将",
3132 segments.conversion_segment(0).candidate(0).value);
3133
3134 // Should not be there for non-exact suggestion.
3135 MakeSegmentsForSuggestion("ふぃるたーたいし", &segments);
3136 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3137 EXPECT_FALSE(
3138 FindCandidateByValue(segments.conversion_segment(0), "フィルター対象"));
3139 }
3140
TEST_F(DictionaryPredictorTest,SuppressFilteredwordForExactMatch)3141 TEST_F(DictionaryPredictorTest, SuppressFilteredwordForExactMatch) {
3142 unique_ptr<MockDataAndPredictor> data_and_predictor(
3143 CreateDictionaryPredictorWithMockData());
3144 const TestableDictionaryPredictor *predictor =
3145 data_and_predictor->dictionary_predictor();
3146
3147 Segments segments;
3148 // Note: The suggestion filter entry "フィルター" for test is not
3149 // appropriate here, as Katakana entry will be added by realtime conversion.
3150 // Here, we want to confirm the behavior including unigram prediction.
3151 MakeSegmentsForSuggestion("ふぃるたーたいしょう", &segments);
3152
3153 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3154 EXPECT_FALSE(
3155 FindCandidateByValue(segments.conversion_segment(0), "フィルター対象"));
3156 }
3157
3158 namespace {
3159
3160 const char kTestTokenArray[] =
3161 // {"あ", "", ZERO_QUERY_EMOJI, EMOJI_DOCOMO | EMOJI_SOFTBANK, 0xfeb04}
3162 "\x04\x00\x00\x00"
3163 "\x00\x00\x00\x00"
3164 "\x03\x00"
3165 "\x06\x00"
3166 "\x04\xeb\x0f\x00"
3167 // {"あ", "❕", ZERO_QUERY_EMOJI, EMOJI_UNICODE, 0xfeb0b},
3168 "\x04\x00\x00\x00"
3169 "\x02\x00\x00\x00"
3170 "\x03\x00"
3171 "\x01\x00"
3172 "\x0b\xeb\x0f\x00"
3173 // {"あ", "❣", ZERO_QUERY_NONE, EMOJI_NONE, 0x00},
3174 "\x04\x00\x00\x00"
3175 "\x03\x00\x00\x00"
3176 "\x00\x00"
3177 "\x00\x00"
3178 "\x00\x00\x00\x00"
3179 // {"ああ", "( •̀ㅁ•́;)", ZERO_QUERY_EMOTICON, EMOJI_NONE, 0x00}
3180 "\x05\x00\x00\x00"
3181 "\x01\x00\x00\x00"
3182 "\x02\x00"
3183 "\x00\x00"
3184 "\x00\x00\x00\x00";
3185
3186 const char *kTestStrings[] = {
3187 "", "( •̀ㅁ•́;)", "❕", "❣", "あ", "ああ",
3188 };
3189
3190 struct TestEntry {
3191 int32 available_emoji_carrier;
3192 string key;
3193 bool expected_result;
3194 // candidate value and ZeroQueryType.
3195 std::vector<string> expected_candidates;
3196 std::vector<int32> expected_types;
3197
DebugStringmozc::__anon8d7eb5c60411::TestEntry3198 string DebugString() const {
3199 string candidates;
3200 Util::JoinStrings(expected_candidates, ", ", &candidates);
3201 string types;
3202 for (size_t i = 0; i < expected_types.size(); ++i) {
3203 if (i != 0) {
3204 types.append(", ");
3205 }
3206 types.append(Util::StringPrintf("%d", types[i]));
3207 }
3208 return Util::StringPrintf(
3209 "available_emoji_carrier: %d\n"
3210 "key: %s\n"
3211 "expected_result: %d\n"
3212 "expected_candidates: %s\n"
3213 "expected_types: %s",
3214 available_emoji_carrier,
3215 key.c_str(),
3216 expected_result,
3217 candidates.c_str(),
3218 types.c_str());
3219 }
3220 };
3221
3222 } // namespace
3223
TEST_F(DictionaryPredictorTest,GetZeroQueryCandidates)3224 TEST_F(DictionaryPredictorTest, GetZeroQueryCandidates) {
3225 // Create test zero query data.
3226 std::unique_ptr<uint32[]> string_data_buffer;
3227 ZeroQueryDict zero_query_dict;
3228 {
3229 // kTestTokenArray contains a trailing '\0', so create a StringPiece that
3230 // excludes it by subtracting 1.
3231 const StringPiece token_array_data(kTestTokenArray,
3232 arraysize(kTestTokenArray) - 1);
3233 std::vector<StringPiece> strs;
3234 for (const char *str : kTestStrings) {
3235 strs.push_back(str);
3236 }
3237 const StringPiece string_array_data =
3238 SerializedStringArray::SerializeToBuffer(strs, &string_data_buffer);
3239 zero_query_dict.Init(token_array_data, string_array_data);
3240 }
3241
3242 std::vector<TestEntry> test_entries;
3243 {
3244 TestEntry entry;
3245 entry.available_emoji_carrier = 0;
3246 entry.key = "a";
3247 entry.expected_result = false;
3248 entry.expected_candidates.clear();
3249 entry.expected_types.clear();
3250 test_entries.push_back(entry);
3251 }
3252 {
3253 TestEntry entry;
3254 entry.available_emoji_carrier = 0;
3255 entry.key = "ん";
3256 entry.expected_result = false;
3257 entry.expected_candidates.clear();
3258 entry.expected_types.clear();
3259 test_entries.push_back(entry);
3260 }
3261 {
3262 TestEntry entry;
3263 entry.available_emoji_carrier = 0;
3264 entry.key = "ああ";
3265 entry.expected_result = true;
3266 entry.expected_candidates.push_back("( •̀ㅁ•́;)");
3267 entry.expected_types.push_back(ZERO_QUERY_EMOTICON);
3268 test_entries.push_back(entry);
3269 }
3270 {
3271 TestEntry entry;
3272 entry.available_emoji_carrier = 0;
3273 entry.key = "あ";
3274 entry.expected_result = true;
3275 entry.expected_candidates.push_back("❣");
3276 entry.expected_types.push_back(ZERO_QUERY_NONE);
3277 test_entries.push_back(entry);
3278 }
3279 {
3280 TestEntry entry;
3281 entry.available_emoji_carrier = commands::Request::UNICODE_EMOJI;
3282 entry.key = "あ";
3283 entry.expected_result = true;
3284 entry.expected_candidates.push_back("❕");
3285 entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3286
3287 entry.expected_candidates.push_back("❣");
3288 entry.expected_types.push_back(ZERO_QUERY_NONE);
3289 test_entries.push_back(entry);
3290 }
3291 {
3292 TestEntry entry;
3293 entry.available_emoji_carrier = commands::Request::DOCOMO_EMOJI;
3294 entry.key = "あ";
3295 entry.expected_result = true;
3296 string candidate;
3297 Util::UCS4ToUTF8(0xfeb04, &candidate); // exclamation
3298 entry.expected_candidates.push_back(candidate);
3299 entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3300
3301 entry.expected_candidates.push_back("❣");
3302 entry.expected_types.push_back(ZERO_QUERY_NONE);
3303 test_entries.push_back(entry);
3304 }
3305 {
3306 TestEntry entry;
3307 entry.available_emoji_carrier = commands::Request::KDDI_EMOJI;
3308 entry.key = "あ";
3309 entry.expected_result = true;
3310 entry.expected_candidates.push_back("❣");
3311 entry.expected_types.push_back(ZERO_QUERY_NONE);
3312 test_entries.push_back(entry);
3313 }
3314 {
3315 TestEntry entry;
3316 entry.available_emoji_carrier =
3317 (commands::Request::DOCOMO_EMOJI | commands::Request::SOFTBANK_EMOJI |
3318 commands::Request::UNICODE_EMOJI);
3319 entry.key = "あ";
3320 entry.expected_result = true;
3321 string candidate;
3322 Util::UCS4ToUTF8(0xfeb04, &candidate); // exclamation
3323 entry.expected_candidates.push_back(candidate);
3324 entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3325
3326 entry.expected_candidates.push_back("❕");
3327 entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3328
3329 entry.expected_candidates.push_back("❣");
3330 entry.expected_types.push_back(ZERO_QUERY_NONE);
3331 test_entries.push_back(entry);
3332 }
3333
3334 for (size_t i = 0; i < test_entries.size(); ++i) {
3335 const TestEntry &test_entry = test_entries[i];
3336 ASSERT_EQ(test_entry.expected_candidates.size(),
3337 test_entry.expected_types.size());
3338
3339 commands::Request client_request;
3340 client_request.set_available_emoji_carrier(
3341 test_entry.available_emoji_carrier);
3342 composer::Table table;
3343 const config::Config &config = config::ConfigHandler::DefaultConfig();
3344 composer::Composer composer(&table, &client_request, &config);
3345 const ConversionRequest request(&composer, &client_request, &config);
3346
3347 std::vector<DictionaryPredictor::ZeroQueryResult> actual_candidates;
3348 const bool actual_result =
3349 DictionaryPredictor::GetZeroQueryCandidatesForKey(
3350 request, test_entry.key, zero_query_dict, &actual_candidates);
3351 EXPECT_EQ(test_entry.expected_result, actual_result)
3352 << test_entry.DebugString();
3353 for (size_t j = 0; j < test_entry.expected_candidates.size(); ++j) {
3354 EXPECT_EQ(test_entry.expected_candidates[j], actual_candidates[j].first)
3355 << "Failed at " << j << " : " << test_entry.DebugString();
3356 EXPECT_EQ(test_entry.expected_types[j], actual_candidates[j].second)
3357 << "Failed at " << j << " : " << test_entry.DebugString();
3358 }
3359 }
3360 }
3361
3362 namespace {
SetSegmentForCommit(const string & candidate_value,int candidate_source_info,Segments * segments)3363 void SetSegmentForCommit(const string &candidate_value,
3364 int candidate_source_info, Segments *segments) {
3365 segments->Clear();
3366 Segment *segment = segments->add_segment();
3367 segment->set_key("");
3368 segment->set_segment_type(Segment::FIXED_VALUE);
3369 Segment::Candidate *candidate = segment->add_candidate();
3370 candidate->key = candidate_value;
3371 candidate->content_key = candidate_value;
3372 candidate->value = candidate_value;
3373 candidate->content_value = candidate_value;
3374 candidate->source_info = candidate_source_info;
3375 }
3376 } // namespace
3377
TEST_F(DictionaryPredictorTest,UsageStats)3378 TEST_F(DictionaryPredictorTest, UsageStats) {
3379 unique_ptr<MockDataAndPredictor> data_and_predictor(
3380 CreateDictionaryPredictorWithMockData());
3381 DictionaryPredictor *predictor =
3382 data_and_predictor->mutable_dictionary_predictor();
3383
3384 Segments segments;
3385 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNone", 0);
3386 SetSegmentForCommit(
3387 "★", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NONE, &segments);
3388 predictor->Finish(*convreq_, &segments);
3389 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNone", 1);
3390
3391 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNumberSuffix", 0);
3392 SetSegmentForCommit(
3393 "個", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX,
3394 &segments);
3395 predictor->Finish(*convreq_, &segments);
3396 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNumberSuffix", 1);
3397
3398 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoticon", 0);
3399 SetSegmentForCommit(
3400 "\(^o^)/", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_EMOTICON,
3401 &segments);
3402 predictor->Finish(*convreq_, &segments);
3403 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoticon", 1);
3404
3405 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoji", 0);
3406 SetSegmentForCommit("❕",
3407 Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_EMOJI,
3408 &segments);
3409 predictor->Finish(*convreq_, &segments);
3410 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoji", 1);
3411
3412 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeBigram", 0);
3413 SetSegmentForCommit(
3414 "ヒルズ", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_BIGRAM,
3415 &segments);
3416 predictor->Finish(*convreq_, &segments);
3417 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeBigram", 1);
3418
3419 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeSuffix", 0);
3420 SetSegmentForCommit(
3421 "が", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX,
3422 &segments);
3423 predictor->Finish(*convreq_, &segments);
3424 EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeSuffix", 1);
3425 }
3426
3427 } // namespace mozc
3428