1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "prediction/dictionary_predictor.h"
31 
32 #include <algorithm>
33 #include <memory>
34 #include <random>
35 #include <set>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 #include "base/flags.h"
41 #include "base/logging.h"
42 #include "base/port.h"
43 #include "base/serialized_string_array.h"
44 #include "base/system_util.h"
45 #include "base/util.h"
46 #include "composer/composer.h"
47 #include "composer/internal/typing_model.h"
48 #include "composer/table.h"
49 #include "config/config_handler.h"
50 #include "converter/connector.h"
51 #include "converter/converter_interface.h"
52 #include "converter/converter_mock.h"
53 #include "converter/immutable_converter.h"
54 #include "converter/immutable_converter_interface.h"
55 #include "converter/node_allocator.h"
56 #include "converter/segmenter.h"
57 #include "converter/segments.h"
58 #include "data_manager/data_manager_interface.h"
59 #include "data_manager/testing/mock_data_manager.h"
60 #include "dictionary/dictionary_interface.h"
61 #include "dictionary/dictionary_mock.h"
62 #include "dictionary/pos_group.h"
63 #include "dictionary/pos_matcher.h"
64 #include "dictionary/suffix_dictionary.h"
65 #include "dictionary/suppression_dictionary.h"
66 #include "dictionary/system/system_dictionary.h"
67 #include "prediction/suggestion_filter.h"
68 #include "prediction/zero_query_dict.h"
69 #include "protocol/commands.pb.h"
70 #include "protocol/config.pb.h"
71 #include "request/conversion_request.h"
72 #include "session/request_test_util.h"
73 #include "testing/base/public/gmock.h"
74 #include "testing/base/public/googletest.h"
75 #include "testing/base/public/gunit.h"
76 #include "transliteration/transliteration.h"
77 #include "usage_stats/usage_stats.h"
78 #include "usage_stats/usage_stats_testing_util.h"
79 
80 DECLARE_bool(enable_expansion_for_dictionary_predictor);
81 
82 namespace mozc {
83 namespace {
84 
85 using std::unique_ptr;
86 
87 using dictionary::DictionaryInterface;
88 using dictionary::DictionaryMock;
89 using dictionary::POSMatcher;
90 using dictionary::PosGroup;
91 using dictionary::SuffixDictionary;
92 using dictionary::SuppressionDictionary;
93 using dictionary::Token;
94 using ::testing::_;
95 
96 const int kInfinity = (2 << 20);
97 
CreateSystemDictionaryFromDataManager(const DataManagerInterface & data_manager)98 DictionaryInterface *CreateSystemDictionaryFromDataManager(
99     const DataManagerInterface &data_manager) {
100   const char *data = NULL;
101   int size = 0;
102   data_manager.GetSystemDictionaryData(&data, &size);
103   using mozc::dictionary::SystemDictionary;
104   return SystemDictionary::Builder(data, size).Build();
105 }
106 
CreateSuffixDictionaryFromDataManager(const DataManagerInterface & data_manager)107 DictionaryInterface *CreateSuffixDictionaryFromDataManager(
108     const DataManagerInterface &data_manager) {
109   StringPiece suffix_key_array_data, suffix_value_array_data;
110   const uint32 *token_array;
111   data_manager.GetSuffixDictionaryData(&suffix_key_array_data,
112                                        &suffix_value_array_data,
113                                        &token_array);
114   return new SuffixDictionary(suffix_key_array_data,
115                               suffix_value_array_data,
116                               token_array);
117 }
118 
CreateSuggestionFilter(const DataManagerInterface & data_manager)119 SuggestionFilter *CreateSuggestionFilter(
120     const DataManagerInterface &data_manager) {
121   const char *data = NULL;
122   size_t size = 0;
123   data_manager.GetSuggestionFilterData(&data, &size);
124   return new SuggestionFilter(data, size);
125 }
126 
127 // Simple immutable converter mock for the realtime conversion test
128 class ImmutableConverterMock : public ImmutableConverterInterface {
129  public:
ImmutableConverterMock()130   ImmutableConverterMock() {
131     Segment *segment = segments_.add_segment();
132     segment->set_key("わたしのなまえはなかのです");
133     Segment::Candidate *candidate = segment->add_candidate();
134     candidate->value = "私の名前は中野です";
135     candidate->key = ("わたしのなまえはなかのです");
136     // "わたしの, 私の", "わたし, 私"
137     candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
138     // "なまえは, 名前は", "なまえ, 名前"
139     candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
140     // "なかのです, 中野です", "なかの, 中野"
141     candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
142   }
143 
ConvertForRequest(const ConversionRequest & request,Segments * segments) const144   bool ConvertForRequest(
145       const ConversionRequest &request, Segments *segments) const override {
146     segments->CopyFrom(segments_);
147     return true;
148   }
149 
150  private:
151   Segments segments_;
152 };
153 
154 class TestableDictionaryPredictor : public DictionaryPredictor {
155   // Test-only subclass: Just changing access levels
156  public:
TestableDictionaryPredictor(const DataManagerInterface & data_manager,const ConverterInterface * converter,const ImmutableConverterInterface * immutable_converter,const DictionaryInterface * dictionary,const DictionaryInterface * suffix_dictionary,const Connector * connector,const Segmenter * segmenter,const POSMatcher * pos_matcher,const SuggestionFilter * suggestion_filter)157   TestableDictionaryPredictor(
158       const DataManagerInterface &data_manager,
159       const ConverterInterface *converter,
160       const ImmutableConverterInterface *immutable_converter,
161       const DictionaryInterface *dictionary,
162       const DictionaryInterface *suffix_dictionary,
163       const Connector *connector,
164       const Segmenter *segmenter,
165       const POSMatcher *pos_matcher,
166       const SuggestionFilter *suggestion_filter)
167       : DictionaryPredictor(data_manager,
168                             converter,
169                             immutable_converter,
170                             dictionary,
171                             suffix_dictionary,
172                             connector,
173                             segmenter,
174                             pos_matcher,
175                             suggestion_filter) {}
176 
177   using DictionaryPredictor::PredictionTypes;
178   using DictionaryPredictor::NO_PREDICTION;
179   using DictionaryPredictor::UNIGRAM;
180   using DictionaryPredictor::BIGRAM;
181   using DictionaryPredictor::REALTIME;
182   using DictionaryPredictor::REALTIME_TOP;
183   using DictionaryPredictor::SUFFIX;
184   using DictionaryPredictor::ENGLISH;
185   using DictionaryPredictor::Result;
186   using DictionaryPredictor::MakeEmptyResult;
187   using DictionaryPredictor::AddPredictionToCandidates;
188   using DictionaryPredictor::AggregateRealtimeConversion;
189   using DictionaryPredictor::AggregateUnigramPrediction;
190   using DictionaryPredictor::AggregateBigramPrediction;
191   using DictionaryPredictor::AggregateSuffixPrediction;
192   using DictionaryPredictor::AggregateEnglishPrediction;
193   using DictionaryPredictor::ApplyPenaltyForKeyExpansion;
194   using DictionaryPredictor::TYPING_CORRECTION;
195   using DictionaryPredictor::AggregateTypeCorrectingPrediction;
196 };
197 
198 // Helper class to hold dictionary data and predictor objects.
199 class MockDataAndPredictor {
200  public:
201   // Initializes predictor with given dictionary and suffix_dictionary.  When
202   // NULL is passed to the first argument |dictionary|, the default
203   // DictionaryMock is used. For the second, the default is MockDataManager's
204   // suffix dictionary. Note that |dictionary| is owned by this class but
205   // |suffix_dictionary| is NOT owned because the current design assumes that
206   // suffix dictionary is singleton.
Init(const DictionaryInterface * dictionary=NULL,const DictionaryInterface * suffix_dictionary=NULL)207   void Init(const DictionaryInterface *dictionary = NULL,
208             const DictionaryInterface *suffix_dictionary = NULL) {
209     pos_matcher_.Set(data_manager_.GetPOSMatcherData());
210     suppression_dictionary_.reset(new SuppressionDictionary);
211     if (!dictionary) {
212       dictionary_mock_ = new DictionaryMock;
213       dictionary_.reset(dictionary_mock_);
214     } else {
215       dictionary_mock_ = NULL;
216       dictionary_.reset(dictionary);
217     }
218     if (!suffix_dictionary) {
219       suffix_dictionary_.reset(
220           CreateSuffixDictionaryFromDataManager(data_manager_));
221     } else {
222       suffix_dictionary_.reset(suffix_dictionary);
223     }
224     CHECK(suffix_dictionary_.get());
225 
226     connector_.reset(Connector::CreateFromDataManager(data_manager_));
227     CHECK(connector_.get());
228 
229     segmenter_.reset(Segmenter::CreateFromDataManager(data_manager_));
230     CHECK(segmenter_.get());
231 
232     pos_group_.reset(new PosGroup(data_manager_.GetPosGroupData()));
233     suggestion_filter_.reset(CreateSuggestionFilter(data_manager_));
234     immutable_converter_.reset(
235         new ImmutableConverterImpl(dictionary_.get(),
236                                    suffix_dictionary_.get(),
237                                    suppression_dictionary_.get(),
238                                    connector_.get(),
239                                    segmenter_.get(),
240                                    &pos_matcher_,
241                                    pos_group_.get(),
242                                    suggestion_filter_.get()));
243     converter_.reset(new ConverterMock());
244     dictionary_predictor_.reset(
245         new TestableDictionaryPredictor(data_manager_,
246                                         converter_.get(),
247                                         immutable_converter_.get(),
248                                         dictionary_.get(),
249                                         suffix_dictionary_.get(),
250                                         connector_.get(),
251                                         segmenter_.get(),
252                                         &pos_matcher_,
253                                         suggestion_filter_.get()));
254   }
255 
pos_matcher() const256   const POSMatcher &pos_matcher() const {
257     return pos_matcher_;
258   }
259 
mutable_dictionary()260   DictionaryMock *mutable_dictionary() {
261     return dictionary_mock_;
262   }
263 
mutable_converter_mock()264   ConverterMock *mutable_converter_mock() {
265     return converter_.get();
266   }
267 
dictionary_predictor()268   const TestableDictionaryPredictor *dictionary_predictor() {
269     return dictionary_predictor_.get();
270   }
271 
mutable_dictionary_predictor()272   TestableDictionaryPredictor *mutable_dictionary_predictor() {
273     return dictionary_predictor_.get();
274   }
275 
276  private:
277   const testing::MockDataManager data_manager_;
278   POSMatcher pos_matcher_;
279   unique_ptr<SuppressionDictionary> suppression_dictionary_;
280   unique_ptr<const Connector> connector_;
281   unique_ptr<const Segmenter> segmenter_;
282   unique_ptr<const DictionaryInterface> suffix_dictionary_;
283   unique_ptr<const DictionaryInterface> dictionary_;
284   DictionaryMock *dictionary_mock_;
285   unique_ptr<const PosGroup> pos_group_;
286   unique_ptr<ImmutableConverterInterface> immutable_converter_;
287   unique_ptr<ConverterMock> converter_;
288   unique_ptr<const SuggestionFilter> suggestion_filter_;
289   unique_ptr<TestableDictionaryPredictor> dictionary_predictor_;
290 };
291 
292 class CallCheckDictionary : public DictionaryInterface {
293  public:
294   CallCheckDictionary() = default;
295   ~CallCheckDictionary() override = default;
296 
297   MOCK_CONST_METHOD1(HasKey,
298                      bool(StringPiece));
299   MOCK_CONST_METHOD1(HasValue,
300                      bool(StringPiece));
301   MOCK_CONST_METHOD3(LookupPredictive,
302                      void(StringPiece key,
303                           const ConversionRequest& convreq,
304                           Callback *callback));
305   MOCK_CONST_METHOD3(LookupPrefix,
306                      void(StringPiece key,
307                           const ConversionRequest& convreq,
308                           Callback *callback));
309   MOCK_CONST_METHOD3(LookupExact,
310                      void(StringPiece key,
311                           const ConversionRequest& convreq,
312                           Callback *callback));
313   MOCK_CONST_METHOD3(LookupReverse,
314                      void(StringPiece str,
315                           const ConversionRequest& convreq,
316                           Callback *callback));
317 };
318 
319 // Action to call the third argument of LookupPrefix with the token
320 // <key, value>.
ACTION_P4(LookupPrefixOneToken,key,value,lid,rid)321 ACTION_P4(LookupPrefixOneToken, key, value, lid, rid) {
322   Token token;
323   token.key = key;
324   token.value = value;
325   token.lid = lid;
326   token.rid = rid;
327   arg2->OnToken(key, key, token);
328 }
329 
MakeSegmentsForSuggestion(const string key,Segments * segments)330 void MakeSegmentsForSuggestion(const string key, Segments *segments) {
331   segments->Clear();
332   segments->set_max_prediction_candidates_size(10);
333   segments->set_request_type(Segments::SUGGESTION);
334   Segment *seg = segments->add_segment();
335   seg->set_key(key);
336   seg->set_segment_type(Segment::FREE);
337 }
338 
MakeSegmentsForPrediction(const string key,Segments * segments)339 void MakeSegmentsForPrediction(const string key, Segments *segments) {
340   segments->Clear();
341   segments->set_max_prediction_candidates_size(50);
342   segments->set_request_type(Segments::PREDICTION);
343   Segment *seg = segments->add_segment();
344   seg->set_key(key);
345   seg->set_segment_type(Segment::FREE);
346 }
347 
PrependHistorySegments(const string & key,const string & value,Segments * segments)348 void PrependHistorySegments(const string &key,
349                             const string &value,
350                             Segments *segments) {
351   Segment *seg = segments->push_front_segment();
352   seg->set_segment_type(Segment::HISTORY);
353   seg->set_key(key);
354   Segment::Candidate *c = seg->add_candidate();
355   c->key = key;
356   c->content_key = key;
357   c->value = value;
358   c->content_value = value;
359 }
360 
361 class MockTypingModel : public mozc::composer::TypingModel {
362  public:
MockTypingModel()363   MockTypingModel() : TypingModel(nullptr, 0, nullptr, 0, nullptr) {}
364   ~MockTypingModel() override = default;
GetCost(StringPiece key) const365   int GetCost(StringPiece key) const override {
366     return 10;
367   }
368 };
369 
370 }  // namespace
371 
372 class DictionaryPredictorTest : public ::testing::Test {
373  public:
DictionaryPredictorTest()374   DictionaryPredictorTest() :
375       default_expansion_flag_(
376           FLAGS_enable_expansion_for_dictionary_predictor) {
377   }
378 
~DictionaryPredictorTest()379   ~DictionaryPredictorTest() override {
380     FLAGS_enable_expansion_for_dictionary_predictor = default_expansion_flag_;
381   }
382 
383  protected:
SetUp()384   void SetUp() override {
385     FLAGS_enable_expansion_for_dictionary_predictor = false;
386     SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
387     request_.reset(new commands::Request);
388     config_.reset(new config::Config);
389     config::ConfigHandler::GetDefaultConfig(config_.get());
390     table_.reset(new composer::Table);
391     composer_.reset(
392         new composer::Composer(table_.get(), request_.get(), config_.get()));
393     convreq_.reset(
394         new ConversionRequest(composer_.get(), request_.get(), config_.get()));
395 
396     mozc::usage_stats::UsageStats::ClearAllStatsForTest();
397   }
398 
TearDown()399   void TearDown() override {
400     FLAGS_enable_expansion_for_dictionary_predictor = false;
401     mozc::usage_stats::UsageStats::ClearAllStatsForTest();
402   }
403 
AddWordsToMockDic(DictionaryMock * mock)404   static void AddWordsToMockDic(DictionaryMock *mock) {
405     const char kGoogleA[] = "ぐーぐるあ";
406 
407     const char kGoogleAdsenseHiragana[] = "ぐーぐるあどせんす";
408     const char kGoogleAdsenseKatakana[] = "グーグルアドセンス";
409     mock->AddLookupPredictive(kGoogleA, kGoogleAdsenseHiragana,
410                               kGoogleAdsenseKatakana, Token::NONE);
411 
412     const char kGoogleAdwordsHiragana[] = "ぐーぐるあどわーず";
413     const char kGoogleAdwordsKatakana[] = "グーグルアドワーズ";
414     mock->AddLookupPredictive(kGoogleA, kGoogleAdwordsHiragana,
415                               kGoogleAdwordsKatakana, Token::NONE);
416 
417     const char kGoogle[] = "ぐーぐる";
418     mock->AddLookupPredictive(kGoogle, kGoogleAdsenseHiragana,
419                               kGoogleAdsenseKatakana, Token::NONE);
420     mock->AddLookupPredictive(kGoogle, kGoogleAdwordsHiragana,
421                               kGoogleAdwordsKatakana, Token::NONE);
422 
423     const char kGoogleKatakana[] = "グーグル";
424     mock->AddLookupPrefix(kGoogle, kGoogleKatakana, kGoogleKatakana,
425                           Token::NONE);
426 
427     const char kAdsense[] = "あどせんす";
428     const char kAdsenseKatakana[] = "アドセンス";
429     mock->AddLookupPrefix(kAdsense, kAdsenseKatakana, kAdsenseKatakana,
430                           Token::NONE);
431 
432     const char kTestHiragana[] = "てすと";
433     const char kTestKatakana[] = "テスト";
434     mock->AddLookupPrefix(kTestHiragana, kTestHiragana, kTestKatakana,
435                           Token::NONE);
436 
437     const char kFilterHiragana[] = "ふぃるたーたいしょう";
438     const char kFilterPrefixHiragana[] = "ふぃるたーたいし";
439 
440     // Note: This is in the filter
441     const char kFilterWord[] = "フィルター対象";
442 
443     // Note: This is NOT in the filter
444     const char kNonFilterWord[] = "フィルター大将";
445 
446     mock->AddLookupPrefix(kFilterHiragana, kFilterHiragana, kFilterWord,
447                           Token::NONE);
448 
449     mock->AddLookupPrefix(kFilterHiragana, kFilterHiragana, kNonFilterWord,
450                           Token::NONE);
451 
452     mock->AddLookupPredictive(kFilterHiragana, kFilterHiragana, kFilterWord,
453                               Token::NONE);
454 
455     mock->AddLookupPredictive(kFilterHiragana, kFilterPrefixHiragana,
456                               kFilterWord, Token::NONE);
457 
458     const char kWrongCapriHiragana[] = "かぷりちょうざ";
459     const char kRightCapriHiragana[] = "かぷりちょーざ";
460     const char kCapriKatakana[] = "カプリチョーザ";
461 
462     mock->AddLookupPrefix(kWrongCapriHiragana, kRightCapriHiragana,
463                           kCapriKatakana, Token::SPELLING_CORRECTION);
464 
465     mock->AddLookupPredictive(kWrongCapriHiragana, kRightCapriHiragana,
466                               kCapriKatakana, Token::SPELLING_CORRECTION);
467 
468     const char kDe[] = "で";
469 
470     mock->AddLookupPrefix(kDe, kDe, kDe, Token::NONE);
471 
472     const char kHirosueHiragana[] = "ひろすえ";
473     const char kHirosue[] = "広末";
474 
475     mock->AddLookupPrefix(kHirosueHiragana, kHirosueHiragana, kHirosue,
476                           Token::NONE);
477 
478     const char kYuzaHiragana[] = "ゆーざー";
479     const char kYuza[] = "ユーザー";
480     // For dictionary suggestion
481     mock->AddLookupPredictive(kYuzaHiragana, kYuzaHiragana, kYuza,
482                               Token::USER_DICTIONARY);
483     // For realtime conversion
484     mock->AddLookupPrefix(kYuzaHiragana, kYuzaHiragana, kYuza,
485                           Token::USER_DICTIONARY);
486 
487     // Some English entries
488     mock->AddLookupPredictive("conv", "converge", "converge", Token::NONE);
489     mock->AddLookupPredictive("conv", "converged", "converged", Token::NONE);
490     mock->AddLookupPredictive("conv", "convergent", "convergent", Token::NONE);
491     mock->AddLookupPredictive("con", "contraction", "contraction", Token::NONE);
492     mock->AddLookupPredictive("con", "control", "control", Token::NONE);
493   }
494 
CreateDictionaryPredictorWithMockData()495   MockDataAndPredictor *CreateDictionaryPredictorWithMockData() {
496     MockDataAndPredictor *ret = new MockDataAndPredictor;
497     ret->Init();
498     AddWordsToMockDic(ret->mutable_dictionary());
499     return ret;
500   }
501 
GenerateKeyEvents(const string & text,std::vector<commands::KeyEvent> * keys)502   void GenerateKeyEvents(const string &text,
503                          std::vector<commands::KeyEvent> *keys) {
504     keys->clear();
505 
506     const char *begin = text.data();
507     const char *end = text.data() + text.size();
508     size_t mblen = 0;
509 
510     while (begin < end) {
511       commands::KeyEvent key;
512       const char32 w = Util::UTF8ToUCS4(begin, end, &mblen);
513       if (Util::GetCharacterSet(w) == Util::ASCII) {
514         key.set_key_code(*begin);
515       } else {
516         key.set_key_code('?');
517         key.set_key_string(string(begin, mblen));
518       }
519       begin += mblen;
520       keys->push_back(key);
521     }
522   }
523 
InsertInputSequence(const string & text,composer::Composer * composer)524   void InsertInputSequence(const string &text, composer::Composer *composer) {
525     std::vector<commands::KeyEvent> keys;
526     GenerateKeyEvents(text, &keys);
527 
528     for (size_t i = 0; i < keys.size(); ++i) {
529       composer->InsertCharacterKeyEvent(keys[i]);
530     }
531   }
532 
InsertInputSequenceForProbableKeyEvent(const string & text,const uint32 * corrected_key_codes,composer::Composer * composer)533   void InsertInputSequenceForProbableKeyEvent(const string &text,
534                                               const uint32 *corrected_key_codes,
535                                               composer::Composer *composer) {
536     std::vector<commands::KeyEvent> keys;
537     GenerateKeyEvents(text, &keys);
538 
539     for (size_t i = 0; i < keys.size(); ++i) {
540       if (keys[i].key_code() != corrected_key_codes[i]) {
541         commands::KeyEvent::ProbableKeyEvent *probable_key_event;
542 
543         probable_key_event = keys[i].add_probable_key_event();
544         probable_key_event->set_key_code(keys[i].key_code());
545         probable_key_event->set_probability(0.9f);
546 
547         probable_key_event = keys[i].add_probable_key_event();
548         probable_key_event->set_key_code(corrected_key_codes[i]);
549         probable_key_event->set_probability(0.1f);
550       }
551       composer->InsertCharacterKeyEvent(keys[i]);
552     }
553   }
554 
ExpansionForUnigramTestHelper(bool use_expansion)555   void ExpansionForUnigramTestHelper(bool use_expansion) {
556     config_->set_use_dictionary_suggest(true);
557     config_->set_use_realtime_conversion(false);
558     config_->set_use_kana_modifier_insensitive_conversion(use_expansion);
559 
560     table_->LoadFromFile("system://romanji-hiragana.tsv");
561     composer_->SetTable(table_.get());
562     unique_ptr<MockDataAndPredictor> data_and_predictor(
563         new MockDataAndPredictor);
564     // CallCheckDictionary is managed by data_and_predictor;
565     CallCheckDictionary *check_dictionary = new CallCheckDictionary;
566     data_and_predictor->Init(check_dictionary, NULL);
567     const TestableDictionaryPredictor *predictor =
568       data_and_predictor->dictionary_predictor();
569 
570     {
571       Segments segments;
572       segments.set_request_type(Segments::PREDICTION);
573       request_->set_kana_modifier_insensitive_conversion(use_expansion);
574       InsertInputSequence("gu-g", composer_.get());
575       Segment *segment = segments.add_segment();
576       CHECK(segment);
577       string query;
578       composer_->GetQueryForPrediction(&query);
579       segment->set_key(query);
580 
581       EXPECT_CALL(*check_dictionary,
582                   LookupPredictive(::testing::Ne(""),
583                                    ::testing::Ref(*convreq_), _))
584           .Times(::testing::AtLeast(1));
585 
586       std::vector<TestableDictionaryPredictor::Result> results;
587       predictor->AggregateUnigramPrediction(
588           TestableDictionaryPredictor::UNIGRAM,
589           *convreq_, segments, &results);
590     }
591   }
592 
ExpansionForBigramTestHelper(bool use_expansion)593   void ExpansionForBigramTestHelper(bool use_expansion) {
594     config_->set_use_dictionary_suggest(true);
595     config_->set_use_realtime_conversion(false);
596     config_->set_use_kana_modifier_insensitive_conversion(use_expansion);
597 
598     table_->LoadFromFile("system://romanji-hiragana.tsv");
599     composer_->SetTable(table_.get());
600     unique_ptr<MockDataAndPredictor> data_and_predictor(
601         new MockDataAndPredictor);
602     // CallCheckDictionary is managed by data_and_predictor;
603     CallCheckDictionary *check_dictionary = new CallCheckDictionary;
604     data_and_predictor->Init(check_dictionary, NULL);
605     const TestableDictionaryPredictor *predictor =
606       data_and_predictor->dictionary_predictor();
607 
608     {
609       Segments segments;
610       segments.set_request_type(Segments::PREDICTION);
611       // History segment's key and value should be in the dictionary
612       Segment *segment = segments.add_segment();
613       CHECK(segment);
614       segment->set_segment_type(Segment::HISTORY);
615       segment->set_key("ぐーぐる");
616       Segment::Candidate *cand = segment->add_candidate();
617       cand->key = "ぐーぐる";
618       cand->content_key = "ぐーぐる";
619       cand->value = "グーグル";
620       cand->content_value = "グーグル";
621 
622       segment = segments.add_segment();
623       CHECK(segment);
624 
625       request_->set_kana_modifier_insensitive_conversion(use_expansion);
626       InsertInputSequence("m", composer_.get());
627       string query;
628       composer_->GetQueryForPrediction(&query);
629       segment->set_key(query);
630 
631       // History key and value should be in the dictionary.
632       EXPECT_CALL(*check_dictionary,
633                   LookupPrefix(_, ::testing::Ref(*convreq_), _))
634           .WillOnce(LookupPrefixOneToken("ぐーぐる", "グーグル", 1, 1));
635       EXPECT_CALL(*check_dictionary,
636                   LookupPredictive(_, ::testing::Ref(*convreq_), _));
637 
638       std::vector<TestableDictionaryPredictor::Result> results;
639       predictor->AggregateBigramPrediction(TestableDictionaryPredictor::BIGRAM,
640                                            *convreq_, segments, &results);
641     }
642   }
643 
ExpansionForSuffixTestHelper(bool use_expansion)644   void ExpansionForSuffixTestHelper(bool use_expansion) {
645     config_->set_use_dictionary_suggest(true);
646     config_->set_use_realtime_conversion(false);
647     config_->set_use_kana_modifier_insensitive_conversion(use_expansion);
648 
649     table_->LoadFromFile("system://romanji-hiragana.tsv");
650     composer_->SetTable(table_.get());
651     unique_ptr<MockDataAndPredictor> data_and_predictor(
652         new MockDataAndPredictor);
653     // CallCheckDictionary is managed by data_and_predictor.
654     CallCheckDictionary *check_dictionary = new CallCheckDictionary;
655     data_and_predictor->Init(NULL, check_dictionary);
656     const TestableDictionaryPredictor *predictor =
657       data_and_predictor->dictionary_predictor();
658 
659     {
660       Segments segments;
661       segments.set_request_type(Segments::PREDICTION);
662       Segment *segment = segments.add_segment();
663       CHECK(segment);
664 
665       request_->set_kana_modifier_insensitive_conversion(use_expansion);
666       InsertInputSequence("des", composer_.get());
667       string query;
668       composer_->GetQueryForPrediction(&query);
669       segment->set_key(query);
670 
671       EXPECT_CALL(*check_dictionary,
672                   LookupPredictive(::testing::Ne(""),
673                                    ::testing::Ref(*convreq_), _))
674           .Times(::testing::AtLeast(1));
675 
676       std::vector<TestableDictionaryPredictor::Result> results;
677       predictor->AggregateSuffixPrediction(
678           TestableDictionaryPredictor::SUFFIX,
679           *convreq_, segments, &results);
680     }
681   }
682 
FindCandidateByValue(const Segment & segment,const string & value)683   bool FindCandidateByValue(
684       const Segment &segment,
685       const string &value) {
686     for (size_t i = 0; i < segment.candidates_size(); ++i) {
687       const Segment::Candidate &c = segment.candidate(i);
688       if (c.value == value) {
689         return true;
690       }
691     }
692     return false;
693   }
694 
FindResultByValue(const std::vector<TestableDictionaryPredictor::Result> & results,const string & value)695   bool FindResultByValue(
696       const std::vector<TestableDictionaryPredictor::Result> &results,
697       const string &value) {
698     for (size_t i = 0; i < results.size(); ++i) {
699       if (results[i].value == value) {
700         return true;
701       }
702     }
703     return false;
704   }
705 
AggregateEnglishPredictionTestHelper(transliteration::TransliterationType input_mode,const char * key,const char * expected_prefix,const char * expected_values[],size_t expected_values_size)706   void AggregateEnglishPredictionTestHelper(
707       transliteration::TransliterationType input_mode,
708       const char *key, const char *expected_prefix,
709       const char *expected_values[], size_t expected_values_size) {
710     unique_ptr<MockDataAndPredictor> data_and_predictor(
711         CreateDictionaryPredictorWithMockData());
712     const TestableDictionaryPredictor *predictor =
713         data_and_predictor->dictionary_predictor();
714 
715     table_->LoadFromFile("system://romanji-hiragana.tsv");
716     composer_->Reset();
717     composer_->SetTable(table_.get());
718     composer_->SetInputMode(input_mode);
719     InsertInputSequence(key, composer_.get());
720 
721     Segments segments;
722     MakeSegmentsForPrediction(key, &segments);
723 
724     std::vector<TestableDictionaryPredictor::Result> results;
725     predictor->AggregateEnglishPrediction(
726         TestableDictionaryPredictor::ENGLISH,
727         *convreq_, segments, &results);
728 
729     std::set<string> values;
730     for (size_t i = 0; i < results.size(); ++i) {
731       EXPECT_EQ(TestableDictionaryPredictor::ENGLISH, results[i].types);
732       EXPECT_TRUE(Util::StartsWith(results[i].value, expected_prefix))
733           << results[i].value
734           << " doesn't start with " << expected_prefix;
735       values.insert(results[i].value);
736     }
737     for (size_t i = 0; i < expected_values_size; ++i) {
738       EXPECT_TRUE(values.find(expected_values[i]) != values.end())
739           << expected_values[i] << " isn't in the results";
740     }
741   }
742 
AggregateTypeCorrectingTestHelper(const char * key,const uint32 * corrected_key_codes,const char * expected_values[],size_t expected_values_size)743   void AggregateTypeCorrectingTestHelper(
744       const char *key,
745       const uint32 *corrected_key_codes,
746       const char *expected_values[],
747       size_t expected_values_size) {
748     request_->set_special_romanji_table(
749         commands::Request::QWERTY_MOBILE_TO_HIRAGANA);
750 
751     unique_ptr<MockDataAndPredictor> data_and_predictor(
752         CreateDictionaryPredictorWithMockData());
753     const TestableDictionaryPredictor *predictor =
754         data_and_predictor->dictionary_predictor();
755 
756     table_->LoadFromFile("system://qwerty_mobile-hiragana.tsv");
757     table_->typing_model_.reset(new MockTypingModel());
758     InsertInputSequenceForProbableKeyEvent(
759         key, corrected_key_codes, composer_.get());
760 
761     Segments segments;
762     MakeSegmentsForPrediction(key, &segments);
763 
764     std::vector<TestableDictionaryPredictor::Result> results;
765     predictor->AggregateTypeCorrectingPrediction(
766         TestableDictionaryPredictor::TYPING_CORRECTION,
767         *convreq_, segments, &results);
768 
769     std::set<string> values;
770     for (size_t i = 0; i < results.size(); ++i) {
771       EXPECT_EQ(TestableDictionaryPredictor::TYPING_CORRECTION,
772                 results[i].types);
773       values.insert(results[i].value);
774     }
775     for (size_t i = 0; i < expected_values_size; ++i) {
776       EXPECT_TRUE(values.find(expected_values[i]) != values.end())
777           << expected_values[i] << " isn't in the results";
778     }
779   }
780 
781   unique_ptr<composer::Composer> composer_;
782   unique_ptr<composer::Table> table_;
783   unique_ptr<ConversionRequest> convreq_;
784   unique_ptr<config::Config> config_;
785   unique_ptr<commands::Request> request_;
786 
787  private:
788   const bool default_expansion_flag_;
789   unique_ptr<ImmutableConverterInterface> immutable_converter_;
790   mozc::usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
791 };
792 
TEST_F(DictionaryPredictorTest,OnOffTest)793 TEST_F(DictionaryPredictorTest, OnOffTest) {
794   unique_ptr<MockDataAndPredictor> data_and_predictor(
795       CreateDictionaryPredictorWithMockData());
796   const DictionaryPredictor *predictor =
797       data_and_predictor->dictionary_predictor();
798 
799   // turn off
800   Segments segments;
801   config_->set_use_dictionary_suggest(false);
802   config_->set_use_realtime_conversion(false);
803 
804   MakeSegmentsForSuggestion("ぐーぐるあ", &segments);
805   EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
806 
807   // turn on
808   config_->set_use_dictionary_suggest(true);
809   MakeSegmentsForSuggestion("ぐーぐるあ", &segments);
810   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
811 
812   // empty query
813   MakeSegmentsForSuggestion("", &segments);
814   EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
815 }
816 
TEST_F(DictionaryPredictorTest,PartialSuggestion)817 TEST_F(DictionaryPredictorTest, PartialSuggestion) {
818   unique_ptr<MockDataAndPredictor> data_and_predictor(
819       CreateDictionaryPredictorWithMockData());
820   {
821     // Set up mock converter.
822     Segments segments;
823     Segment *segment = segments.add_segment();
824     Segment::Candidate *candidate = segment->add_candidate();
825     candidate->value = "Realtime top result";
826     ConverterMock *converter = data_and_predictor->mutable_converter_mock();
827     converter->SetStartConversionForRequest(&segments, true);
828   }
829   const DictionaryPredictor *predictor =
830       data_and_predictor->dictionary_predictor();
831 
832   Segments segments;
833   config_->set_use_dictionary_suggest(true);
834   config_->set_use_realtime_conversion(true);
835   // turn on mobile mode
836   request_->set_mixed_conversion(true);
837 
838   segments.Clear();
839   segments.set_max_prediction_candidates_size(10);
840   segments.set_request_type(Segments::PARTIAL_SUGGESTION);
841   Segment *seg = segments.add_segment();
842   seg->set_key("ぐーぐるあ");
843   seg->set_segment_type(Segment::FREE);
844   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
845 }
846 
TEST_F(DictionaryPredictorTest,BigramTest)847 TEST_F(DictionaryPredictorTest, BigramTest) {
848   Segments segments;
849   config_->set_use_dictionary_suggest(true);
850 
851   MakeSegmentsForSuggestion("あ", &segments);
852 
853   // history is "グーグル"
854   PrependHistorySegments("ぐーぐる", "グーグル", &segments);
855 
856   unique_ptr<MockDataAndPredictor> data_and_predictor(
857       CreateDictionaryPredictorWithMockData());
858   const DictionaryPredictor *predictor =
859       data_and_predictor->dictionary_predictor();
860   // "グーグルアドセンス" will be returned.
861   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
862 }
863 
TEST_F(DictionaryPredictorTest,BigramTestWithZeroQuery)864 TEST_F(DictionaryPredictorTest, BigramTestWithZeroQuery) {
865   Segments segments;
866   config_->set_use_dictionary_suggest(true);
867   request_->set_zero_query_suggestion(true);
868 
869   // current query is empty
870   MakeSegmentsForSuggestion("", &segments);
871 
872   // history is "グーグル"
873   PrependHistorySegments("ぐーぐる", "グーグル", &segments);
874 
875   unique_ptr<MockDataAndPredictor> data_and_predictor(
876       CreateDictionaryPredictorWithMockData());
877   const DictionaryPredictor *predictor =
878       data_and_predictor->dictionary_predictor();
879   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
880 }
881 
882 // Check that previous candidate never be shown at the current candidate.
TEST_F(DictionaryPredictorTest,Regression3042706)883 TEST_F(DictionaryPredictorTest, Regression3042706) {
884   Segments segments;
885   config_->set_use_dictionary_suggest(true);
886 
887   MakeSegmentsForSuggestion("だい", &segments);
888 
889   // history is "きょうと/京都"
890   PrependHistorySegments("きょうと", "京都", &segments);
891 
892   unique_ptr<MockDataAndPredictor> data_and_predictor(
893       CreateDictionaryPredictorWithMockData());
894   const DictionaryPredictor *predictor =
895       data_and_predictor->dictionary_predictor();
896   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
897   EXPECT_EQ(2, segments.segments_size());  // history + current
898   for (int i = 0; i < segments.segment(1).candidates_size(); ++i) {
899     const Segment::Candidate &candidate = segments.segment(1).candidate(i);
900     EXPECT_FALSE(Util::StartsWith(candidate.content_value, "京都"));
901     EXPECT_TRUE(Util::StartsWith(candidate.content_key, "だい"));
902   }
903 }
904 
TEST_F(DictionaryPredictorTest,GetPredictionTypes)905 TEST_F(DictionaryPredictorTest, GetPredictionTypes) {
906   Segments segments;
907   config_->set_use_dictionary_suggest(true);
908   config_->set_use_realtime_conversion(false);
909 
910   // empty segments
911   {
912     EXPECT_EQ(
913         DictionaryPredictor::NO_PREDICTION,
914         DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
915   }
916 
917   // normal segments
918   {
919     MakeSegmentsForSuggestion("てすとだよ", &segments);
920     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
921               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
922 
923     segments.set_request_type(Segments::PREDICTION);
924     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
925               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
926 
927     segments.set_request_type(Segments::CONVERSION);
928     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
929               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
930   }
931 
932   // short key
933   {
934     MakeSegmentsForSuggestion("てす", &segments);
935     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
936               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
937 
938     // on prediction mode, return UNIGRAM
939     segments.set_request_type(Segments::PREDICTION);
940     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
941               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
942   }
943 
944   // zipcode-like key
945   {
946     MakeSegmentsForSuggestion("0123", &segments);
947     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
948               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
949   }
950 
951   // History is short => UNIGRAM
952   {
953     MakeSegmentsForSuggestion("てすとだよ", &segments);
954     PrependHistorySegments("A", "A", &segments);
955     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
956               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
957   }
958 
959   // both History and current segment are long => UNIGRAM|BIGRAM
960   {
961     MakeSegmentsForSuggestion("てすとだよ", &segments);
962     PrependHistorySegments("てすとだよ", "abc", &segments);
963     EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::BIGRAM,
964               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
965   }
966 
967   // Current segment is short => BIGRAM
968   {
969     MakeSegmentsForSuggestion("A", &segments);
970     PrependHistorySegments("てすとだよ", "abc", &segments);
971     EXPECT_EQ(DictionaryPredictor::BIGRAM,
972               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
973   }
974 
975   // Typing correction type shouldn't be appended.
976   {
977     MakeSegmentsForSuggestion("pはよう", &segments);
978     EXPECT_FALSE(DictionaryPredictor::TYPING_CORRECTION &
979                  DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
980   }
981 
982   // Input mode is HALF_ASCII or FULL_ASCII => ENGLISH
983   {
984     config_->set_use_dictionary_suggest(true);
985 
986     MakeSegmentsForSuggestion("hel", &segments);
987 
988     composer_->SetInputMode(transliteration::HALF_ASCII);
989     EXPECT_EQ(DictionaryPredictor::ENGLISH,
990               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
991 
992     composer_->SetInputMode(transliteration::FULL_ASCII);
993     EXPECT_EQ(DictionaryPredictor::ENGLISH,
994               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
995 
996     // When dictionary suggest is turned off, English prediction should be
997     // disabled.
998     config_->set_use_dictionary_suggest(false);
999 
1000     composer_->SetInputMode(transliteration::HALF_ASCII);
1001     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
1002               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1003 
1004     composer_->SetInputMode(transliteration::FULL_ASCII);
1005     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
1006               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1007 
1008     config_->set_use_dictionary_suggest(true);
1009 
1010     segments.set_request_type(Segments::PARTIAL_SUGGESTION);
1011     composer_->SetInputMode(transliteration::HALF_ASCII);
1012     EXPECT_EQ(DictionaryPredictor::ENGLISH | DictionaryPredictor::REALTIME,
1013               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1014 
1015     composer_->SetInputMode(transliteration::FULL_ASCII);
1016     EXPECT_EQ(DictionaryPredictor::ENGLISH | DictionaryPredictor::REALTIME,
1017               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1018 
1019     config_->set_use_dictionary_suggest(false);
1020 
1021     composer_->SetInputMode(transliteration::HALF_ASCII);
1022     EXPECT_EQ(DictionaryPredictor::REALTIME,
1023               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1024 
1025     composer_->SetInputMode(transliteration::FULL_ASCII);
1026     EXPECT_EQ(DictionaryPredictor::REALTIME,
1027               DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1028   }
1029 
1030   // When romaji table is qwerty mobile => ENGLISH is included depending on the
1031   // language aware input setting.
1032   {
1033     const auto orig_input_mode = composer_->GetInputMode();
1034     const auto orig_table = request_->special_romanji_table();
1035     const auto orig_lang_aware = request_->language_aware_input();
1036     const bool orig_use_dictionary_suggest = config_->use_dictionary_suggest();
1037 
1038     composer_->SetInputMode(transliteration::HIRAGANA);
1039     config_->set_use_dictionary_suggest(true);
1040 
1041     // The case where romaji table is set to qwerty.  ENGLISH is turned on if
1042     // language aware input is enabled.
1043     for (const auto table :
1044          {commands::Request::QWERTY_MOBILE_TO_HIRAGANA,
1045           commands::Request::QWERTY_MOBILE_TO_HALFWIDTHASCII}) {
1046       request_->set_special_romanji_table(table);
1047 
1048       // Language aware input is default: No English prediction.
1049       request_->set_language_aware_input(
1050           commands::Request::DEFAULT_LANGUAGE_AWARE_BEHAVIOR);
1051       auto type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1052       EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1053 
1054       // Language aware input is off: No English prediction.
1055       request_->set_language_aware_input(
1056           commands::Request::NO_LANGUAGE_AWARE_INPUT);
1057       type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1058       EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1059 
1060       // Language aware input is on: English prediction is included.
1061       request_->set_language_aware_input(
1062           commands::Request::LANGUAGE_AWARE_SUGGESTION);
1063       type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1064       EXPECT_EQ(DictionaryPredictor::ENGLISH,
1065                 type & DictionaryPredictor::ENGLISH);
1066     }
1067 
1068     // The case where romaji table is not qwerty.  ENGLISH is turned off
1069     // regardless of language aware input setting.
1070     for (const auto table : {
1071              commands::Request::FLICK_TO_HALFWIDTHASCII,
1072              commands::Request::FLICK_TO_HIRAGANA,
1073              commands::Request::GODAN_TO_HALFWIDTHASCII,
1074              commands::Request::GODAN_TO_HIRAGANA,
1075              commands::Request::NOTOUCH_TO_HALFWIDTHASCII,
1076              commands::Request::NOTOUCH_TO_HIRAGANA,
1077              commands::Request::TOGGLE_FLICK_TO_HALFWIDTHASCII,
1078              commands::Request::TOGGLE_FLICK_TO_HIRAGANA,
1079              commands::Request::TWELVE_KEYS_TO_HALFWIDTHASCII,
1080              commands::Request::TWELVE_KEYS_TO_HIRAGANA,
1081          }) {
1082       request_->set_special_romanji_table(table);
1083 
1084       // Language aware input is default.
1085       request_->set_language_aware_input(
1086           commands::Request::DEFAULT_LANGUAGE_AWARE_BEHAVIOR);
1087       auto type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1088       EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1089 
1090       // Language aware input is off.
1091       request_->set_language_aware_input(
1092           commands::Request::NO_LANGUAGE_AWARE_INPUT);
1093       type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1094       EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1095 
1096       // Language aware input is on.
1097       request_->set_language_aware_input(
1098           commands::Request::LANGUAGE_AWARE_SUGGESTION);
1099       type = DictionaryPredictor::GetPredictionTypes(*convreq_, segments);
1100       EXPECT_EQ(0, type & DictionaryPredictor::ENGLISH);
1101     }
1102 
1103     config_->set_use_dictionary_suggest(orig_use_dictionary_suggest);
1104     request_->set_language_aware_input(orig_lang_aware);
1105     request_->set_special_romanji_table(orig_table);
1106     composer_->SetInputMode(orig_input_mode);
1107   }
1108 }
1109 
TEST_F(DictionaryPredictorTest,GetPredictionTypesTestWithTypingCorrection)1110 TEST_F(DictionaryPredictorTest, GetPredictionTypesTestWithTypingCorrection) {
1111   Segments segments;
1112   config_->set_use_dictionary_suggest(true);
1113   config_->set_use_realtime_conversion(false);
1114   config_->set_use_typing_correction(true);
1115 
1116   MakeSegmentsForSuggestion("pはよう", &segments);
1117   EXPECT_EQ(
1118       DictionaryPredictor::UNIGRAM | DictionaryPredictor::TYPING_CORRECTION,
1119       DictionaryPredictor::GetPredictionTypes(*convreq_, segments));
1120 }
1121 
TEST_F(DictionaryPredictorTest,GetPredictionTypesTestWithZeroQuerySuggestion)1122 TEST_F(DictionaryPredictorTest, GetPredictionTypesTestWithZeroQuerySuggestion) {
1123   Segments segments;
1124   config_->set_use_dictionary_suggest(true);
1125   config_->set_use_realtime_conversion(false);
1126   request_->set_zero_query_suggestion(true);
1127 
1128   unique_ptr<MockDataAndPredictor> data_and_predictor(
1129       CreateDictionaryPredictorWithMockData());
1130   const DictionaryPredictor *predictor =
1131       data_and_predictor->dictionary_predictor();
1132 
1133   // empty segments
1134   {
1135     EXPECT_EQ(
1136         DictionaryPredictor::NO_PREDICTION,
1137         predictor->GetPredictionTypes(*convreq_, segments));
1138   }
1139 
1140   // normal segments
1141   {
1142     MakeSegmentsForSuggestion("てすとだよ", &segments);
1143     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1144               predictor->GetPredictionTypes(*convreq_, segments));
1145 
1146     segments.set_request_type(Segments::PREDICTION);
1147     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1148               predictor->GetPredictionTypes(*convreq_, segments));
1149 
1150     segments.set_request_type(Segments::CONVERSION);
1151     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION,
1152               predictor->GetPredictionTypes(*convreq_, segments));
1153   }
1154 
1155   // short key
1156   {
1157     MakeSegmentsForSuggestion("て", &segments);
1158     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1159               predictor->GetPredictionTypes(*convreq_, segments));
1160 
1161     // on prediction mode, return UNIGRAM
1162     segments.set_request_type(Segments::PREDICTION);
1163     EXPECT_EQ(DictionaryPredictor::UNIGRAM,
1164               predictor->GetPredictionTypes(*convreq_, segments));
1165   }
1166 
1167   // History is short => UNIGRAM
1168   {
1169     MakeSegmentsForSuggestion("てすとだよ", &segments);
1170     PrependHistorySegments("A", "A", &segments);
1171     EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::SUFFIX,
1172               predictor->GetPredictionTypes(*convreq_, segments));
1173   }
1174 
1175   // both History and current segment are long => UNIGRAM|BIGRAM
1176   {
1177     MakeSegmentsForSuggestion("てすとだよ", &segments);
1178     PrependHistorySegments("てすとだよ", "abc", &segments);
1179     EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::BIGRAM |
1180                   DictionaryPredictor::SUFFIX,
1181               predictor->GetPredictionTypes(*convreq_, segments));
1182   }
1183 
1184   {
1185     MakeSegmentsForSuggestion("A", &segments);
1186     PrependHistorySegments("てすとだよ", "abc", &segments);
1187     EXPECT_EQ(DictionaryPredictor::BIGRAM | DictionaryPredictor::UNIGRAM |
1188                   DictionaryPredictor::SUFFIX,
1189               predictor->GetPredictionTypes(*convreq_, segments));
1190   }
1191 
1192   {
1193     MakeSegmentsForSuggestion("", &segments);
1194     PrependHistorySegments("て", "abc", &segments);
1195     EXPECT_EQ(DictionaryPredictor::SUFFIX,
1196               predictor->GetPredictionTypes(*convreq_, segments));
1197   }
1198 
1199   {
1200     MakeSegmentsForSuggestion("A", &segments);
1201     PrependHistorySegments("て", "abc", &segments);
1202     EXPECT_EQ(DictionaryPredictor::UNIGRAM | DictionaryPredictor::SUFFIX,
1203               predictor->GetPredictionTypes(*convreq_, segments));
1204   }
1205 
1206   {
1207     MakeSegmentsForSuggestion("", &segments);
1208     PrependHistorySegments("てすとだよ", "abc", &segments);
1209     EXPECT_EQ(DictionaryPredictor::BIGRAM | DictionaryPredictor::SUFFIX,
1210               predictor->GetPredictionTypes(*convreq_, segments));
1211   }
1212 }
1213 
TEST_F(DictionaryPredictorTest,AggregateUnigramPrediction)1214 TEST_F(DictionaryPredictorTest, AggregateUnigramPrediction) {
1215   Segments segments;
1216   unique_ptr<MockDataAndPredictor> data_and_predictor(
1217       CreateDictionaryPredictorWithMockData());
1218   const DictionaryPredictor *predictor =
1219       data_and_predictor->dictionary_predictor();
1220 
1221   const char kKey[] = "ぐーぐるあ";
1222 
1223   MakeSegmentsForSuggestion(kKey, &segments);
1224 
1225   std::vector<DictionaryPredictor::Result> results;
1226 
1227   predictor->AggregateUnigramPrediction(
1228       DictionaryPredictor::BIGRAM,
1229       *convreq_, segments, &results);
1230   EXPECT_TRUE(results.empty());
1231 
1232   predictor->AggregateUnigramPrediction(
1233       DictionaryPredictor::REALTIME,
1234       *convreq_, segments, &results);
1235   EXPECT_TRUE(results.empty());
1236 
1237   predictor->AggregateUnigramPrediction(
1238       DictionaryPredictor::UNIGRAM,
1239       *convreq_, segments, &results);
1240   EXPECT_FALSE(results.empty());
1241 
1242   for (size_t i = 0; i < results.size(); ++i) {
1243     EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[i].types);
1244     EXPECT_TRUE(Util::StartsWith(results[i].key, kKey));
1245   }
1246 
1247   EXPECT_EQ(1, segments.conversion_segments_size());
1248 }
1249 
TEST_F(DictionaryPredictorTest,AggregateUnigramCandidateForMixedConversion)1250 TEST_F(DictionaryPredictorTest, AggregateUnigramCandidateForMixedConversion) {
1251   const char kHiraganaA[] = "あ";
1252 
1253   DictionaryMock mock_dict;
1254   // A system dictionary entry "a".
1255   mock_dict.AddLookupPredictive(kHiraganaA, kHiraganaA, "a", Token::NONE);
1256   // System dictionary entries "a0", ..., "a9", which are detected as redundant
1257   // by MaybeRedundant(); see dictionary_predictor.cc.
1258   for (int i = 0; i < 10; ++i) {
1259     mock_dict.AddLookupPredictive(kHiraganaA, kHiraganaA,
1260                                   Util::StringPrintf("a%d", i), Token::NONE);
1261   }
1262   // A user dictionary entry "aaa".  MaybeRedundant() detects this entry as
1263   // redundant but it should not be filtered in prediction.
1264   mock_dict.AddLookupPredictive(kHiraganaA, kHiraganaA, "aaa",
1265                                 Token::USER_DICTIONARY);
1266 
1267   config_->set_use_dictionary_suggest(true);
1268   config_->set_use_realtime_conversion(false);
1269   table_->LoadFromFile("system://12keys-hiragana.tsv");
1270   composer_->SetTable(table_.get());
1271   InsertInputSequence(kHiraganaA, composer_.get());
1272   Segments segments;
1273   segments.set_request_type(Segments::PREDICTION);
1274   Segment *segment = segments.add_segment();
1275   segment->set_key(kHiraganaA);
1276 
1277   std::vector<DictionaryPredictor::Result> results;
1278   DictionaryPredictor::AggregateUnigramCandidateForMixedConversion(
1279       mock_dict, *convreq_, segments, &results);
1280 
1281   // Check if "aaa" is not filtered.
1282   auto iter = results.begin();
1283   for (; iter != results.end(); ++iter) {
1284     if (iter->key == kHiraganaA && iter->value == "aaa" &&
1285         iter->IsUserDictionaryResult()) {
1286       break;
1287     }
1288   }
1289   EXPECT_NE(results.end(), iter);
1290 }
1291 
TEST_F(DictionaryPredictorTest,AggregateBigramPrediction)1292 TEST_F(DictionaryPredictorTest, AggregateBigramPrediction) {
1293   unique_ptr<MockDataAndPredictor> data_and_predictor(
1294       CreateDictionaryPredictorWithMockData());
1295   const DictionaryPredictor *predictor =
1296       data_and_predictor->dictionary_predictor();
1297 
1298   {
1299     Segments segments;
1300 
1301     MakeSegmentsForSuggestion("あ", &segments);
1302 
1303     // history is "グーグル"
1304     const char kHistoryKey[] = "ぐーぐる";
1305     const char kHistoryValue[] = "グーグル";
1306 
1307     PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1308 
1309     std::vector<DictionaryPredictor::Result> results;
1310 
1311     predictor->AggregateBigramPrediction(DictionaryPredictor::UNIGRAM,
1312                                          *convreq_, segments, &results);
1313     EXPECT_TRUE(results.empty());
1314 
1315     predictor->AggregateBigramPrediction(DictionaryPredictor::REALTIME,
1316                                          *convreq_, segments, &results);
1317     EXPECT_TRUE(results.empty());
1318 
1319     predictor->AggregateBigramPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1320                                          segments, &results);
1321     EXPECT_FALSE(results.empty());
1322 
1323     for (size_t i = 0; i < results.size(); ++i) {
1324       // "グーグルアドセンス", "グーグル", "アドセンス"
1325       // are in the dictionary.
1326       if (results[i].value == "グーグルアドセンス") {
1327         EXPECT_EQ(DictionaryPredictor::BIGRAM, results[i].types);
1328       } else {
1329         EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[i].types);
1330       }
1331       EXPECT_TRUE(Util::StartsWith(results[i].key, kHistoryKey));
1332       EXPECT_TRUE(Util::StartsWith(results[i].value, kHistoryValue));
1333       // Not zero query
1334       EXPECT_FALSE(results[i].source_info &
1335                    Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX);
1336     }
1337 
1338     EXPECT_EQ(1, segments.conversion_segments_size());
1339   }
1340 
1341   {
1342     Segments segments;
1343 
1344     MakeSegmentsForSuggestion("あ", &segments);
1345 
1346     const char kHistoryKey[] = "てす";
1347     const char kHistoryValue[] = "テス";
1348 
1349     PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1350 
1351     std::vector<DictionaryPredictor::Result> results;
1352 
1353     predictor->AggregateBigramPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1354                                          segments, &results);
1355     EXPECT_TRUE(results.empty());
1356   }
1357 }
1358 
TEST_F(DictionaryPredictorTest,AggregateZeroQueryBigramPrediction)1359 TEST_F(DictionaryPredictorTest, AggregateZeroQueryBigramPrediction) {
1360   unique_ptr<MockDataAndPredictor> data_and_predictor(
1361       CreateDictionaryPredictorWithMockData());
1362   const DictionaryPredictor *predictor =
1363       data_and_predictor->dictionary_predictor();
1364   commands::RequestForUnitTest::FillMobileRequest(request_.get());
1365 
1366   {
1367     Segments segments;
1368 
1369     // Zero query
1370     MakeSegmentsForSuggestion("", &segments);
1371 
1372     // history is "グーグル"
1373     const char kHistoryKey[] = "ぐーぐる";
1374     const char kHistoryValue[] = "グーグル";
1375 
1376     PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1377 
1378     std::vector<DictionaryPredictor::Result> results;
1379 
1380     predictor->AggregateBigramPrediction(DictionaryPredictor::UNIGRAM,
1381                                          *convreq_, segments, &results);
1382     EXPECT_TRUE(results.empty());
1383 
1384     predictor->AggregateBigramPrediction(DictionaryPredictor::REALTIME,
1385                                          *convreq_, segments, &results);
1386     EXPECT_TRUE(results.empty());
1387 
1388     predictor->AggregateBigramPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1389                                          segments, &results);
1390     EXPECT_FALSE(results.empty());
1391 
1392     for (size_t i = 0; i < results.size(); ++i) {
1393       EXPECT_TRUE(Util::StartsWith(results[i].key, kHistoryKey));
1394       EXPECT_TRUE(Util::StartsWith(results[i].value, kHistoryValue));
1395       // Zero query
1396       EXPECT_FALSE(results[i].source_info &
1397                    Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX);
1398     }
1399   }
1400 }
1401 
TEST_F(DictionaryPredictorTest,GetRealtimeCandidateMaxSize)1402 TEST_F(DictionaryPredictorTest, GetRealtimeCandidateMaxSize) {
1403   unique_ptr<MockDataAndPredictor> data_and_predictor(
1404       CreateDictionaryPredictorWithMockData());
1405   const DictionaryPredictor *predictor =
1406       data_and_predictor->dictionary_predictor();
1407   Segments segments;
1408 
1409   // GetRealtimeCandidateMaxSize has some heuristics so here we test following
1410   // conditions.
1411   // - The result must be equal or less than kMaxSize;
1412   // - If mixed_conversion is the same, the result of SUGGESTION is
1413   //        equal or less than PREDICTION.
1414   // - If mixed_conversion is the same, the result of PARTIAL_SUGGESTION is
1415   //        equal or less than PARTIAL_PREDICTION.
1416   // - Partial version has equal or greater than non-partial version.
1417 
1418   const size_t kMaxSize = 100;
1419 
1420   // non-partial, non-mixed-conversion
1421   segments.set_request_type(Segments::PREDICTION);
1422   const size_t prediction_no_mixed =
1423       predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1424   EXPECT_GE(kMaxSize, prediction_no_mixed);
1425 
1426   segments.set_request_type(Segments::SUGGESTION);
1427   const size_t suggestion_no_mixed =
1428       predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1429   EXPECT_GE(kMaxSize, suggestion_no_mixed);
1430   EXPECT_LE(suggestion_no_mixed, prediction_no_mixed);
1431 
1432   // non-partial, mixed-conversion
1433   segments.set_request_type(Segments::PREDICTION);
1434   const size_t prediction_mixed =
1435       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1436   EXPECT_GE(kMaxSize, prediction_mixed);
1437 
1438   segments.set_request_type(Segments::SUGGESTION);
1439   const size_t suggestion_mixed =
1440       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1441   EXPECT_GE(kMaxSize, suggestion_mixed);
1442 
1443   // partial, non-mixed-conversion
1444   segments.set_request_type(Segments::PARTIAL_PREDICTION);
1445   const size_t partial_prediction_no_mixed =
1446       predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1447   EXPECT_GE(kMaxSize, partial_prediction_no_mixed);
1448 
1449   segments.set_request_type(Segments::PARTIAL_SUGGESTION);
1450   const size_t partial_suggestion_no_mixed =
1451       predictor->GetRealtimeCandidateMaxSize(segments, false, kMaxSize);
1452   EXPECT_GE(kMaxSize, partial_suggestion_no_mixed);
1453   EXPECT_LE(partial_suggestion_no_mixed, partial_prediction_no_mixed);
1454 
1455   // partial, mixed-conversion
1456   segments.set_request_type(Segments::PARTIAL_PREDICTION);
1457   const size_t partial_prediction_mixed =
1458       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1459   EXPECT_GE(kMaxSize, partial_prediction_mixed);
1460 
1461   segments.set_request_type(Segments::PARTIAL_SUGGESTION);
1462   const size_t partial_suggestion_mixed =
1463       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1464   EXPECT_GE(kMaxSize, partial_suggestion_mixed);
1465   EXPECT_LE(partial_suggestion_mixed, partial_prediction_mixed);
1466 
1467   EXPECT_GE(partial_prediction_no_mixed, prediction_no_mixed);
1468   EXPECT_GE(partial_prediction_mixed, prediction_mixed);
1469   EXPECT_GE(partial_suggestion_no_mixed, suggestion_no_mixed);
1470   EXPECT_GE(partial_suggestion_mixed, suggestion_mixed);
1471 }
1472 
TEST_F(DictionaryPredictorTest,GetRealtimeCandidateMaxSizeForMixed)1473 TEST_F(DictionaryPredictorTest, GetRealtimeCandidateMaxSizeForMixed) {
1474   unique_ptr<MockDataAndPredictor> data_and_predictor(
1475       CreateDictionaryPredictorWithMockData());
1476   const DictionaryPredictor *predictor =
1477       data_and_predictor->dictionary_predictor();
1478   Segments segments;
1479   Segment *segment = segments.add_segment();
1480 
1481   const size_t kMaxSize = 100;
1482 
1483   // for short key, try to provide many results as possible
1484   segment->set_key("short");
1485   segments.set_request_type(Segments::SUGGESTION);
1486   const size_t short_suggestion_mixed =
1487       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1488   EXPECT_GE(kMaxSize, short_suggestion_mixed);
1489 
1490   segments.set_request_type(Segments::PREDICTION);
1491   const size_t short_prediction_mixed =
1492       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1493   EXPECT_GE(kMaxSize, short_prediction_mixed);
1494 
1495   // for long key, provide few results
1496   segment->set_key("long_request_key");
1497   segments.set_request_type(Segments::SUGGESTION);
1498   const size_t long_suggestion_mixed =
1499       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1500   EXPECT_GE(kMaxSize, long_suggestion_mixed);
1501   EXPECT_GT(short_suggestion_mixed, long_suggestion_mixed);
1502 
1503   segments.set_request_type(Segments::PREDICTION);
1504   const size_t long_prediction_mixed =
1505       predictor->GetRealtimeCandidateMaxSize(segments, true, kMaxSize);
1506   EXPECT_GE(kMaxSize, long_prediction_mixed);
1507   EXPECT_GT(kMaxSize, long_prediction_mixed + long_suggestion_mixed);
1508   EXPECT_GT(short_prediction_mixed, long_prediction_mixed);
1509 }
1510 
TEST_F(DictionaryPredictorTest,AggregateRealtimeConversion)1511 TEST_F(DictionaryPredictorTest, AggregateRealtimeConversion) {
1512   testing::MockDataManager data_manager;
1513   unique_ptr<const DictionaryInterface> dictionary(new DictionaryMock);
1514   unique_ptr<ConverterMock> converter(new ConverterMock);
1515   unique_ptr<ImmutableConverterInterface> immutable_converter(
1516       new ImmutableConverterMock);
1517   unique_ptr<const DictionaryInterface> suffix_dictionary(
1518       CreateSuffixDictionaryFromDataManager(data_manager));
1519   unique_ptr<const Connector> connector(
1520       Connector::CreateFromDataManager(data_manager));
1521   unique_ptr<const Segmenter> segmenter(
1522       Segmenter::CreateFromDataManager(data_manager));
1523   unique_ptr<const SuggestionFilter> suggestion_filter(
1524       CreateSuggestionFilter(data_manager));
1525   const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
1526   unique_ptr<TestableDictionaryPredictor> predictor(
1527       new TestableDictionaryPredictor(data_manager,
1528                                       converter.get(),
1529                                       immutable_converter.get(),
1530                                       dictionary.get(),
1531                                       suffix_dictionary.get(),
1532                                       connector.get(),
1533                                       segmenter.get(),
1534                                       &pos_matcher,
1535                                       suggestion_filter.get()));
1536 
1537   const char kKey[] = "わたしのなまえはなかのです";
1538 
1539   // Set up mock converter
1540   {
1541     // Make segments like:
1542     // "わたしの"    | "なまえは" | "なかのです"
1543     // "Watashino" | "Namaeha" | "Nakanodesu"
1544     Segments segments;
1545 
1546     Segment *segment = segments.add_segment();
1547     segment->set_key("わたしの");
1548     segment->add_candidate()->value = "Watashino";
1549 
1550     segment = segments.add_segment();
1551     segment->set_key("なまえは");
1552     segment->add_candidate()->value = "Namaeha";
1553 
1554     segment = segments.add_segment();
1555     segment->set_key("なかのです");
1556     segment->add_candidate()->value = "Nakanodesu";
1557 
1558     converter->SetStartConversionForRequest(&segments, true);
1559   }
1560 
1561   // A test case with use_actual_converter_for_realtime_conversion being false,
1562   // i.e., realtime conversion result is generated by ImmutableConverterMock.
1563   {
1564     Segments segments;
1565 
1566     MakeSegmentsForSuggestion(kKey, &segments);
1567 
1568     std::vector<TestableDictionaryPredictor::Result> results;
1569     convreq_->set_use_actual_converter_for_realtime_conversion(false);
1570 
1571     predictor->AggregateRealtimeConversion(
1572         TestableDictionaryPredictor::UNIGRAM, *convreq_, &segments, &results);
1573     EXPECT_TRUE(results.empty());
1574 
1575     predictor->AggregateRealtimeConversion(
1576         TestableDictionaryPredictor::BIGRAM, *convreq_, &segments, &results);
1577     EXPECT_TRUE(results.empty());
1578 
1579     predictor->AggregateRealtimeConversion(
1580         TestableDictionaryPredictor::REALTIME, *convreq_, &segments, &results);
1581 
1582     ASSERT_EQ(1, results.size());
1583     EXPECT_EQ(TestableDictionaryPredictor::REALTIME, results[0].types);
1584     EXPECT_EQ(kKey, results[0].key);
1585     EXPECT_EQ(3, results[0].inner_segment_boundary.size());
1586   }
1587 
1588   // A test case with use_actual_converter_for_realtime_conversion being true,
1589   // i.e., realtime conversion result is generated by ConverterMock.
1590   {
1591     Segments segments;
1592 
1593     MakeSegmentsForSuggestion(kKey, &segments);
1594 
1595     std::vector<TestableDictionaryPredictor::Result> results;
1596     convreq_->set_use_actual_converter_for_realtime_conversion(true);
1597 
1598     predictor->AggregateRealtimeConversion(
1599         TestableDictionaryPredictor::UNIGRAM, *convreq_, &segments, &results);
1600     EXPECT_TRUE(results.empty());
1601 
1602     predictor->AggregateRealtimeConversion(
1603         TestableDictionaryPredictor::BIGRAM, *convreq_, &segments, &results);
1604     EXPECT_TRUE(results.empty());
1605 
1606     predictor->AggregateRealtimeConversion(
1607         TestableDictionaryPredictor::REALTIME, *convreq_, &segments, &results);
1608 
1609     // When |request.use_actual_converter_for_realtime_conversion| is true, the
1610     // extra label REALTIME_TOP is expected to be added.
1611     ASSERT_EQ(2, results.size());
1612     bool realtime_top_found = false;
1613     for (size_t i = 0; i < results.size(); ++i) {
1614       EXPECT_EQ(TestableDictionaryPredictor::REALTIME |
1615                 TestableDictionaryPredictor::REALTIME_TOP, results[i].types);
1616       if (results[i].key == kKey &&
1617           results[i].value == "WatashinoNamaehaNakanodesu" &&
1618           results[i].inner_segment_boundary.size() == 3) {
1619         realtime_top_found = true;
1620         break;
1621       }
1622     }
1623     EXPECT_TRUE(realtime_top_found);
1624   }
1625 }
1626 
1627 namespace {
1628 
1629 struct SimpleSuffixToken {
1630   const char *key;
1631   const char *value;
1632 };
1633 
1634 const SimpleSuffixToken kSuffixTokens[] = {
1635     {"いか", "以下"}
1636 };
1637 
1638 class TestSuffixDictionary : public DictionaryInterface {
1639  public:
1640   TestSuffixDictionary() = default;
1641   ~TestSuffixDictionary() override = default;
1642 
HasKey(StringPiece value) const1643   bool HasKey(StringPiece value) const override { return false; }
1644 
HasValue(StringPiece value) const1645   bool HasValue(StringPiece value) const override { return false; }
1646 
LookupPredictive(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1647   void LookupPredictive(StringPiece key,
1648                         const ConversionRequest &conversion_request,
1649                         Callback *callback) const override {
1650     Token token;
1651     for (size_t i = 0; i < arraysize(kSuffixTokens); ++i) {
1652       const SimpleSuffixToken &suffix_token = kSuffixTokens[i];
1653       if (!key.empty() && !Util::StartsWith(suffix_token.key, key)) {
1654         continue;
1655       }
1656       switch (callback->OnKey(suffix_token.key)) {
1657         case Callback::TRAVERSE_DONE:
1658           return;
1659         case Callback::TRAVERSE_NEXT_KEY:
1660           continue;
1661         case Callback::TRAVERSE_CULL:
1662           LOG(FATAL) << "Culling is not supported.";
1663           break;
1664         default:
1665           break;
1666       }
1667       token.key = suffix_token.key;
1668       token.value = suffix_token.value;
1669       token.cost = 1000;
1670       token.lid = token.rid = 0;
1671       if (callback->OnToken(token.key, token.key, token) ==
1672           Callback::TRAVERSE_DONE) {
1673         break;
1674       }
1675     }
1676   }
1677 
LookupPrefix(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1678   void LookupPrefix(StringPiece key,
1679                     const ConversionRequest &conversion_request,
1680                     Callback *callback) const override {}
1681 
LookupExact(StringPiece key,const ConversionRequest & conversion_request,Callback * callback) const1682   void LookupExact(StringPiece key, const ConversionRequest &conversion_request,
1683                    Callback *callback) const override {}
1684 
LookupReverse(StringPiece str,const ConversionRequest & conversion_request,Callback * callback) const1685   void LookupReverse(StringPiece str,
1686                      const ConversionRequest &conversion_request,
1687                      Callback *callback) const override {}
1688 };
1689 
1690 }  // namespace
1691 
TEST_F(DictionaryPredictorTest,GetCandidateCutoffThreshold)1692 TEST_F(DictionaryPredictorTest, GetCandidateCutoffThreshold) {
1693   unique_ptr<MockDataAndPredictor> data_and_predictor(
1694       CreateDictionaryPredictorWithMockData());
1695   const DictionaryPredictor *predictor =
1696       data_and_predictor->dictionary_predictor();
1697   Segments segments;
1698 
1699   segments.set_request_type(Segments::PREDICTION);
1700   const size_t prediction =
1701       predictor->GetCandidateCutoffThreshold(segments);
1702 
1703   segments.set_request_type(Segments::SUGGESTION);
1704   const size_t suggestion =
1705       predictor->GetCandidateCutoffThreshold(segments);
1706   EXPECT_LE(suggestion, prediction);
1707 }
1708 
TEST_F(DictionaryPredictorTest,AggregateSuffixPrediction)1709 TEST_F(DictionaryPredictorTest, AggregateSuffixPrediction) {
1710   unique_ptr<MockDataAndPredictor> data_and_predictor(new MockDataAndPredictor);
1711   data_and_predictor->Init(NULL, new TestSuffixDictionary());
1712 
1713   const DictionaryPredictor *predictor =
1714       data_and_predictor->dictionary_predictor();
1715 
1716   Segments segments;
1717 
1718   MakeSegmentsForSuggestion("あ", &segments);
1719 
1720   // history is "グーグル"
1721   const char kHistoryKey[] = "ぐーぐる";
1722   const char kHistoryValue[] = "グーグル";
1723 
1724   PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1725 
1726   std::vector<DictionaryPredictor::Result> results;
1727 
1728   // Since SuffixDictionary only returns when key is "い".
1729   // result should be empty.
1730   predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1731                                        segments, &results);
1732   EXPECT_TRUE(results.empty());
1733 
1734   results.clear();
1735   segments.mutable_conversion_segment(0)->set_key("");
1736   predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1737                                        segments, &results);
1738   EXPECT_FALSE(results.empty());
1739 
1740   results.clear();
1741   predictor->AggregateSuffixPrediction(DictionaryPredictor::UNIGRAM, *convreq_,
1742                                        segments, &results);
1743   EXPECT_TRUE(results.empty());
1744 
1745   predictor->AggregateSuffixPrediction(DictionaryPredictor::REALTIME, *convreq_,
1746                                        segments, &results);
1747   EXPECT_TRUE(results.empty());
1748 
1749   predictor->AggregateSuffixPrediction(DictionaryPredictor::BIGRAM, *convreq_,
1750                                        segments, &results);
1751   EXPECT_TRUE(results.empty());
1752 
1753   // Candidates generated by AggregateSuffixPrediction should have SUFFIX type.
1754   results.clear();
1755   segments.mutable_conversion_segment(0)->set_key("い");
1756   predictor->AggregateSuffixPrediction(
1757       DictionaryPredictor::SUFFIX | DictionaryPredictor::BIGRAM, *convreq_,
1758       segments, &results);
1759   EXPECT_FALSE(results.empty());
1760   for (size_t i = 0; i < results.size(); ++i) {
1761     EXPECT_EQ(DictionaryPredictor::SUFFIX, results[i].types);
1762     // Not zero query
1763     EXPECT_FALSE(Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX &
1764                  results[i].source_info);
1765   }
1766 }
1767 
TEST_F(DictionaryPredictorTest,AggregateZeroQuerySuffixPrediction)1768 TEST_F(DictionaryPredictorTest, AggregateZeroQuerySuffixPrediction) {
1769   unique_ptr<MockDataAndPredictor> data_and_predictor(new MockDataAndPredictor);
1770   data_and_predictor->Init(NULL, new TestSuffixDictionary());
1771 
1772   const DictionaryPredictor *predictor =
1773       data_and_predictor->dictionary_predictor();
1774 
1775   commands::RequestForUnitTest::FillMobileRequest(request_.get());
1776   Segments segments;
1777 
1778   // Zero query
1779   MakeSegmentsForSuggestion("", &segments);
1780 
1781   // history is "グーグル"
1782   const char kHistoryKey[] = "ぐーぐる";
1783   const char kHistoryValue[] = "グーグル";
1784 
1785   PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1786 
1787   std::vector<DictionaryPredictor::Result> results;
1788 
1789   // Candidates generated by AggregateSuffixPrediction should have SUFFIX type.
1790   predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1791                                        segments, &results);
1792   EXPECT_FALSE(results.empty());
1793   for (size_t i = 0; i < results.size(); ++i) {
1794     EXPECT_EQ(DictionaryPredictor::SUFFIX, results[i].types);
1795     // Zero query
1796     EXPECT_TRUE(Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX &
1797                 results[i].source_info);
1798   }
1799 }
1800 
TEST_F(DictionaryPredictorTest,AggregateEnglishPrediction)1801 TEST_F(DictionaryPredictorTest, AggregateEnglishPrediction) {
1802   // Input mode: HALF_ASCII, Key: lower case
1803   //   => Prediction should be in half-width lower case.
1804   {
1805     const char *kExpectedValues[] = {
1806         "converge",
1807         "converged",
1808         "convergent",
1809     };
1810     AggregateEnglishPredictionTestHelper(transliteration::HALF_ASCII, "conv",
1811                                          "conv", kExpectedValues,
1812                                          arraysize(kExpectedValues));
1813   }
1814   // Input mode: HALF_ASCII, Key: upper case
1815   //   => Prediction should be in half-width upper case.
1816   {
1817     const char *kExpectedValues[] = {
1818         "CONVERGE",
1819         "CONVERGED",
1820         "CONVERGENT",
1821     };
1822     AggregateEnglishPredictionTestHelper(transliteration::HALF_ASCII, "CONV",
1823                                          "CONV", kExpectedValues,
1824                                          arraysize(kExpectedValues));
1825   }
1826   // Input mode: HALF_ASCII, Key: capitalized
1827   //   => Prediction should be half-width and capitalized
1828   {
1829     const char *kExpectedValues[] = {
1830         "Converge",
1831         "Converged",
1832         "Convergent",
1833     };
1834     AggregateEnglishPredictionTestHelper(transliteration::HALF_ASCII, "Conv",
1835                                          "Conv", kExpectedValues,
1836                                          arraysize(kExpectedValues));
1837   }
1838   // Input mode: FULL_ASCII, Key: lower case
1839   //   => Prediction should be in full-wdith lower case.
1840   {
1841     const char *kExpectedValues[] = {
1842         "converge",
1843         "converged",
1844         "convergent",
1845     };
1846     AggregateEnglishPredictionTestHelper(transliteration::FULL_ASCII, "conv",
1847                                          "conv",
1848                                          kExpectedValues,
1849                                          arraysize(kExpectedValues));
1850   }
1851   // Input mode: FULL_ASCII, Key: upper case
1852   //   => Prediction should be in full-width upper case.
1853   {
1854     const char *kExpectedValues[] = {
1855         "CONVERGE",
1856         "CONVERGED",
1857         "CONVERGENT",
1858     };
1859     AggregateEnglishPredictionTestHelper(transliteration::FULL_ASCII, "CONV",
1860                                          "CONV",
1861                                          kExpectedValues,
1862                                          arraysize(kExpectedValues));
1863   }
1864   // Input mode: FULL_ASCII, Key: capitalized
1865   //   => Prediction should be full-width and capitalized
1866   {
1867     const char *kExpectedValues[] = {
1868         "Converge",
1869         "Converged",
1870         "Convergent",
1871     };
1872     AggregateEnglishPredictionTestHelper(transliteration::FULL_ASCII, "Conv",
1873                                          "Conv",
1874                                          kExpectedValues,
1875                                          arraysize(kExpectedValues));
1876   }
1877 }
1878 
TEST_F(DictionaryPredictorTest,AggregateTypeCorrectingPrediction)1879 TEST_F(DictionaryPredictorTest, AggregateTypeCorrectingPrediction) {
1880   config_->set_use_typing_correction(true);
1881 
1882   const char kInputText[] = "gu-huru";
1883   const uint32 kCorrectedKeyCodes[] = {'g', 'u', '-', 'g', 'u', 'r', 'u'};
1884   const char *kExpectedValues[] = {
1885       "グーグルアドセンス",
1886       "グーグルアドワーズ",
1887   };
1888   AggregateTypeCorrectingTestHelper(kInputText, kCorrectedKeyCodes,
1889                                     kExpectedValues,
1890                                     arraysize(kExpectedValues));
1891 }
1892 
TEST_F(DictionaryPredictorTest,ZeroQuerySuggestionAfterNumbers)1893 TEST_F(DictionaryPredictorTest, ZeroQuerySuggestionAfterNumbers) {
1894   unique_ptr<MockDataAndPredictor> data_and_predictor(
1895       CreateDictionaryPredictorWithMockData());
1896   const DictionaryPredictor *predictor =
1897       data_and_predictor->dictionary_predictor();
1898   const POSMatcher &pos_matcher = data_and_predictor->pos_matcher();
1899   Segments segments;
1900 
1901   {
1902     MakeSegmentsForSuggestion("", &segments);
1903 
1904     const char kHistoryKey[] = "12";
1905     const char kHistoryValue[] = "12";
1906     const char kExpectedValue[] = "月";
1907     PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1908     std::vector<DictionaryPredictor::Result> results;
1909     predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1910                                          segments, &results);
1911     EXPECT_FALSE(results.empty());
1912 
1913     std::vector<DictionaryPredictor::Result>::const_iterator target =
1914         results.end();
1915     for (std::vector<DictionaryPredictor::Result>::const_iterator it =
1916              results.begin();
1917          it != results.end(); ++it) {
1918       EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
1919 
1920       EXPECT_TRUE(
1921           Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX &
1922           it->source_info);
1923 
1924       if (it->value == kExpectedValue) {
1925         target = it;
1926         break;
1927       }
1928     }
1929     EXPECT_NE(results.end(), target);
1930     EXPECT_EQ(target->value, kExpectedValue);
1931     EXPECT_EQ(target->lid, pos_matcher.GetCounterSuffixWordId());
1932     EXPECT_EQ(target->rid, pos_matcher.GetCounterSuffixWordId());
1933 
1934     // Make sure number suffixes are not suggested when there is a key
1935     results.clear();
1936     MakeSegmentsForSuggestion("あ", &segments);
1937     PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1938     predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1939                                          segments, &results);
1940     target = results.end();
1941     for (std::vector<DictionaryPredictor::Result>::const_iterator it =
1942              results.begin();
1943          it != results.end(); ++it) {
1944       EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
1945       if (it->value == kExpectedValue) {
1946         target = it;
1947         break;
1948       }
1949     }
1950     EXPECT_EQ(results.end(), target);
1951   }
1952 
1953   {
1954     MakeSegmentsForSuggestion("", &segments);
1955 
1956     const char kHistoryKey[] = "66050713";  // A random number
1957     const char kHistoryValue[] = "66050713";
1958     const char kExpectedValue[] = "個";
1959     PrependHistorySegments(kHistoryKey, kHistoryValue, &segments);
1960     std::vector<DictionaryPredictor::Result> results;
1961     predictor->AggregateSuffixPrediction(DictionaryPredictor::SUFFIX, *convreq_,
1962                                          segments, &results);
1963     EXPECT_FALSE(results.empty());
1964 
1965     bool found = false;
1966     for (std::vector<DictionaryPredictor::Result>::const_iterator it =
1967              results.begin();
1968          it != results.end(); ++it) {
1969       EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
1970       if (it->value == kExpectedValue) {
1971         EXPECT_TRUE(
1972             Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX &
1973             it->source_info);
1974         found = true;
1975         break;
1976       }
1977     }
1978     EXPECT_TRUE(found);
1979   }
1980 }
1981 
TEST_F(DictionaryPredictorTest,TriggerNumberZeroQuerySuggestion)1982 TEST_F(DictionaryPredictorTest, TriggerNumberZeroQuerySuggestion) {
1983   unique_ptr<MockDataAndPredictor> data_and_predictor(
1984       CreateDictionaryPredictorWithMockData());
1985   const DictionaryPredictor *predictor =
1986       data_and_predictor->dictionary_predictor();
1987   const POSMatcher &pos_matcher = data_and_predictor->pos_matcher();
1988 
1989   const struct TestCase {
1990     const char *history_key;
1991     const char *history_value;
1992     const char *find_suffix_value;
1993     bool expected_result;
1994   } kTestCases[] = {
1995       {"12", "12", "月", true},
1996       {"12", "12", "月", true},
1997       {"12", "壱拾弐", "月", false},
1998       {"12", "十二", "月", false},
1999       {"12", "一二", "月", false},
2000       {"12", "Ⅻ", "月", false},
2001       {"あか", "12", "月", true},    // T13N
2002       {"あか", "12", "月", true},  // T13N
2003       {"じゅう", "10", "時", true},
2004       {"じゅう", "10", "時", true},
2005       {"じゅう", "十", "時", false},
2006       {"じゅう", "拾", "時", false},
2007   };
2008 
2009   for (size_t i = 0; i < arraysize(kTestCases); ++i) {
2010     Segments segments;
2011     MakeSegmentsForSuggestion("", &segments);
2012 
2013     const TestCase &test_case = kTestCases[i];
2014     PrependHistorySegments(
2015         test_case.history_key, test_case.history_value, &segments);
2016     std::vector<DictionaryPredictor::Result> results;
2017     predictor->AggregateSuffixPrediction(
2018         DictionaryPredictor::SUFFIX,
2019         *convreq_, segments, &results);
2020     EXPECT_FALSE(results.empty());
2021 
2022     bool found = false;
2023     for (std::vector<DictionaryPredictor::Result>::const_iterator it =
2024              results.begin();
2025          it != results.end(); ++it) {
2026       EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
2027       if (it->value == test_case.find_suffix_value &&
2028           it->lid == pos_matcher.GetCounterSuffixWordId()) {
2029         EXPECT_TRUE(
2030           Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX &
2031           it->source_info);
2032         found = true;
2033         break;
2034       }
2035     }
2036     EXPECT_EQ(test_case.expected_result, found) << test_case.history_value;
2037   }
2038 }
2039 
TEST_F(DictionaryPredictorTest,TriggerZeroQuerySuggestion)2040 TEST_F(DictionaryPredictorTest, TriggerZeroQuerySuggestion) {
2041   unique_ptr<MockDataAndPredictor> data_and_predictor(
2042       CreateDictionaryPredictorWithMockData());
2043   const DictionaryPredictor *predictor =
2044       data_and_predictor->dictionary_predictor();
2045 
2046   const struct TestCase {
2047     const char *history_key;
2048     const char *history_value;
2049     const char *find_value;
2050     bool expected_result;
2051   } kTestCases[] = {
2052       {"@", "@", "gmail.com", true},
2053       {"!", "!", "?", false},
2054   };
2055 
2056   for (size_t i = 0; i < arraysize(kTestCases); ++i) {
2057     Segments segments;
2058     MakeSegmentsForSuggestion("", &segments);
2059 
2060     const TestCase &test_case = kTestCases[i];
2061     PrependHistorySegments(
2062         test_case.history_key, test_case.history_value, &segments);
2063     std::vector<DictionaryPredictor::Result> results;
2064     predictor->AggregateSuffixPrediction(
2065         DictionaryPredictor::SUFFIX,
2066         *convreq_, segments, &results);
2067     EXPECT_FALSE(results.empty());
2068 
2069     bool found = false;
2070     for (std::vector<DictionaryPredictor::Result>::const_iterator it =
2071              results.begin();
2072          it != results.end(); ++it) {
2073       EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
2074       if (it->value == test_case.find_value &&
2075           it->lid == 0 /* EOS */) {
2076         found = true;
2077         break;
2078       }
2079     }
2080     EXPECT_EQ(test_case.expected_result, found) << test_case.history_value;
2081   }
2082 }
2083 
TEST_F(DictionaryPredictorTest,GetHistoryKeyAndValue)2084 TEST_F(DictionaryPredictorTest, GetHistoryKeyAndValue) {
2085   Segments segments;
2086   unique_ptr<MockDataAndPredictor> data_and_predictor(
2087       CreateDictionaryPredictorWithMockData());
2088   const DictionaryPredictor *predictor =
2089       data_and_predictor->dictionary_predictor();
2090 
2091   MakeSegmentsForSuggestion("test", &segments);
2092 
2093   string key, value;
2094   EXPECT_FALSE(predictor->GetHistoryKeyAndValue(segments, &key, &value));
2095 
2096   PrependHistorySegments("key", "value", &segments);
2097   EXPECT_TRUE(predictor->GetHistoryKeyAndValue(segments, &key, &value));
2098   EXPECT_EQ("key", key);
2099   EXPECT_EQ("value", value);
2100 }
2101 
TEST_F(DictionaryPredictorTest,IsZipCodeRequest)2102 TEST_F(DictionaryPredictorTest, IsZipCodeRequest) {
2103   EXPECT_FALSE(DictionaryPredictor::IsZipCodeRequest(""));
2104   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("000"));
2105   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("000"));
2106   EXPECT_FALSE(DictionaryPredictor::IsZipCodeRequest("ABC"));
2107   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("---"));
2108   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("0124-"));
2109   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("0124-0"));
2110   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("012-0"));
2111   EXPECT_TRUE(DictionaryPredictor::IsZipCodeRequest("012-3456"));
2112   EXPECT_FALSE(DictionaryPredictor::IsZipCodeRequest("012-0"));
2113 }
2114 
TEST_F(DictionaryPredictorTest,IsAggressiveSuggestion)2115 TEST_F(DictionaryPredictorTest, IsAggressiveSuggestion) {
2116   unique_ptr<MockDataAndPredictor> data_and_predictor(
2117       CreateDictionaryPredictorWithMockData());
2118   const DictionaryPredictor *predictor =
2119       data_and_predictor->dictionary_predictor();
2120 
2121   // "ただしい",
2122   // "ただしいけめんにかぎる",
2123   EXPECT_TRUE(predictor->IsAggressiveSuggestion(
2124       4,      // query_len
2125       11,     // key_len
2126       6000,   // cost
2127       true,   // is_suggestion
2128       20));   // total_candidates_size
2129 
2130   // cost <= 4000
2131   EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2132       4,
2133       11,
2134       4000,
2135       true,
2136       20));
2137 
2138   // not suggestion
2139   EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2140       4,
2141       11,
2142       4000,
2143       false,
2144       20));
2145 
2146   // total_candidates_size is small
2147   EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2148       4,
2149       11,
2150       4000,
2151       true,
2152       5));
2153 
2154   // query_length = 5
2155   EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2156       5,
2157       11,
2158       6000,
2159       true,
2160       20));
2161 
2162   // "それでも",
2163   // "それでもぼくはやっていない",
2164   EXPECT_TRUE(predictor->IsAggressiveSuggestion(
2165       4,
2166       13,
2167       6000,
2168       true,
2169       20));
2170 
2171   // cost <= 4000
2172   EXPECT_FALSE(predictor->IsAggressiveSuggestion(
2173       4,
2174       13,
2175       4000,
2176       true,
2177       20));
2178 }
2179 
TEST_F(DictionaryPredictorTest,RealtimeConversionStartingWithAlphabets)2180 TEST_F(DictionaryPredictorTest, RealtimeConversionStartingWithAlphabets) {
2181   Segments segments;
2182   // turn on real-time conversion
2183   config_->set_use_dictionary_suggest(false);
2184   config_->set_use_realtime_conversion(true);
2185 
2186   unique_ptr<MockDataAndPredictor> data_and_predictor(
2187       CreateDictionaryPredictorWithMockData());
2188   const DictionaryPredictor *predictor =
2189       data_and_predictor->dictionary_predictor();
2190 
2191   const char kKey[] = "PCてすと";
2192   const char *kExpectedSuggestionValues[] = {
2193       "Realtime top result",
2194       "PCテスト",
2195   };
2196 
2197   // Set up mock converter for realtime top result.
2198   {
2199     Segments segments;
2200     Segment *segment = segments.add_segment();
2201     segment->set_key(kKey);
2202     Segment::Candidate *candidate = segment->add_candidate();
2203     candidate->value = kExpectedSuggestionValues[0];
2204     ConverterMock *converter = data_and_predictor->mutable_converter_mock();
2205     converter->SetStartConversionForRequest(&segments, true);
2206   }
2207 
2208   MakeSegmentsForSuggestion(kKey, &segments);
2209 
2210   std::vector<DictionaryPredictor::Result> results;
2211 
2212   convreq_->set_use_actual_converter_for_realtime_conversion(false);
2213   predictor->AggregateRealtimeConversion(
2214       DictionaryPredictor::REALTIME, *convreq_, &segments, &results);
2215   ASSERT_EQ(1, results.size());
2216 
2217   EXPECT_EQ(DictionaryPredictor::REALTIME, results[0].types);
2218   EXPECT_EQ(kExpectedSuggestionValues[1], results[0].value);
2219   EXPECT_EQ(1, segments.conversion_segments_size());
2220 }
2221 
TEST_F(DictionaryPredictorTest,RealtimeConversionWithSpellingCorrection)2222 TEST_F(DictionaryPredictorTest, RealtimeConversionWithSpellingCorrection) {
2223   Segments segments;
2224   // turn on real-time conversion
2225   config_->set_use_dictionary_suggest(false);
2226   config_->set_use_realtime_conversion(true);
2227 
2228   unique_ptr<MockDataAndPredictor> data_and_predictor(
2229       CreateDictionaryPredictorWithMockData());
2230   const DictionaryPredictor *predictor =
2231       data_and_predictor->dictionary_predictor();
2232 
2233   const char kCapriHiragana[] = "かぷりちょうざ";
2234 
2235   // Set up mock converter for realtime top result.
2236   {
2237     Segments segments;
2238     Segment *segment = segments.add_segment();
2239     segment->set_key(kCapriHiragana);
2240     Segment::Candidate *candidate = segment->add_candidate();
2241     candidate->value = "Dummy";
2242     ConverterMock *converter = data_and_predictor->mutable_converter_mock();
2243     converter->SetStartConversionForRequest(&segments, true);
2244   }
2245 
2246   MakeSegmentsForSuggestion(kCapriHiragana, &segments);
2247 
2248   std::vector<DictionaryPredictor::Result> results;
2249 
2250   convreq_->set_use_actual_converter_for_realtime_conversion(false);
2251   predictor->AggregateUnigramPrediction(
2252       DictionaryPredictor::UNIGRAM,
2253       *convreq_, segments, &results);
2254   ASSERT_FALSE(results.empty());
2255   EXPECT_NE(0, (results[0].candidate_attributes &
2256                 Segment::Candidate::SPELLING_CORRECTION));
2257 
2258   results.clear();
2259 
2260   const char kKeyWithDe[] = "かぷりちょうざで";
2261   const char kExpectedSuggestionValueWithDe[] = "カプリチョーザで";
2262 
2263   MakeSegmentsForSuggestion(kKeyWithDe, &segments);
2264   predictor->AggregateRealtimeConversion(
2265       DictionaryPredictor::REALTIME, *convreq_, &segments, &results);
2266   EXPECT_EQ(1, results.size());
2267 
2268   EXPECT_EQ(results[0].types, DictionaryPredictor::REALTIME);
2269   EXPECT_NE(0, (results[0].candidate_attributes &
2270                 Segment::Candidate::SPELLING_CORRECTION));
2271   EXPECT_EQ(kExpectedSuggestionValueWithDe, results[0].value);
2272   EXPECT_EQ(1, segments.conversion_segments_size());
2273 }
2274 
TEST_F(DictionaryPredictorTest,GetMissSpelledPosition)2275 TEST_F(DictionaryPredictorTest, GetMissSpelledPosition) {
2276   unique_ptr<MockDataAndPredictor> data_and_predictor(
2277       CreateDictionaryPredictorWithMockData());
2278   const DictionaryPredictor *predictor =
2279       data_and_predictor->dictionary_predictor();
2280 
2281   EXPECT_EQ(0, predictor->GetMissSpelledPosition("", ""));
2282   EXPECT_EQ(3,
2283             predictor->GetMissSpelledPosition("れみおめろん", "レミオロメン"));
2284   EXPECT_EQ(5,
2285             predictor->GetMissSpelledPosition("とーとばっく", "トートバッグ"));
2286   EXPECT_EQ(
2287       4, predictor->GetMissSpelledPosition("おーすとりらあ", "オーストラリア"));
2288   EXPECT_EQ(7, predictor->GetMissSpelledPosition("じきそうしょう", "時期尚早"));
2289 }
2290 
TEST_F(DictionaryPredictorTest,RemoveMissSpelledCandidates)2291 TEST_F(DictionaryPredictorTest, RemoveMissSpelledCandidates) {
2292   unique_ptr<MockDataAndPredictor> data_and_predictor(
2293       CreateDictionaryPredictorWithMockData());
2294   const DictionaryPredictor *predictor =
2295       data_and_predictor->dictionary_predictor();
2296 
2297   {
2298     std::vector<DictionaryPredictor::Result> results;
2299     DictionaryPredictor::Result *result;
2300 
2301     results.push_back(DictionaryPredictor::Result());
2302     result = &results.back();
2303     result->key = "ばっく";
2304     result->value = "バッグ";
2305     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2306                                        Token::SPELLING_CORRECTION);
2307 
2308     results.push_back(DictionaryPredictor::Result());
2309     result = &results.back();
2310     result->key = "ばっぐ";
2311     result->value = "バッグ";
2312     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2313                                        Token::NONE);
2314 
2315     results.push_back(DictionaryPredictor::Result());
2316     result = &results.back();
2317     result->key = "ばっく";
2318     result->value = "バック";
2319     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2320                                        Token::NONE);
2321 
2322     predictor->RemoveMissSpelledCandidates(1, &results);
2323     ASSERT_EQ(3, results.size());
2324 
2325     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[0].types);
2326     EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[1].types);
2327     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[2].types);
2328   }
2329 
2330   {
2331     std::vector<DictionaryPredictor::Result> results;
2332     DictionaryPredictor::Result *result;
2333 
2334     results.push_back(DictionaryPredictor::Result());
2335     result = &results.back();
2336     result->key = "ばっく";
2337     result->value = "バッグ";
2338     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2339                                        Token::SPELLING_CORRECTION);
2340 
2341     results.push_back(DictionaryPredictor::Result());
2342     result = &results.back();
2343     result->key = "てすと";
2344     result->value = "テスト";
2345     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2346                                        Token::NONE);
2347 
2348     predictor->RemoveMissSpelledCandidates(1, &results);
2349     CHECK_EQ(2, results.size());
2350 
2351     EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[0].types);
2352     EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[1].types);
2353   }
2354 
2355   {
2356     std::vector<DictionaryPredictor::Result> results;
2357     DictionaryPredictor::Result *result;
2358 
2359     results.push_back(DictionaryPredictor::Result());
2360     result = &results.back();
2361     result->key = "ばっく";
2362     result->value = "バッグ";
2363     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2364                                        Token::SPELLING_CORRECTION);
2365 
2366     results.push_back(DictionaryPredictor::Result());
2367     result = &results.back();
2368     result->key = "ばっく";
2369     result->value = "バック";
2370     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2371                                        Token::NONE);
2372 
2373     predictor->RemoveMissSpelledCandidates(1, &results);
2374     CHECK_EQ(2, results.size());
2375 
2376     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[0].types);
2377     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[1].types);
2378   }
2379 
2380   {
2381     std::vector<DictionaryPredictor::Result> results;
2382     DictionaryPredictor::Result *result;
2383 
2384     results.push_back(DictionaryPredictor::Result());
2385     result = &results.back();
2386     result->key = "ばっく";
2387     result->value = "バッグ";
2388     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2389                                        Token::SPELLING_CORRECTION);
2390 
2391     results.push_back(DictionaryPredictor::Result());
2392     result = &results.back();
2393     result->key = "ばっく";
2394     result->value = "バック";
2395     result->SetTypesAndTokenAttributes(DictionaryPredictor::UNIGRAM,
2396                                        Token::NONE);
2397 
2398     predictor->RemoveMissSpelledCandidates(3, &results);
2399     CHECK_EQ(2, results.size());
2400 
2401     EXPECT_EQ(DictionaryPredictor::UNIGRAM, results[0].types);
2402     EXPECT_EQ(DictionaryPredictor::NO_PREDICTION, results[1].types);
2403   }
2404 }
2405 
TEST_F(DictionaryPredictorTest,UseExpansionForUnigramTest)2406 TEST_F(DictionaryPredictorTest, UseExpansionForUnigramTest) {
2407   FLAGS_enable_expansion_for_dictionary_predictor = true;
2408   ExpansionForUnigramTestHelper(true);
2409 }
2410 
TEST_F(DictionaryPredictorTest,UnuseExpansionForUnigramTest)2411 TEST_F(DictionaryPredictorTest, UnuseExpansionForUnigramTest) {
2412   FLAGS_enable_expansion_for_dictionary_predictor = false;
2413   ExpansionForUnigramTestHelper(false);
2414 }
2415 
TEST_F(DictionaryPredictorTest,UseExpansionForBigramTest)2416 TEST_F(DictionaryPredictorTest, UseExpansionForBigramTest) {
2417   FLAGS_enable_expansion_for_dictionary_predictor = true;
2418   ExpansionForBigramTestHelper(true);
2419 }
2420 
TEST_F(DictionaryPredictorTest,UnuseExpansionForBigramTest)2421 TEST_F(DictionaryPredictorTest, UnuseExpansionForBigramTest) {
2422   FLAGS_enable_expansion_for_dictionary_predictor = false;
2423   ExpansionForBigramTestHelper(false);
2424 }
2425 
TEST_F(DictionaryPredictorTest,UseExpansionForSuffixTest)2426 TEST_F(DictionaryPredictorTest, UseExpansionForSuffixTest) {
2427   FLAGS_enable_expansion_for_dictionary_predictor = true;
2428   ExpansionForSuffixTestHelper(true);
2429 }
2430 
TEST_F(DictionaryPredictorTest,UnuseExpansionForSuffixTest)2431 TEST_F(DictionaryPredictorTest, UnuseExpansionForSuffixTest) {
2432   FLAGS_enable_expansion_for_dictionary_predictor = false;
2433   ExpansionForSuffixTestHelper(false);
2434 }
2435 
TEST_F(DictionaryPredictorTest,ExpansionPenaltyForRomanTest)2436 TEST_F(DictionaryPredictorTest, ExpansionPenaltyForRomanTest) {
2437   FLAGS_enable_expansion_for_dictionary_predictor = true;
2438   config_->set_use_dictionary_suggest(true);
2439   config_->set_use_realtime_conversion(false);
2440 
2441   table_->LoadFromFile("system://romanji-hiragana.tsv");
2442   composer_->SetTable(table_.get());
2443   unique_ptr<MockDataAndPredictor> data_and_predictor(
2444       CreateDictionaryPredictorWithMockData());
2445   const TestableDictionaryPredictor *predictor =
2446       data_and_predictor->dictionary_predictor();
2447 
2448   Segments segments;
2449   segments.set_request_type(Segments::PREDICTION);
2450   InsertInputSequence("ak", composer_.get());
2451   Segment *segment = segments.add_segment();
2452   CHECK(segment);
2453   {
2454     string query;
2455     composer_->GetQueryForPrediction(&query);
2456     segment->set_key(query);
2457     EXPECT_EQ("あ", query);
2458   }
2459   {
2460     string base;
2461     std::set<string> expanded;
2462     composer_->GetQueriesForPrediction(&base, &expanded);
2463     EXPECT_EQ("あ", base);
2464     EXPECT_GT(expanded.size(), 5);
2465   }
2466 
2467   std::vector<TestableDictionaryPredictor::Result> results;
2468   TestableDictionaryPredictor::Result *result;
2469 
2470   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2471   result = &results.back();
2472   result->key = "あか";
2473   result->value = "赤";
2474   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2475                                      Token::NONE);
2476 
2477   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2478   result = &results.back();
2479   result->key = "あき";
2480   result->value = "秋";
2481   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2482                                      Token::NONE);
2483 
2484   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2485   result = &results.back();
2486   result->key = "あかぎ";
2487   result->value = "アカギ";
2488   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2489                                      Token::NONE);
2490 
2491   EXPECT_EQ(3, results.size());
2492   EXPECT_EQ(0, results[0].cost);
2493   EXPECT_EQ(0, results[1].cost);
2494   EXPECT_EQ(0, results[2].cost);
2495 
2496   predictor->ApplyPenaltyForKeyExpansion(segments, &results);
2497 
2498   // no penalties
2499   EXPECT_EQ(0, results[0].cost);
2500   EXPECT_EQ(0, results[1].cost);
2501   EXPECT_EQ(0, results[2].cost);
2502 }
2503 
TEST_F(DictionaryPredictorTest,ExpansionPenaltyForKanaTest)2504 TEST_F(DictionaryPredictorTest, ExpansionPenaltyForKanaTest) {
2505   FLAGS_enable_expansion_for_dictionary_predictor = true;
2506   config_->set_use_dictionary_suggest(true);
2507   config_->set_use_realtime_conversion(false);
2508 
2509   table_->LoadFromFile("system://kana.tsv");
2510   unique_ptr<MockDataAndPredictor> data_and_predictor(
2511       CreateDictionaryPredictorWithMockData());
2512   const TestableDictionaryPredictor *predictor =
2513       data_and_predictor->dictionary_predictor();
2514 
2515   Segments segments;
2516   segments.set_request_type(Segments::PREDICTION);
2517   InsertInputSequence("あし", composer_.get());
2518 
2519   Segment *segment = segments.add_segment();
2520   CHECK(segment);
2521   {
2522     string query;
2523     composer_->GetQueryForPrediction(&query);
2524     segment->set_key(query);
2525     EXPECT_EQ("あし", query);
2526   }
2527   {
2528     string base;
2529     std::set<string> expanded;
2530     composer_->GetQueriesForPrediction(&base, &expanded);
2531     EXPECT_EQ("あ", base);
2532     EXPECT_EQ(2, expanded.size());
2533   }
2534 
2535   std::vector<TestableDictionaryPredictor::Result> results;
2536   TestableDictionaryPredictor::Result *result;
2537 
2538   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2539   result = &results.back();
2540   result->key = "あし";
2541   result->value = "足";
2542   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2543                                      Token::NONE);
2544 
2545   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2546   result = &results.back();
2547   result->key = "あじ";
2548   result->value = "味";
2549   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2550                                      Token::NONE);
2551 
2552   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2553   result = &results.back();
2554   result->key = "あした";
2555   result->value = "明日";
2556   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2557                                      Token::NONE);
2558 
2559   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2560   result = &results.back();
2561   result->key = "あじあ";
2562   result->value = "アジア";
2563   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2564                                      Token::NONE);
2565 
2566   EXPECT_EQ(4, results.size());
2567   EXPECT_EQ(0, results[0].cost);
2568   EXPECT_EQ(0, results[1].cost);
2569   EXPECT_EQ(0, results[2].cost);
2570   EXPECT_EQ(0, results[3].cost);
2571 
2572   predictor->ApplyPenaltyForKeyExpansion(segments, &results);
2573 
2574   EXPECT_EQ(0, results[0].cost);
2575   EXPECT_LT(0, results[1].cost);
2576   EXPECT_EQ(0, results[2].cost);
2577   EXPECT_LT(0, results[3].cost);
2578 }
2579 
TEST_F(DictionaryPredictorTest,SetLMCost)2580 TEST_F(DictionaryPredictorTest, SetLMCost) {
2581   unique_ptr<MockDataAndPredictor> data_and_predictor(
2582       CreateDictionaryPredictorWithMockData());
2583   const TestableDictionaryPredictor *predictor =
2584       data_and_predictor->dictionary_predictor();
2585 
2586   Segments segments;
2587   segments.set_request_type(Segments::PREDICTION);
2588   Segment *segment = segments.add_segment();
2589   CHECK(segment);
2590   segment->set_key("てすと");
2591 
2592   std::vector<TestableDictionaryPredictor::Result> results;
2593   TestableDictionaryPredictor::Result *result;
2594 
2595   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2596   result = &results.back();
2597   result->key = "てすと";
2598   result->value = "てすと";
2599   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2600                                      Token::NONE);
2601 
2602   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2603   result = &results.back();
2604   result->key = "てすと";
2605   result->value = "テスト";
2606   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2607                                      Token::NONE);
2608 
2609   results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
2610   result = &results.back();
2611   result->key = "てすとてすと";
2612   result->value = "テストテスト";
2613   result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::UNIGRAM,
2614                                      Token::NONE);
2615 
2616   predictor->SetLMCost(segments, &results);
2617 
2618   EXPECT_EQ(3, results.size());
2619   EXPECT_EQ("てすと", results[0].value);
2620   EXPECT_EQ("テスト", results[1].value);
2621   EXPECT_EQ("テストテスト", results[2].value);
2622   EXPECT_GT(results[2].cost, results[0].cost);
2623   EXPECT_GT(results[2].cost, results[1].cost);
2624 }
2625 
2626 namespace {
2627 
AddTestableDictionaryPredictorResult(const char * key,const char * value,int wcost,TestableDictionaryPredictor::PredictionTypes prediction_types,Token::AttributesBitfield attributes,std::vector<TestableDictionaryPredictor::Result> * results)2628 void AddTestableDictionaryPredictorResult(
2629     const char *key, const char *value, int wcost,
2630     TestableDictionaryPredictor::PredictionTypes prediction_types,
2631     Token::AttributesBitfield attributes,
2632     std::vector<TestableDictionaryPredictor::Result> *results) {
2633   results->push_back(TestableDictionaryPredictor::MakeEmptyResult());
2634   TestableDictionaryPredictor::Result *result = &results->back();
2635   result->key = key;
2636   result->value = value;
2637   result->wcost = wcost;
2638   result->SetTypesAndTokenAttributes(prediction_types, attributes);
2639 }
2640 
2641 }  // namespace
2642 
TEST_F(DictionaryPredictorTest,SetLMCostForUserDictionaryWord)2643 TEST_F(DictionaryPredictorTest, SetLMCostForUserDictionaryWord) {
2644   unique_ptr<MockDataAndPredictor> data_and_predictor(
2645       CreateDictionaryPredictorWithMockData());
2646   const TestableDictionaryPredictor *predictor =
2647       data_and_predictor->dictionary_predictor();
2648 
2649   const char *kAikaHiragana = "あいか";
2650   const char *kAikaKanji = "愛佳";
2651 
2652   Segments segments;
2653   segments.set_request_type(Segments::PREDICTION);
2654   Segment *segment = segments.add_segment();
2655   ASSERT_NE(nullptr, segment);
2656   segment->set_key(kAikaHiragana);
2657 
2658   {
2659     // Cost of words in user dictionary should be decreased.
2660     const int kOrigianlWordCost = 10000;
2661     std::vector<TestableDictionaryPredictor::Result> results;
2662     AddTestableDictionaryPredictorResult(
2663         kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2664         TestableDictionaryPredictor::UNIGRAM, Token::USER_DICTIONARY,
2665         &results);
2666 
2667     predictor->SetLMCost(segments, &results);
2668 
2669     EXPECT_EQ(1, results.size());
2670     EXPECT_EQ(kAikaKanji, results[0].value);
2671     EXPECT_GT(kOrigianlWordCost, results[0].cost);
2672     EXPECT_LE(1, results[0].cost);
2673   }
2674 
2675   {
2676     // Cost of words in user dictionary should not be decreased to below 1.
2677     const int kOrigianlWordCost = 10;
2678     std::vector<TestableDictionaryPredictor::Result> results;
2679     AddTestableDictionaryPredictorResult(
2680         kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2681         TestableDictionaryPredictor::UNIGRAM, Token::USER_DICTIONARY,
2682         &results);
2683 
2684     predictor->SetLMCost(segments, &results);
2685 
2686     EXPECT_EQ(1, results.size());
2687     EXPECT_EQ(kAikaKanji, results[0].value);
2688     EXPECT_GT(kOrigianlWordCost, results[0].cost);
2689     EXPECT_LE(1, results[0].cost);
2690   }
2691 
2692   {
2693     // Cost of general symbols should not be decreased.
2694     const int kOrigianlWordCost = 10000;
2695     std::vector<TestableDictionaryPredictor::Result> results;
2696     AddTestableDictionaryPredictorResult(
2697         kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2698         TestableDictionaryPredictor::UNIGRAM, Token::USER_DICTIONARY,
2699         &results);
2700     ASSERT_EQ(1, results.size());
2701     results[0].lid = data_and_predictor->pos_matcher().GetGeneralSymbolId();
2702     results[0].rid = results[0].lid;
2703     predictor->SetLMCost(segments, &results);
2704 
2705     EXPECT_EQ(1, results.size());
2706     EXPECT_EQ(kAikaKanji, results[0].value);
2707     EXPECT_LE(kOrigianlWordCost, results[0].cost);
2708   }
2709 
2710   {
2711     // Cost of words not in user dictionary should not be decreased.
2712     const int kOrigianlWordCost = 10000;
2713     std::vector<TestableDictionaryPredictor::Result> results;
2714     AddTestableDictionaryPredictorResult(
2715         kAikaHiragana, kAikaKanji, kOrigianlWordCost,
2716         TestableDictionaryPredictor::UNIGRAM, Token::NONE,
2717         &results);
2718 
2719     predictor->SetLMCost(segments, &results);
2720 
2721     EXPECT_EQ(1, results.size());
2722     EXPECT_EQ(kAikaKanji, results[0].value);
2723     EXPECT_EQ(kOrigianlWordCost, results[0].cost);
2724   }
2725 }
2726 
TEST_F(DictionaryPredictorTest,SuggestSpellingCorrection)2727 TEST_F(DictionaryPredictorTest, SuggestSpellingCorrection) {
2728   testing::MockDataManager data_manager;
2729 
2730   unique_ptr<MockDataAndPredictor> data_and_predictor(
2731       new MockDataAndPredictor());
2732   data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2733                            CreateSuffixDictionaryFromDataManager(data_manager));
2734 
2735   const TestableDictionaryPredictor *predictor =
2736       data_and_predictor->dictionary_predictor();
2737 
2738   Segments segments;
2739   MakeSegmentsForPrediction("あぼがど", &segments);
2740 
2741   predictor->PredictForRequest(*convreq_, &segments);
2742 
2743   EXPECT_TRUE(FindCandidateByValue(segments.conversion_segment(0), "アボカド"));
2744 }
2745 
TEST_F(DictionaryPredictorTest,DoNotSuggestSpellingCorrectionBeforeMismatch)2746 TEST_F(DictionaryPredictorTest, DoNotSuggestSpellingCorrectionBeforeMismatch) {
2747   testing::MockDataManager data_manager;
2748 
2749   unique_ptr<MockDataAndPredictor> data_and_predictor(
2750       new MockDataAndPredictor());
2751   data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2752                            CreateSuffixDictionaryFromDataManager(data_manager));
2753 
2754   const TestableDictionaryPredictor *predictor =
2755       data_and_predictor->dictionary_predictor();
2756 
2757   Segments segments;
2758   MakeSegmentsForPrediction("あぼが", &segments);
2759 
2760   predictor->PredictForRequest(*convreq_, &segments);
2761 
2762   EXPECT_FALSE(
2763       FindCandidateByValue(segments.conversion_segment(0), "アボカド"));
2764 }
2765 
TEST_F(DictionaryPredictorTest,MobileUnigramSuggestion)2766 TEST_F(DictionaryPredictorTest, MobileUnigramSuggestion) {
2767   testing::MockDataManager data_manager;
2768 
2769   unique_ptr<MockDataAndPredictor> data_and_predictor(
2770       new MockDataAndPredictor());
2771   data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2772                            CreateSuffixDictionaryFromDataManager(data_manager));
2773 
2774   const TestableDictionaryPredictor *predictor =
2775       data_and_predictor->dictionary_predictor();
2776 
2777   Segments segments;
2778   const char kKey[] = "とうきょう";
2779 
2780   MakeSegmentsForSuggestion(kKey, &segments);
2781 
2782   commands::RequestForUnitTest::FillMobileRequest(request_.get());
2783 
2784   std::vector<TestableDictionaryPredictor::Result> results;
2785   predictor->AggregateUnigramPrediction(TestableDictionaryPredictor::UNIGRAM,
2786                                         *convreq_, segments, &results);
2787 
2788   EXPECT_TRUE(FindResultByValue(results, "東京"));
2789 
2790   int prefix_count = 0;
2791   for (size_t i = 0; i < results.size(); ++i) {
2792     if (Util::StartsWith(results[i].value, "東京")) {
2793       ++prefix_count;
2794     }
2795   }
2796   // Should not have same prefix candidates a lot.
2797   EXPECT_LE(prefix_count, 6);
2798 }
2799 
TEST_F(DictionaryPredictorTest,MobileZeroQuerySuggestion)2800 TEST_F(DictionaryPredictorTest, MobileZeroQuerySuggestion) {
2801   testing::MockDataManager data_manager;
2802 
2803   unique_ptr<MockDataAndPredictor> data_and_predictor(
2804       new MockDataAndPredictor());
2805   data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2806                            CreateSuffixDictionaryFromDataManager(data_manager));
2807 
2808   const TestableDictionaryPredictor *predictor =
2809       data_and_predictor->dictionary_predictor();
2810 
2811   Segments segments;
2812   MakeSegmentsForPrediction("", &segments);
2813 
2814   PrependHistorySegments("だいがく", "大学", &segments);
2815 
2816   commands::RequestForUnitTest::FillMobileRequest(request_.get());
2817   predictor->PredictForRequest(*convreq_, &segments);
2818 
2819   EXPECT_TRUE(FindCandidateByValue(segments.conversion_segment(0), "入試"));
2820   EXPECT_TRUE(
2821       FindCandidateByValue(segments.conversion_segment(0), "入試センター"));
2822 }
2823 
2824 // We are not sure what should we suggest after the end of sentence for now.
2825 // However, we decided to show zero query suggestion rather than stopping
2826 // zero query completely. Users may be confused if they cannot see suggestion
2827 // window only after the certain conditions.
2828 // TODO(toshiyuki): Show useful zero query suggestions after EOS.
TEST_F(DictionaryPredictorTest,DISABLED_MobileZeroQuerySuggestionAfterEOS)2829 TEST_F(DictionaryPredictorTest, DISABLED_MobileZeroQuerySuggestionAfterEOS) {
2830   testing::MockDataManager data_manager;
2831 
2832   unique_ptr<MockDataAndPredictor> data_and_predictor(
2833       new MockDataAndPredictor());
2834   data_and_predictor->Init(CreateSystemDictionaryFromDataManager(data_manager),
2835                            CreateSuffixDictionaryFromDataManager(data_manager));
2836 
2837   const TestableDictionaryPredictor *predictor =
2838       data_and_predictor->dictionary_predictor();
2839 
2840   commands::RequestForUnitTest::FillMobileRequest(request_.get());
2841 
2842   const POSMatcher &pos_matcher = data_and_predictor->pos_matcher();
2843 
2844   const struct TestCase {
2845     const char *key;
2846     const char *value;
2847     int rid;
2848     bool expected_result;
2849   } kTestcases[] = {
2850       {"ですよね。", "ですよね。", pos_matcher.GetEOSSymbolId(), false},
2851       {"。", "。", pos_matcher.GetEOSSymbolId(), false},
2852       {"まるいち", "①", pos_matcher.GetEOSSymbolId(), false},
2853       {"そう", "そう", pos_matcher.GetGeneralNounId(), true},
2854       {"そう!", "そう!", pos_matcher.GetGeneralNounId(), false},
2855       {"むすめ。", "娘。", pos_matcher.GetUniqueNounId(), true},
2856   };
2857 
2858   for (size_t i = 0; i < arraysize(kTestcases); ++i) {
2859     const TestCase &test_case = kTestcases[i];
2860 
2861     Segments segments;
2862     MakeSegmentsForPrediction("", &segments);
2863 
2864     Segment *seg = segments.push_front_segment();
2865     seg->set_segment_type(Segment::HISTORY);
2866     seg->set_key(test_case.key);
2867     Segment::Candidate *c = seg->add_candidate();
2868     c->key = test_case.key;
2869     c->content_key = test_case.key;
2870     c->value = test_case.value;
2871     c->content_value = test_case.value;
2872     c->rid = test_case.rid;
2873 
2874     predictor->PredictForRequest(*convreq_, &segments);
2875     const bool candidates_inserted =
2876         segments.conversion_segment(0).candidates_size() > 0;
2877     EXPECT_EQ(test_case.expected_result, candidates_inserted);
2878   }
2879 }
2880 
TEST_F(DictionaryPredictorTest,PropagateUserDictionaryAttribute)2881 TEST_F(DictionaryPredictorTest, PropagateUserDictionaryAttribute) {
2882   unique_ptr<MockDataAndPredictor> data_and_predictor(
2883       CreateDictionaryPredictorWithMockData());
2884   const DictionaryPredictor *predictor =
2885       data_and_predictor->dictionary_predictor();
2886 
2887   Segments segments;
2888   config_->set_use_dictionary_suggest(true);
2889   config_->set_use_realtime_conversion(true);
2890 
2891   {
2892     segments.Clear();
2893     segments.set_max_prediction_candidates_size(10);
2894     segments.set_request_type(Segments::SUGGESTION);
2895     Segment *seg = segments.add_segment();
2896     seg->set_key("ゆーざー");
2897     seg->set_segment_type(Segment::FREE);
2898     EXPECT_TRUE(predictor->PredictForRequest(*convreq_,
2899                                              &segments));
2900     EXPECT_EQ(1, segments.conversion_segments_size());
2901     bool find_yuza_candidate = false;
2902     for (size_t i = 0;
2903          i < segments.conversion_segment(0).candidates_size();
2904          ++i) {
2905       const Segment::Candidate &cand =
2906           segments.conversion_segment(0).candidate(i);
2907       if (cand.value == "ユーザー" &&
2908           (cand.attributes & (Segment::Candidate::NO_VARIANTS_EXPANSION |
2909                               Segment::Candidate::USER_DICTIONARY))) {
2910         find_yuza_candidate = true;
2911       }
2912     }
2913     EXPECT_TRUE(find_yuza_candidate);
2914   }
2915 
2916   {
2917     segments.Clear();
2918     segments.set_max_prediction_candidates_size(10);
2919     segments.set_request_type(Segments::SUGGESTION);
2920     Segment *seg = segments.add_segment();
2921     seg->set_key("ゆーざーの");
2922     seg->set_segment_type(Segment::FREE);
2923     EXPECT_TRUE(predictor->PredictForRequest(*convreq_,
2924                                              &segments));
2925     EXPECT_EQ(1, segments.conversion_segments_size());
2926     bool find_yuza_candidate = false;
2927     for (size_t i = 0;
2928          i < segments.conversion_segment(0).candidates_size();
2929          ++i) {
2930       const Segment::Candidate &cand =
2931           segments.conversion_segment(0).candidate(i);
2932       if ((cand.value == "ユーザーの") &&
2933           (cand.attributes & (Segment::Candidate::NO_VARIANTS_EXPANSION |
2934                               Segment::Candidate::USER_DICTIONARY))) {
2935         find_yuza_candidate = true;
2936       }
2937     }
2938     EXPECT_TRUE(find_yuza_candidate);
2939   }
2940 }
2941 
TEST_F(DictionaryPredictorTest,SetDescription)2942 TEST_F(DictionaryPredictorTest, SetDescription) {
2943   {
2944     string description;
2945     DictionaryPredictor::SetDescription(
2946         TestableDictionaryPredictor::TYPING_CORRECTION, 0, &description);
2947     EXPECT_EQ("補正", description);
2948 
2949     description.clear();
2950     DictionaryPredictor::SetDescription(
2951         0, Segment::Candidate::AUTO_PARTIAL_SUGGESTION, &description);
2952     EXPECT_EQ("部分", description);
2953   }
2954 }
2955 
TEST_F(DictionaryPredictorTest,SetDebugDescription)2956 TEST_F(DictionaryPredictorTest, SetDebugDescription) {
2957   {
2958     string description;
2959     const TestableDictionaryPredictor::PredictionTypes types =
2960         TestableDictionaryPredictor::UNIGRAM |
2961         TestableDictionaryPredictor::ENGLISH;
2962     DictionaryPredictor::SetDebugDescription(types, &description);
2963     EXPECT_EQ("UE", description);
2964   }
2965   {
2966     string description = "description";
2967     const TestableDictionaryPredictor::PredictionTypes types =
2968         TestableDictionaryPredictor::REALTIME |
2969         TestableDictionaryPredictor::BIGRAM;
2970     DictionaryPredictor::SetDebugDescription(types, &description);
2971     EXPECT_EQ("description BR", description);
2972   }
2973   {
2974     string description;
2975     const TestableDictionaryPredictor::PredictionTypes types =
2976         TestableDictionaryPredictor::BIGRAM |
2977         TestableDictionaryPredictor::REALTIME |
2978         TestableDictionaryPredictor::SUFFIX;
2979     DictionaryPredictor::SetDebugDescription(types, &description);
2980     EXPECT_EQ("BRS", description);
2981   }
2982 }
2983 
TEST_F(DictionaryPredictorTest,PropagateRealtimeConversionBoundary)2984 TEST_F(DictionaryPredictorTest, PropagateRealtimeConversionBoundary) {
2985   testing::MockDataManager data_manager;
2986   unique_ptr<const DictionaryInterface> dictionary(new DictionaryMock);
2987   unique_ptr<ConverterInterface> converter(new ConverterMock);
2988   unique_ptr<ImmutableConverterInterface> immutable_converter(
2989       new ImmutableConverterMock);
2990   unique_ptr<const DictionaryInterface> suffix_dictionary(
2991       CreateSuffixDictionaryFromDataManager(data_manager));
2992   unique_ptr<const Connector> connector(
2993       Connector::CreateFromDataManager(data_manager));
2994   unique_ptr<const Segmenter> segmenter(
2995       Segmenter::CreateFromDataManager(data_manager));
2996   unique_ptr<const SuggestionFilter> suggestion_filter(
2997       CreateSuggestionFilter(data_manager));
2998   const dictionary::POSMatcher pos_matcher(data_manager.GetPOSMatcherData());
2999   unique_ptr<TestableDictionaryPredictor> predictor(
3000       new TestableDictionaryPredictor(data_manager,
3001                                       converter.get(),
3002                                       immutable_converter.get(),
3003                                       dictionary.get(),
3004                                       suffix_dictionary.get(),
3005                                       connector.get(),
3006                                       segmenter.get(),
3007                                       &pos_matcher,
3008                                       suggestion_filter.get()));
3009   Segments segments;
3010   const char kKey[] =
3011       "わたしのなまえはなかのです";
3012   MakeSegmentsForSuggestion(kKey, &segments);
3013 
3014   std::vector<TestableDictionaryPredictor::Result> results;
3015   predictor->AggregateRealtimeConversion(
3016       TestableDictionaryPredictor::REALTIME, *convreq_,
3017       &segments, &results);
3018 
3019   // mock results
3020   EXPECT_EQ(1, results.size());
3021   predictor->AddPredictionToCandidates(*convreq_,
3022                                        &segments, &results);
3023   EXPECT_EQ(1, segments.conversion_segments_size());
3024   EXPECT_EQ(1, segments.conversion_segment(0).candidates_size());
3025   const Segment::Candidate &cand = segments.conversion_segment(0).candidate(0);
3026   EXPECT_EQ("わたしのなまえはなかのです", cand.key);
3027   EXPECT_EQ("私の名前は中野です", cand.value);
3028   EXPECT_EQ(3, cand.inner_segment_boundary.size());
3029 }
3030 
TEST_F(DictionaryPredictorTest,PropagateResultCosts)3031 TEST_F(DictionaryPredictorTest, PropagateResultCosts) {
3032   unique_ptr<MockDataAndPredictor> data_and_predictor(
3033       CreateDictionaryPredictorWithMockData());
3034   const TestableDictionaryPredictor *predictor =
3035       data_and_predictor->dictionary_predictor();
3036 
3037   std::vector<TestableDictionaryPredictor::Result> results;
3038   const int kTestSize = 20;
3039   for (size_t i = 0; i < kTestSize; ++i) {
3040     results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
3041     TestableDictionaryPredictor::Result *result = &results.back();
3042     result->key = string(1, 'a' + i);
3043     result->value = string(1, 'A' + i);
3044     result->wcost = i;
3045     result->cost = i + 1000;
3046     result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::REALTIME,
3047                                        Token::NONE);
3048   }
3049   std::random_device rd;
3050   std::mt19937 urbg(rd());
3051   std::shuffle(results.begin(), results.end(), urbg);
3052 
3053   Segments segments;
3054   MakeSegmentsForSuggestion("test", &segments);
3055   segments.set_max_prediction_candidates_size(kTestSize);
3056 
3057   predictor->AddPredictionToCandidates(*convreq_,
3058                                        &segments, &results);
3059 
3060   EXPECT_EQ(1, segments.conversion_segments_size());
3061   ASSERT_EQ(kTestSize, segments.conversion_segment(0).candidates_size());
3062   const Segment &segment = segments.conversion_segment(0);
3063   for (size_t i = 0; i < segment.candidates_size(); ++i) {
3064     EXPECT_EQ(i + 1000, segment.candidate(i).cost);
3065   }
3066 }
3067 
TEST_F(DictionaryPredictorTest,PredictNCandidates)3068 TEST_F(DictionaryPredictorTest, PredictNCandidates) {
3069   unique_ptr<MockDataAndPredictor> data_and_predictor(
3070       CreateDictionaryPredictorWithMockData());
3071   const TestableDictionaryPredictor *predictor =
3072       data_and_predictor->dictionary_predictor();
3073 
3074   std::vector<TestableDictionaryPredictor::Result> results;
3075   const int kTotalCandidateSize = 100;
3076   const int kLowCostCandidateSize = 5;
3077   for (size_t i = 0; i < kTotalCandidateSize; ++i) {
3078     results.push_back(TestableDictionaryPredictor::MakeEmptyResult());
3079     TestableDictionaryPredictor::Result *result = &results.back();
3080     result->key = string(1, 'a' + i);
3081     result->value = string(1, 'A' + i);
3082     result->wcost = i;
3083     result->SetTypesAndTokenAttributes(TestableDictionaryPredictor::REALTIME,
3084                                        Token::NONE);
3085     if (i < kLowCostCandidateSize) {
3086       result->cost = i + 1000;
3087     } else {
3088       result->cost = i + kInfinity;
3089     }
3090   }
3091   std::random_shuffle(results.begin(), results.end());
3092 
3093   Segments segments;
3094   MakeSegmentsForSuggestion("test", &segments);
3095   segments.set_max_prediction_candidates_size(kLowCostCandidateSize + 1);
3096 
3097   predictor->AddPredictionToCandidates(*convreq_,
3098                                        &segments, &results);
3099 
3100   ASSERT_EQ(1, segments.conversion_segments_size());
3101   ASSERT_EQ(kLowCostCandidateSize,
3102             segments.conversion_segment(0).candidates_size());
3103   const Segment &segment = segments.conversion_segment(0);
3104   for (size_t i = 0; i < segment.candidates_size(); ++i) {
3105     EXPECT_EQ(i + 1000, segment.candidate(i).cost);
3106   }
3107 }
3108 
TEST_F(DictionaryPredictorTest,SuggestFilteredwordForExactMatchOnMobile)3109 TEST_F(DictionaryPredictorTest, SuggestFilteredwordForExactMatchOnMobile) {
3110   unique_ptr<MockDataAndPredictor> data_and_predictor(
3111       CreateDictionaryPredictorWithMockData());
3112   const TestableDictionaryPredictor *predictor =
3113       data_and_predictor->dictionary_predictor();
3114 
3115   // turn on mobile mode
3116   commands::RequestForUnitTest::FillMobileRequest(request_.get());
3117 
3118   Segments segments;
3119   // Note: The suggestion filter entry "フィルター" for test is not
3120   // appropriate here, as Katakana entry will be added by realtime conversion.
3121   // Here, we want to confirm the behavior including unigram prediction.
3122   MakeSegmentsForSuggestion("ふぃるたーたいしょう", &segments);
3123 
3124   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3125   EXPECT_TRUE(
3126       FindCandidateByValue(segments.conversion_segment(0), "フィルター対象"));
3127   EXPECT_TRUE(
3128       FindCandidateByValue(segments.conversion_segment(0), "フィルター大将"));
3129 
3130   // However, filtered word should not be the top.
3131   EXPECT_EQ("フィルター大将",
3132             segments.conversion_segment(0).candidate(0).value);
3133 
3134   // Should not be there for non-exact suggestion.
3135   MakeSegmentsForSuggestion("ふぃるたーたいし", &segments);
3136   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3137   EXPECT_FALSE(
3138       FindCandidateByValue(segments.conversion_segment(0), "フィルター対象"));
3139 }
3140 
TEST_F(DictionaryPredictorTest,SuppressFilteredwordForExactMatch)3141 TEST_F(DictionaryPredictorTest, SuppressFilteredwordForExactMatch) {
3142   unique_ptr<MockDataAndPredictor> data_and_predictor(
3143       CreateDictionaryPredictorWithMockData());
3144   const TestableDictionaryPredictor *predictor =
3145       data_and_predictor->dictionary_predictor();
3146 
3147   Segments segments;
3148   // Note: The suggestion filter entry "フィルター" for test is not
3149   // appropriate here, as Katakana entry will be added by realtime conversion.
3150   // Here, we want to confirm the behavior including unigram prediction.
3151   MakeSegmentsForSuggestion("ふぃるたーたいしょう", &segments);
3152 
3153   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3154   EXPECT_FALSE(
3155       FindCandidateByValue(segments.conversion_segment(0), "フィルター対象"));
3156 }
3157 
3158 namespace {
3159 
3160 const char kTestTokenArray[] =
3161     // {"あ", "", ZERO_QUERY_EMOJI, EMOJI_DOCOMO | EMOJI_SOFTBANK, 0xfeb04}
3162     "\x04\x00\x00\x00"
3163     "\x00\x00\x00\x00"
3164     "\x03\x00"
3165     "\x06\x00"
3166     "\x04\xeb\x0f\x00"
3167     // {"あ", "❕", ZERO_QUERY_EMOJI, EMOJI_UNICODE, 0xfeb0b},
3168     "\x04\x00\x00\x00"
3169     "\x02\x00\x00\x00"
3170     "\x03\x00"
3171     "\x01\x00"
3172     "\x0b\xeb\x0f\x00"
3173     // {"あ", "❣", ZERO_QUERY_NONE, EMOJI_NONE, 0x00},
3174     "\x04\x00\x00\x00"
3175     "\x03\x00\x00\x00"
3176     "\x00\x00"
3177     "\x00\x00"
3178     "\x00\x00\x00\x00"
3179     // {"ああ", "( •̀ㅁ•́;)", ZERO_QUERY_EMOTICON, EMOJI_NONE, 0x00}
3180     "\x05\x00\x00\x00"
3181     "\x01\x00\x00\x00"
3182     "\x02\x00"
3183     "\x00\x00"
3184     "\x00\x00\x00\x00";
3185 
3186 const char *kTestStrings[] = {
3187     "", "( •̀ㅁ•́;)", "❕", "❣", "あ", "ああ",
3188 };
3189 
3190 struct TestEntry {
3191   int32 available_emoji_carrier;
3192   string key;
3193   bool expected_result;
3194   // candidate value and ZeroQueryType.
3195   std::vector<string> expected_candidates;
3196   std::vector<int32> expected_types;
3197 
DebugStringmozc::__anon8d7eb5c60411::TestEntry3198   string DebugString() const {
3199     string candidates;
3200     Util::JoinStrings(expected_candidates, ", ", &candidates);
3201     string types;
3202     for (size_t i = 0; i < expected_types.size(); ++i) {
3203       if (i != 0) {
3204         types.append(", ");
3205       }
3206       types.append(Util::StringPrintf("%d", types[i]));
3207     }
3208     return Util::StringPrintf(
3209         "available_emoji_carrier: %d\n"
3210         "key: %s\n"
3211         "expected_result: %d\n"
3212         "expected_candidates: %s\n"
3213         "expected_types: %s",
3214         available_emoji_carrier,
3215         key.c_str(),
3216         expected_result,
3217         candidates.c_str(),
3218         types.c_str());
3219   }
3220 };
3221 
3222 }  // namespace
3223 
TEST_F(DictionaryPredictorTest,GetZeroQueryCandidates)3224 TEST_F(DictionaryPredictorTest, GetZeroQueryCandidates) {
3225   // Create test zero query data.
3226   std::unique_ptr<uint32[]> string_data_buffer;
3227   ZeroQueryDict zero_query_dict;
3228   {
3229     // kTestTokenArray contains a trailing '\0', so create a StringPiece that
3230     // excludes it by subtracting 1.
3231     const StringPiece token_array_data(kTestTokenArray,
3232                                        arraysize(kTestTokenArray) - 1);
3233     std::vector<StringPiece> strs;
3234     for (const char *str : kTestStrings) {
3235       strs.push_back(str);
3236     }
3237     const StringPiece string_array_data =
3238         SerializedStringArray::SerializeToBuffer(strs, &string_data_buffer);
3239     zero_query_dict.Init(token_array_data, string_array_data);
3240   }
3241 
3242   std::vector<TestEntry> test_entries;
3243   {
3244     TestEntry entry;
3245     entry.available_emoji_carrier = 0;
3246     entry.key = "a";
3247     entry.expected_result = false;
3248     entry.expected_candidates.clear();
3249     entry.expected_types.clear();
3250     test_entries.push_back(entry);
3251   }
3252   {
3253     TestEntry entry;
3254     entry.available_emoji_carrier = 0;
3255     entry.key = "ん";
3256     entry.expected_result = false;
3257     entry.expected_candidates.clear();
3258     entry.expected_types.clear();
3259     test_entries.push_back(entry);
3260   }
3261   {
3262     TestEntry entry;
3263     entry.available_emoji_carrier = 0;
3264     entry.key = "ああ";
3265     entry.expected_result = true;
3266     entry.expected_candidates.push_back("( •̀ㅁ•́;)");
3267     entry.expected_types.push_back(ZERO_QUERY_EMOTICON);
3268     test_entries.push_back(entry);
3269   }
3270   {
3271     TestEntry entry;
3272     entry.available_emoji_carrier = 0;
3273     entry.key = "あ";
3274     entry.expected_result = true;
3275     entry.expected_candidates.push_back("❣");
3276     entry.expected_types.push_back(ZERO_QUERY_NONE);
3277     test_entries.push_back(entry);
3278   }
3279   {
3280     TestEntry entry;
3281     entry.available_emoji_carrier = commands::Request::UNICODE_EMOJI;
3282     entry.key = "あ";
3283     entry.expected_result = true;
3284     entry.expected_candidates.push_back("❕");
3285     entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3286 
3287     entry.expected_candidates.push_back("❣");
3288     entry.expected_types.push_back(ZERO_QUERY_NONE);
3289     test_entries.push_back(entry);
3290   }
3291   {
3292     TestEntry entry;
3293     entry.available_emoji_carrier = commands::Request::DOCOMO_EMOJI;
3294     entry.key = "あ";
3295     entry.expected_result = true;
3296     string candidate;
3297     Util::UCS4ToUTF8(0xfeb04, &candidate);  // exclamation
3298     entry.expected_candidates.push_back(candidate);
3299     entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3300 
3301     entry.expected_candidates.push_back("❣");
3302     entry.expected_types.push_back(ZERO_QUERY_NONE);
3303     test_entries.push_back(entry);
3304   }
3305   {
3306     TestEntry entry;
3307     entry.available_emoji_carrier = commands::Request::KDDI_EMOJI;
3308     entry.key = "あ";
3309     entry.expected_result = true;
3310     entry.expected_candidates.push_back("❣");
3311     entry.expected_types.push_back(ZERO_QUERY_NONE);
3312     test_entries.push_back(entry);
3313   }
3314   {
3315     TestEntry entry;
3316     entry.available_emoji_carrier =
3317         (commands::Request::DOCOMO_EMOJI | commands::Request::SOFTBANK_EMOJI |
3318          commands::Request::UNICODE_EMOJI);
3319     entry.key = "あ";
3320     entry.expected_result = true;
3321     string candidate;
3322     Util::UCS4ToUTF8(0xfeb04, &candidate);  // exclamation
3323     entry.expected_candidates.push_back(candidate);
3324     entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3325 
3326     entry.expected_candidates.push_back("❕");
3327     entry.expected_types.push_back(ZERO_QUERY_EMOJI);
3328 
3329     entry.expected_candidates.push_back("❣");
3330     entry.expected_types.push_back(ZERO_QUERY_NONE);
3331     test_entries.push_back(entry);
3332   }
3333 
3334   for (size_t i = 0; i < test_entries.size(); ++i) {
3335     const TestEntry &test_entry = test_entries[i];
3336     ASSERT_EQ(test_entry.expected_candidates.size(),
3337               test_entry.expected_types.size());
3338 
3339     commands::Request client_request;
3340     client_request.set_available_emoji_carrier(
3341         test_entry.available_emoji_carrier);
3342     composer::Table table;
3343     const config::Config &config = config::ConfigHandler::DefaultConfig();
3344     composer::Composer composer(&table, &client_request, &config);
3345     const ConversionRequest request(&composer, &client_request, &config);
3346 
3347     std::vector<DictionaryPredictor::ZeroQueryResult> actual_candidates;
3348     const bool actual_result =
3349         DictionaryPredictor::GetZeroQueryCandidatesForKey(
3350             request, test_entry.key, zero_query_dict, &actual_candidates);
3351     EXPECT_EQ(test_entry.expected_result, actual_result)
3352         << test_entry.DebugString();
3353     for (size_t j = 0; j < test_entry.expected_candidates.size(); ++j) {
3354       EXPECT_EQ(test_entry.expected_candidates[j], actual_candidates[j].first)
3355           << "Failed at " << j << " : " << test_entry.DebugString();
3356       EXPECT_EQ(test_entry.expected_types[j], actual_candidates[j].second)
3357           << "Failed at " << j << " : " << test_entry.DebugString();
3358     }
3359   }
3360 }
3361 
3362 namespace {
SetSegmentForCommit(const string & candidate_value,int candidate_source_info,Segments * segments)3363 void SetSegmentForCommit(const string &candidate_value,
3364                          int candidate_source_info, Segments *segments) {
3365   segments->Clear();
3366   Segment *segment = segments->add_segment();
3367   segment->set_key("");
3368   segment->set_segment_type(Segment::FIXED_VALUE);
3369   Segment::Candidate *candidate = segment->add_candidate();
3370   candidate->key = candidate_value;
3371   candidate->content_key = candidate_value;
3372   candidate->value = candidate_value;
3373   candidate->content_value = candidate_value;
3374   candidate->source_info = candidate_source_info;
3375 }
3376 }  // namespace
3377 
TEST_F(DictionaryPredictorTest,UsageStats)3378 TEST_F(DictionaryPredictorTest, UsageStats) {
3379   unique_ptr<MockDataAndPredictor> data_and_predictor(
3380       CreateDictionaryPredictorWithMockData());
3381   DictionaryPredictor *predictor =
3382       data_and_predictor->mutable_dictionary_predictor();
3383 
3384   Segments segments;
3385   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNone", 0);
3386   SetSegmentForCommit(
3387       "★", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NONE, &segments);
3388   predictor->Finish(*convreq_, &segments);
3389   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNone", 1);
3390 
3391   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNumberSuffix", 0);
3392   SetSegmentForCommit(
3393       "個", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_NUMBER_SUFFIX,
3394       &segments);
3395   predictor->Finish(*convreq_, &segments);
3396   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeNumberSuffix", 1);
3397 
3398   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoticon", 0);
3399   SetSegmentForCommit(
3400       "\(^o^)/", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_EMOTICON,
3401       &segments);
3402   predictor->Finish(*convreq_, &segments);
3403   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoticon", 1);
3404 
3405   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoji", 0);
3406   SetSegmentForCommit("❕",
3407                       Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_EMOJI,
3408                       &segments);
3409   predictor->Finish(*convreq_, &segments);
3410   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeEmoji", 1);
3411 
3412   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeBigram", 0);
3413   SetSegmentForCommit(
3414       "ヒルズ", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_BIGRAM,
3415       &segments);
3416   predictor->Finish(*convreq_, &segments);
3417   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeBigram", 1);
3418 
3419   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeSuffix", 0);
3420   SetSegmentForCommit(
3421       "が", Segment::Candidate::DICTIONARY_PREDICTOR_ZERO_QUERY_SUFFIX,
3422       &segments);
3423   predictor->Finish(*convreq_, &segments);
3424   EXPECT_COUNT_STATS("CommitDictionaryPredictorZeroQueryTypeSuffix", 1);
3425 }
3426 
3427 }  // namespace mozc
3428