1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "prediction/user_history_predictor.h"
31 
32 #include <memory>
33 #include <set>
34 #include <string>
35 
36 #include "base/file_util.h"
37 #include "base/logging.h"
38 #include "base/password_manager.h"
39 #include "base/port.h"
40 #include "base/system_util.h"
41 #include "base/util.h"
42 #include "composer/composer.h"
43 #include "composer/table.h"
44 #include "config/config_handler.h"
45 #include "converter/segments.h"
46 #include "data_manager/testing/mock_data_manager.h"
47 #include "dictionary/dictionary_mock.h"
48 #include "dictionary/suppression_dictionary.h"
49 #include "protocol/commands.pb.h"
50 #include "protocol/config.pb.h"
51 #include "request/conversion_request.h"
52 #include "session/request_test_util.h"
53 #include "testing/base/public/googletest.h"
54 #include "testing/base/public/gunit.h"
55 #include "usage_stats/usage_stats.h"
56 #include "usage_stats/usage_stats_testing_util.h"
57 
58 DECLARE_bool(enable_expansion_for_user_history_predictor);
59 
60 namespace mozc {
61 namespace {
62 
63 using std::unique_ptr;
64 
65 using commands::Request;
66 using config::Config;
67 using dictionary::DictionaryMock;
68 using dictionary::SuppressionDictionary;
69 using dictionary::Token;
70 
AddSegmentForSuggestion(const string & key,Segments * segments)71 void AddSegmentForSuggestion(const string &key, Segments *segments) {
72   segments->set_max_prediction_candidates_size(10);
73   segments->set_request_type(Segments::SUGGESTION);
74   Segment *seg = segments->add_segment();
75   seg->set_key(key);
76   seg->set_segment_type(Segment::FIXED_VALUE);
77 }
78 
MakeSegmentsForSuggestion(const string & key,Segments * segments)79 void MakeSegmentsForSuggestion(const string &key, Segments *segments) {
80   segments->Clear();
81   AddSegmentForSuggestion(key, segments);
82 }
83 
AddSegmentForPrediction(const string & key,Segments * segments)84 void AddSegmentForPrediction(const string &key, Segments *segments) {
85   segments->set_max_prediction_candidates_size(10);
86   segments->set_request_type(Segments::PREDICTION);
87   Segment *seg = segments->add_segment();
88   seg->set_key(key);
89   seg->set_segment_type(Segment::FIXED_VALUE);
90 }
91 
MakeSegmentsForPrediction(const string & key,Segments * segments)92 void MakeSegmentsForPrediction(const string &key, Segments *segments) {
93   segments->Clear();
94   AddSegmentForPrediction(key, segments);
95 }
96 
AddSegmentForConversion(const string & key,Segments * segments)97 void AddSegmentForConversion(const string &key, Segments *segments) {
98   segments->set_request_type(Segments::CONVERSION);
99   Segment *seg = segments->add_segment();
100   seg->set_key(key);
101   seg->set_segment_type(Segment::FIXED_VALUE);
102 }
103 
MakeSegmentsForConversion(const string & key,Segments * segments)104 void MakeSegmentsForConversion(const string &key, Segments *segments) {
105   segments->Clear();
106   AddSegmentForConversion(key, segments);
107 }
108 
AddCandidate(size_t index,const string & value,Segments * segments)109 void AddCandidate(size_t index, const string &value, Segments *segments) {
110   Segment::Candidate *candidate =
111       segments->mutable_segment(index)->add_candidate();
112   CHECK(candidate);
113   candidate->Init();
114   candidate->value = value;
115   candidate->content_value = value;
116   candidate->key = segments->segment(index).key();
117   candidate->content_key = segments->segment(index).key();
118 }
119 
AddCandidateWithDescription(size_t index,const string & value,const string & desc,Segments * segments)120 void AddCandidateWithDescription(size_t index,
121                                  const string &value,
122                                  const string &desc,
123                                  Segments *segments) {
124   Segment::Candidate *candidate =
125       segments->mutable_segment(index)->add_candidate();
126   CHECK(candidate);
127   candidate->Init();
128   candidate->value = value;
129   candidate->content_value = value;
130   candidate->key = segments->segment(index).key();
131   candidate->content_key = segments->segment(index).key();
132   candidate->description = desc;
133 }
134 
AddCandidate(const string & value,Segments * segments)135 void AddCandidate(const string &value, Segments *segments) {
136   AddCandidate(0, value, segments);
137 }
138 
AddCandidateWithDescription(const string & value,const string & desc,Segments * segments)139 void AddCandidateWithDescription(const string &value,
140                                  const string &desc,
141                                  Segments *segments) {
142   AddCandidateWithDescription(0, value, desc, segments);
143 }
144 
FindCandidateByValue(const string & value,const Segments & segments)145 bool FindCandidateByValue(const string &value, const Segments &segments) {
146   for (size_t i = 0;
147        i < segments.conversion_segment(0).candidates_size(); ++i) {
148     if (segments.conversion_segment(0).candidate(i).value == value) {
149       return true;
150     }
151   }
152   return false;
153 }
154 }   // namespace
155 
156 class UserHistoryPredictorTest : public ::testing::Test {
157  public:
UserHistoryPredictorTest()158   UserHistoryPredictorTest()
159       : default_expansion_(FLAGS_enable_expansion_for_user_history_predictor) {
160   }
161 
~UserHistoryPredictorTest()162   ~UserHistoryPredictorTest() override {
163     FLAGS_enable_expansion_for_user_history_predictor = default_expansion_;
164   }
165 
166  protected:
SetUp()167   void SetUp() override {
168     SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
169     request_.reset(new Request);
170     config_.reset(new Config);
171     config::ConfigHandler::GetDefaultConfig(config_.get());
172     table_.reset(new composer::Table);
173     composer_.reset(
174         new composer::Composer(table_.get(), request_.get(), config_.get()));
175     convreq_.reset(
176         new ConversionRequest(composer_.get(), request_.get(), config_.get()));
177     data_and_predictor_.reset(CreateDataAndPredictor());
178 
179     mozc::usage_stats::UsageStats::ClearAllStatsForTest();
180   }
181 
TearDown()182   void TearDown() override {
183     FLAGS_enable_expansion_for_user_history_predictor = default_expansion_;
184 
185     mozc::usage_stats::UsageStats::ClearAllStatsForTest();
186   }
187 
GetUserHistoryPredictor()188   UserHistoryPredictor *GetUserHistoryPredictor() {
189     return data_and_predictor_->predictor.get();
190   }
191 
GetUserHistoryPredictorWithClearedHistory()192   UserHistoryPredictor *GetUserHistoryPredictorWithClearedHistory() {
193     UserHistoryPredictor *predictor = data_and_predictor_->predictor.get();
194     predictor->WaitForSyncer();
195     predictor->ClearAllHistory();
196     predictor->WaitForSyncer();
197     return predictor;
198   }
199 
GetDictionaryMock()200   DictionaryMock *GetDictionaryMock() {
201     return data_and_predictor_->dictionary.get();
202   }
203 
GetSuppressionDictionary()204   SuppressionDictionary *GetSuppressionDictionary() {
205     return data_and_predictor_->suppression_dictionary.get();
206   }
207 
IsSuggested(UserHistoryPredictor * predictor,const string & key,const string & value)208   static bool IsSuggested(UserHistoryPredictor *predictor,
209                           const string &key, const string &value) {
210     const ConversionRequest conversion_request;
211     Segments segments;
212     MakeSegmentsForSuggestion(key, &segments);
213     return predictor->PredictForRequest(conversion_request, &segments) &&
214            FindCandidateByValue(value, segments);
215   }
216 
IsPredicted(UserHistoryPredictor * predictor,const string & key,const string & value)217   static bool IsPredicted(UserHistoryPredictor *predictor,
218                           const string &key, const string &value) {
219     const ConversionRequest conversion_request;
220     Segments segments;
221     MakeSegmentsForPrediction(key, &segments);
222     return predictor->PredictForRequest(conversion_request, &segments) &&
223            FindCandidateByValue(value, segments);
224   }
225 
IsSuggestedAndPredicted(UserHistoryPredictor * predictor,const string & key,const string & value)226   static bool IsSuggestedAndPredicted(UserHistoryPredictor *predictor,
227                                       const string &key, const string &value) {
228     return IsSuggested(predictor, key, value) &&
229            IsPredicted(predictor, key, value);
230   }
231 
InsertEntry(UserHistoryPredictor * predictor,const string & key,const string & value)232   static UserHistoryPredictor::Entry *InsertEntry(
233       UserHistoryPredictor *predictor,
234       const string &key, const string &value) {
235     UserHistoryPredictor::Entry *e =
236         &predictor->dic_->Insert(predictor->Fingerprint(key, value))->value;
237     e->set_key(key);
238     e->set_value(value);
239     e->set_removed(false);
240     return e;
241   }
242 
AppendEntry(UserHistoryPredictor * predictor,const string & key,const string & value,UserHistoryPredictor::Entry * prev)243   static UserHistoryPredictor::Entry *AppendEntry(
244       UserHistoryPredictor *predictor,
245       const string &key, const string &value,
246       UserHistoryPredictor::Entry *prev) {
247     prev->add_next_entries()->set_entry_fp(
248         predictor->Fingerprint(key, value));
249     UserHistoryPredictor::Entry *e = InsertEntry(predictor, key, value);
250     return e;
251   }
252 
IsConnected(const UserHistoryPredictor::Entry & prev,const UserHistoryPredictor::Entry & next)253   static bool IsConnected(const UserHistoryPredictor::Entry &prev,
254                           const UserHistoryPredictor::Entry &next) {
255     const uint32 fp =
256         UserHistoryPredictor::Fingerprint(next.key(), next.value());
257     for (size_t i = 0; i < prev.next_entries_size(); ++i) {
258       if (prev.next_entries(i).entry_fp() == fp) {
259         return true;
260       }
261     }
262     return false;
263   }
264 
265   // Helper function to create a test case for bigram history deletion.
InitHistory_JapaneseInput(UserHistoryPredictor * predictor,UserHistoryPredictor::Entry ** japaneseinput,UserHistoryPredictor::Entry ** japanese,UserHistoryPredictor::Entry ** input)266   static void InitHistory_JapaneseInput(
267       UserHistoryPredictor *predictor,
268       UserHistoryPredictor::Entry **japaneseinput,
269       UserHistoryPredictor::Entry **japanese,
270       UserHistoryPredictor::Entry **input) {
271     // Make the history for ("japaneseinput", "JapaneseInput"). It's assumed
272     // that this sentence consists of two segments, "japanese" and "input". So,
273     // the following history entries are constructed:
274     //   ("japaneseinput", "JapaneseInput")  // Unigram
275     //   ("japanese", "Japanese") --- ("input", "Input")  // Bigram chain
276     *japaneseinput = InsertEntry(predictor, "japaneseinput", "JapaneseInput");
277     *japanese = InsertEntry(predictor, "japanese", "Japanese");
278     *input = AppendEntry(predictor, "input", "Input", *japanese);
279 
280     // Check the predictor functionality for the above history structure.
281     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
282     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
283     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
284   }
285 
286   // Helper function to create a test case for trigram history deletion.
InitHistory_JapaneseInputMethod(UserHistoryPredictor * predictor,UserHistoryPredictor::Entry ** japaneseinputmethod,UserHistoryPredictor::Entry ** japanese,UserHistoryPredictor::Entry ** input,UserHistoryPredictor::Entry ** method)287   static void InitHistory_JapaneseInputMethod(
288       UserHistoryPredictor *predictor,
289       UserHistoryPredictor::Entry **japaneseinputmethod,
290       UserHistoryPredictor::Entry **japanese,
291       UserHistoryPredictor::Entry **input,
292       UserHistoryPredictor::Entry **method) {
293     // Make the history for ("japaneseinputmethod", "JapaneseInputMethod"). It's
294     // assumed that this sentence consists of three segments, "japanese",
295     // "input" and "method". So, the following history entries are constructed:
296     //   ("japaneseinputmethod", "JapaneseInputMethod")  // Unigram
297     //   ("japanese", "Japanese") -- ("input", "Input") -- ("method", "Method")
298     *japaneseinputmethod =
299         InsertEntry(predictor, "japaneseinputmethod", "JapaneseInputMethod");
300     *japanese = InsertEntry(predictor, "japanese", "Japanese");
301     *input = AppendEntry(predictor, "input", "Input", *japanese);
302     *method = AppendEntry(predictor, "method", "Method", *input);
303 
304     // Check the predictor functionality for the above history structure.
305     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
306     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
307     EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
308                                         "japan", "JapaneseInputMethod"));
309     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
310     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
311     EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
312   }
313 
314   unique_ptr<composer::Composer> composer_;
315   unique_ptr<composer::Table> table_;
316   unique_ptr<ConversionRequest> convreq_;
317   unique_ptr<Config> config_;
318   unique_ptr<Request> request_;
319 
320  private:
321   struct DataAndPredictor {
322     unique_ptr<DictionaryMock> dictionary;
323     unique_ptr<SuppressionDictionary> suppression_dictionary;
324     unique_ptr<UserHistoryPredictor> predictor;
325     dictionary::POSMatcher pos_matcher;
326   };
327 
CreateDataAndPredictor() const328   DataAndPredictor *CreateDataAndPredictor() const {
329     DataAndPredictor *ret = new DataAndPredictor;
330     testing::MockDataManager data_manager;
331     ret->dictionary.reset(new DictionaryMock);
332     ret->suppression_dictionary.reset(new SuppressionDictionary);
333     ret->pos_matcher.Set(data_manager.GetPOSMatcherData());
334     ret->predictor.reset(
335         new UserHistoryPredictor(ret->dictionary.get(),
336                                  &ret->pos_matcher,
337                                  ret->suppression_dictionary.get(),
338                                  false));
339     return ret;
340   }
341 
342   const bool default_expansion_;
343   unique_ptr<DataAndPredictor> data_and_predictor_;
344   mozc::usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
345 };
346 
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorTest)347 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorTest) {
348   {
349     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
350     predictor->WaitForSyncer();
351 
352     // Nothing happen
353     {
354       Segments segments;
355       MakeSegmentsForSuggestion("てすと", &segments);
356       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
357       EXPECT_EQ(0, segments.segment(0).candidates_size());
358     }
359 
360     // Nothing happen
361     {
362       Segments segments;
363       MakeSegmentsForPrediction("てすと", &segments);
364       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
365       EXPECT_EQ(0, segments.segment(0).candidates_size());
366     }
367 
368     // Insert two items
369     {
370       Segments segments;
371       MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
372       AddCandidate("私の名前は中野です", &segments);
373       predictor->Finish(*convreq_, &segments);
374 
375       segments.Clear();
376       MakeSegmentsForSuggestion("わたしの", &segments);
377       EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
378       EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
379       EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
380                   Segment::Candidate::USER_HISTORY_PREDICTOR);
381 
382       segments.Clear();
383       MakeSegmentsForPrediction("わたしの", &segments);
384       EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
385       EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
386       EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
387                   Segment::Candidate::USER_HISTORY_PREDICTOR);
388     }
389 
390     // Insert without learning (nothing happen).
391     {
392       config::Config::HistoryLearningLevel no_learning_levels[] = {
393           config::Config::READ_ONLY, config::Config::NO_HISTORY};
394       for (config::Config::HistoryLearningLevel level : no_learning_levels) {
395         config_->set_history_learning_level(level);
396 
397         Segments segments;
398         MakeSegmentsForConversion("こんにちはさようなら", &segments);
399         AddCandidate("今日はさようなら", &segments);
400         predictor->Finish(*convreq_, &segments);
401 
402         segments.Clear();
403         MakeSegmentsForSuggestion("こんにちは", &segments);
404         EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
405         MakeSegmentsForPrediction("こんにちは", &segments);
406         EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
407       }
408       config_->set_history_learning_level(config::Config::DEFAULT_HISTORY);
409     }
410 
411     // sync
412     predictor->Sync();
413     Util::Sleep(500);
414   }
415 
416   // reload
417   {
418     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
419     predictor->WaitForSyncer();
420     Segments segments;
421 
422     // turn off
423     {
424       Segments segments;
425       config_->set_use_history_suggest(false);
426 
427       MakeSegmentsForSuggestion("わたしの", &segments);
428       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
429 
430       config_->set_use_history_suggest(true);
431       config_->set_incognito_mode(true);
432 
433       MakeSegmentsForSuggestion("わたしの", &segments);
434       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
435 
436       config_->set_incognito_mode(false);
437       config_->set_history_learning_level(config::Config::NO_HISTORY);
438 
439       MakeSegmentsForSuggestion("わたしの", &segments);
440       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
441     }
442 
443     // turn on
444     { config::ConfigHandler::GetDefaultConfig(config_.get()); }
445 
446     // reproducesd
447     MakeSegmentsForSuggestion("わたしの", &segments);
448     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
449     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
450 
451     segments.Clear();
452     MakeSegmentsForPrediction("わたしの", &segments);
453     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
454     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
455 
456     // Exact Match
457     segments.Clear();
458     MakeSegmentsForSuggestion("わたしのなまえはなかのです", &segments);
459     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
460     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
461 
462     segments.Clear();
463     MakeSegmentsForPrediction("わたしのなまえはなかのです", &segments);
464     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
465     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
466 
467     segments.Clear();
468     MakeSegmentsForSuggestion("こんにちはさようなら", &segments);
469     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
470 
471     segments.Clear();
472     MakeSegmentsForPrediction("こんにちはさようなら", &segments);
473     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
474 
475     // Read only mode should show suggestion.
476     {
477       config_->set_history_learning_level(config::Config::READ_ONLY);
478       MakeSegmentsForSuggestion("わたしの", &segments);
479       EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
480       EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
481 
482       segments.Clear();
483       MakeSegmentsForPrediction("わたしの", &segments);
484       EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
485       EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
486       config_->set_history_learning_level(config::Config::DEFAULT_HISTORY);
487     }
488 
489     // clear
490     predictor->ClearAllHistory();
491     predictor->WaitForSyncer();
492   }
493 
494   // nothing happen
495   {
496     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
497     predictor->WaitForSyncer();
498     Segments segments;
499 
500     // reproducesd
501     MakeSegmentsForSuggestion("わたしの", &segments);
502     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
503 
504     MakeSegmentsForPrediction("わたしの", &segments);
505     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
506   }
507 
508   // nothing happen
509   {
510     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
511     predictor->WaitForSyncer();
512     Segments segments;
513 
514     // reproducesd
515     MakeSegmentsForSuggestion("わたしの", &segments);
516     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
517 
518     MakeSegmentsForPrediction("わたしの", &segments);
519     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
520   }
521 }
522 
523 // We did not support such Segments which has multiple segments and
524 // has type != CONVERSION.
525 // To support such Segments, this test case is created separately.
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorTest_suggestion)526 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorTest_suggestion) {
527   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
528   predictor->WaitForSyncer();
529   predictor->ClearAllHistory();
530   predictor->WaitForSyncer();
531 
532   // Register input histories via Finish method.
533   {
534     Segments segments;
535     MakeSegmentsForSuggestion("かまた", &segments);
536     AddCandidate(0, "火魔汰", &segments);
537     AddSegmentForSuggestion("ま", &segments);
538     AddCandidate(1, "摩", &segments);
539     predictor->Finish(*convreq_, &segments);
540 
541     // All added items must be suggestion entries.
542     const UserHistoryPredictor::DicCache::Element *element;
543     for (element = predictor->dic_->Head(); element->next;
544          element = element->next) {
545       const user_history_predictor::UserHistory::Entry &entry = element->value;
546       EXPECT_TRUE(entry.has_suggestion_freq() && entry.suggestion_freq() == 1);
547       EXPECT_TRUE(!entry.has_conversion_freq() && entry.conversion_freq() == 0);
548     }
549   }
550 
551   // Obtain input histories via Predict method.
552   {
553     Segments segments;
554     MakeSegmentsForSuggestion("かま", &segments);
555     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
556     std::set<string> expected_candidates;
557     expected_candidates.insert("火魔汰");
558     // We can get this entry even if Segmtnts's type is not CONVERSION.
559     expected_candidates.insert("火魔汰摩");
560     for (size_t i = 0; i < segments.segment(0).candidates_size(); ++i) {
561       SCOPED_TRACE(segments.segment(0).candidate(i).value);
562       EXPECT_EQ(
563           1, expected_candidates.erase(segments.segment(0).candidate(i).value));
564     }
565   }
566 }
567 
TEST_F(UserHistoryPredictorTest,DescriptionTest)568 TEST_F(UserHistoryPredictorTest, DescriptionTest) {
569 #ifdef DEBUG
570   const char kDescription[] = "テスト History";
571 #else
572   const char kDescription[] = "テスト";
573 #endif  // DEBUG
574 
575   {
576     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
577     predictor->WaitForSyncer();
578 
579     // Insert two items
580     {
581       Segments segments;
582       MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
583       AddCandidateWithDescription("私の名前は中野です", kDescription,
584                                   &segments);
585       predictor->Finish(*convreq_, &segments);
586 
587       MakeSegmentsForSuggestion("わたしの", &segments);
588       EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
589       EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
590       EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
591 
592       segments.Clear();
593       MakeSegmentsForPrediction("わたしの", &segments);
594       EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
595       EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
596       EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
597     }
598 
599     // sync
600     predictor->Sync();
601   }
602 
603   // reload
604   {
605     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
606     predictor->WaitForSyncer();
607     Segments segments;
608 
609     // turn off
610     {
611       Segments segments;
612       config_->set_use_history_suggest(false);
613       predictor->WaitForSyncer();
614 
615       MakeSegmentsForSuggestion("わたしの", &segments);
616       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
617 
618       config_->set_use_history_suggest(true);
619       config_->set_incognito_mode(true);
620 
621       MakeSegmentsForSuggestion("わたしの", &segments);
622       EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
623     }
624 
625     // turn on
626     {
627       config::ConfigHandler::GetDefaultConfig(config_.get());
628       predictor->WaitForSyncer();
629     }
630 
631     // reproducesd
632     MakeSegmentsForSuggestion("わたしの", &segments);
633     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
634     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
635     EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
636 
637     segments.Clear();
638     MakeSegmentsForPrediction("わたしの", &segments);
639     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
640     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
641     EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
642 
643     // Exact Match
644     segments.Clear();
645     MakeSegmentsForSuggestion("わたしのなまえはなかのです", &segments);
646     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
647     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
648     EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
649 
650     segments.Clear();
651     MakeSegmentsForSuggestion("わたしのなまえはなかのです", &segments);
652     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
653     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
654     EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
655 
656     // clear
657     predictor->ClearAllHistory();
658     predictor->WaitForSyncer();
659   }
660 
661   // nothing happen
662   {
663     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
664     predictor->WaitForSyncer();
665     Segments segments;
666 
667     // reproducesd
668     MakeSegmentsForSuggestion("わたしの", &segments);
669     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
670 
671     MakeSegmentsForPrediction("わたしの", &segments);
672     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
673   }
674 
675   // nothing happen
676   {
677     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
678     predictor->WaitForSyncer();
679     Segments segments;
680 
681     // reproducesd
682     MakeSegmentsForSuggestion("わたしの", &segments);
683     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
684 
685     MakeSegmentsForPrediction("わたしの", &segments);
686     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
687   }
688 }
689 
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorUnusedHistoryTest)690 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorUnusedHistoryTest) {
691   {
692     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
693     predictor->WaitForSyncer();
694 
695     Segments segments;
696     MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
697     AddCandidate("私の名前は中野です", &segments);
698 
699     // once
700     segments.set_request_type(Segments::SUGGESTION);
701     predictor->Finish(*convreq_, &segments);
702 
703     segments.Clear();
704     MakeSegmentsForConversion("ひろすえりょうこ", &segments);
705     AddCandidate("広末涼子", &segments);
706 
707     segments.set_request_type(Segments::CONVERSION);
708 
709     // conversion
710     predictor->Finish(*convreq_, &segments);
711 
712     // sync
713     predictor->Sync();
714   }
715 
716   {
717     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
718     predictor->WaitForSyncer();
719     Segments segments;
720 
721     MakeSegmentsForSuggestion("わたしの", &segments);
722     EXPECT_TRUE(predictor->Predict(&segments));
723     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
724 
725     segments.Clear();
726     MakeSegmentsForSuggestion("ひろすえ", &segments);
727     EXPECT_TRUE(predictor->Predict(&segments));
728     EXPECT_EQ("広末涼子", segments.segment(0).candidate(0).value);
729 
730     predictor->ClearUnusedHistory();
731     predictor->WaitForSyncer();
732 
733     segments.Clear();
734     MakeSegmentsForSuggestion("わたしの", &segments);
735     EXPECT_TRUE(predictor->Predict(&segments));
736     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
737 
738     segments.Clear();
739     MakeSegmentsForSuggestion("ひろすえ", &segments);
740     EXPECT_FALSE(predictor->Predict(&segments));
741 
742     predictor->Sync();
743   }
744 
745   {
746     UserHistoryPredictor *predictor = GetUserHistoryPredictor();
747     predictor->WaitForSyncer();
748     Segments segments;
749 
750     MakeSegmentsForSuggestion("わたしの", &segments);
751     EXPECT_TRUE(predictor->Predict(&segments));
752     EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
753 
754     segments.Clear();
755     MakeSegmentsForSuggestion("ひろすえ", &segments);
756     EXPECT_FALSE(predictor->Predict(&segments));
757   }
758 }
759 
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorRevertTest)760 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorRevertTest) {
761   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
762   predictor->WaitForSyncer();
763   predictor->ClearAllHistory();
764   predictor->WaitForSyncer();
765 
766   Segments segments, segments2;
767   MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
768   AddCandidate("私の名前は中野です", &segments);
769 
770   predictor->Finish(*convreq_, &segments);
771 
772   // Before Revert, Suggest works
773   MakeSegmentsForSuggestion("わたしの", &segments2);
774   EXPECT_TRUE(predictor->Predict(&segments2));
775   EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
776 
777   // Call revert here
778   predictor->Revert(&segments);
779 
780   segments.Clear();
781   MakeSegmentsForSuggestion("わたしの", &segments);
782 
783   EXPECT_FALSE(predictor->Predict(&segments));
784   EXPECT_EQ(0, segments.segment(0).candidates_size());
785 
786   EXPECT_FALSE(predictor->Predict(&segments));
787   EXPECT_EQ(0, segments.segment(0).candidates_size());
788 }
789 
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorClearTest)790 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorClearTest) {
791   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
792   predictor->WaitForSyncer();
793 
794   // input "testtest" 10 times
795   for (int i = 0; i < 10; ++i) {
796     Segments segments;
797     MakeSegmentsForConversion("testtest", &segments);
798     AddCandidate("テストテスト", &segments);
799     predictor->Finish(*convreq_, &segments);
800   }
801 
802   predictor->ClearAllHistory();
803   predictor->WaitForSyncer();
804 
805   // input "testtest" 1 time
806   for (int i = 0; i < 1; ++i) {
807     Segments segments;
808     MakeSegmentsForConversion("testtest", &segments);
809     AddCandidate("テストテスト", &segments);
810     predictor->Finish(*convreq_, &segments);
811   }
812 
813   // frequency is cleared as well.
814   {
815     Segments segments;
816     MakeSegmentsForSuggestion("t", &segments);
817     EXPECT_FALSE(predictor->Predict(&segments));
818 
819     segments.Clear();
820     MakeSegmentsForSuggestion("testte", &segments);
821     EXPECT_TRUE(predictor->Predict(&segments));
822   }
823 }
824 
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorTrailingPunctuation)825 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorTrailingPunctuation) {
826   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
827   predictor->WaitForSyncer();
828   predictor->ClearAllHistory();
829   predictor->WaitForSyncer();
830 
831   Segments segments;
832 
833   MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
834 
835   AddCandidate(0, "私の名前は中野です", &segments);
836 
837   AddSegmentForConversion("。", &segments);
838   AddCandidate(1, "。", &segments);
839 
840   predictor->Finish(*convreq_, &segments);
841 
842   segments.Clear();
843   MakeSegmentsForPrediction("わたしの", &segments);
844   EXPECT_TRUE(predictor->Predict(&segments));
845   EXPECT_EQ(2, segments.segment(0).candidates_size());
846   EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
847   EXPECT_EQ("私の名前は中野です。", segments.segment(0).candidate(1).value);
848 
849   segments.Clear();
850   MakeSegmentsForSuggestion("わたしの", &segments);
851 
852   EXPECT_TRUE(predictor->Predict(&segments));
853   EXPECT_EQ(2, segments.segment(0).candidates_size());
854   EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
855   EXPECT_EQ("私の名前は中野です。", segments.segment(0).candidate(1).value);
856 }
857 
TEST_F(UserHistoryPredictorTest,TrailingPunctuation_Mobile)858 TEST_F(UserHistoryPredictorTest, TrailingPunctuation_Mobile) {
859   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
860   predictor->WaitForSyncer();
861   predictor->ClearAllHistory();
862   predictor->WaitForSyncer();
863   commands::RequestForUnitTest::FillMobileRequest(request_.get());
864   Segments segments;
865 
866   MakeSegmentsForConversion("です。", &segments);
867 
868   AddCandidate(0, "です。", &segments);
869 
870   predictor->Finish(*convreq_, &segments);
871 
872   segments.Clear();
873 
874   MakeSegmentsForPrediction("です", &segments);
875   EXPECT_FALSE(predictor->Predict(&segments));
876 }
877 
TEST_F(UserHistoryPredictorTest,HistoryToPunctuation)878 TEST_F(UserHistoryPredictorTest, HistoryToPunctuation) {
879   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
880   predictor->WaitForSyncer();
881   predictor->ClearAllHistory();
882   predictor->WaitForSyncer();
883 
884   Segments segments;
885 
886   // Scenario 1: A user have commited "亜" by prediction and then commit "。".
887   // Then, the unigram "亜" is learned but the bigram "亜。" shouldn't.
888   MakeSegmentsForPrediction("あ", &segments);
889   AddCandidate(0, "亜", &segments);
890   predictor->Finish(*convreq_, &segments);
891   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
892 
893   AddSegmentForPrediction("。", &segments);
894   AddCandidate(1, "。", &segments);
895   predictor->Finish(*convreq_, &segments);
896 
897   segments.Clear();
898   MakeSegmentsForPrediction("あ", &segments);  // "あ"
899   ASSERT_TRUE(predictor->Predict(&segments)) << segments.DebugString();
900   EXPECT_EQ("亜", segments.segment(0).candidate(0).value);
901 
902   segments.Clear();
903 
904   // Scenario 2: the opposite case to Scenario 1, i.e., "。亜".  Nothing is
905   // suggested from symbol "。".
906   MakeSegmentsForPrediction("。", &segments);
907   AddCandidate(0, "。", &segments);
908   predictor->Finish(*convreq_, &segments);
909   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
910 
911   AddSegmentForPrediction("あ", &segments);
912   AddCandidate(1, "亜", &segments);
913   predictor->Finish(*convreq_, &segments);
914 
915   segments.Clear();
916   MakeSegmentsForPrediction("。", &segments);  // "。"
917   EXPECT_FALSE(predictor->Predict(&segments)) << segments.DebugString();
918 
919   segments.Clear();
920 
921   // Scenario 3: If the history segment looks like a sentence and committed
922   // value is a punctuation, the concatenated entry is also learned.
923   MakeSegmentsForPrediction("おつかれさまです", &segments);
924   AddCandidate(0, "お疲れ様です", &segments);
925   predictor->Finish(*convreq_, &segments);
926   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
927 
928   AddSegmentForPrediction("。", &segments);
929   AddCandidate(1, "。", &segments);
930   predictor->Finish(*convreq_, &segments);
931 
932   segments.Clear();
933   MakeSegmentsForPrediction("おつかれ", &segments);
934   ASSERT_TRUE(predictor->Predict(&segments)) << segments.DebugString();
935   EXPECT_EQ("お疲れ様です", segments.segment(0).candidate(0).value);
936   EXPECT_EQ("お疲れ様です。", segments.segment(0).candidate(1).value);
937 }
938 
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorPreceedingPunctuation)939 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorPreceedingPunctuation) {
940   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
941   predictor->WaitForSyncer();
942   predictor->ClearAllHistory();
943   predictor->WaitForSyncer();
944 
945   Segments segments;
946 
947   MakeSegmentsForConversion("。", &segments);
948   AddCandidate(0, "。", &segments);
949 
950   AddSegmentForConversion("わたしのなまえはなかのです", &segments);
951 
952   AddCandidate(1, "私の名前は中野です", &segments);
953 
954   predictor->Finish(*convreq_, &segments);
955 
956   segments.Clear();
957   MakeSegmentsForPrediction("わたしの", &segments);
958 
959   EXPECT_TRUE(predictor->Predict(&segments));
960   EXPECT_EQ(1, segments.segment(0).candidates_size());
961   EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
962 
963   segments.Clear();
964   MakeSegmentsForSuggestion("わたしの", &segments);
965   EXPECT_TRUE(predictor->Predict(&segments));
966   EXPECT_EQ(1, segments.segment(0).candidates_size());
967   EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
968 }
969 
970 namespace {
971 struct StartsWithPunctuationsTestData {
972   const char *first_character;
973   bool expected_result;
974 };
975 }  // namespace
976 
TEST_F(UserHistoryPredictorTest,StartsWithPunctuations)977 TEST_F(UserHistoryPredictorTest, StartsWithPunctuations) {
978   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
979   const StartsWithPunctuationsTestData kTestCases[] = {
980       {"。", false}, {"、", false}, {"?", false}, {"!", false}, {"ぬ", true},
981   };
982 
983   for (size_t i = 0; i < arraysize(kTestCases); ++i) {
984     predictor->WaitForSyncer();
985     predictor->ClearAllHistory();
986     predictor->WaitForSyncer();
987 
988     Segments segments;
989     const string first_char = kTestCases[i].first_character;
990     {
991       // Learn from two segments
992       MakeSegmentsForConversion(first_char, &segments);
993       AddCandidate(0, first_char, &segments);
994       AddSegmentForConversion("てすとぶんしょう", &segments);
995       AddCandidate(1, "テスト文章", &segments);
996       predictor->Finish(*convreq_, &segments);
997     }
998     segments.Clear();
999     {
1000       // Learn from one segment
1001       MakeSegmentsForConversion(first_char + "てすとぶんしょう", &segments);
1002       AddCandidate(0, first_char + "テスト文章", &segments);
1003       predictor->Finish(*convreq_, &segments);
1004     }
1005     segments.Clear();
1006     {
1007       // Suggestion
1008       MakeSegmentsForSuggestion(first_char, &segments);
1009       AddCandidate(0, first_char, &segments);
1010       EXPECT_EQ(kTestCases[i].expected_result, predictor->Predict(&segments))
1011           << "Suggest from " << first_char;
1012     }
1013     segments.Clear();
1014     {
1015       // Prediciton
1016       MakeSegmentsForPrediction(first_char, &segments);
1017       EXPECT_EQ(kTestCases[i].expected_result, predictor->Predict(&segments))
1018           << "Predict from " << first_char;
1019     }
1020   }
1021 }
1022 
TEST_F(UserHistoryPredictorTest,ZeroQuerySuggestionTest)1023 TEST_F(UserHistoryPredictorTest, ZeroQuerySuggestionTest) {
1024   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1025   predictor->WaitForSyncer();
1026   predictor->ClearAllHistory();
1027   predictor->WaitForSyncer();
1028 
1029   request_->set_zero_query_suggestion(true);
1030 
1031   commands::Request non_zero_query_request;
1032   non_zero_query_request.set_zero_query_suggestion(false);
1033   ConversionRequest non_zero_query_conversion_request(
1034       composer_.get(), &non_zero_query_request, config_.get());
1035 
1036   Segments segments;
1037 
1038   // No history segments
1039   segments.Clear();
1040   MakeSegmentsForSuggestion("", &segments);
1041   EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
1042 
1043   {
1044     segments.Clear();
1045 
1046     MakeSegmentsForConversion("たろうは", &segments);
1047     AddCandidate(0, "太郎は", &segments);
1048     predictor->Finish(*convreq_, &segments);
1049     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1050 
1051     AddSegmentForConversion("はなこに", &segments);
1052     AddCandidate(1, "花子に", &segments);
1053     predictor->Finish(*convreq_, &segments);
1054     segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1055 
1056     segments.pop_back_segment();
1057     AddSegmentForConversion("きょうと", &segments);
1058     AddCandidate(1, "京都", &segments);
1059     Util::Sleep(2000);
1060     predictor->Finish(*convreq_, &segments);
1061     segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1062 
1063     segments.pop_back_segment();
1064     AddSegmentForConversion("おおさか", &segments);
1065     AddCandidate(1, "大阪", &segments);
1066     Util::Sleep(2000);
1067     predictor->Finish(*convreq_, &segments);
1068     segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1069 
1070     // Zero query suggestion is disabled.
1071     segments.pop_back_segment();
1072     AddSegmentForSuggestion("", &segments);  // empty request
1073     EXPECT_FALSE(predictor->PredictForRequest(non_zero_query_conversion_request,
1074                                               &segments));
1075 
1076     segments.pop_back_segment();
1077     AddSegmentForSuggestion("", &segments);  // empty request
1078     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1079     // last-pushed segment is "大阪"
1080     EXPECT_EQ("大阪", segments.segment(1).candidate(0).value);
1081     EXPECT_EQ("おおさか", segments.segment(1).candidate(0).key);
1082     EXPECT_TRUE(segments.segment(1).candidate(0).source_info &
1083                 Segment::Candidate::USER_HISTORY_PREDICTOR);
1084 
1085     segments.pop_back_segment();
1086     AddSegmentForSuggestion("は", &segments);
1087     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1088 
1089     segments.pop_back_segment();
1090     AddSegmentForSuggestion("た", &segments);
1091     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1092 
1093     segments.pop_back_segment();
1094     AddSegmentForSuggestion("き", &segments);
1095     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1096 
1097     segments.pop_back_segment();
1098     AddSegmentForSuggestion("お", &segments);
1099     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1100   }
1101 
1102   predictor->ClearAllHistory();
1103   predictor->WaitForSyncer();
1104 
1105   {
1106     segments.Clear();
1107     MakeSegmentsForConversion("たろうは", &segments);
1108     AddCandidate(0, "太郎は", &segments);
1109 
1110     AddSegmentForConversion("はなこに", &segments);
1111     AddCandidate(1, "花子に", &segments);
1112     predictor->Finish(*convreq_, &segments);
1113 
1114     segments.Clear();
1115     MakeSegmentsForConversion("たろうは", &segments);
1116     AddCandidate(0, "太郎は", &segments);
1117     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1118 
1119     // Zero query suggestion is disabled.
1120     AddSegmentForSuggestion("", &segments);  // empty request
1121     EXPECT_FALSE(predictor->PredictForRequest(non_zero_query_conversion_request,
1122                                               &segments));
1123 
1124     segments.pop_back_segment();
1125     AddSegmentForSuggestion("", &segments);  // empty request
1126     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1127 
1128     segments.pop_back_segment();
1129     AddSegmentForSuggestion("は", &segments);
1130     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1131 
1132     segments.pop_back_segment();
1133     AddSegmentForSuggestion("た", &segments);
1134     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1135   }
1136 }
1137 
TEST_F(UserHistoryPredictorTest,MultiSegmentsMultiInput)1138 TEST_F(UserHistoryPredictorTest, MultiSegmentsMultiInput) {
1139   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1140   predictor->WaitForSyncer();
1141   predictor->ClearAllHistory();
1142   predictor->WaitForSyncer();
1143 
1144   Segments segments;
1145 
1146   MakeSegmentsForConversion("たろうは", &segments);
1147   AddCandidate(0, "太郎は", &segments);
1148   predictor->Finish(*convreq_, &segments);
1149   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1150 
1151   AddSegmentForConversion("はなこに", &segments);
1152   AddCandidate(1, "花子に", &segments);
1153   predictor->Finish(*convreq_, &segments);
1154   segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1155 
1156   segments.clear_conversion_segments();
1157   AddSegmentForConversion("むずかしい", &segments);
1158   AddCandidate(2, "難しい", &segments);
1159   predictor->Finish(*convreq_, &segments);
1160   segments.mutable_segment(2)->set_segment_type(Segment::HISTORY);
1161 
1162   segments.clear_conversion_segments();
1163   AddSegmentForConversion("ほんを", &segments);
1164   AddCandidate(3, "本を", &segments);
1165   predictor->Finish(*convreq_, &segments);
1166   segments.mutable_segment(3)->set_segment_type(Segment::HISTORY);
1167 
1168   segments.clear_conversion_segments();
1169   AddSegmentForConversion("よませた", &segments);
1170   AddCandidate(4, "読ませた", &segments);
1171   predictor->Finish(*convreq_, &segments);
1172 
1173   segments.Clear();
1174   MakeSegmentsForSuggestion("た", &segments);
1175   EXPECT_FALSE(predictor->Predict(&segments));
1176 
1177   segments.Clear();
1178   MakeSegmentsForSuggestion("たろうは", &segments);
1179   EXPECT_TRUE(predictor->Predict(&segments));
1180 
1181   segments.Clear();
1182   MakeSegmentsForSuggestion("ろうは", &segments);
1183   EXPECT_FALSE(predictor->Predict(&segments));
1184 
1185   segments.Clear();
1186   MakeSegmentsForSuggestion("たろうははな", &segments);
1187   EXPECT_TRUE(predictor->Predict(&segments));
1188 
1189   segments.Clear();
1190   MakeSegmentsForSuggestion("はなこにむ", &segments);
1191   EXPECT_TRUE(predictor->Predict(&segments));
1192 
1193   segments.Clear();
1194   MakeSegmentsForSuggestion("むずかし", &segments);
1195   EXPECT_TRUE(predictor->Predict(&segments));
1196 
1197   segments.Clear();
1198   MakeSegmentsForSuggestion("はなこにむずかしいほ", &segments);
1199   EXPECT_TRUE(predictor->Predict(&segments));
1200 
1201   segments.Clear();
1202   MakeSegmentsForSuggestion("ほんをよま", &segments);
1203   EXPECT_TRUE(predictor->Predict(&segments));
1204 
1205   Util::Sleep(1000);
1206 
1207   // Add new entry "たろうはよしこに/太郎は良子に"
1208   segments.Clear();
1209   MakeSegmentsForConversion("たろうは", &segments);
1210   AddCandidate(0, "太郎は", &segments);
1211   predictor->Finish(*convreq_, &segments);
1212   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1213 
1214   AddSegmentForConversion("よしこに", &segments);
1215   AddCandidate(1, "良子に", &segments);
1216   predictor->Finish(*convreq_, &segments);
1217   segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1218 
1219   segments.Clear();
1220   MakeSegmentsForSuggestion("たろうは", &segments);
1221   EXPECT_TRUE(predictor->Predict(&segments));
1222   EXPECT_EQ("太郎は良子に", segments.segment(0).candidate(0).value);
1223   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1224               Segment::Candidate::USER_HISTORY_PREDICTOR);
1225 }
1226 
TEST_F(UserHistoryPredictorTest,MultiSegmentsSingleInput)1227 TEST_F(UserHistoryPredictorTest, MultiSegmentsSingleInput) {
1228   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1229   predictor->WaitForSyncer();
1230   predictor->ClearAllHistory();
1231   predictor->WaitForSyncer();
1232 
1233   Segments segments;
1234 
1235   MakeSegmentsForConversion("たろうは", &segments);
1236   AddCandidate(0, "太郎は", &segments);
1237 
1238   AddSegmentForConversion("はなこに", &segments);
1239   AddCandidate(1, "花子に", &segments);
1240 
1241   AddSegmentForConversion("むずかしい", &segments);
1242   AddCandidate(2, "難しい", &segments);
1243 
1244   AddSegmentForConversion("ほんを", &segments);
1245   AddCandidate(3, "本を", &segments);
1246 
1247   AddSegmentForConversion("よませた", &segments);
1248   AddCandidate(4, "読ませた", &segments);
1249 
1250   predictor->Finish(*convreq_, &segments);
1251 
1252   segments.Clear();
1253   MakeSegmentsForSuggestion("たろうは", &segments);
1254   EXPECT_TRUE(predictor->Predict(&segments));
1255 
1256   segments.Clear();
1257   MakeSegmentsForSuggestion("た", &segments);
1258   EXPECT_FALSE(predictor->Predict(&segments));
1259 
1260   segments.Clear();
1261   MakeSegmentsForSuggestion("たろうははな", &segments);
1262   EXPECT_TRUE(predictor->Predict(&segments));
1263 
1264   segments.Clear();
1265   MakeSegmentsForSuggestion("ろうははな", &segments);
1266   EXPECT_FALSE(predictor->Predict(&segments));
1267 
1268   segments.Clear();
1269   MakeSegmentsForSuggestion("はなこにむ", &segments);
1270   EXPECT_TRUE(predictor->Predict(&segments));
1271 
1272   segments.Clear();
1273   MakeSegmentsForSuggestion("むずかし", &segments);
1274   EXPECT_TRUE(predictor->Predict(&segments));
1275 
1276   segments.Clear();
1277   MakeSegmentsForSuggestion("はなこにむずかしいほ", &segments);
1278   EXPECT_TRUE(predictor->Predict(&segments));
1279 
1280   segments.Clear();
1281   MakeSegmentsForSuggestion("ほんをよま", &segments);
1282   EXPECT_TRUE(predictor->Predict(&segments));
1283 
1284   Util::Sleep(1000);
1285 
1286   // Add new entry "たろうはよしこに/太郎は良子に"
1287   segments.Clear();
1288   MakeSegmentsForConversion("たろうは", &segments);
1289   AddCandidate(0, "太郎は", &segments);
1290   predictor->Finish(*convreq_, &segments);
1291   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1292 
1293   AddSegmentForConversion("よしこに", &segments);
1294   AddCandidate(1, "良子に", &segments);
1295   predictor->Finish(*convreq_, &segments);
1296   segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1297 
1298   segments.Clear();
1299   MakeSegmentsForSuggestion("たろうは", &segments);
1300   EXPECT_TRUE(predictor->Predict(&segments));
1301   EXPECT_EQ("太郎は良子に", segments.segment(0).candidate(0).value);
1302   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1303               Segment::Candidate::USER_HISTORY_PREDICTOR);
1304 }
1305 
TEST_F(UserHistoryPredictorTest,Regression2843371_Case1)1306 TEST_F(UserHistoryPredictorTest, Regression2843371_Case1) {
1307   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1308   predictor->WaitForSyncer();
1309   predictor->ClearAllHistory();
1310   predictor->WaitForSyncer();
1311 
1312   Segments segments;
1313 
1314   MakeSegmentsForConversion("とうきょうは", &segments);
1315   AddCandidate(0, "東京は", &segments);
1316 
1317   AddSegmentForConversion("、", &segments);
1318   AddCandidate(1, "、", &segments);
1319 
1320   AddSegmentForConversion("にほんです", &segments);
1321   AddCandidate(2, "日本です", &segments);
1322 
1323   AddSegmentForConversion("。", &segments);
1324   AddCandidate(3, "。", &segments);
1325 
1326   predictor->Finish(*convreq_, &segments);
1327 
1328   segments.Clear();
1329 
1330   Util::Sleep(1000);
1331 
1332   MakeSegmentsForConversion("らーめんは", &segments);
1333   AddCandidate(0, "ラーメンは", &segments);
1334 
1335   AddSegmentForConversion("、", &segments);
1336   AddCandidate(1, "、", &segments);
1337 
1338   AddSegmentForConversion("めんるいです", &segments);
1339   AddCandidate(2, "麺類です", &segments);
1340 
1341   AddSegmentForConversion("。", &segments);
1342   AddCandidate(3, "。", &segments);
1343 
1344   predictor->Finish(*convreq_, &segments);
1345 
1346   segments.Clear();
1347 
1348   MakeSegmentsForSuggestion("とうきょうは、", &segments);
1349   EXPECT_TRUE(predictor->Predict(&segments));
1350 
1351   EXPECT_EQ("東京は、日本です", segments.segment(0).candidate(0).value);
1352   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1353               Segment::Candidate::USER_HISTORY_PREDICTOR);
1354 }
1355 
TEST_F(UserHistoryPredictorTest,Regression2843371_Case2)1356 TEST_F(UserHistoryPredictorTest, Regression2843371_Case2) {
1357   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1358   predictor->WaitForSyncer();
1359   predictor->ClearAllHistory();
1360   predictor->WaitForSyncer();
1361 
1362   Segments segments;
1363 
1364   MakeSegmentsForConversion("えど", &segments);
1365   AddCandidate(0, "江戸", &segments);
1366 
1367   AddSegmentForConversion("(", &segments);
1368   AddCandidate(1, "(", &segments);
1369 
1370   AddSegmentForConversion("とうきょう", &segments);
1371   AddCandidate(2, "東京", &segments);
1372 
1373   AddSegmentForConversion(")", &segments);
1374   AddCandidate(3, ")", &segments);
1375 
1376   AddSegmentForConversion("は", &segments);
1377   AddCandidate(4, "は", &segments);
1378 
1379   AddSegmentForConversion("えぞ", &segments);
1380   AddCandidate(5, "蝦夷", &segments);
1381 
1382   AddSegmentForConversion("(", &segments);
1383   AddCandidate(6, "(", &segments);
1384 
1385   AddSegmentForConversion("ほっかいどう", &segments);
1386   AddCandidate(7, "北海道", &segments);
1387 
1388   AddSegmentForConversion(")", &segments);
1389   AddCandidate(8, ")", &segments);
1390 
1391   AddSegmentForConversion("ではない", &segments);
1392   AddCandidate(9, "ではない", &segments);
1393 
1394   AddSegmentForConversion("。", &segments);
1395   AddCandidate(10, "。", &segments);
1396 
1397   predictor->Finish(*convreq_, &segments);
1398 
1399   segments.Clear();
1400 
1401   MakeSegmentsForSuggestion("えど(", &segments);
1402   EXPECT_TRUE(predictor->Predict(&segments));
1403   EXPECT_EQ("江戸(東京", segments.segment(0).candidate(0).value);
1404   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1405               Segment::Candidate::USER_HISTORY_PREDICTOR);
1406 
1407   EXPECT_TRUE(predictor->Predict(&segments));
1408 
1409   EXPECT_EQ("江戸(東京", segments.segment(0).candidate(0).value);
1410   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1411               Segment::Candidate::USER_HISTORY_PREDICTOR);
1412 }
1413 
TEST_F(UserHistoryPredictorTest,Regression2843371_Case3)1414 TEST_F(UserHistoryPredictorTest, Regression2843371_Case3) {
1415   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1416   predictor->WaitForSyncer();
1417   predictor->ClearAllHistory();
1418   predictor->WaitForSyncer();
1419 
1420   Segments segments;
1421 
1422   MakeSegmentsForConversion("「", &segments);
1423   AddCandidate(0, "「", &segments);
1424 
1425   AddSegmentForConversion("やま", &segments);
1426   AddCandidate(1, "山", &segments);
1427 
1428   AddSegmentForConversion("」", &segments);
1429   AddCandidate(2, "」", &segments);
1430 
1431   AddSegmentForConversion("は", &segments);
1432   AddCandidate(3, "は", &segments);
1433 
1434   AddSegmentForConversion("たかい", &segments);
1435   AddCandidate(4, "高い", &segments);
1436 
1437   AddSegmentForConversion("。", &segments);
1438   AddCandidate(5, "。", &segments);
1439 
1440   predictor->Finish(*convreq_, &segments);
1441 
1442   Util::Sleep(2000);
1443 
1444   segments.Clear();
1445 
1446   MakeSegmentsForConversion("「", &segments);
1447   AddCandidate(0, "「", &segments);
1448 
1449   AddSegmentForConversion("うみ", &segments);
1450   AddCandidate(1, "海", &segments);
1451 
1452   AddSegmentForConversion("」", &segments);
1453   AddCandidate(2, "」", &segments);
1454 
1455   AddSegmentForConversion("は", &segments);
1456   AddCandidate(3, "は", &segments);
1457 
1458   AddSegmentForConversion("ふかい", &segments);
1459   AddCandidate(4, "深い", &segments);
1460 
1461   AddSegmentForConversion("。", &segments);
1462   AddCandidate(5, "。", &segments);
1463 
1464   predictor->Finish(*convreq_, &segments);
1465 
1466   segments.Clear();
1467 
1468   MakeSegmentsForSuggestion("「やま」は", &segments);
1469   EXPECT_TRUE(predictor->Predict(&segments));
1470 
1471   EXPECT_EQ("「山」は高い", segments.segment(0).candidate(0).value);
1472   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1473               Segment::Candidate::USER_HISTORY_PREDICTOR);
1474 }
1475 
TEST_F(UserHistoryPredictorTest,Regression2843775)1476 TEST_F(UserHistoryPredictorTest, Regression2843775) {
1477   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1478   predictor->WaitForSyncer();
1479   predictor->ClearAllHistory();
1480   predictor->WaitForSyncer();
1481 
1482   Segments segments;
1483 
1484   MakeSegmentsForConversion("そうです", &segments);
1485   AddCandidate(0, "そうです", &segments);
1486 
1487   AddSegmentForConversion("。よろしくおねがいします", &segments);
1488   AddCandidate(1, "。よろしくお願いします", &segments);
1489 
1490   predictor->Finish(*convreq_, &segments);
1491 
1492   segments.Clear();
1493 
1494   MakeSegmentsForSuggestion("そうです", &segments);
1495   EXPECT_TRUE(predictor->Predict(&segments));
1496 
1497   EXPECT_EQ("そうです。よろしくお願いします",
1498             segments.segment(0).candidate(0).value);
1499   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1500               Segment::Candidate::USER_HISTORY_PREDICTOR);
1501 }
1502 
TEST_F(UserHistoryPredictorTest,DuplicateString)1503 TEST_F(UserHistoryPredictorTest, DuplicateString) {
1504   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1505   predictor->WaitForSyncer();
1506   predictor->ClearAllHistory();
1507   predictor->WaitForSyncer();
1508 
1509   Segments segments;
1510 
1511   MakeSegmentsForConversion("らいおん", &segments);
1512   AddCandidate(0, "ライオン", &segments);
1513 
1514   AddSegmentForConversion("(", &segments);
1515   AddCandidate(1, "(", &segments);
1516 
1517   AddSegmentForConversion("もうじゅう", &segments);
1518   AddCandidate(2, "猛獣", &segments);
1519 
1520   AddSegmentForConversion(")と", &segments);
1521   AddCandidate(3, ")と", &segments);
1522 
1523   AddSegmentForConversion("ぞうりむし", &segments);
1524   AddCandidate(4, "ゾウリムシ", &segments);
1525 
1526   AddSegmentForConversion("(", &segments);
1527   AddCandidate(5, "(", &segments);
1528 
1529   AddSegmentForConversion("びせいぶつ", &segments);
1530   AddCandidate(6, "微生物", &segments);
1531 
1532   AddSegmentForConversion(")", &segments);
1533   AddCandidate(7, ")", &segments);
1534 
1535   predictor->Finish(*convreq_, &segments);
1536 
1537   segments.Clear();
1538 
1539   MakeSegmentsForSuggestion("ぞうりむし", &segments);
1540   EXPECT_TRUE(predictor->Predict(&segments));
1541 
1542   for (int i = 0; i < segments.segment(0).candidates_size(); ++i) {
1543     EXPECT_EQ(string::npos,
1544               segments.segment(0).candidate(i).value.find(
1545                   "猛獣"));  // "猛獣" should not be found
1546   }
1547 
1548   segments.Clear();
1549 
1550   MakeSegmentsForSuggestion("らいおん", &segments);
1551   EXPECT_TRUE(predictor->Predict(&segments));
1552 
1553   for (int i = 0; i < segments.segment(0).candidates_size(); ++i) {
1554     EXPECT_EQ(string::npos,
1555               segments.segment(0).candidate(i).value.find("ライオン(微生物"));
1556   }
1557 }
1558 
1559 struct Command {
1560   enum Type {
1561     LOOKUP,
1562     INSERT,
1563     SYNC,
1564     WAIT,
1565   };
1566   Type type;
1567   string key;
1568   string value;
Commandmozc::Command1569   Command() : type(LOOKUP) {}
1570 };
1571 
TEST_F(UserHistoryPredictorTest,SyncTest)1572 TEST_F(UserHistoryPredictorTest, SyncTest) {
1573   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1574   predictor->WaitForSyncer();
1575 
1576   std::vector<Command> commands(10000);
1577   for (size_t i = 0; i < commands.size(); ++i) {
1578     commands[i].key = std::to_string(static_cast<uint32>(i)) + "key";
1579     commands[i].value = std::to_string(static_cast<uint32>(i)) + "value";
1580     const int n = Util::Random(100);
1581     if (n == 0) {
1582       commands[i].type = Command::WAIT;
1583     } else if (n < 10) {
1584       commands[i].type = Command::SYNC;
1585     } else if (n < 50) {
1586       commands[i].type = Command::INSERT;
1587     } else {
1588       commands[i].type = Command::LOOKUP;
1589     }
1590   }
1591 
1592   // Kind of stress test
1593   Segments segments;
1594   for (size_t i = 0; i < commands.size(); ++i) {
1595     switch (commands[i].type) {
1596       case Command::SYNC:
1597         predictor->Sync();
1598         break;
1599       case Command::WAIT:
1600         predictor->WaitForSyncer();
1601         break;
1602       case Command::INSERT:
1603         segments.Clear();
1604         MakeSegmentsForConversion(commands[i].key, &segments);
1605         AddCandidate(commands[i].value, &segments);
1606         predictor->Finish(*convreq_, &segments);
1607         break;
1608       case Command::LOOKUP:
1609         segments.Clear();
1610         MakeSegmentsForSuggestion(commands[i].key, &segments);
1611         predictor->Predict(&segments);
1612         break;
1613       default:
1614         break;
1615     }
1616   }
1617 }
1618 
TEST_F(UserHistoryPredictorTest,GetMatchTypeTest)1619 TEST_F(UserHistoryPredictorTest, GetMatchTypeTest) {
1620   EXPECT_EQ(UserHistoryPredictor::NO_MATCH,
1621             UserHistoryPredictor::GetMatchType("test", ""));
1622 
1623   EXPECT_EQ(UserHistoryPredictor::NO_MATCH,
1624             UserHistoryPredictor::GetMatchType("", ""));
1625 
1626   EXPECT_EQ(UserHistoryPredictor::LEFT_EMPTY_MATCH,
1627             UserHistoryPredictor::GetMatchType("", "test"));
1628 
1629   EXPECT_EQ(UserHistoryPredictor::NO_MATCH,
1630             UserHistoryPredictor::GetMatchType("foo", "bar"));
1631 
1632   EXPECT_EQ(UserHistoryPredictor::EXACT_MATCH,
1633             UserHistoryPredictor::GetMatchType("foo", "foo"));
1634 
1635   EXPECT_EQ(UserHistoryPredictor::LEFT_PREFIX_MATCH,
1636             UserHistoryPredictor::GetMatchType("foo", "foobar"));
1637 
1638   EXPECT_EQ(UserHistoryPredictor::RIGHT_PREFIX_MATCH,
1639             UserHistoryPredictor::GetMatchType("foobar", "foo"));
1640 }
1641 
TEST_F(UserHistoryPredictorTest,FingerPrintTest)1642 TEST_F(UserHistoryPredictorTest, FingerPrintTest) {
1643   const char kKey[] = "abc";
1644   const char kValue[] = "ABC";
1645 
1646   UserHistoryPredictor::Entry entry;
1647   entry.set_key(kKey);
1648   entry.set_value(kValue);
1649 
1650   const uint32 entry_fp1 =
1651       UserHistoryPredictor::Fingerprint(kKey, kValue);
1652   const uint32 entry_fp2 =
1653       UserHistoryPredictor::EntryFingerprint(entry);
1654 
1655   const uint32 entry_fp3 =
1656       UserHistoryPredictor::Fingerprint(
1657           kKey, kValue,
1658           UserHistoryPredictor::Entry::DEFAULT_ENTRY);
1659 
1660   const uint32 entry_fp4 =
1661       UserHistoryPredictor::Fingerprint(
1662           kKey, kValue,
1663           UserHistoryPredictor::Entry::CLEAN_ALL_EVENT);
1664 
1665   const uint32 entry_fp5 =
1666       UserHistoryPredictor::Fingerprint(
1667           kKey, kValue,
1668           UserHistoryPredictor::Entry::CLEAN_UNUSED_EVENT);
1669 
1670   Segment segment;
1671   segment.set_key(kKey);
1672   Segment::Candidate *c = segment.add_candidate();
1673   c->key = kKey;
1674   c->content_key = kKey;
1675   c->value = kValue;
1676   c->content_value = kValue;
1677 
1678   const uint32 segment_fp =
1679       UserHistoryPredictor::SegmentFingerprint(segment);
1680 
1681   Segment segment2;
1682   segment2.set_key("ab");
1683   Segment::Candidate *c2 = segment2.add_candidate();
1684   c2->key = kKey;
1685   c2->content_key = kKey;
1686   c2->value = kValue;
1687   c2->content_value = kValue;
1688 
1689   const uint32 segment_fp2 =
1690       UserHistoryPredictor::SegmentFingerprint(segment2);
1691 
1692   EXPECT_EQ(entry_fp1, entry_fp2);
1693   EXPECT_EQ(entry_fp1, entry_fp3);
1694   EXPECT_NE(entry_fp1, entry_fp4);
1695   EXPECT_NE(entry_fp1, entry_fp5);
1696   EXPECT_NE(entry_fp4, entry_fp5);
1697   EXPECT_EQ(segment_fp, entry_fp2);
1698   EXPECT_EQ(segment_fp, entry_fp1);
1699   EXPECT_EQ(segment_fp, segment_fp2);
1700 }
1701 
TEST_F(UserHistoryPredictorTest,Uint32ToStringTest)1702 TEST_F(UserHistoryPredictorTest, Uint32ToStringTest) {
1703   EXPECT_EQ(123,
1704             UserHistoryPredictor::StringToUint32(
1705                 UserHistoryPredictor::Uint32ToString(123)));
1706 
1707   EXPECT_EQ(12141,
1708             UserHistoryPredictor::StringToUint32(
1709                 UserHistoryPredictor::Uint32ToString(12141)));
1710 
1711   for (uint32 i = 0; i < 10000; ++i) {
1712     EXPECT_EQ(i,
1713               UserHistoryPredictor::StringToUint32(
1714                   UserHistoryPredictor::Uint32ToString(i)));
1715   }
1716 
1717   // invalid input
1718   EXPECT_EQ(0, UserHistoryPredictor::StringToUint32(""));
1719 
1720   // not 4byte
1721   EXPECT_EQ(0, UserHistoryPredictor::StringToUint32("abcdef"));
1722 }
1723 
TEST_F(UserHistoryPredictorTest,GetScore)1724 TEST_F(UserHistoryPredictorTest, GetScore) {
1725   // latest value has higher score.
1726   {
1727     UserHistoryPredictor::Entry entry1, entry2;
1728 
1729     entry1.set_key("abc");
1730     entry1.set_value("ABC");
1731     entry1.set_last_access_time(10);
1732 
1733     entry2.set_key("foo");
1734     entry2.set_value("ABC");
1735     entry2.set_last_access_time(20);
1736 
1737     EXPECT_GT(UserHistoryPredictor::GetScore(entry2),
1738               UserHistoryPredictor::GetScore(entry1));
1739   }
1740 
1741   // shorter value has higher score.
1742   {
1743     UserHistoryPredictor::Entry entry1, entry2;
1744 
1745     entry1.set_key("abc");
1746     entry1.set_value("ABC");
1747     entry1.set_last_access_time(10);
1748 
1749     entry2.set_key("foo");
1750     entry2.set_value("ABCD");
1751     entry2.set_last_access_time(10);
1752 
1753     EXPECT_GT(UserHistoryPredictor::GetScore(entry1),
1754               UserHistoryPredictor::GetScore(entry2));
1755   }
1756 
1757   // bigram boost makes the entry stronger
1758   {
1759     UserHistoryPredictor::Entry entry1, entry2;
1760 
1761     entry1.set_key("abc");
1762     entry1.set_value("ABC");
1763     entry1.set_last_access_time(10);
1764 
1765     entry2.set_key("foo");
1766     entry2.set_value("ABC");
1767     entry2.set_last_access_time(10);
1768     entry2.set_bigram_boost(true);
1769 
1770     EXPECT_GT(UserHistoryPredictor::GetScore(entry2),
1771               UserHistoryPredictor::GetScore(entry1));
1772   }
1773 
1774   // bigram boost makes the entry stronger
1775   {
1776     UserHistoryPredictor::Entry entry1, entry2;
1777 
1778     entry1.set_key("abc");
1779     entry1.set_value("ABCD");
1780     entry1.set_last_access_time(10);
1781     entry1.set_bigram_boost(true);
1782 
1783     entry2.set_key("foo");
1784     entry2.set_value("ABC");
1785     entry2.set_last_access_time(50);
1786 
1787     EXPECT_GT(UserHistoryPredictor::GetScore(entry1),
1788               UserHistoryPredictor::GetScore(entry2));
1789   }
1790 }
1791 
TEST_F(UserHistoryPredictorTest,IsValidEntry)1792 TEST_F(UserHistoryPredictorTest, IsValidEntry) {
1793   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1794 
1795   UserHistoryPredictor::Entry entry;
1796 
1797   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1798 
1799   entry.set_key("key");
1800   entry.set_value("value");
1801 
1802   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1803   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1804       entry, Request::UNICODE_EMOJI));
1805 
1806   entry.set_removed(true);
1807   EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1808   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1809       entry, Request::UNICODE_EMOJI));
1810 
1811   entry.set_removed(false);
1812   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1813   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1814       entry, Request::UNICODE_EMOJI));
1815 
1816   entry.set_entry_type(UserHistoryPredictor::Entry::CLEAN_ALL_EVENT);
1817   EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1818   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1819       entry, Request::UNICODE_EMOJI));
1820 
1821   entry.set_entry_type(UserHistoryPredictor::Entry::CLEAN_UNUSED_EVENT);
1822   EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1823   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1824       entry, Request::UNICODE_EMOJI));
1825 
1826   entry.set_removed(true);
1827   EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1828   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1829       entry, Request::UNICODE_EMOJI));
1830 
1831   entry.Clear();
1832   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1833   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1834       entry, Request::UNICODE_EMOJI));
1835 
1836   entry.Clear();
1837   entry.set_key("key");
1838   entry.set_value("value");
1839   entry.set_description("絵文字");
1840   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1841   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1842       entry, Request::UNICODE_EMOJI));
1843   EXPECT_FALSE(predictor->IsValidEntry(entry, 0));
1844   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(entry, 0));
1845 
1846   // An android pua emoji example. (Note: 0xFE000 is in the region).
1847   Util::UCS4ToUTF8(0xFE000, entry.mutable_value());
1848   EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1849   EXPECT_FALSE(predictor->IsValidEntry(entry, 0));
1850   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::DOCOMO_EMOJI));
1851   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::SOFTBANK_EMOJI));
1852   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::KDDI_EMOJI));
1853 
1854   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1855       entry, Request::UNICODE_EMOJI));
1856   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(entry, 0));
1857   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1858       entry, Request::DOCOMO_EMOJI));
1859   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1860       entry, Request::SOFTBANK_EMOJI));
1861   EXPECT_TRUE(
1862       predictor->IsValidEntryIgnoringRemovedField(entry, Request::KDDI_EMOJI));
1863 
1864   SuppressionDictionary *d = GetSuppressionDictionary();
1865   DCHECK(d);
1866   d->Lock();
1867   d->AddEntry("foo", "bar");
1868   d->UnLock();
1869 
1870   entry.set_key("key");
1871   entry.set_value("value");
1872   EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1873   EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1874       entry, Request::UNICODE_EMOJI));
1875 
1876   entry.set_key("foo");
1877   entry.set_value("bar");
1878   EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1879   EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1880       entry, Request::UNICODE_EMOJI));
1881 
1882   d->Lock();
1883   d->Clear();
1884   d->UnLock();
1885 }
1886 
TEST_F(UserHistoryPredictorTest,IsValidSuggestion)1887 TEST_F(UserHistoryPredictorTest, IsValidSuggestion) {
1888   UserHistoryPredictor::Entry entry;
1889 
1890   EXPECT_FALSE(UserHistoryPredictor::IsValidSuggestion(
1891       UserHistoryPredictor::DEFAULT, 1, entry));
1892 
1893   entry.set_bigram_boost(true);
1894   EXPECT_TRUE(UserHistoryPredictor::IsValidSuggestion(
1895       UserHistoryPredictor::DEFAULT, 1, entry));
1896 
1897   entry.set_bigram_boost(false);
1898   EXPECT_TRUE(UserHistoryPredictor::IsValidSuggestion(
1899       UserHistoryPredictor::ZERO_QUERY_SUGGESTION, 1, entry));
1900 
1901   entry.set_bigram_boost(false);
1902   entry.set_conversion_freq(10);
1903   EXPECT_TRUE(UserHistoryPredictor::IsValidSuggestion(
1904       UserHistoryPredictor::DEFAULT, 1, entry));
1905 }
1906 
TEST_F(UserHistoryPredictorTest,EntryPriorityQueueTest)1907 TEST_F(UserHistoryPredictorTest, EntryPriorityQueueTest) {
1908   // removed automatically
1909   const int kSize = 10000;
1910   {
1911     UserHistoryPredictor::EntryPriorityQueue queue;
1912     for (int i = 0; i < 10000; ++i) {
1913       EXPECT_NE(nullptr, queue.NewEntry());
1914     }
1915   }
1916 
1917   {
1918     UserHistoryPredictor::EntryPriorityQueue queue;
1919     std::vector<UserHistoryPredictor::Entry *> expected;
1920     for (int i = 0; i < kSize; ++i) {
1921       UserHistoryPredictor::Entry *entry = queue.NewEntry();
1922       entry->set_key("test" + std::to_string(i));
1923       entry->set_value("test" + std::to_string(i));
1924       entry->set_last_access_time(i + 1000);
1925       expected.push_back(entry);
1926       EXPECT_TRUE(queue.Push(entry));
1927     }
1928 
1929     int n = kSize - 1;
1930     while (true) {
1931       const UserHistoryPredictor::Entry *entry = queue.Pop();
1932       if (entry == nullptr) {
1933         break;
1934       }
1935       EXPECT_EQ(expected[n], entry);
1936       --n;
1937     }
1938     EXPECT_EQ(-1, n);
1939   }
1940 
1941   {
1942     UserHistoryPredictor::EntryPriorityQueue queue;
1943     for (int i = 0; i < 5; ++i) {
1944       UserHistoryPredictor::Entry *entry = queue.NewEntry();
1945       entry->set_key("test");
1946       entry->set_value("test");
1947       queue.Push(entry);
1948     }
1949     EXPECT_EQ(1, queue.size());
1950 
1951     for (int i = 0; i < 5; ++i) {
1952       UserHistoryPredictor::Entry *entry = queue.NewEntry();
1953       entry->set_key("foo");
1954       entry->set_value("bar");
1955       queue.Push(entry);
1956     }
1957 
1958     EXPECT_EQ(2, queue.size());
1959   }
1960 }
1961 
1962 namespace {
1963 
RemoveLastUCS4Character(const string & input)1964 string RemoveLastUCS4Character(const string &input) {
1965   const size_t ucs4_count = Util::CharsLen(input);
1966   if (ucs4_count == 0) {
1967     return "";
1968   }
1969 
1970   size_t ucs4_processed = 0;
1971   string output;
1972   for (ConstChar32Iterator iter(input);
1973        !iter.Done() && (ucs4_processed < ucs4_count - 1);
1974        iter.Next(), ++ucs4_processed) {
1975     Util::UCS4ToUTF8Append(iter.Get(), &output);
1976   }
1977   return output;
1978 }
1979 
1980 struct PrivacySensitiveTestData {
1981   bool is_sensitive;
1982   const char *scenario_description;
1983   const char *input;
1984   const char *output;
1985 };
1986 
1987 const bool kSensitive = true;
1988 const bool kNonSensitive = false;
1989 
1990 const PrivacySensitiveTestData kNonSensitiveCases[] = {
1991   {
1992     kNonSensitive,  // We might want to revisit this behavior
1993     "Type privacy sensitive number but it is commited as full-width number "
1994     "by mistake.",
1995     "0007",
1996     "0007"
1997   }, {
1998     kNonSensitive,
1999     "Type a ZIP number.",
2000     "100-0001",
2001     "東京都千代田区千代田"
2002   }, {
2003     kNonSensitive,  // We might want to revisit this behavior
2004     "Type privacy sensitive number but the result contains one or more "
2005     "non-ASCII character such as full-width dash.",
2006     "1111-1111",
2007     "1111-1111"
2008   }, {
2009     kNonSensitive,  // We might want to revisit this behavior
2010     "User dictionary contains a credit card number.",
2011     "かーどばんごう",
2012     "0000-0000-0000-0000"
2013   }, {
2014     kNonSensitive,  // We might want to revisit this behavior
2015     "User dictionary contains a credit card number.",
2016     "かーどばんごう",
2017     "0000000000000000"
2018   }, {
2019     kNonSensitive,  // We might want to revisit this behavior
2020     "User dictionary contains privacy sensitive information.",
2021     "ぱすわーど",
2022     "ywwz1sxm"
2023   }, {
2024     kNonSensitive,  // We might want to revisit this behavior
2025     "Input privacy sensitive text by Roman-input mode by mistake and then "
2026     "hit F10 key to convert it to half-alphanumeric text. In this case "
2027     "we assume all the alphabetical characters are consumed by Roman-input "
2028     "rules.",
2029     "いあ1ぼ3ぅ",
2030     "ia1bo3xu"
2031   }, {
2032     kNonSensitive,
2033     "Katakana to English transliteration.",  // http://b/4394325
2034     "おれんじ",
2035     "Orange"
2036   }, {
2037     kNonSensitive,
2038     "Input a very common English word which should be included in our "
2039     "system dictionary by Roman-input mode by mistake and "
2040     "then hit F10 key to convert it to half-alphanumeric text.",
2041     "おらんげ",
2042     "orange"
2043   }, {
2044     kSensitive,
2045     "Input a password-like text.",
2046     "123abc!",
2047     "123abc!",
2048   }, {
2049     kSensitive,
2050     "Input privacy sensitive text by Roman-input mode by mistake and then "
2051     "hit F10 key to convert it to half-alphanumeric text. In this case, "
2052     "there may remain one or more alphabetical characters, which have not "
2053     "been consumed by Roman-input rules.",
2054     "yっwz1sxm",
2055     "ywwz1sxm"
2056   }, {
2057     kNonSensitive,
2058     "Type a very common English word all in lower case which should be "
2059     "included in our system dictionary without capitalization.",
2060     "variable",
2061     "variable"
2062   }, {
2063     kNonSensitive,
2064     "Type a very common English word all in upper case whose lower case "
2065     "should be included in our system dictionary.",
2066     "VARIABLE",
2067     "VARIABLE"
2068   }, {
2069     kNonSensitive,
2070     "Type a very common English word with capitalization whose lower case "
2071     "should be included in our system dictionary.",
2072     "Variable",
2073     "Variable"
2074   }, {
2075     kSensitive,  // We might want to revisit this behavior
2076     "Type a very common English word with random capitalization, which "
2077     "should be treated as case SENSITIVE.",
2078     "vArIaBle",
2079     "vArIaBle"
2080   }, {
2081     kSensitive,
2082     "Type an English word in lower case but only its upper case form is "
2083     "stored in dictionary.",
2084     "upper",
2085     "upper",
2086   }, {
2087     kSensitive,  // We might want to revisit this behavior
2088     "Type just a number.",
2089     "2398402938402934",
2090     "2398402938402934"
2091   }, {
2092     kSensitive,  // We might want to revisit this behavior
2093     "Type an common English word which might be included in our system "
2094     "dictionary with number postfix.",
2095     "Orange10000",
2096     "Orange10000"
2097   },
2098 };
2099 
2100 }  // namespace
2101 
TEST_F(UserHistoryPredictorTest,PrivacySensitiveTest)2102 TEST_F(UserHistoryPredictorTest, PrivacySensitiveTest) {
2103   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2104 
2105   // Add those words to the mock dictionary that are assumed to exist in privacy
2106   // sensitive filtering.
2107   const char *kEnglishWords[] = {
2108     "variable", "UPPER",
2109   };
2110   for (size_t i = 0; i < arraysize(kEnglishWords); ++i) {
2111     // LookupPredictive is used in UserHistoryPredictor::IsPrivacySensitive().
2112     GetDictionaryMock()->AddLookupExact(
2113         kEnglishWords[i], kEnglishWords[i], kEnglishWords[i], Token::NONE);
2114   }
2115 
2116   for (size_t i = 0; i < arraysize(kNonSensitiveCases); ++i) {
2117     predictor->ClearAllHistory();
2118     predictor->WaitForSyncer();
2119 
2120     const PrivacySensitiveTestData &data = kNonSensitiveCases[i];
2121     const string description(data.scenario_description);
2122     const string input(data.input);
2123     const string output(data.output);
2124     const string &partial_input = RemoveLastUCS4Character(input);
2125     const bool expect_sensitive = data.is_sensitive;
2126 
2127     // Initial commit.
2128     {
2129       Segments segments;
2130       MakeSegmentsForConversion(input, &segments);
2131       AddCandidate(0, output, &segments);
2132       predictor->Finish(*convreq_, &segments);
2133     }
2134 
2135     // TODO(yukawa): Refactor the scenario runner below by making
2136     //     some utility functions.
2137 
2138     // Check suggestion
2139     {
2140       Segments segments;
2141       MakeSegmentsForSuggestion(partial_input, &segments);
2142       if (expect_sensitive) {
2143         EXPECT_FALSE(predictor->Predict(&segments))
2144           << description << " input: " << input << " output: " << output;
2145       } else {
2146         EXPECT_TRUE(predictor->Predict(&segments))
2147           << description << " input: " << input << " output: " << output;
2148       }
2149       segments.Clear();
2150       MakeSegmentsForPrediction(input, &segments);
2151       if (expect_sensitive) {
2152         EXPECT_FALSE(predictor->Predict(&segments))
2153           << description << " input: " << input << " output: " << output;
2154       } else {
2155         EXPECT_TRUE(predictor->Predict(&segments))
2156           << description << " input: " << input << " output: " << output;
2157       }
2158     }
2159 
2160     // Check Prediction
2161     {
2162       Segments segments;
2163       MakeSegmentsForPrediction(partial_input, &segments);
2164       if (expect_sensitive) {
2165         EXPECT_FALSE(predictor->Predict(&segments))
2166           << description << " input: " << input << " output: " << output;
2167       } else {
2168         EXPECT_TRUE(predictor->Predict(&segments))
2169           << description << " input: " << input << " output: " << output;
2170       }
2171       segments.Clear();
2172       MakeSegmentsForPrediction(input, &segments);
2173       if (expect_sensitive) {
2174         EXPECT_FALSE(predictor->Predict(&segments))
2175           << description << " input: " << input << " output: " << output;
2176       } else {
2177         EXPECT_TRUE(predictor->Predict(&segments))
2178           << description << " input: " << input << " output: " << output;
2179       }
2180     }
2181   }
2182 }
2183 
TEST_F(UserHistoryPredictorTest,PrivacySensitiveMultiSegmentsTest)2184 TEST_F(UserHistoryPredictorTest, PrivacySensitiveMultiSegmentsTest) {
2185   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2186   predictor->WaitForSyncer();
2187 
2188   // If a password-like input consists of multiple segments, it is not
2189   // considered to be privacy sensitive when the input is committed.
2190   // Currently this is a known issue.
2191   {
2192     Segments segments;
2193     MakeSegmentsForConversion("123", &segments);
2194     AddSegmentForConversion("abc!", &segments);
2195     AddCandidate(0, "123", &segments);
2196     AddCandidate(1, "abc!", &segments);
2197     predictor->Finish(*convreq_, &segments);
2198   }
2199 
2200   {
2201     Segments segments;
2202     MakeSegmentsForSuggestion("123abc", &segments);
2203     EXPECT_TRUE(predictor->Predict(&segments));
2204     segments.Clear();
2205     MakeSegmentsForSuggestion("123abc!", &segments);
2206     EXPECT_TRUE(predictor->Predict(&segments));
2207   }
2208 
2209   {
2210     Segments segments;
2211     MakeSegmentsForPrediction("123abc", &segments);
2212     EXPECT_TRUE(predictor->Predict(&segments));
2213     segments.Clear();
2214     MakeSegmentsForPrediction("123abc!", &segments);
2215     EXPECT_TRUE(predictor->Predict(&segments));
2216   }
2217 }
2218 
TEST_F(UserHistoryPredictorTest,UserHistoryStorage)2219 TEST_F(UserHistoryPredictorTest, UserHistoryStorage) {
2220   const string filename =
2221       FileUtil::JoinPath(SystemUtil::GetUserProfileDirectory(), "test");
2222 
2223   UserHistoryStorage storage1(filename);
2224 
2225   UserHistoryPredictor::Entry *entry = storage1.user_history_base.add_entries();
2226   CHECK(entry);
2227   entry->set_key("key");
2228   entry->set_key("value");
2229   storage1.Save();
2230   UserHistoryStorage storage2(filename);
2231   storage2.Load();
2232 
2233   EXPECT_EQ(storage1.user_history_base.DebugString(), storage2.user_history_base.DebugString());
2234   FileUtil::Unlink(filename);
2235 }
2236 
TEST_F(UserHistoryPredictorTest,RomanFuzzyPrefixMatch)2237 TEST_F(UserHistoryPredictorTest, RomanFuzzyPrefixMatch) {
2238   // same
2239   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", "abc"));
2240   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("a", "a"));
2241 
2242   // exact prefix
2243   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", "a"));
2244   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", "ab"));
2245   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", ""));
2246 
2247   // swap
2248   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("ab", "ba"));
2249   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abfoo", "bafoo"));
2250   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("fooab", "fooba"));
2251   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("fooabfoo",
2252                                                           "foobafoo"));
2253 
2254   // swap + prefix
2255   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("fooabfoo",
2256                                                           "fooba"));
2257 
2258   // deletion
2259   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcd", "acd"));
2260   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcd", "bcd"));
2261 
2262   // deletion + prefix
2263   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcdf",   "acd"));
2264   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcdfoo", "bcd"));
2265 
2266   // voice sound mark
2267   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2268                                                           "gu^guru"));
2269   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2270                                                           "gu=guru"));
2271   EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2272                                                           "gu^gu"));
2273   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2274                                                            "gugu"));
2275 
2276   // Invalid
2277   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("", ""));
2278   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("", "a"));
2279   EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcde",
2280                                                            "defe"));
2281 }
2282 
TEST_F(UserHistoryPredictorTest,MaybeRomanMisspelledKey)2283 TEST_F(UserHistoryPredictorTest, MaybeRomanMisspelledKey) {
2284   EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("こんぴゅーt"));
2285   EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("こんぴゅーt"));
2286   EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("こんぴゅーた"));
2287   EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("ぱsこん"));
2288   EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("ぱそこん"));
2289   EXPECT_TRUE(
2290       UserHistoryPredictor::MaybeRomanMisspelledKey("おねがいしまうs"));
2291   EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("おねがいします"));
2292   EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("いんた=ねっと"));
2293   EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("t"));
2294   EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("ーt"));
2295   EXPECT_FALSE(
2296       UserHistoryPredictor::MaybeRomanMisspelledKey("おnがいしまうs"));
2297   // Two unknowns
2298   EXPECT_FALSE(
2299       UserHistoryPredictor::MaybeRomanMisspelledKey("お&がい$しまう"));
2300   // One alpha and one unknown
2301   EXPECT_FALSE(
2302       UserHistoryPredictor::MaybeRomanMisspelledKey("お&がいしまうs"));
2303 }
2304 
TEST_F(UserHistoryPredictorTest,GetRomanMisspelledKey)2305 TEST_F(UserHistoryPredictorTest, GetRomanMisspelledKey) {
2306   Segments segments;
2307   Segment *seg = segments.add_segment();
2308   seg->set_segment_type(Segment::FREE);
2309   Segment::Candidate *candidate = seg->add_candidate();
2310   candidate->value = "test";
2311 
2312   config_->set_preedit_method(config::Config::ROMAN);
2313 
2314   seg->set_key("");
2315   EXPECT_EQ("",
2316             UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2317 
2318   seg->set_key("おねがいしまうs");
2319   EXPECT_EQ("onegaisimaus",
2320             UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2321 
2322   seg->set_key("おねがいします");
2323   EXPECT_EQ("",
2324             UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2325 
2326   config_->set_preedit_method(config::Config::KANA);
2327 
2328   seg->set_key("おねがいしまうs");
2329   EXPECT_EQ("",
2330             UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2331 
2332   seg->set_key("おねがいします");
2333   EXPECT_EQ("",
2334             UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2335 }
2336 
TEST_F(UserHistoryPredictorTest,RomanFuzzyLookupEntry)2337 TEST_F(UserHistoryPredictorTest, RomanFuzzyLookupEntry) {
2338   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2339   UserHistoryPredictor::Entry entry;
2340   UserHistoryPredictor::EntryPriorityQueue results;
2341 
2342   entry.set_key("");
2343   EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("", &entry, &results));
2344 
2345   entry.set_key("よろしく");
2346   EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("yorosku", &entry, &results));
2347   EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("yrosiku", &entry, &results));
2348   EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("yorsiku", &entry, &results));
2349   EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("yrsk", &entry, &results));
2350   EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("yorosiku", &entry, &results));
2351 
2352   entry.set_key("ぐーぐる");
2353   EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("gu=guru", &entry, &results));
2354   EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("gu-guru", &entry, &results));
2355   EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("g=guru", &entry, &results));
2356 }
2357 
2358 namespace {
2359 struct LookupTestData {
2360   const string entry_key;
2361   const bool expect_result;
2362 };
2363 }  // namespace
2364 
TEST_F(UserHistoryPredictorTest,ExpandedLookupRoman)2365 TEST_F(UserHistoryPredictorTest, ExpandedLookupRoman) {
2366   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2367   UserHistoryPredictor::Entry entry;
2368   UserHistoryPredictor::EntryPriorityQueue results;
2369 
2370   // Roman
2371   // preedit: "あk"
2372   // input_key: "あk"
2373   // key_base: "あ"
2374   // key_expanded: "か","き","く","け", "こ"
2375   unique_ptr<Trie<string>> expanded(new Trie<string>);
2376   expanded->AddEntry("か", "");
2377   expanded->AddEntry("き", "");
2378   expanded->AddEntry("く", "");
2379   expanded->AddEntry("け", "");
2380   expanded->AddEntry("こ", "");
2381 
2382   const LookupTestData kTests1[] = {
2383     { "", false },
2384     { "あか", true },
2385     { "あき", true },
2386     { "あかい", true },
2387     { "あまい", false },
2388     { "あ", false },
2389     { "さか", false },
2390     { "さき", false },
2391     { "さかい", false },
2392     { "さまい", false },
2393     { "さ", false },
2394   };
2395 
2396   // with expanded
2397   for (size_t i = 0; i < arraysize(kTests1); ++i) {
2398     entry.set_key(kTests1[i].entry_key);
2399     EXPECT_EQ(kTests1[i].expect_result, predictor->LookupEntry(
2400         UserHistoryPredictor::DEFAULT,
2401         "あk", "あ",
2402         expanded.get(), &entry, nullptr, &results))
2403         << kTests1[i].entry_key;
2404   }
2405 
2406   // only expanded
2407   // preedit: "k"
2408   // input_key: ""
2409   // key_base: ""
2410   // key_expanded: "か","き","く","け", "こ"
2411 
2412   const LookupTestData kTests2[] = {
2413     { "", false },
2414     { "か", true },
2415     { "き", true },
2416     { "かい", true },
2417     { "まい", false },
2418     { "も", false },
2419   };
2420 
2421   for (size_t i = 0; i < arraysize(kTests2); ++i) {
2422     entry.set_key(kTests2[i].entry_key);
2423     EXPECT_EQ(kTests2[i].expect_result, predictor->LookupEntry(
2424         UserHistoryPredictor::DEFAULT,
2425         "", "", expanded.get(), &entry, nullptr, &results))
2426         << kTests2[i].entry_key;
2427   }
2428 }
2429 
TEST_F(UserHistoryPredictorTest,ExpandedLookupKana)2430 TEST_F(UserHistoryPredictorTest, ExpandedLookupKana) {
2431   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2432   UserHistoryPredictor::Entry entry;
2433   UserHistoryPredictor::EntryPriorityQueue results;
2434 
2435   // Kana
2436   // preedit: "あし"
2437   // input_key: "あし"
2438   // key_base: "あ"
2439   // key_expanded: "し","じ"
2440   unique_ptr<Trie<string>> expanded(new Trie<string>);
2441   expanded->AddEntry("し", "");
2442   expanded->AddEntry("じ", "");
2443 
2444   const LookupTestData kTests1[] = {
2445       { "", false },
2446       { "あ", false },
2447       { "あし", true },
2448       { "あじ", true },
2449       { "あしかゆい", true },
2450       { "あじうまい", true },
2451       { "あまにがい", false },
2452       { "あめ", false },
2453       { "まし", false },
2454       { "まじ", false },
2455       { "ましなあじ", false },
2456       { "まじうまい", false },
2457       { "ままにがい", false },
2458       { "まめ", false },
2459   };
2460 
2461   // with expanded
2462   for (size_t i = 0; i < arraysize(kTests1); ++i) {
2463     entry.set_key(kTests1[i].entry_key);
2464     EXPECT_EQ(kTests1[i].expect_result, predictor->LookupEntry(
2465         UserHistoryPredictor::DEFAULT,
2466         "あし", "あ",
2467         expanded.get(), &entry, nullptr, &results))
2468         << kTests1[i].entry_key;
2469   }
2470 
2471   // only expanded
2472   // input_key: "し"
2473   // key_base: ""
2474   // key_expanded: "し","じ"
2475   const LookupTestData kTests2[] = {
2476       { "", false },
2477       { "し", true },
2478       { "じ", true },
2479       { "しかうまい", true },
2480       { "じゅうかい", true },
2481       { "ま", false },
2482       { "まめ", false },
2483   };
2484 
2485   for (size_t i = 0; i < arraysize(kTests2); ++i) {
2486     entry.set_key(kTests2[i].entry_key);
2487     EXPECT_EQ(kTests2[i].expect_result, predictor->LookupEntry(
2488         UserHistoryPredictor::DEFAULT,
2489         "し", "", expanded.get(), &entry, nullptr, &results))
2490         << kTests2[i].entry_key;
2491   }
2492 }
2493 
TEST_F(UserHistoryPredictorTest,GetMatchTypeFromInputRoman)2494 TEST_F(UserHistoryPredictorTest, GetMatchTypeFromInputRoman) {
2495   // We have to define this here,
2496   // because UserHistoryPredictor::MatchType is private
2497   struct MatchTypeTestData {
2498     const string target;
2499     const UserHistoryPredictor::MatchType expect_type;
2500   };
2501 
2502   // Roman
2503   // preedit: "あk"
2504   // input_key: "あ"
2505   // key_base: "あ"
2506   // key_expanded: "か","き","く","け", "こ"
2507   unique_ptr<Trie<string>> expanded(new Trie<string>);
2508   expanded->AddEntry("か", "か");
2509   expanded->AddEntry("き", "き");
2510   expanded->AddEntry("く", "く");
2511   expanded->AddEntry("け", "け");
2512   expanded->AddEntry("こ", "こ");
2513 
2514   const MatchTypeTestData kTests1[] = {
2515       {"", UserHistoryPredictor::NO_MATCH},
2516       {"い", UserHistoryPredictor::NO_MATCH},
2517       {"あ", UserHistoryPredictor::RIGHT_PREFIX_MATCH},
2518       {"あい", UserHistoryPredictor::NO_MATCH},
2519       {"あか", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2520       {"あかい", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2521   };
2522 
2523   for (size_t i = 0; i < arraysize(kTests1); ++i) {
2524     EXPECT_EQ(kTests1[i].expect_type,
2525               UserHistoryPredictor::GetMatchTypeFromInput(
2526                   "あ", "あ",
2527                   expanded.get(), kTests1[i].target))
2528         << kTests1[i].target;
2529   }
2530 
2531   // only expanded
2532   // preedit: "k"
2533   // input_key: ""
2534   // key_base: ""
2535   // key_expanded: "か","き","く","け", "こ"
2536   const MatchTypeTestData kTests2[] = {
2537       {"", UserHistoryPredictor::NO_MATCH},
2538       {"い", UserHistoryPredictor::NO_MATCH},
2539       {"いか", UserHistoryPredictor::NO_MATCH},
2540       {"か", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2541       {"かいがい", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2542   };
2543 
2544   for (size_t i = 0; i < arraysize(kTests2); ++i) {
2545     EXPECT_EQ(kTests2[i].expect_type,
2546               UserHistoryPredictor::GetMatchTypeFromInput(
2547                   "", "", expanded.get(), kTests2[i].target))
2548         << kTests2[i].target;
2549   }
2550 }
2551 
TEST_F(UserHistoryPredictorTest,GetMatchTypeFromInputKana)2552 TEST_F(UserHistoryPredictorTest, GetMatchTypeFromInputKana) {
2553   // We have to define this here,
2554   // because UserHistoryPredictor::MatchType is private
2555   struct MatchTypeTestData {
2556     const string target;
2557     const UserHistoryPredictor::MatchType expect_type;
2558   };
2559 
2560   // Kana
2561   // preedit: "あし"
2562   // input_key: "あし"
2563   // key_base: "あ"
2564   // key_expanded: "し","じ"
2565   unique_ptr<Trie<string>> expanded(new Trie<string>);
2566   expanded->AddEntry("し", "し");
2567   expanded->AddEntry("じ", "じ");
2568 
2569   const MatchTypeTestData kTests1[] = {
2570       {"", UserHistoryPredictor::NO_MATCH},
2571       {"い", UserHistoryPredictor::NO_MATCH},
2572       {"いし", UserHistoryPredictor::NO_MATCH},
2573       {"あ", UserHistoryPredictor::RIGHT_PREFIX_MATCH},
2574       {"あし", UserHistoryPredictor::EXACT_MATCH},
2575       {"あじ", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2576       {"あした", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2577       {"あじしお", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2578   };
2579 
2580   for (size_t i = 0; i < arraysize(kTests1); ++i) {
2581     EXPECT_EQ(kTests1[i].expect_type,
2582               UserHistoryPredictor::GetMatchTypeFromInput(
2583                   "あし", "あ",
2584                   expanded.get(), kTests1[i].target))
2585         << kTests1[i].target;
2586   }
2587 
2588   // only expanded
2589   // preedit: "し"
2590   // input_key: "し"
2591   // key_base: ""
2592   // key_expanded: "し","じ"
2593   const MatchTypeTestData kTests2[] = {
2594       {"", UserHistoryPredictor::NO_MATCH},
2595       {"い", UserHistoryPredictor::NO_MATCH},
2596       {"し", UserHistoryPredictor::EXACT_MATCH},
2597       {"じ", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2598       {"しじみ", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2599       {"じかん", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2600   };
2601 
2602   for (size_t i = 0; i < arraysize(kTests2); ++i) {
2603     EXPECT_EQ(kTests2[i].expect_type,
2604               UserHistoryPredictor::GetMatchTypeFromInput(
2605                   "し", "", expanded.get(), kTests2[i].target))
2606         << kTests2[i].target;
2607   }
2608 }
2609 
2610 namespace {
InitSegmentsFromInputSequence(const string & text,composer::Composer * composer,ConversionRequest * request,Segments * segments)2611 void InitSegmentsFromInputSequence(const string &text,
2612                                    composer::Composer *composer,
2613                                    ConversionRequest *request,
2614                                    Segments *segments) {
2615   DCHECK(composer);
2616   DCHECK(request);
2617   DCHECK(segments);
2618   const char *begin = text.data();
2619   const char *end = text.data() + text.size();
2620   size_t mblen = 0;
2621 
2622   while (begin < end) {
2623     commands::KeyEvent key;
2624     const char32 w = Util::UTF8ToUCS4(begin, end, &mblen);
2625     if (Util::GetCharacterSet(w) == Util::ASCII) {
2626       key.set_key_code(*begin);
2627     } else {
2628       key.set_key_code('?');
2629       key.set_key_string(string(begin, mblen));
2630     }
2631     begin += mblen;
2632     composer->InsertCharacterKeyEvent(key);
2633   }
2634 
2635   request->set_composer(composer);
2636 
2637   segments->set_request_type(Segments::PREDICTION);
2638   Segment *segment = segments->add_segment();
2639   CHECK(segment);
2640   string query;
2641   composer->GetQueryForPrediction(&query);
2642   segment->set_key(query);
2643 }
2644 }  // namespace
2645 
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsRoman)2646 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsRoman) {
2647   table_->LoadFromFile("system://romanji-hiragana.tsv");
2648   composer_->SetTable(table_.get());
2649   Segments segments;
2650 
2651   InitSegmentsFromInputSequence("gu-g",
2652                                 composer_.get(),
2653                                 convreq_.get(),
2654                                 &segments);
2655 
2656   {
2657     FLAGS_enable_expansion_for_user_history_predictor = true;
2658     string input_key;
2659     string base;
2660     unique_ptr<Trie<string>> expanded;
2661     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2662                                                   segments,
2663                                                   &input_key,
2664                                                   &base,
2665                                                   &expanded);
2666     EXPECT_EQ("ぐーg", input_key);
2667     EXPECT_EQ("ぐー", base);
2668     EXPECT_TRUE(expanded != nullptr);
2669     string value;
2670     size_t key_length = 0;
2671     bool has_subtrie = false;
2672     EXPECT_TRUE(
2673         expanded->LookUpPrefix("ぐ", &value, &key_length, &has_subtrie));
2674     EXPECT_EQ("ぐ", value);
2675   }
2676 
2677   {
2678     FLAGS_enable_expansion_for_user_history_predictor = false;
2679     string input_key;
2680     string base;
2681     unique_ptr<Trie<string>> expanded;
2682     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2683                                                   segments,
2684                                                   &input_key,
2685                                                   &base,
2686                                                   &expanded);
2687     EXPECT_EQ("ぐー", input_key);
2688     EXPECT_EQ("ぐー", base);
2689     EXPECT_TRUE(expanded == nullptr);
2690   }
2691 }
2692 
2693 namespace {
GetRandomAscii()2694 uint32 GetRandomAscii() {
2695   return static_cast<uint32>(' ') +
2696       Util::Random(static_cast<uint32>('~' - ' '));
2697 }
2698 }  // namespace
2699 
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsRomanRandom)2700 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsRomanRandom) {
2701   FLAGS_enable_expansion_for_user_history_predictor = true;
2702   table_->LoadFromFile("system://romanji-hiragana.tsv");
2703   composer_->SetTable(table_.get());
2704   Segments segments;
2705 
2706   for (size_t i = 0; i < 1000; ++i) {
2707     composer_->Reset();
2708     const int len = 1 + Util::Random(4);
2709     DCHECK_GE(len, 1);
2710     DCHECK_LE(len, 5);
2711     string input;
2712     for (size_t j = 0; j < len; ++j) {
2713       input += GetRandomAscii();
2714     }
2715     InitSegmentsFromInputSequence(input,
2716                                   composer_.get(),
2717                                   convreq_.get(),
2718                                   &segments);
2719     string input_key;
2720     string base;
2721     unique_ptr<Trie<string>> expanded;
2722     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2723                                                   segments,
2724                                                   &input_key,
2725                                                   &base,
2726                                                   &expanded);
2727   }
2728 }
2729 
2730 // Found by random test.
2731 // input_key != base by compoesr modification.
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsShouldNotCrash)2732 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsShouldNotCrash) {
2733   FLAGS_enable_expansion_for_user_history_predictor = true;
2734   table_->LoadFromFile("system://romanji-hiragana.tsv");
2735   composer_->SetTable(table_.get());
2736   Segments segments;
2737 
2738   {
2739     InitSegmentsFromInputSequence("8,+",
2740                                   composer_.get(),
2741                                   convreq_.get(),
2742                                   &segments);
2743     string input_key;
2744     string base;
2745     unique_ptr<Trie<string>> expanded;
2746     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2747                                                   segments,
2748                                                   &input_key,
2749                                                   &base,
2750                                                   &expanded);
2751   }
2752 }
2753 
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsRomanN)2754 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsRomanN) {
2755   FLAGS_enable_expansion_for_user_history_predictor = true;
2756   table_->LoadFromFile("system://romanji-hiragana.tsv");
2757   composer_->SetTable(table_.get());
2758   Segments segments;
2759 
2760   {
2761     InitSegmentsFromInputSequence(
2762         "n", composer_.get(), convreq_.get(), &segments);
2763     string input_key;
2764     string base;
2765     unique_ptr<Trie<string>> expanded;
2766     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2767                                                   segments,
2768                                                   &input_key,
2769                                                   &base,
2770                                                   &expanded);
2771     EXPECT_EQ("n", input_key);
2772     EXPECT_EQ("", base);
2773     EXPECT_TRUE(expanded != nullptr);
2774     string value;
2775     size_t key_length = 0;
2776     bool has_subtrie = false;
2777     EXPECT_TRUE(
2778         expanded->LookUpPrefix("な", &value, &key_length, &has_subtrie));
2779     EXPECT_EQ("な", value);
2780   }
2781 
2782   composer_->Reset();
2783   segments.Clear();
2784   {
2785     InitSegmentsFromInputSequence(
2786         "nn", composer_.get(), convreq_.get(), &segments);
2787     string input_key;
2788     string base;
2789     unique_ptr<Trie<string>> expanded;
2790     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2791                                                   segments,
2792                                                   &input_key,
2793                                                   &base,
2794                                                   &expanded);
2795     EXPECT_EQ("ん", input_key);
2796     EXPECT_EQ("ん", base);
2797     EXPECT_TRUE(expanded == nullptr);
2798   }
2799 
2800   composer_->Reset();
2801   segments.Clear();
2802   {
2803     InitSegmentsFromInputSequence("n'", composer_.get(),
2804                                   convreq_.get(), &segments);
2805     string input_key;
2806     string base;
2807     unique_ptr<Trie<string>> expanded;
2808     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2809                                                   segments,
2810                                                   &input_key,
2811                                                   &base,
2812                                                   &expanded);
2813     EXPECT_EQ("ん", input_key);
2814     EXPECT_EQ("ん", base);
2815     EXPECT_TRUE(expanded == nullptr);
2816   }
2817 
2818   composer_->Reset();
2819   segments.Clear();
2820   {
2821     InitSegmentsFromInputSequence("n'n", composer_.get(),
2822                                   convreq_.get(), &segments);
2823     string input_key;
2824     string base;
2825     unique_ptr<Trie<string>> expanded;
2826     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2827                                                   segments,
2828                                                   &input_key,
2829                                                   &base,
2830                                                   &expanded);
2831     EXPECT_EQ("んn", input_key);
2832     EXPECT_EQ("ん", base);
2833     EXPECT_TRUE(expanded != nullptr);
2834     string value;
2835     size_t key_length = 0;
2836     bool has_subtrie = false;
2837     EXPECT_TRUE(
2838         expanded->LookUpPrefix("な",
2839                                &value, &key_length, &has_subtrie));
2840     EXPECT_EQ("な", value);
2841   }
2842 }
2843 
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsFlickN)2844 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsFlickN) {
2845   FLAGS_enable_expansion_for_user_history_predictor = true;
2846   table_->LoadFromFile("system://flick-hiragana.tsv");
2847   composer_->SetTable(table_.get());
2848   Segments segments;
2849 
2850   {
2851     InitSegmentsFromInputSequence("/", composer_.get(), convreq_.get(),
2852                                   &segments);
2853     string input_key;
2854     string base;
2855     unique_ptr<Trie<string>> expanded;
2856     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2857                                                   segments,
2858                                                   &input_key,
2859                                                   &base,
2860                                                   &expanded);
2861     EXPECT_EQ("ん", input_key);
2862     EXPECT_EQ("", base);
2863     EXPECT_TRUE(expanded != nullptr);
2864     string value;
2865     size_t key_length = 0;
2866     bool has_subtrie = false;
2867     EXPECT_TRUE(
2868         expanded->LookUpPrefix("ん", &value, &key_length, &has_subtrie));
2869     EXPECT_EQ("ん", value);
2870   }
2871 }
2872 
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegments12KeyN)2873 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegments12KeyN) {
2874   FLAGS_enable_expansion_for_user_history_predictor = true;
2875   table_->LoadFromFile("system://12keys-hiragana.tsv");
2876   composer_->SetTable(table_.get());
2877   Segments segments;
2878 
2879   {
2880     InitSegmentsFromInputSequence("わ00",
2881                                   composer_.get(),
2882                                   convreq_.get(),
2883                                   &segments);
2884     string input_key;
2885     string base;
2886     unique_ptr<Trie<string>> expanded;
2887     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2888                                                   segments,
2889                                                   &input_key,
2890                                                   &base,
2891                                                   &expanded);
2892     EXPECT_EQ("ん", input_key);
2893     EXPECT_EQ("", base);
2894     EXPECT_TRUE(expanded != nullptr);
2895     string value;
2896     size_t key_length = 0;
2897     bool has_subtrie = false;
2898     EXPECT_TRUE(
2899         expanded->LookUpPrefix("ん", &value, &key_length, &has_subtrie));
2900     EXPECT_EQ("ん", value);
2901   }
2902 }
2903 
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsKana)2904 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsKana) {
2905   table_->LoadFromFile("system://kana.tsv");
2906   composer_->SetTable(table_.get());
2907   Segments segments;
2908 
2909   InitSegmentsFromInputSequence("あか",
2910                                 composer_.get(), convreq_.get(), &segments);
2911 
2912   {
2913     FLAGS_enable_expansion_for_user_history_predictor = true;
2914     string input_key;
2915     string base;
2916     unique_ptr<Trie<string>> expanded;
2917     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2918                                                   segments,
2919                                                   &input_key,
2920                                                   &base,
2921                                                   &expanded);
2922     EXPECT_EQ("あか", input_key);
2923     EXPECT_EQ("あ", base);
2924     EXPECT_TRUE(expanded != nullptr);
2925     string value;
2926     size_t key_length = 0;
2927     bool has_subtrie = false;
2928     EXPECT_TRUE(
2929         expanded->LookUpPrefix("が",
2930                                &value, &key_length, &has_subtrie));
2931     EXPECT_EQ("が", value);
2932   }
2933 
2934   {
2935     FLAGS_enable_expansion_for_user_history_predictor = false;
2936     string input_key;
2937     string base;
2938     unique_ptr<Trie<string>> expanded;
2939     UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2940                                                   segments,
2941                                                   &input_key,
2942                                                   &base,
2943                                                   &expanded);
2944     EXPECT_EQ("あか", input_key);
2945     EXPECT_EQ("あか", base);
2946     EXPECT_TRUE(expanded == nullptr);
2947   }
2948 }
2949 
TEST_F(UserHistoryPredictorTest,RealtimeConversionInnerSegment)2950 TEST_F(UserHistoryPredictorTest, RealtimeConversionInnerSegment) {
2951   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2952   predictor->WaitForSyncer();
2953   predictor->ClearAllHistory();
2954   predictor->WaitForSyncer();
2955 
2956   Segments segments;
2957   {
2958     const char kKey[] = "わたしのなまえはなかのです";
2959     const char kValue[] = "私の名前は中野です";
2960     MakeSegmentsForPrediction(kKey, &segments);
2961     Segment::Candidate *candidate =
2962         segments.mutable_segment(0)->add_candidate();
2963     CHECK(candidate);
2964     candidate->Init();
2965     candidate->value = kValue;
2966     candidate->content_value = kValue;
2967     candidate->key = kKey;
2968     candidate->content_key = kKey;
2969     // "わたしの, 私の", "わたし, 私"
2970     candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
2971     // "なまえは, 名前は", "なまえ, 名前"
2972     candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
2973     // "なかのです, 中野です", "なかの, 中野"
2974     candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
2975   }
2976   predictor->Finish(*convreq_, &segments);
2977   segments.Clear();
2978 
2979   MakeSegmentsForPrediction("なかの", &segments);
2980   EXPECT_TRUE(predictor->Predict(&segments));
2981   EXPECT_TRUE(FindCandidateByValue("中野です", segments));
2982 
2983   segments.Clear();
2984   MakeSegmentsForPrediction("なまえ", &segments);
2985   EXPECT_TRUE(predictor->Predict(&segments));
2986   EXPECT_TRUE(FindCandidateByValue("名前は", segments));
2987   EXPECT_TRUE(FindCandidateByValue("名前は中野です", segments));
2988 }
2989 
TEST_F(UserHistoryPredictorTest,ZeroQueryFromRealtimeConversion)2990 TEST_F(UserHistoryPredictorTest, ZeroQueryFromRealtimeConversion) {
2991   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2992   predictor->WaitForSyncer();
2993   predictor->ClearAllHistory();
2994   predictor->WaitForSyncer();
2995 
2996   Segments segments;
2997   {
2998     const char kKey[] = "わたしのなまえはなかのです";
2999     const char kValue[] = "私の名前は中野です";
3000     MakeSegmentsForPrediction(kKey, &segments);
3001     Segment::Candidate *candidate =
3002         segments.mutable_segment(0)->add_candidate();
3003     CHECK(candidate);
3004     candidate->Init();
3005     candidate->value = kValue;
3006     candidate->content_value = kValue;
3007     candidate->key = kKey;
3008     candidate->content_key = kKey;
3009     // "わたしの, 私の", "わたし, 私"
3010     candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
3011     // "なまえは, 名前は", "なまえ, 名前"
3012     candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
3013     // "なかのです, 中野です", "なかの, 中野"
3014     candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
3015   }
3016   predictor->Finish(*convreq_, &segments);
3017   segments.Clear();
3018 
3019   MakeSegmentsForConversion("わたしの", &segments);
3020   AddCandidate(0, "私の", &segments);
3021   predictor->Finish(*convreq_, &segments);
3022   segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3023 
3024   AddSegmentForSuggestion("", &segments);  // empty request
3025   commands::Request request;
3026   request_->set_zero_query_suggestion(true);
3027   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3028   EXPECT_TRUE(FindCandidateByValue("名前は", segments));
3029 }
3030 
TEST_F(UserHistoryPredictorTest,LongCandidateForMobile)3031 TEST_F(UserHistoryPredictorTest, LongCandidateForMobile) {
3032   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3033   predictor->WaitForSyncer();
3034   predictor->ClearAllHistory();
3035   predictor->WaitForSyncer();
3036 
3037   commands::RequestForUnitTest::FillMobileRequest(request_.get());
3038 
3039   Segments segments;
3040   for (size_t i = 0; i < 3; ++i) {
3041     const char kKey[] = "よろしくおねがいします";
3042     const char kValue[] = "よろしくお願いします";
3043     MakeSegmentsForPrediction(kKey, &segments);
3044     Segment::Candidate *candidate =
3045         segments.mutable_segment(0)->add_candidate();
3046     CHECK(candidate);
3047     candidate->Init();
3048     candidate->value = kValue;
3049     candidate->content_value = kValue;
3050     candidate->key = kKey;
3051     candidate->content_key = kKey;
3052     predictor->Finish(*convreq_, &segments);
3053     segments.Clear();
3054   }
3055 
3056   MakeSegmentsForPrediction("よろ", &segments);
3057   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3058   EXPECT_TRUE(FindCandidateByValue("よろしくお願いします", segments));
3059 }
3060 
TEST_F(UserHistoryPredictorTest,EraseNextEntries)3061 TEST_F(UserHistoryPredictorTest, EraseNextEntries) {
3062   UserHistoryPredictor::Entry e;
3063   e.add_next_entries()->set_entry_fp(100);
3064   e.add_next_entries()->set_entry_fp(10);
3065   e.add_next_entries()->set_entry_fp(30);
3066   e.add_next_entries()->set_entry_fp(10);
3067   e.add_next_entries()->set_entry_fp(100);
3068 
3069   UserHistoryPredictor::EraseNextEntries(1234, &e);
3070   EXPECT_EQ(5, e.next_entries_size());
3071 
3072   UserHistoryPredictor::EraseNextEntries(30, &e);
3073   ASSERT_EQ(4, e.next_entries_size());
3074   for (size_t i = 0; i < 4; ++i) {
3075     EXPECT_NE(30, e.next_entries(i).entry_fp());
3076   }
3077 
3078   UserHistoryPredictor::EraseNextEntries(10, &e);
3079   ASSERT_EQ(2, e.next_entries_size());
3080   for (size_t i = 0; i < 2; ++i) {
3081     EXPECT_NE(10, e.next_entries(i).entry_fp());
3082   }
3083 
3084   UserHistoryPredictor::EraseNextEntries(100, &e);
3085   EXPECT_EQ(0, e.next_entries_size());
3086 }
3087 
TEST_F(UserHistoryPredictorTest,RemoveNgramChain)3088 TEST_F(UserHistoryPredictorTest, RemoveNgramChain) {
3089   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3090 
3091   // Set up the following chain of next entries:
3092   // ("abc", "ABC")
3093   // (  "a",   "A") --- ("b", "B") --- ("c", "C")
3094   UserHistoryPredictor::Entry *abc = InsertEntry(predictor, "abc", "ABC");
3095   UserHistoryPredictor::Entry *a = InsertEntry(predictor, "a", "A");
3096   UserHistoryPredictor::Entry *b = AppendEntry(predictor, "b", "B", a);
3097   UserHistoryPredictor::Entry *c = AppendEntry(predictor, "c", "C", b);
3098 
3099   std::vector<UserHistoryPredictor::Entry *> entries;
3100   entries.push_back(abc);
3101   entries.push_back(a);
3102   entries.push_back(b);
3103   entries.push_back(c);
3104 
3105   // The method should return NOT_FOUND for key-value pairs not in the chain.
3106   for (size_t i = 0; i < entries.size(); ++i) {
3107     std::vector<StringPiece> dummy1, dummy2;
3108     EXPECT_EQ(UserHistoryPredictor::NOT_FOUND,
3109               predictor->RemoveNgramChain("hoge", "HOGE", entries[i],
3110                                           &dummy1, 0, &dummy2, 0));
3111   }
3112   // Moreover, all nodes and links should be kept.
3113   for (size_t i = 0; i < entries.size(); ++i) {
3114     EXPECT_FALSE(entries[i]->removed());
3115   }
3116   EXPECT_TRUE(IsConnected(*a, *b));
3117   EXPECT_TRUE(IsConnected(*b, *c));
3118 
3119   {
3120     // Try deleting the chain for "abc". Only the link from "b" to "c" should be
3121     // removed.
3122     std::vector<StringPiece> dummy1, dummy2;
3123     EXPECT_EQ(UserHistoryPredictor::DONE,
3124               predictor->RemoveNgramChain("abc", "ABC", a,
3125                                           &dummy1, 0, &dummy2, 0));
3126     for (size_t i = 0; i < entries.size(); ++i) {
3127       EXPECT_FALSE(entries[i]->removed());
3128     }
3129     EXPECT_TRUE(IsConnected(*a, *b));
3130     EXPECT_FALSE(IsConnected(*b, *c));
3131   }
3132   {
3133     // Try deleting the chain for "a". Since this is the head of the chain, the
3134     // function returns TAIL and nothing should be removed.
3135     std::vector<StringPiece> dummy1, dummy2;
3136     EXPECT_EQ(UserHistoryPredictor::TAIL,
3137               predictor->RemoveNgramChain("a", "A", a,
3138                                           &dummy1, 0, &dummy2, 0));
3139     for (size_t i = 0; i < entries.size(); ++i) {
3140       EXPECT_FALSE(entries[i]->removed());
3141     }
3142     EXPECT_TRUE(IsConnected(*a, *b));
3143     EXPECT_FALSE(IsConnected(*b, *c));
3144   }
3145   {
3146     // Further delete the chain for "ab".  Now all the links should be removed.
3147     std::vector<StringPiece> dummy1, dummy2;
3148     EXPECT_EQ(UserHistoryPredictor::DONE,
3149               predictor->RemoveNgramChain("ab", "AB", a,
3150                                           &dummy1, 0, &dummy2, 0));
3151     for (size_t i = 0; i < entries.size(); ++i) {
3152       EXPECT_FALSE(entries[i]->removed());
3153     }
3154     EXPECT_FALSE(IsConnected(*a, *b));
3155     EXPECT_FALSE(IsConnected(*b, *c));
3156   }
3157 }
3158 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Unigram)3159 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Unigram) {
3160   // Tests ClearHistoryEntry() for unigram history.
3161   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3162 
3163   // Add a unigram history ("japanese", "Japanese").
3164   UserHistoryPredictor::Entry *e =
3165       InsertEntry(predictor, "japanese", "Japanese");
3166 
3167   // "Japanese" should be suggested and predicted from "japan".
3168   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3169 
3170   // Delete the history.
3171   EXPECT_TRUE(predictor->ClearHistoryEntry("japanese", "Japanese"));
3172 
3173   EXPECT_TRUE(e->removed());
3174 
3175   // "Japanese" should be never be suggested nor predicted.
3176   const string key = "japanese";
3177   for (size_t i = 0; i < key.size(); ++i) {
3178     const string &prefix = key.substr(0, i);
3179     EXPECT_FALSE(IsSuggested(predictor, prefix, "Japanese"));
3180     EXPECT_FALSE(IsPredicted(predictor, prefix, "Japanese"));
3181   }
3182 }
3183 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Bigram_DeleteWhole)3184 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Bigram_DeleteWhole) {
3185   // Tests ClearHistoryEntry() for bigram history.  This case tests the deletion
3186   // of whole sentence.
3187   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3188 
3189   // Make the history for ("japaneseinput", "JapaneseInput"). It's assumed that
3190   // this sentence consists of two segments, "japanese" and "input". So, the
3191   // following history entries are constructed:
3192   //   ("japaneseinput", "JapaneseInput")  // Unigram
3193   //   ("japanese", "Japanese") --- ("input", "Input")  // Bigram chain
3194   UserHistoryPredictor::Entry *japaneseinput;
3195   UserHistoryPredictor::Entry *japanese;
3196   UserHistoryPredictor::Entry *input;
3197   InitHistory_JapaneseInput(predictor, &japaneseinput, &japanese, &input);
3198 
3199   // Check the predictor functionality for the above history structure.
3200   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3201   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3202   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
3203 
3204   // Delete the unigram ("japaneseinput", "JapaneseInput").
3205   EXPECT_TRUE(predictor->ClearHistoryEntry("japaneseinput", "JapaneseInput"));
3206 
3207   EXPECT_TRUE(japaneseinput->removed());
3208   EXPECT_FALSE(japanese->removed());
3209   EXPECT_FALSE(input->removed());
3210   EXPECT_FALSE(IsConnected(*japanese, *input));
3211 
3212   // Now "JapaneseInput" should never be suggested nor predicted.
3213   const string key = "japaneseinput";
3214   for (size_t i = 0; i < key.size(); ++i) {
3215     const string &prefix = key.substr(0, i);
3216     EXPECT_FALSE(IsSuggested(predictor, prefix, "Japaneseinput"));
3217     EXPECT_FALSE(IsPredicted(predictor, prefix, "Japaneseinput"));
3218   }
3219 
3220   // However, predictor should show "Japanese" and "Input".
3221   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3222   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3223 }
3224 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Bigram_DeleteFirst)3225 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Bigram_DeleteFirst) {
3226   // Tests ClearHistoryEntry() for bigram history.  This case tests the deletion
3227   // of the first node of the bigram chain.
3228   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3229 
3230   // Make the history for ("japaneseinput", "JapaneseInput"), i.e., the same
3231   // history structure as ClearHistoryEntry_Bigram_DeleteWhole is constructed.
3232   UserHistoryPredictor::Entry *japaneseinput;
3233   UserHistoryPredictor::Entry *japanese;
3234   UserHistoryPredictor::Entry *input;
3235   InitHistory_JapaneseInput(predictor, &japaneseinput, &japanese, &input);
3236 
3237   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3238   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3239   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
3240 
3241   // Delete the first bigram node ("japanese", "Japanese").
3242   EXPECT_TRUE(predictor->ClearHistoryEntry("japanese", "Japanese"));
3243 
3244   // Note that the first node was removed but the connection to the second node
3245   // is still valid.
3246   EXPECT_FALSE(japaneseinput->removed());
3247   EXPECT_TRUE(japanese->removed());
3248   EXPECT_FALSE(input->removed());
3249   EXPECT_TRUE(IsConnected(*japanese, *input));
3250 
3251   // Now "Japanese" should never be suggested nor predicted.
3252   const string key = "japaneseinput";
3253   for (size_t i = 0; i < key.size(); ++i) {
3254     const string &prefix = key.substr(0, i);
3255     EXPECT_FALSE(IsSuggested(predictor, prefix, "Japanese"));
3256     EXPECT_FALSE(IsPredicted(predictor, prefix, "Japanese"));
3257   }
3258 
3259   // However, predictor should show "JapaneseInput" and "Input".
3260   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3261   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3262 }
3263 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Bigram_DeleteSecond)3264 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Bigram_DeleteSecond) {
3265   // Tests ClearHistoryEntry() for bigram history.  This case tests the deletion
3266   // of the first node of the bigram chain.
3267   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3268 
3269   // Make the history for ("japaneseinput", "JapaneseInput"), i.e., the same
3270   // history structure as ClearHistoryEntry_Bigram_DeleteWhole is constructed.
3271   UserHistoryPredictor::Entry *japaneseinput;
3272   UserHistoryPredictor::Entry *japanese;
3273   UserHistoryPredictor::Entry *input;
3274   InitHistory_JapaneseInput(predictor, &japaneseinput, &japanese, &input);
3275 
3276   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3277   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3278   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
3279 
3280   // Delete the second bigram node ("input", "Input").
3281   EXPECT_TRUE(predictor->ClearHistoryEntry("input", "Input"));
3282 
3283   EXPECT_FALSE(japaneseinput->removed());
3284   EXPECT_FALSE(japanese->removed());
3285   EXPECT_TRUE(input->removed());
3286   EXPECT_TRUE(IsConnected(*japanese, *input));
3287 
3288   // Now "Input" should never be suggested nor predicted.
3289   const string key = "input";
3290   for (size_t i = 0; i < key.size(); ++i) {
3291     const string &prefix = key.substr(0, i);
3292     EXPECT_FALSE(IsSuggested(predictor, prefix, "Input"));
3293     EXPECT_FALSE(IsPredicted(predictor, prefix, "Input"));
3294   }
3295 
3296   // However, predictor should show "Japanese" and "JapaneseInput".
3297   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3298   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3299 }
3300 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteWhole)3301 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteWhole) {
3302   // Tests ClearHistoryEntry() for trigram history.  This case tests the
3303   // deletion of the whole sentence.
3304   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3305 
3306   // Make the history for ("japaneseinputmethod", "JapaneseInputMethod"). It's
3307   // assumed that this sentence consists of three segments, "japanese", "input"
3308   // and "method". So, the following history entries are constructed:
3309   //   ("japaneseinputmethod", "JapaneseInputMethod")  // Unigram
3310   //   ("japanese", "Japanese") -- ("input", "Input") -- ("method", "Method")
3311   UserHistoryPredictor::Entry *japaneseinputmethod;
3312   UserHistoryPredictor::Entry *japanese;
3313   UserHistoryPredictor::Entry *input;
3314   UserHistoryPredictor::Entry *method;
3315   InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3316                                   &japanese, &input, &method);
3317 
3318   // Delete the history of the whole sentence.
3319   EXPECT_TRUE(predictor->ClearHistoryEntry(
3320       "japaneseinputmethod", "JapaneseInputMethod"));
3321 
3322   // Note that only the link from "input" to "method" was removed.
3323   EXPECT_TRUE(japaneseinputmethod->removed());
3324   EXPECT_FALSE(japanese->removed());
3325   EXPECT_FALSE(input->removed());
3326   EXPECT_FALSE(method->removed());
3327   EXPECT_TRUE(IsConnected(*japanese, *input));
3328   EXPECT_FALSE(IsConnected(*input, *method));
3329 
3330   {
3331     // Now "JapaneseInputMethod" should never be suggested nor predicted.
3332     const string key = "japaneseinputmethod";
3333     for (size_t i = 0; i < key.size(); ++i) {
3334       const string &prefix = key.substr(0, i);
3335       EXPECT_FALSE(IsSuggested(predictor, prefix, "JapaneseInputMethod"));
3336       EXPECT_FALSE(IsPredicted(predictor, prefix, "JapaneseInputMethod"));
3337     }
3338   }
3339   {
3340     // Here's a limitation of chain cut.  Since we have cut the link from
3341     // "input" to "method", the predictor cannot show "InputMethod" although it
3342     // could before.  However, since "InputMethod" is not the direct input by
3343     // the user (user's input was "JapaneseInputMethod" in this case), this
3344     // limitation would be acceptable.
3345     const string key = "inputmethod";
3346     for (size_t i = 0; i < key.size(); ++i) {
3347       const string &prefix = key.substr(0, i);
3348       EXPECT_FALSE(IsSuggested(predictor, prefix, "InputMethod"));
3349       EXPECT_FALSE(IsPredicted(predictor, prefix, "InputMethod"));
3350     }
3351   }
3352 
3353   // The following can be still suggested and predicted.
3354   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3355   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3356   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3357   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3358 }
3359 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteFirst)3360 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteFirst) {
3361   // Tests ClearHistoryEntry() for trigram history.  This case tests the
3362   // deletion of the first node of trigram.
3363   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3364 
3365   // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3366   UserHistoryPredictor::Entry *japaneseinputmethod;
3367   UserHistoryPredictor::Entry *japanese;
3368   UserHistoryPredictor::Entry *input;
3369   UserHistoryPredictor::Entry *method;
3370   InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3371                                   &japanese, &input, &method);
3372 
3373   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3374   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3375   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3376                                       "japan", "JapaneseInputMethod"));
3377   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3378   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3379   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3380 
3381   // Delete the first node of the chain.
3382   EXPECT_TRUE(predictor->ClearHistoryEntry("japanese", "Japanese"));
3383 
3384   // Note that the two links are still alive.
3385   EXPECT_FALSE(japaneseinputmethod->removed());
3386   EXPECT_TRUE(japanese->removed());
3387   EXPECT_FALSE(input->removed());
3388   EXPECT_FALSE(method->removed());
3389   EXPECT_TRUE(IsConnected(*japanese, *input));
3390   EXPECT_TRUE(IsConnected(*input, *method));
3391 
3392   {
3393     // Now "Japanese" should never be suggested nor predicted.
3394     const string key = "japaneseinputmethod";
3395     for (size_t i = 0; i < key.size(); ++i) {
3396       const string &prefix = key.substr(0, i);
3397       EXPECT_FALSE(IsSuggested(predictor, prefix, "Japanese"));
3398       EXPECT_FALSE(IsPredicted(predictor, prefix, "Japanese"));
3399     }
3400   }
3401 
3402   // The following are still suggested and predicted.
3403   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3404   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3405                                       "japan", "JapaneseInputMethod"));
3406   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3407   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3408   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3409 }
3410 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteSecond)3411 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteSecond) {
3412   // Tests ClearHistoryEntry() for trigram history.  This case tests the
3413   // deletion of the second node of trigram.
3414   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3415 
3416   // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3417   UserHistoryPredictor::Entry *japaneseinputmethod;
3418   UserHistoryPredictor::Entry *japanese;
3419   UserHistoryPredictor::Entry *input;
3420   UserHistoryPredictor::Entry *method;
3421   InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3422                                   &japanese, &input, &method);
3423 
3424   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3425   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3426   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3427                                       "japan", "JapaneseInputMethod"));
3428   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3429   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3430   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3431 
3432   // Delete the second node of the chain.
3433   EXPECT_TRUE(predictor->ClearHistoryEntry("input", "Input"));
3434 
3435   // Note that the two links are still alive.
3436   EXPECT_FALSE(japaneseinputmethod->removed());
3437   EXPECT_FALSE(japanese->removed());
3438   EXPECT_TRUE(input->removed());
3439   EXPECT_FALSE(method->removed());
3440   EXPECT_TRUE(IsConnected(*japanese, *input));
3441   EXPECT_TRUE(IsConnected(*input, *method));
3442 
3443   {
3444     // Now "Input" should never be suggested nor predicted.
3445     const string key = "inputmethod";
3446     for (size_t i = 0; i < key.size(); ++i) {
3447       const string &prefix = key.substr(0, i);
3448       EXPECT_FALSE(IsSuggested(predictor, prefix, "Input"));
3449       EXPECT_FALSE(IsPredicted(predictor, prefix, "Input"));
3450     }
3451   }
3452 
3453   // The following can still be shown by the predictor.
3454   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3455   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3456   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3457                                       "japan", "JapaneseInputMethod"));
3458   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3459   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3460 }
3461 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteThird)3462 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteThird) {
3463   // Tests ClearHistoryEntry() for trigram history.  This case tests the
3464   // deletion of the third node of trigram.
3465   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3466 
3467   // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3468   UserHistoryPredictor::Entry *japaneseinputmethod;
3469   UserHistoryPredictor::Entry *japanese;
3470   UserHistoryPredictor::Entry *input;
3471   UserHistoryPredictor::Entry *method;
3472   InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3473                                   &japanese, &input, &method);
3474 
3475   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3476   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3477   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3478                                       "japan", "JapaneseInputMethod"));
3479   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3480   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3481   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3482 
3483   // Delete the third node, "method".
3484   EXPECT_TRUE(predictor->ClearHistoryEntry("method", "Method"));
3485 
3486   // Note that the two links are still alive.
3487   EXPECT_FALSE(japaneseinputmethod->removed());
3488   EXPECT_FALSE(japanese->removed());
3489   EXPECT_FALSE(input->removed());
3490   EXPECT_TRUE(method->removed());
3491   EXPECT_TRUE(IsConnected(*japanese, *input));
3492   EXPECT_TRUE(IsConnected(*input, *method));
3493 
3494   {
3495     // Now "Method" should never be suggested nor predicted.
3496     const string key = "method";
3497     for (size_t i = 0; i < key.size(); ++i) {
3498       const string &prefix = key.substr(0, i);
3499       EXPECT_FALSE(IsSuggested(predictor, prefix, "Method"));
3500       EXPECT_FALSE(IsPredicted(predictor, prefix, "Method"));
3501     }
3502   }
3503 
3504   // The following can still be shown by the predictor.
3505   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3506   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3507   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3508                                       "japan", "JapaneseInputMethod"));
3509   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3510   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3511 }
3512 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteFirstBigram)3513 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteFirstBigram) {
3514   // Tests ClearHistoryEntry() for trigram history.  This case tests the
3515   // deletion of the first bigram of trigram.
3516   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3517 
3518   // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3519   UserHistoryPredictor::Entry *japaneseinputmethod;
3520   UserHistoryPredictor::Entry *japanese;
3521   UserHistoryPredictor::Entry *input;
3522   UserHistoryPredictor::Entry *method;
3523   InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3524                                   &japanese, &input, &method);
3525 
3526   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3527   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3528   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3529                                       "japan", "JapaneseInputMethod"));
3530   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3531   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3532   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3533 
3534   // Delete the sentence consisting of the first two nodes.
3535   EXPECT_TRUE(predictor->ClearHistoryEntry("japaneseinput", "JapaneseInput"));
3536 
3537   // Note that the node "japaneseinput" and the link from "japanese" to "input"
3538   // were removed.
3539   EXPECT_FALSE(japaneseinputmethod->removed());
3540   EXPECT_FALSE(japanese->removed());
3541   EXPECT_FALSE(input->removed());
3542   EXPECT_FALSE(method->removed());
3543   EXPECT_FALSE(IsConnected(*japanese, *input));
3544   EXPECT_TRUE(IsConnected(*input, *method));
3545 
3546   {
3547     // Now "JapaneseInput" should never be suggested nor predicted.
3548     const string key = "japaneseinputmethod";
3549     for (size_t i = 0; i < key.size(); ++i) {
3550       const string &prefix = key.substr(0, i);
3551       EXPECT_FALSE(IsSuggested(predictor, prefix, "JapaneseInput"));
3552       EXPECT_FALSE(IsPredicted(predictor, prefix, "JapaneseInput"));
3553     }
3554   }
3555 
3556   // However, the following can still be available, including
3557   // "JapaneseInputMethod".
3558   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3559   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3560                                       "japan", "JapaneseInputMethod"));
3561   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3562   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3563   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3564 }
3565 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteSecondBigram)3566 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteSecondBigram) {
3567   // Tests ClearHistoryEntry() for trigram history.  This case tests the
3568   // deletion of the latter bigram of trigram.
3569   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3570 
3571   // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3572   UserHistoryPredictor::Entry *japaneseinputmethod;
3573   UserHistoryPredictor::Entry *japanese;
3574   UserHistoryPredictor::Entry *input;
3575   UserHistoryPredictor::Entry *method;
3576   InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3577                                   &japanese, &input, &method);
3578 
3579   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3580   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3581   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3582                                       "japan", "JapaneseInputMethod"));
3583   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3584   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3585   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3586 
3587   // Delete the latter bigram.
3588   EXPECT_TRUE(predictor->ClearHistoryEntry("inputmethod", "InputMethod"));
3589 
3590   // Note that only link from "input" to "method" was removed.
3591   EXPECT_FALSE(japaneseinputmethod->removed());
3592   EXPECT_FALSE(japanese->removed());
3593   EXPECT_FALSE(input->removed());
3594   EXPECT_FALSE(method->removed());
3595   EXPECT_TRUE(IsConnected(*japanese, *input));
3596   EXPECT_FALSE(IsConnected(*input, *method));
3597 
3598   {
3599     // Now "InputMethod" should never be suggested.
3600     const string key = "inputmethod";
3601     for (size_t i = 0; i < key.size(); ++i) {
3602       const string &prefix = key.substr(0, i);
3603       EXPECT_FALSE(IsSuggested(predictor, prefix, "InputMethod"));
3604       EXPECT_FALSE(IsPredicted(predictor, prefix, "InputMethod"));
3605     }
3606   }
3607 
3608   // However, the following are available.
3609   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3610   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3611   EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3612                                       "japan", "JapaneseInputMethod"));
3613   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3614   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3615 }
3616 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Scenario1)3617 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Scenario1) {
3618   // Tests a common scenario: First, a user accidentally inputs an incomplete
3619   // romaji sequence and the predictor learns it.  Then, the user deletes it.
3620   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3621 
3622   // Set up history. Convert "ぐーぐr" to "グーグr" 3 times.  This emulates a
3623   // case that a user accidentally input incomplete sequence.
3624   for (int i = 0; i < 3; ++i) {
3625     Segments segments;
3626     MakeSegmentsForConversion("ぐーぐr", &segments);
3627     AddCandidate("グーグr", &segments);
3628     predictor->Finish(*convreq_, &segments);
3629   }
3630 
3631   // Test if the predictor learned "グーグr".
3632   EXPECT_TRUE(IsSuggested(predictor, "ぐーぐ", "グーグr"));
3633   EXPECT_TRUE(IsPredicted(predictor, "ぐーぐ", "グーグr"));
3634 
3635   // The user tris deleting the history ("ぐーぐr", "グーグr").
3636   EXPECT_TRUE(predictor->ClearHistoryEntry("ぐーぐr", "グーグr"));
3637 
3638   // The predictor shouldn't show "グーグr" both for suggestion and prediction.
3639   EXPECT_FALSE(IsSuggested(predictor, "ぐーぐ", "グーグr"));
3640   EXPECT_FALSE(IsPredicted(predictor, "ぐーぐ", "グーグr"));
3641 }
3642 
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Scenario2)3643 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Scenario2) {
3644   // Tests a common scenario: First, a user inputs a sentence ending with a
3645   // symbol and it's learned by the predictor.  Then, the user deletes the
3646   // history containing the symbol.
3647   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3648 
3649   // Set up history. Convert "きょうもいいてんき!" to "今日もいい天気!" 3 times
3650   // so that the predictor learns the sentence. We assume that this sentence
3651   // consists of three segments: "今日も|いい天気|!".
3652   for (int i = 0; i < 3; ++i) {
3653     Segments segments;
3654     segments.set_request_type(Segments::CONVERSION);
3655 
3656     // The first segment: ("きょうも", "今日も")
3657     Segment *seg = segments.add_segment();
3658     seg->set_key("きょうも");
3659     seg->set_segment_type(Segment::FIXED_VALUE);
3660     Segment::Candidate *candidate = seg->add_candidate();
3661     candidate->Init();
3662     candidate->value = "今日も";
3663     candidate->content_value = "今日";
3664     candidate->key = seg->key();
3665     candidate->content_key = "きょう";
3666 
3667     // The second segment: ("いいてんき", "いい天気")
3668     seg = segments.add_segment();
3669     seg->set_key("いいてんき");
3670     seg->set_segment_type(Segment::FIXED_VALUE);
3671     candidate = seg->add_candidate();
3672     candidate->Init();
3673     candidate->value = "いい天気";
3674     candidate->content_value = candidate->value;
3675     candidate->key = seg->key();
3676     candidate->content_key = seg->key();
3677 
3678     // The third segment: ("!", "!")
3679     seg = segments.add_segment();
3680     seg->set_key("!");
3681     seg->set_segment_type(Segment::FIXED_VALUE);
3682     candidate = seg->add_candidate();
3683     candidate->Init();
3684     candidate->value = "!";
3685     candidate->content_value = "!";
3686     candidate->key = seg->key();
3687     candidate->content_key = seg->key();
3688 
3689     predictor->Finish(*convreq_, &segments);
3690   }
3691 
3692   // Check if the predictor learned the sentence.  Since the symbol is contained
3693   // in one segment, both "今日もいい天気" and "今日もいい天気!" should be
3694   // suggested and predicted.
3695   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "きょうも", "今日もいい天気"));
3696   EXPECT_TRUE(
3697       IsSuggestedAndPredicted(predictor, "きょうも", "今日もいい天気!"));
3698 
3699   // Now the user deletes the sentence containing the "!".
3700   EXPECT_TRUE(
3701       predictor->ClearHistoryEntry("きょうもいいてんき!", "今日もいい天気!"));
3702 
3703   // The sentence "今日もいい天気" should still be suggested and predicted.
3704   EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "きょうも", "今日もいい天気"));
3705 
3706   // However, "今日もいい天気!" should be neither suggested nor predicted.
3707   EXPECT_FALSE(IsSuggested(predictor, "きょうも", "今日もいい天気!"));
3708   EXPECT_FALSE(IsPredicted(predictor, "きょうも", "今日もいい天気!"));
3709 }
3710 
TEST_F(UserHistoryPredictorTest,ContentWordLearningFromInnerSegmentBoundary)3711 TEST_F(UserHistoryPredictorTest, ContentWordLearningFromInnerSegmentBoundary) {
3712   UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3713   predictor->set_content_word_learning_enabled(true);
3714 
3715   Segments segments;
3716   {
3717     const char kKey[] = "とうきょうかなごやにいきたい";
3718     const char kValue[] = "東京か名古屋に行きたい";
3719     MakeSegmentsForPrediction(kKey, &segments);
3720     Segment::Candidate *candidate =
3721         segments.mutable_segment(0)->add_candidate();
3722     candidate->Init();
3723     candidate->key = kKey;
3724     candidate->value = kValue;
3725     candidate->content_key = kKey;
3726     candidate->content_value = kValue;
3727     candidate->PushBackInnerSegmentBoundary(18, 9, 15, 6);
3728     candidate->PushBackInnerSegmentBoundary(12, 12, 9, 9);
3729     candidate->PushBackInnerSegmentBoundary(12, 12, 12, 12);
3730     predictor->Finish(*convreq_, &segments);
3731   }
3732 
3733   segments.Clear();
3734   MakeSegmentsForPrediction("と", &segments);
3735   EXPECT_TRUE(predictor->Predict(&segments));
3736   EXPECT_TRUE(FindCandidateByValue("東京", segments));
3737   EXPECT_TRUE(FindCandidateByValue("東京か", segments));
3738 
3739   segments.Clear();
3740   MakeSegmentsForPrediction("な", &segments);
3741   EXPECT_TRUE(predictor->Predict(&segments));
3742   EXPECT_TRUE(FindCandidateByValue("名古屋", segments));
3743   EXPECT_TRUE(FindCandidateByValue("名古屋に", segments));
3744 
3745   segments.Clear();
3746   MakeSegmentsForPrediction("い", &segments);
3747   EXPECT_TRUE(predictor->Predict(&segments));
3748   EXPECT_TRUE(FindCandidateByValue("行きたい", segments));
3749 }
3750 
TEST_F(UserHistoryPredictorTest,JoinedSegmentsTest_Mobile)3751 TEST_F(UserHistoryPredictorTest, JoinedSegmentsTest_Mobile) {
3752   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3753   predictor->WaitForSyncer();
3754   predictor->ClearAllHistory();
3755   predictor->WaitForSyncer();
3756   commands::RequestForUnitTest::FillMobileRequest(request_.get());
3757   Segments segments;
3758 
3759   MakeSegmentsForConversion("わたしの", &segments);
3760   AddCandidate(0, "私の", &segments);
3761 
3762   AddSegmentForConversion("なまえは", &segments);
3763   AddCandidate(1, "名前は", &segments);
3764 
3765   predictor->Finish(*convreq_, &segments);
3766   segments.Clear();
3767 
3768   MakeSegmentsForSuggestion("わたし", &segments);
3769   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3770   EXPECT_EQ(1, segments.segment(0).candidates_size());
3771   EXPECT_EQ("私の", segments.segment(0).candidate(0).value);
3772   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3773               Segment::Candidate::USER_HISTORY_PREDICTOR);
3774   segments.Clear();
3775 
3776   MakeSegmentsForPrediction("わたしの", &segments);
3777   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3778   EXPECT_EQ(1, segments.segment(0).candidates_size());
3779   EXPECT_EQ("私の", segments.segment(0).candidate(0).value);
3780   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3781               Segment::Candidate::USER_HISTORY_PREDICTOR);
3782   segments.Clear();
3783 
3784   MakeSegmentsForPrediction("わたしのな", &segments);
3785   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3786   EXPECT_EQ(1, segments.segment(0).candidates_size());
3787   EXPECT_EQ("私の名前は", segments.segment(0).candidate(0).value);
3788   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3789               Segment::Candidate::USER_HISTORY_PREDICTOR);
3790   segments.Clear();
3791 }
3792 
TEST_F(UserHistoryPredictorTest,JoinedSegmentsTest_Desktop)3793 TEST_F(UserHistoryPredictorTest, JoinedSegmentsTest_Desktop) {
3794   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3795   predictor->WaitForSyncer();
3796   predictor->ClearAllHistory();
3797   predictor->WaitForSyncer();
3798 
3799   Segments segments;
3800 
3801   MakeSegmentsForConversion("わたしの", &segments);
3802   AddCandidate(0, "私の", &segments);
3803 
3804   AddSegmentForConversion("なまえは", &segments);
3805   AddCandidate(1, "名前は", &segments);
3806 
3807   predictor->Finish(*convreq_, &segments);
3808 
3809   segments.Clear();
3810 
3811   MakeSegmentsForSuggestion("わたし", &segments);
3812   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3813   EXPECT_EQ(2, segments.segment(0).candidates_size());
3814   EXPECT_EQ("私の", segments.segment(0).candidate(0).value);
3815   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3816               Segment::Candidate::USER_HISTORY_PREDICTOR);
3817   EXPECT_EQ("私の名前は", segments.segment(0).candidate(1).value);
3818   EXPECT_TRUE(segments.segment(0).candidate(1).source_info &
3819               Segment::Candidate::USER_HISTORY_PREDICTOR);
3820   segments.Clear();
3821 
3822   MakeSegmentsForPrediction("わたしの", &segments);
3823   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3824   EXPECT_EQ(1, segments.segment(0).candidates_size());
3825   EXPECT_EQ("私の名前は", segments.segment(0).candidate(0).value);
3826   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3827               Segment::Candidate::USER_HISTORY_PREDICTOR);
3828   segments.Clear();
3829 
3830   MakeSegmentsForPrediction("わたしのな", &segments);
3831   EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3832   EXPECT_EQ(1, segments.segment(0).candidates_size());
3833   EXPECT_EQ("私の名前は", segments.segment(0).candidate(0).value);
3834   EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3835               Segment::Candidate::USER_HISTORY_PREDICTOR);
3836   segments.Clear();
3837 }
3838 
TEST_F(UserHistoryPredictorTest,UsageStats)3839 TEST_F(UserHistoryPredictorTest, UsageStats) {
3840   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3841   predictor->WaitForSyncer();
3842   predictor->ClearAllHistory();
3843   predictor->WaitForSyncer();
3844 
3845   Segments segments;
3846   EXPECT_COUNT_STATS("CommitUserHistoryPredictor", 0);
3847   EXPECT_COUNT_STATS("CommitUserHistoryPredictorZeroQuery", 0);
3848 
3849   MakeSegmentsForConversion("なまえは", &segments);
3850   AddCandidate(0, "名前は", &segments);
3851   segments.mutable_conversion_segment(0)->mutable_candidate(0)->source_info |=
3852       Segment::Candidate::USER_HISTORY_PREDICTOR;
3853   predictor->Finish(*convreq_, &segments);
3854 
3855   EXPECT_COUNT_STATS("CommitUserHistoryPredictor", 1);
3856   EXPECT_COUNT_STATS("CommitUserHistoryPredictorZeroQuery", 0);
3857 
3858   segments.Clear();
3859 
3860   // Zero query
3861   MakeSegmentsForConversion("", &segments);
3862   AddCandidate(0, "名前は", &segments);
3863   segments.mutable_conversion_segment(0)->mutable_candidate(0)->source_info |=
3864       Segment::Candidate::USER_HISTORY_PREDICTOR;
3865   predictor->Finish(*convreq_, &segments);
3866 
3867   // UserHistoryPredictor && ZeroQuery
3868   EXPECT_COUNT_STATS("CommitUserHistoryPredictor", 2);
3869   EXPECT_COUNT_STATS("CommitUserHistoryPredictorZeroQuery", 1);
3870 }
3871 
TEST_F(UserHistoryPredictorTest,PunctuationLink_Mobile)3872 TEST_F(UserHistoryPredictorTest, PunctuationLink_Mobile) {
3873   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3874   predictor->WaitForSyncer();
3875   predictor->ClearAllHistory();
3876   predictor->WaitForSyncer();
3877   commands::RequestForUnitTest::FillMobileRequest(request_.get());
3878   Segments segments;
3879   {
3880     MakeSegmentsForConversion("ございます", &segments);
3881     AddCandidate(0, "ございます", &segments);
3882 
3883     predictor->Finish(*convreq_, &segments);
3884     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3885 
3886     AddSegmentForConversion("!", &segments);
3887     AddCandidate(1, "!", &segments);
3888     predictor->Finish(*convreq_, &segments);
3889 
3890     segments.Clear();
3891     MakeSegmentsForSuggestion("ございま", &segments);
3892     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3893     EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
3894     EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
3895 
3896     // Zero query from "ございます" -> "!"
3897     segments.Clear();
3898     MakeSegmentsForConversion("ございます", &segments);
3899     AddCandidate(0, "ございます", &segments);
3900     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3901     AddSegmentForSuggestion("", &segments);  // empty request
3902     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3903     EXPECT_EQ("!", segments.conversion_segment(0).candidate(0).value);
3904   }
3905 
3906   predictor->ClearAllHistory();
3907   predictor->WaitForSyncer();
3908 
3909   {
3910     MakeSegmentsForConversion("!", &segments);
3911     AddCandidate(0, "!", &segments);
3912 
3913     predictor->Finish(*convreq_, &segments);
3914     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3915 
3916     AddSegmentForConversion("ございます", &segments);
3917     AddCandidate(1, "ございます", &segments);
3918     predictor->Finish(*convreq_, &segments);
3919 
3920     // Zero query from "!" -> no suggestion
3921     segments.Clear();
3922     MakeSegmentsForConversion("!", &segments);
3923     AddCandidate(0, "!", &segments);
3924     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3925     AddSegmentForSuggestion("", &segments);  // empty request
3926     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3927   }
3928 
3929   predictor->ClearAllHistory();
3930   predictor->WaitForSyncer();
3931 
3932   {
3933     MakeSegmentsForConversion("ございます!", &segments);
3934     AddCandidate(0, "ございます!", &segments);
3935 
3936     predictor->Finish(*convreq_, &segments);
3937     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3938 
3939     AddSegmentForConversion("よろしくおねがいします", &segments);
3940     AddCandidate(1, "よろしくお願いします", &segments);
3941     predictor->Finish(*convreq_, &segments);
3942 
3943     // Zero query from "!" -> no suggestion
3944     segments.Clear();
3945     MakeSegmentsForConversion("!", &segments);
3946     AddCandidate(0, "!", &segments);
3947     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3948     AddSegmentForSuggestion("", &segments);  // empty request
3949     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3950 
3951     // Zero query from "ございます!" -> no suggestion
3952     segments.Clear();
3953     MakeSegmentsForConversion("ございます!", &segments);
3954     AddCandidate(0, "ございます!", &segments);
3955     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3956     AddSegmentForSuggestion("", &segments);  // empty request
3957     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3958   }
3959 
3960   predictor->ClearAllHistory();
3961   predictor->WaitForSyncer();
3962 
3963   {
3964     MakeSegmentsForConversion("ございます", &segments);
3965     AddCandidate(0, "ございます", &segments);
3966 
3967     predictor->Finish(*convreq_, &segments);
3968     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3969 
3970     AddSegmentForConversion("!よろしくおねがいします", &segments);
3971     AddCandidate(1, "!よろしくお願いします", &segments);
3972     predictor->Finish(*convreq_, &segments);
3973 
3974     segments.Clear();
3975     MakeSegmentsForSuggestion("ございま", &segments);
3976     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3977     EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
3978     EXPECT_FALSE(
3979         FindCandidateByValue("ございます!よろしくお願いします", segments));
3980 
3981     // Zero query from "ございます" -> no suggestion
3982     segments.Clear();
3983     MakeSegmentsForConversion("ございます", &segments);
3984     AddCandidate(0, "ございます", &segments);
3985     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3986     AddSegmentForSuggestion("", &segments);  // empty request
3987     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3988   }
3989 }
3990 
TEST_F(UserHistoryPredictorTest,PunctuationLink_Desktop)3991 TEST_F(UserHistoryPredictorTest, PunctuationLink_Desktop) {
3992   UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3993   predictor->WaitForSyncer();
3994   predictor->ClearAllHistory();
3995   predictor->WaitForSyncer();
3996   Segments segments;
3997   {
3998     MakeSegmentsForConversion("ございます", &segments);
3999     AddCandidate(0, "ございます", &segments);
4000 
4001     predictor->Finish(*convreq_, &segments);
4002     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4003 
4004     AddSegmentForConversion("!", &segments);
4005     AddCandidate(1, "!", &segments);
4006     predictor->Finish(*convreq_, &segments);
4007 
4008     segments.Clear();
4009     MakeSegmentsForSuggestion("ございま", &segments);
4010     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4011     EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
4012     EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
4013 
4014     segments.Clear();
4015     MakeSegmentsForSuggestion("ございます", &segments);
4016     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4017     EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
4018     EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
4019   }
4020 
4021   predictor->ClearAllHistory();
4022   predictor->WaitForSyncer();
4023 
4024   {
4025     MakeSegmentsForConversion("!", &segments);
4026     AddCandidate(0, "!", &segments);
4027 
4028     predictor->Finish(*convreq_, &segments);
4029     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4030 
4031     AddSegmentForConversion("よろしくおねがいします", &segments);
4032     AddCandidate(1, "よろしくお願いします", &segments);
4033     predictor->Finish(*convreq_, &segments);
4034 
4035     segments.Clear();
4036     MakeSegmentsForSuggestion("!", &segments);
4037     EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
4038   }
4039 
4040   predictor->ClearAllHistory();
4041   predictor->WaitForSyncer();
4042 
4043   {
4044     MakeSegmentsForConversion("ございます!", &segments);
4045     AddCandidate(0, "ございます!", &segments);
4046 
4047     predictor->Finish(*convreq_, &segments);
4048     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4049 
4050     AddSegmentForConversion("よろしくおねがいします", &segments);
4051     AddCandidate(1, "よろしくお願いします", &segments);
4052     predictor->Finish(*convreq_, &segments);
4053 
4054     segments.Clear();
4055     MakeSegmentsForSuggestion("ございます", &segments);
4056     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4057     EXPECT_EQ("ございます!",
4058               segments.conversion_segment(0).candidate(0).value);
4059     EXPECT_FALSE(
4060         FindCandidateByValue("ございます!よろしくお願いします", segments));
4061 
4062     segments.Clear();
4063     MakeSegmentsForSuggestion("ございます!", &segments);
4064     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4065     EXPECT_EQ("ございます!",
4066               segments.conversion_segment(0).candidate(0).value);
4067     EXPECT_FALSE(
4068         FindCandidateByValue("ございます!よろしくお願いします", segments));
4069   }
4070 
4071   predictor->ClearAllHistory();
4072   predictor->WaitForSyncer();
4073 
4074   {
4075     MakeSegmentsForConversion("ございます", &segments);
4076     AddCandidate(0, "ございます", &segments);
4077 
4078     predictor->Finish(*convreq_, &segments);
4079     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4080 
4081     AddSegmentForConversion("!よろしくおねがいします", &segments);
4082     AddCandidate(1, "!よろしくお願いします", &segments);
4083     predictor->Finish(*convreq_, &segments);
4084 
4085     segments.Clear();
4086     MakeSegmentsForSuggestion("ございます", &segments);
4087     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4088     EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
4089     EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
4090     EXPECT_FALSE(
4091         FindCandidateByValue("ございます!よろしくお願いします", segments));
4092   }
4093 
4094   predictor->ClearAllHistory();
4095   predictor->WaitForSyncer();
4096 
4097   {
4098     // Note that "よろしくお願いします:よろしくおねがいします" is the sentence
4099     // like candidate. Please refer to user_history_predictor.cc
4100     MakeSegmentsForConversion("よろしくおねがいします", &segments);
4101     AddCandidate(0, "よろしくお願いします", &segments);
4102 
4103     predictor->Finish(*convreq_, &segments);
4104     segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4105 
4106     AddSegmentForConversion("!", &segments);
4107     AddCandidate(1, "!", &segments);
4108     predictor->Finish(*convreq_, &segments);
4109 
4110     segments.Clear();
4111     MakeSegmentsForSuggestion("よろしくおねがいします", &segments);
4112     EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4113     EXPECT_TRUE(FindCandidateByValue("よろしくお願いします!", segments));
4114   }
4115 }
4116 
4117 }  // namespace mozc
4118