1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "prediction/user_history_predictor.h"
31
32 #include <memory>
33 #include <set>
34 #include <string>
35
36 #include "base/file_util.h"
37 #include "base/logging.h"
38 #include "base/password_manager.h"
39 #include "base/port.h"
40 #include "base/system_util.h"
41 #include "base/util.h"
42 #include "composer/composer.h"
43 #include "composer/table.h"
44 #include "config/config_handler.h"
45 #include "converter/segments.h"
46 #include "data_manager/testing/mock_data_manager.h"
47 #include "dictionary/dictionary_mock.h"
48 #include "dictionary/suppression_dictionary.h"
49 #include "protocol/commands.pb.h"
50 #include "protocol/config.pb.h"
51 #include "request/conversion_request.h"
52 #include "session/request_test_util.h"
53 #include "testing/base/public/googletest.h"
54 #include "testing/base/public/gunit.h"
55 #include "usage_stats/usage_stats.h"
56 #include "usage_stats/usage_stats_testing_util.h"
57
58 DECLARE_bool(enable_expansion_for_user_history_predictor);
59
60 namespace mozc {
61 namespace {
62
63 using std::unique_ptr;
64
65 using commands::Request;
66 using config::Config;
67 using dictionary::DictionaryMock;
68 using dictionary::SuppressionDictionary;
69 using dictionary::Token;
70
AddSegmentForSuggestion(const string & key,Segments * segments)71 void AddSegmentForSuggestion(const string &key, Segments *segments) {
72 segments->set_max_prediction_candidates_size(10);
73 segments->set_request_type(Segments::SUGGESTION);
74 Segment *seg = segments->add_segment();
75 seg->set_key(key);
76 seg->set_segment_type(Segment::FIXED_VALUE);
77 }
78
MakeSegmentsForSuggestion(const string & key,Segments * segments)79 void MakeSegmentsForSuggestion(const string &key, Segments *segments) {
80 segments->Clear();
81 AddSegmentForSuggestion(key, segments);
82 }
83
AddSegmentForPrediction(const string & key,Segments * segments)84 void AddSegmentForPrediction(const string &key, Segments *segments) {
85 segments->set_max_prediction_candidates_size(10);
86 segments->set_request_type(Segments::PREDICTION);
87 Segment *seg = segments->add_segment();
88 seg->set_key(key);
89 seg->set_segment_type(Segment::FIXED_VALUE);
90 }
91
MakeSegmentsForPrediction(const string & key,Segments * segments)92 void MakeSegmentsForPrediction(const string &key, Segments *segments) {
93 segments->Clear();
94 AddSegmentForPrediction(key, segments);
95 }
96
AddSegmentForConversion(const string & key,Segments * segments)97 void AddSegmentForConversion(const string &key, Segments *segments) {
98 segments->set_request_type(Segments::CONVERSION);
99 Segment *seg = segments->add_segment();
100 seg->set_key(key);
101 seg->set_segment_type(Segment::FIXED_VALUE);
102 }
103
MakeSegmentsForConversion(const string & key,Segments * segments)104 void MakeSegmentsForConversion(const string &key, Segments *segments) {
105 segments->Clear();
106 AddSegmentForConversion(key, segments);
107 }
108
AddCandidate(size_t index,const string & value,Segments * segments)109 void AddCandidate(size_t index, const string &value, Segments *segments) {
110 Segment::Candidate *candidate =
111 segments->mutable_segment(index)->add_candidate();
112 CHECK(candidate);
113 candidate->Init();
114 candidate->value = value;
115 candidate->content_value = value;
116 candidate->key = segments->segment(index).key();
117 candidate->content_key = segments->segment(index).key();
118 }
119
AddCandidateWithDescription(size_t index,const string & value,const string & desc,Segments * segments)120 void AddCandidateWithDescription(size_t index,
121 const string &value,
122 const string &desc,
123 Segments *segments) {
124 Segment::Candidate *candidate =
125 segments->mutable_segment(index)->add_candidate();
126 CHECK(candidate);
127 candidate->Init();
128 candidate->value = value;
129 candidate->content_value = value;
130 candidate->key = segments->segment(index).key();
131 candidate->content_key = segments->segment(index).key();
132 candidate->description = desc;
133 }
134
AddCandidate(const string & value,Segments * segments)135 void AddCandidate(const string &value, Segments *segments) {
136 AddCandidate(0, value, segments);
137 }
138
AddCandidateWithDescription(const string & value,const string & desc,Segments * segments)139 void AddCandidateWithDescription(const string &value,
140 const string &desc,
141 Segments *segments) {
142 AddCandidateWithDescription(0, value, desc, segments);
143 }
144
FindCandidateByValue(const string & value,const Segments & segments)145 bool FindCandidateByValue(const string &value, const Segments &segments) {
146 for (size_t i = 0;
147 i < segments.conversion_segment(0).candidates_size(); ++i) {
148 if (segments.conversion_segment(0).candidate(i).value == value) {
149 return true;
150 }
151 }
152 return false;
153 }
154 } // namespace
155
156 class UserHistoryPredictorTest : public ::testing::Test {
157 public:
UserHistoryPredictorTest()158 UserHistoryPredictorTest()
159 : default_expansion_(FLAGS_enable_expansion_for_user_history_predictor) {
160 }
161
~UserHistoryPredictorTest()162 ~UserHistoryPredictorTest() override {
163 FLAGS_enable_expansion_for_user_history_predictor = default_expansion_;
164 }
165
166 protected:
SetUp()167 void SetUp() override {
168 SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
169 request_.reset(new Request);
170 config_.reset(new Config);
171 config::ConfigHandler::GetDefaultConfig(config_.get());
172 table_.reset(new composer::Table);
173 composer_.reset(
174 new composer::Composer(table_.get(), request_.get(), config_.get()));
175 convreq_.reset(
176 new ConversionRequest(composer_.get(), request_.get(), config_.get()));
177 data_and_predictor_.reset(CreateDataAndPredictor());
178
179 mozc::usage_stats::UsageStats::ClearAllStatsForTest();
180 }
181
TearDown()182 void TearDown() override {
183 FLAGS_enable_expansion_for_user_history_predictor = default_expansion_;
184
185 mozc::usage_stats::UsageStats::ClearAllStatsForTest();
186 }
187
GetUserHistoryPredictor()188 UserHistoryPredictor *GetUserHistoryPredictor() {
189 return data_and_predictor_->predictor.get();
190 }
191
GetUserHistoryPredictorWithClearedHistory()192 UserHistoryPredictor *GetUserHistoryPredictorWithClearedHistory() {
193 UserHistoryPredictor *predictor = data_and_predictor_->predictor.get();
194 predictor->WaitForSyncer();
195 predictor->ClearAllHistory();
196 predictor->WaitForSyncer();
197 return predictor;
198 }
199
GetDictionaryMock()200 DictionaryMock *GetDictionaryMock() {
201 return data_and_predictor_->dictionary.get();
202 }
203
GetSuppressionDictionary()204 SuppressionDictionary *GetSuppressionDictionary() {
205 return data_and_predictor_->suppression_dictionary.get();
206 }
207
IsSuggested(UserHistoryPredictor * predictor,const string & key,const string & value)208 static bool IsSuggested(UserHistoryPredictor *predictor,
209 const string &key, const string &value) {
210 const ConversionRequest conversion_request;
211 Segments segments;
212 MakeSegmentsForSuggestion(key, &segments);
213 return predictor->PredictForRequest(conversion_request, &segments) &&
214 FindCandidateByValue(value, segments);
215 }
216
IsPredicted(UserHistoryPredictor * predictor,const string & key,const string & value)217 static bool IsPredicted(UserHistoryPredictor *predictor,
218 const string &key, const string &value) {
219 const ConversionRequest conversion_request;
220 Segments segments;
221 MakeSegmentsForPrediction(key, &segments);
222 return predictor->PredictForRequest(conversion_request, &segments) &&
223 FindCandidateByValue(value, segments);
224 }
225
IsSuggestedAndPredicted(UserHistoryPredictor * predictor,const string & key,const string & value)226 static bool IsSuggestedAndPredicted(UserHistoryPredictor *predictor,
227 const string &key, const string &value) {
228 return IsSuggested(predictor, key, value) &&
229 IsPredicted(predictor, key, value);
230 }
231
InsertEntry(UserHistoryPredictor * predictor,const string & key,const string & value)232 static UserHistoryPredictor::Entry *InsertEntry(
233 UserHistoryPredictor *predictor,
234 const string &key, const string &value) {
235 UserHistoryPredictor::Entry *e =
236 &predictor->dic_->Insert(predictor->Fingerprint(key, value))->value;
237 e->set_key(key);
238 e->set_value(value);
239 e->set_removed(false);
240 return e;
241 }
242
AppendEntry(UserHistoryPredictor * predictor,const string & key,const string & value,UserHistoryPredictor::Entry * prev)243 static UserHistoryPredictor::Entry *AppendEntry(
244 UserHistoryPredictor *predictor,
245 const string &key, const string &value,
246 UserHistoryPredictor::Entry *prev) {
247 prev->add_next_entries()->set_entry_fp(
248 predictor->Fingerprint(key, value));
249 UserHistoryPredictor::Entry *e = InsertEntry(predictor, key, value);
250 return e;
251 }
252
IsConnected(const UserHistoryPredictor::Entry & prev,const UserHistoryPredictor::Entry & next)253 static bool IsConnected(const UserHistoryPredictor::Entry &prev,
254 const UserHistoryPredictor::Entry &next) {
255 const uint32 fp =
256 UserHistoryPredictor::Fingerprint(next.key(), next.value());
257 for (size_t i = 0; i < prev.next_entries_size(); ++i) {
258 if (prev.next_entries(i).entry_fp() == fp) {
259 return true;
260 }
261 }
262 return false;
263 }
264
265 // Helper function to create a test case for bigram history deletion.
InitHistory_JapaneseInput(UserHistoryPredictor * predictor,UserHistoryPredictor::Entry ** japaneseinput,UserHistoryPredictor::Entry ** japanese,UserHistoryPredictor::Entry ** input)266 static void InitHistory_JapaneseInput(
267 UserHistoryPredictor *predictor,
268 UserHistoryPredictor::Entry **japaneseinput,
269 UserHistoryPredictor::Entry **japanese,
270 UserHistoryPredictor::Entry **input) {
271 // Make the history for ("japaneseinput", "JapaneseInput"). It's assumed
272 // that this sentence consists of two segments, "japanese" and "input". So,
273 // the following history entries are constructed:
274 // ("japaneseinput", "JapaneseInput") // Unigram
275 // ("japanese", "Japanese") --- ("input", "Input") // Bigram chain
276 *japaneseinput = InsertEntry(predictor, "japaneseinput", "JapaneseInput");
277 *japanese = InsertEntry(predictor, "japanese", "Japanese");
278 *input = AppendEntry(predictor, "input", "Input", *japanese);
279
280 // Check the predictor functionality for the above history structure.
281 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
282 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
283 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
284 }
285
286 // Helper function to create a test case for trigram history deletion.
InitHistory_JapaneseInputMethod(UserHistoryPredictor * predictor,UserHistoryPredictor::Entry ** japaneseinputmethod,UserHistoryPredictor::Entry ** japanese,UserHistoryPredictor::Entry ** input,UserHistoryPredictor::Entry ** method)287 static void InitHistory_JapaneseInputMethod(
288 UserHistoryPredictor *predictor,
289 UserHistoryPredictor::Entry **japaneseinputmethod,
290 UserHistoryPredictor::Entry **japanese,
291 UserHistoryPredictor::Entry **input,
292 UserHistoryPredictor::Entry **method) {
293 // Make the history for ("japaneseinputmethod", "JapaneseInputMethod"). It's
294 // assumed that this sentence consists of three segments, "japanese",
295 // "input" and "method". So, the following history entries are constructed:
296 // ("japaneseinputmethod", "JapaneseInputMethod") // Unigram
297 // ("japanese", "Japanese") -- ("input", "Input") -- ("method", "Method")
298 *japaneseinputmethod =
299 InsertEntry(predictor, "japaneseinputmethod", "JapaneseInputMethod");
300 *japanese = InsertEntry(predictor, "japanese", "Japanese");
301 *input = AppendEntry(predictor, "input", "Input", *japanese);
302 *method = AppendEntry(predictor, "method", "Method", *input);
303
304 // Check the predictor functionality for the above history structure.
305 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
306 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
307 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
308 "japan", "JapaneseInputMethod"));
309 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
310 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
311 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
312 }
313
314 unique_ptr<composer::Composer> composer_;
315 unique_ptr<composer::Table> table_;
316 unique_ptr<ConversionRequest> convreq_;
317 unique_ptr<Config> config_;
318 unique_ptr<Request> request_;
319
320 private:
321 struct DataAndPredictor {
322 unique_ptr<DictionaryMock> dictionary;
323 unique_ptr<SuppressionDictionary> suppression_dictionary;
324 unique_ptr<UserHistoryPredictor> predictor;
325 dictionary::POSMatcher pos_matcher;
326 };
327
CreateDataAndPredictor() const328 DataAndPredictor *CreateDataAndPredictor() const {
329 DataAndPredictor *ret = new DataAndPredictor;
330 testing::MockDataManager data_manager;
331 ret->dictionary.reset(new DictionaryMock);
332 ret->suppression_dictionary.reset(new SuppressionDictionary);
333 ret->pos_matcher.Set(data_manager.GetPOSMatcherData());
334 ret->predictor.reset(
335 new UserHistoryPredictor(ret->dictionary.get(),
336 &ret->pos_matcher,
337 ret->suppression_dictionary.get(),
338 false));
339 return ret;
340 }
341
342 const bool default_expansion_;
343 unique_ptr<DataAndPredictor> data_and_predictor_;
344 mozc::usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
345 };
346
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorTest)347 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorTest) {
348 {
349 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
350 predictor->WaitForSyncer();
351
352 // Nothing happen
353 {
354 Segments segments;
355 MakeSegmentsForSuggestion("てすと", &segments);
356 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
357 EXPECT_EQ(0, segments.segment(0).candidates_size());
358 }
359
360 // Nothing happen
361 {
362 Segments segments;
363 MakeSegmentsForPrediction("てすと", &segments);
364 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
365 EXPECT_EQ(0, segments.segment(0).candidates_size());
366 }
367
368 // Insert two items
369 {
370 Segments segments;
371 MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
372 AddCandidate("私の名前は中野です", &segments);
373 predictor->Finish(*convreq_, &segments);
374
375 segments.Clear();
376 MakeSegmentsForSuggestion("わたしの", &segments);
377 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
378 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
379 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
380 Segment::Candidate::USER_HISTORY_PREDICTOR);
381
382 segments.Clear();
383 MakeSegmentsForPrediction("わたしの", &segments);
384 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
385 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
386 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
387 Segment::Candidate::USER_HISTORY_PREDICTOR);
388 }
389
390 // Insert without learning (nothing happen).
391 {
392 config::Config::HistoryLearningLevel no_learning_levels[] = {
393 config::Config::READ_ONLY, config::Config::NO_HISTORY};
394 for (config::Config::HistoryLearningLevel level : no_learning_levels) {
395 config_->set_history_learning_level(level);
396
397 Segments segments;
398 MakeSegmentsForConversion("こんにちはさようなら", &segments);
399 AddCandidate("今日はさようなら", &segments);
400 predictor->Finish(*convreq_, &segments);
401
402 segments.Clear();
403 MakeSegmentsForSuggestion("こんにちは", &segments);
404 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
405 MakeSegmentsForPrediction("こんにちは", &segments);
406 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
407 }
408 config_->set_history_learning_level(config::Config::DEFAULT_HISTORY);
409 }
410
411 // sync
412 predictor->Sync();
413 Util::Sleep(500);
414 }
415
416 // reload
417 {
418 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
419 predictor->WaitForSyncer();
420 Segments segments;
421
422 // turn off
423 {
424 Segments segments;
425 config_->set_use_history_suggest(false);
426
427 MakeSegmentsForSuggestion("わたしの", &segments);
428 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
429
430 config_->set_use_history_suggest(true);
431 config_->set_incognito_mode(true);
432
433 MakeSegmentsForSuggestion("わたしの", &segments);
434 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
435
436 config_->set_incognito_mode(false);
437 config_->set_history_learning_level(config::Config::NO_HISTORY);
438
439 MakeSegmentsForSuggestion("わたしの", &segments);
440 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
441 }
442
443 // turn on
444 { config::ConfigHandler::GetDefaultConfig(config_.get()); }
445
446 // reproducesd
447 MakeSegmentsForSuggestion("わたしの", &segments);
448 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
449 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
450
451 segments.Clear();
452 MakeSegmentsForPrediction("わたしの", &segments);
453 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
454 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
455
456 // Exact Match
457 segments.Clear();
458 MakeSegmentsForSuggestion("わたしのなまえはなかのです", &segments);
459 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
460 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
461
462 segments.Clear();
463 MakeSegmentsForPrediction("わたしのなまえはなかのです", &segments);
464 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
465 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
466
467 segments.Clear();
468 MakeSegmentsForSuggestion("こんにちはさようなら", &segments);
469 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
470
471 segments.Clear();
472 MakeSegmentsForPrediction("こんにちはさようなら", &segments);
473 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
474
475 // Read only mode should show suggestion.
476 {
477 config_->set_history_learning_level(config::Config::READ_ONLY);
478 MakeSegmentsForSuggestion("わたしの", &segments);
479 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
480 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
481
482 segments.Clear();
483 MakeSegmentsForPrediction("わたしの", &segments);
484 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
485 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
486 config_->set_history_learning_level(config::Config::DEFAULT_HISTORY);
487 }
488
489 // clear
490 predictor->ClearAllHistory();
491 predictor->WaitForSyncer();
492 }
493
494 // nothing happen
495 {
496 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
497 predictor->WaitForSyncer();
498 Segments segments;
499
500 // reproducesd
501 MakeSegmentsForSuggestion("わたしの", &segments);
502 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
503
504 MakeSegmentsForPrediction("わたしの", &segments);
505 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
506 }
507
508 // nothing happen
509 {
510 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
511 predictor->WaitForSyncer();
512 Segments segments;
513
514 // reproducesd
515 MakeSegmentsForSuggestion("わたしの", &segments);
516 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
517
518 MakeSegmentsForPrediction("わたしの", &segments);
519 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
520 }
521 }
522
523 // We did not support such Segments which has multiple segments and
524 // has type != CONVERSION.
525 // To support such Segments, this test case is created separately.
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorTest_suggestion)526 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorTest_suggestion) {
527 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
528 predictor->WaitForSyncer();
529 predictor->ClearAllHistory();
530 predictor->WaitForSyncer();
531
532 // Register input histories via Finish method.
533 {
534 Segments segments;
535 MakeSegmentsForSuggestion("かまた", &segments);
536 AddCandidate(0, "火魔汰", &segments);
537 AddSegmentForSuggestion("ま", &segments);
538 AddCandidate(1, "摩", &segments);
539 predictor->Finish(*convreq_, &segments);
540
541 // All added items must be suggestion entries.
542 const UserHistoryPredictor::DicCache::Element *element;
543 for (element = predictor->dic_->Head(); element->next;
544 element = element->next) {
545 const user_history_predictor::UserHistory::Entry &entry = element->value;
546 EXPECT_TRUE(entry.has_suggestion_freq() && entry.suggestion_freq() == 1);
547 EXPECT_TRUE(!entry.has_conversion_freq() && entry.conversion_freq() == 0);
548 }
549 }
550
551 // Obtain input histories via Predict method.
552 {
553 Segments segments;
554 MakeSegmentsForSuggestion("かま", &segments);
555 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
556 std::set<string> expected_candidates;
557 expected_candidates.insert("火魔汰");
558 // We can get this entry even if Segmtnts's type is not CONVERSION.
559 expected_candidates.insert("火魔汰摩");
560 for (size_t i = 0; i < segments.segment(0).candidates_size(); ++i) {
561 SCOPED_TRACE(segments.segment(0).candidate(i).value);
562 EXPECT_EQ(
563 1, expected_candidates.erase(segments.segment(0).candidate(i).value));
564 }
565 }
566 }
567
TEST_F(UserHistoryPredictorTest,DescriptionTest)568 TEST_F(UserHistoryPredictorTest, DescriptionTest) {
569 #ifdef DEBUG
570 const char kDescription[] = "テスト History";
571 #else
572 const char kDescription[] = "テスト";
573 #endif // DEBUG
574
575 {
576 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
577 predictor->WaitForSyncer();
578
579 // Insert two items
580 {
581 Segments segments;
582 MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
583 AddCandidateWithDescription("私の名前は中野です", kDescription,
584 &segments);
585 predictor->Finish(*convreq_, &segments);
586
587 MakeSegmentsForSuggestion("わたしの", &segments);
588 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
589 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
590 EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
591
592 segments.Clear();
593 MakeSegmentsForPrediction("わたしの", &segments);
594 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
595 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
596 EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
597 }
598
599 // sync
600 predictor->Sync();
601 }
602
603 // reload
604 {
605 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
606 predictor->WaitForSyncer();
607 Segments segments;
608
609 // turn off
610 {
611 Segments segments;
612 config_->set_use_history_suggest(false);
613 predictor->WaitForSyncer();
614
615 MakeSegmentsForSuggestion("わたしの", &segments);
616 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
617
618 config_->set_use_history_suggest(true);
619 config_->set_incognito_mode(true);
620
621 MakeSegmentsForSuggestion("わたしの", &segments);
622 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
623 }
624
625 // turn on
626 {
627 config::ConfigHandler::GetDefaultConfig(config_.get());
628 predictor->WaitForSyncer();
629 }
630
631 // reproducesd
632 MakeSegmentsForSuggestion("わたしの", &segments);
633 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
634 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
635 EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
636
637 segments.Clear();
638 MakeSegmentsForPrediction("わたしの", &segments);
639 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
640 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
641 EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
642
643 // Exact Match
644 segments.Clear();
645 MakeSegmentsForSuggestion("わたしのなまえはなかのです", &segments);
646 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
647 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
648 EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
649
650 segments.Clear();
651 MakeSegmentsForSuggestion("わたしのなまえはなかのです", &segments);
652 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
653 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
654 EXPECT_EQ(kDescription, segments.segment(0).candidate(0).description);
655
656 // clear
657 predictor->ClearAllHistory();
658 predictor->WaitForSyncer();
659 }
660
661 // nothing happen
662 {
663 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
664 predictor->WaitForSyncer();
665 Segments segments;
666
667 // reproducesd
668 MakeSegmentsForSuggestion("わたしの", &segments);
669 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
670
671 MakeSegmentsForPrediction("わたしの", &segments);
672 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
673 }
674
675 // nothing happen
676 {
677 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
678 predictor->WaitForSyncer();
679 Segments segments;
680
681 // reproducesd
682 MakeSegmentsForSuggestion("わたしの", &segments);
683 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
684
685 MakeSegmentsForPrediction("わたしの", &segments);
686 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
687 }
688 }
689
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorUnusedHistoryTest)690 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorUnusedHistoryTest) {
691 {
692 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
693 predictor->WaitForSyncer();
694
695 Segments segments;
696 MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
697 AddCandidate("私の名前は中野です", &segments);
698
699 // once
700 segments.set_request_type(Segments::SUGGESTION);
701 predictor->Finish(*convreq_, &segments);
702
703 segments.Clear();
704 MakeSegmentsForConversion("ひろすえりょうこ", &segments);
705 AddCandidate("広末涼子", &segments);
706
707 segments.set_request_type(Segments::CONVERSION);
708
709 // conversion
710 predictor->Finish(*convreq_, &segments);
711
712 // sync
713 predictor->Sync();
714 }
715
716 {
717 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
718 predictor->WaitForSyncer();
719 Segments segments;
720
721 MakeSegmentsForSuggestion("わたしの", &segments);
722 EXPECT_TRUE(predictor->Predict(&segments));
723 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
724
725 segments.Clear();
726 MakeSegmentsForSuggestion("ひろすえ", &segments);
727 EXPECT_TRUE(predictor->Predict(&segments));
728 EXPECT_EQ("広末涼子", segments.segment(0).candidate(0).value);
729
730 predictor->ClearUnusedHistory();
731 predictor->WaitForSyncer();
732
733 segments.Clear();
734 MakeSegmentsForSuggestion("わたしの", &segments);
735 EXPECT_TRUE(predictor->Predict(&segments));
736 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
737
738 segments.Clear();
739 MakeSegmentsForSuggestion("ひろすえ", &segments);
740 EXPECT_FALSE(predictor->Predict(&segments));
741
742 predictor->Sync();
743 }
744
745 {
746 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
747 predictor->WaitForSyncer();
748 Segments segments;
749
750 MakeSegmentsForSuggestion("わたしの", &segments);
751 EXPECT_TRUE(predictor->Predict(&segments));
752 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
753
754 segments.Clear();
755 MakeSegmentsForSuggestion("ひろすえ", &segments);
756 EXPECT_FALSE(predictor->Predict(&segments));
757 }
758 }
759
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorRevertTest)760 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorRevertTest) {
761 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
762 predictor->WaitForSyncer();
763 predictor->ClearAllHistory();
764 predictor->WaitForSyncer();
765
766 Segments segments, segments2;
767 MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
768 AddCandidate("私の名前は中野です", &segments);
769
770 predictor->Finish(*convreq_, &segments);
771
772 // Before Revert, Suggest works
773 MakeSegmentsForSuggestion("わたしの", &segments2);
774 EXPECT_TRUE(predictor->Predict(&segments2));
775 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
776
777 // Call revert here
778 predictor->Revert(&segments);
779
780 segments.Clear();
781 MakeSegmentsForSuggestion("わたしの", &segments);
782
783 EXPECT_FALSE(predictor->Predict(&segments));
784 EXPECT_EQ(0, segments.segment(0).candidates_size());
785
786 EXPECT_FALSE(predictor->Predict(&segments));
787 EXPECT_EQ(0, segments.segment(0).candidates_size());
788 }
789
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorClearTest)790 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorClearTest) {
791 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
792 predictor->WaitForSyncer();
793
794 // input "testtest" 10 times
795 for (int i = 0; i < 10; ++i) {
796 Segments segments;
797 MakeSegmentsForConversion("testtest", &segments);
798 AddCandidate("テストテスト", &segments);
799 predictor->Finish(*convreq_, &segments);
800 }
801
802 predictor->ClearAllHistory();
803 predictor->WaitForSyncer();
804
805 // input "testtest" 1 time
806 for (int i = 0; i < 1; ++i) {
807 Segments segments;
808 MakeSegmentsForConversion("testtest", &segments);
809 AddCandidate("テストテスト", &segments);
810 predictor->Finish(*convreq_, &segments);
811 }
812
813 // frequency is cleared as well.
814 {
815 Segments segments;
816 MakeSegmentsForSuggestion("t", &segments);
817 EXPECT_FALSE(predictor->Predict(&segments));
818
819 segments.Clear();
820 MakeSegmentsForSuggestion("testte", &segments);
821 EXPECT_TRUE(predictor->Predict(&segments));
822 }
823 }
824
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorTrailingPunctuation)825 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorTrailingPunctuation) {
826 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
827 predictor->WaitForSyncer();
828 predictor->ClearAllHistory();
829 predictor->WaitForSyncer();
830
831 Segments segments;
832
833 MakeSegmentsForConversion("わたしのなまえはなかのです", &segments);
834
835 AddCandidate(0, "私の名前は中野です", &segments);
836
837 AddSegmentForConversion("。", &segments);
838 AddCandidate(1, "。", &segments);
839
840 predictor->Finish(*convreq_, &segments);
841
842 segments.Clear();
843 MakeSegmentsForPrediction("わたしの", &segments);
844 EXPECT_TRUE(predictor->Predict(&segments));
845 EXPECT_EQ(2, segments.segment(0).candidates_size());
846 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
847 EXPECT_EQ("私の名前は中野です。", segments.segment(0).candidate(1).value);
848
849 segments.Clear();
850 MakeSegmentsForSuggestion("わたしの", &segments);
851
852 EXPECT_TRUE(predictor->Predict(&segments));
853 EXPECT_EQ(2, segments.segment(0).candidates_size());
854 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
855 EXPECT_EQ("私の名前は中野です。", segments.segment(0).candidate(1).value);
856 }
857
TEST_F(UserHistoryPredictorTest,TrailingPunctuation_Mobile)858 TEST_F(UserHistoryPredictorTest, TrailingPunctuation_Mobile) {
859 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
860 predictor->WaitForSyncer();
861 predictor->ClearAllHistory();
862 predictor->WaitForSyncer();
863 commands::RequestForUnitTest::FillMobileRequest(request_.get());
864 Segments segments;
865
866 MakeSegmentsForConversion("です。", &segments);
867
868 AddCandidate(0, "です。", &segments);
869
870 predictor->Finish(*convreq_, &segments);
871
872 segments.Clear();
873
874 MakeSegmentsForPrediction("です", &segments);
875 EXPECT_FALSE(predictor->Predict(&segments));
876 }
877
TEST_F(UserHistoryPredictorTest,HistoryToPunctuation)878 TEST_F(UserHistoryPredictorTest, HistoryToPunctuation) {
879 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
880 predictor->WaitForSyncer();
881 predictor->ClearAllHistory();
882 predictor->WaitForSyncer();
883
884 Segments segments;
885
886 // Scenario 1: A user have commited "亜" by prediction and then commit "。".
887 // Then, the unigram "亜" is learned but the bigram "亜。" shouldn't.
888 MakeSegmentsForPrediction("あ", &segments);
889 AddCandidate(0, "亜", &segments);
890 predictor->Finish(*convreq_, &segments);
891 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
892
893 AddSegmentForPrediction("。", &segments);
894 AddCandidate(1, "。", &segments);
895 predictor->Finish(*convreq_, &segments);
896
897 segments.Clear();
898 MakeSegmentsForPrediction("あ", &segments); // "あ"
899 ASSERT_TRUE(predictor->Predict(&segments)) << segments.DebugString();
900 EXPECT_EQ("亜", segments.segment(0).candidate(0).value);
901
902 segments.Clear();
903
904 // Scenario 2: the opposite case to Scenario 1, i.e., "。亜". Nothing is
905 // suggested from symbol "。".
906 MakeSegmentsForPrediction("。", &segments);
907 AddCandidate(0, "。", &segments);
908 predictor->Finish(*convreq_, &segments);
909 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
910
911 AddSegmentForPrediction("あ", &segments);
912 AddCandidate(1, "亜", &segments);
913 predictor->Finish(*convreq_, &segments);
914
915 segments.Clear();
916 MakeSegmentsForPrediction("。", &segments); // "。"
917 EXPECT_FALSE(predictor->Predict(&segments)) << segments.DebugString();
918
919 segments.Clear();
920
921 // Scenario 3: If the history segment looks like a sentence and committed
922 // value is a punctuation, the concatenated entry is also learned.
923 MakeSegmentsForPrediction("おつかれさまです", &segments);
924 AddCandidate(0, "お疲れ様です", &segments);
925 predictor->Finish(*convreq_, &segments);
926 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
927
928 AddSegmentForPrediction("。", &segments);
929 AddCandidate(1, "。", &segments);
930 predictor->Finish(*convreq_, &segments);
931
932 segments.Clear();
933 MakeSegmentsForPrediction("おつかれ", &segments);
934 ASSERT_TRUE(predictor->Predict(&segments)) << segments.DebugString();
935 EXPECT_EQ("お疲れ様です", segments.segment(0).candidate(0).value);
936 EXPECT_EQ("お疲れ様です。", segments.segment(0).candidate(1).value);
937 }
938
TEST_F(UserHistoryPredictorTest,UserHistoryPredictorPreceedingPunctuation)939 TEST_F(UserHistoryPredictorTest, UserHistoryPredictorPreceedingPunctuation) {
940 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
941 predictor->WaitForSyncer();
942 predictor->ClearAllHistory();
943 predictor->WaitForSyncer();
944
945 Segments segments;
946
947 MakeSegmentsForConversion("。", &segments);
948 AddCandidate(0, "。", &segments);
949
950 AddSegmentForConversion("わたしのなまえはなかのです", &segments);
951
952 AddCandidate(1, "私の名前は中野です", &segments);
953
954 predictor->Finish(*convreq_, &segments);
955
956 segments.Clear();
957 MakeSegmentsForPrediction("わたしの", &segments);
958
959 EXPECT_TRUE(predictor->Predict(&segments));
960 EXPECT_EQ(1, segments.segment(0).candidates_size());
961 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
962
963 segments.Clear();
964 MakeSegmentsForSuggestion("わたしの", &segments);
965 EXPECT_TRUE(predictor->Predict(&segments));
966 EXPECT_EQ(1, segments.segment(0).candidates_size());
967 EXPECT_EQ("私の名前は中野です", segments.segment(0).candidate(0).value);
968 }
969
970 namespace {
971 struct StartsWithPunctuationsTestData {
972 const char *first_character;
973 bool expected_result;
974 };
975 } // namespace
976
TEST_F(UserHistoryPredictorTest,StartsWithPunctuations)977 TEST_F(UserHistoryPredictorTest, StartsWithPunctuations) {
978 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
979 const StartsWithPunctuationsTestData kTestCases[] = {
980 {"。", false}, {"、", false}, {"?", false}, {"!", false}, {"ぬ", true},
981 };
982
983 for (size_t i = 0; i < arraysize(kTestCases); ++i) {
984 predictor->WaitForSyncer();
985 predictor->ClearAllHistory();
986 predictor->WaitForSyncer();
987
988 Segments segments;
989 const string first_char = kTestCases[i].first_character;
990 {
991 // Learn from two segments
992 MakeSegmentsForConversion(first_char, &segments);
993 AddCandidate(0, first_char, &segments);
994 AddSegmentForConversion("てすとぶんしょう", &segments);
995 AddCandidate(1, "テスト文章", &segments);
996 predictor->Finish(*convreq_, &segments);
997 }
998 segments.Clear();
999 {
1000 // Learn from one segment
1001 MakeSegmentsForConversion(first_char + "てすとぶんしょう", &segments);
1002 AddCandidate(0, first_char + "テスト文章", &segments);
1003 predictor->Finish(*convreq_, &segments);
1004 }
1005 segments.Clear();
1006 {
1007 // Suggestion
1008 MakeSegmentsForSuggestion(first_char, &segments);
1009 AddCandidate(0, first_char, &segments);
1010 EXPECT_EQ(kTestCases[i].expected_result, predictor->Predict(&segments))
1011 << "Suggest from " << first_char;
1012 }
1013 segments.Clear();
1014 {
1015 // Prediciton
1016 MakeSegmentsForPrediction(first_char, &segments);
1017 EXPECT_EQ(kTestCases[i].expected_result, predictor->Predict(&segments))
1018 << "Predict from " << first_char;
1019 }
1020 }
1021 }
1022
TEST_F(UserHistoryPredictorTest,ZeroQuerySuggestionTest)1023 TEST_F(UserHistoryPredictorTest, ZeroQuerySuggestionTest) {
1024 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1025 predictor->WaitForSyncer();
1026 predictor->ClearAllHistory();
1027 predictor->WaitForSyncer();
1028
1029 request_->set_zero_query_suggestion(true);
1030
1031 commands::Request non_zero_query_request;
1032 non_zero_query_request.set_zero_query_suggestion(false);
1033 ConversionRequest non_zero_query_conversion_request(
1034 composer_.get(), &non_zero_query_request, config_.get());
1035
1036 Segments segments;
1037
1038 // No history segments
1039 segments.Clear();
1040 MakeSegmentsForSuggestion("", &segments);
1041 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
1042
1043 {
1044 segments.Clear();
1045
1046 MakeSegmentsForConversion("たろうは", &segments);
1047 AddCandidate(0, "太郎は", &segments);
1048 predictor->Finish(*convreq_, &segments);
1049 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1050
1051 AddSegmentForConversion("はなこに", &segments);
1052 AddCandidate(1, "花子に", &segments);
1053 predictor->Finish(*convreq_, &segments);
1054 segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1055
1056 segments.pop_back_segment();
1057 AddSegmentForConversion("きょうと", &segments);
1058 AddCandidate(1, "京都", &segments);
1059 Util::Sleep(2000);
1060 predictor->Finish(*convreq_, &segments);
1061 segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1062
1063 segments.pop_back_segment();
1064 AddSegmentForConversion("おおさか", &segments);
1065 AddCandidate(1, "大阪", &segments);
1066 Util::Sleep(2000);
1067 predictor->Finish(*convreq_, &segments);
1068 segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1069
1070 // Zero query suggestion is disabled.
1071 segments.pop_back_segment();
1072 AddSegmentForSuggestion("", &segments); // empty request
1073 EXPECT_FALSE(predictor->PredictForRequest(non_zero_query_conversion_request,
1074 &segments));
1075
1076 segments.pop_back_segment();
1077 AddSegmentForSuggestion("", &segments); // empty request
1078 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1079 // last-pushed segment is "大阪"
1080 EXPECT_EQ("大阪", segments.segment(1).candidate(0).value);
1081 EXPECT_EQ("おおさか", segments.segment(1).candidate(0).key);
1082 EXPECT_TRUE(segments.segment(1).candidate(0).source_info &
1083 Segment::Candidate::USER_HISTORY_PREDICTOR);
1084
1085 segments.pop_back_segment();
1086 AddSegmentForSuggestion("は", &segments);
1087 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1088
1089 segments.pop_back_segment();
1090 AddSegmentForSuggestion("た", &segments);
1091 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1092
1093 segments.pop_back_segment();
1094 AddSegmentForSuggestion("き", &segments);
1095 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1096
1097 segments.pop_back_segment();
1098 AddSegmentForSuggestion("お", &segments);
1099 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1100 }
1101
1102 predictor->ClearAllHistory();
1103 predictor->WaitForSyncer();
1104
1105 {
1106 segments.Clear();
1107 MakeSegmentsForConversion("たろうは", &segments);
1108 AddCandidate(0, "太郎は", &segments);
1109
1110 AddSegmentForConversion("はなこに", &segments);
1111 AddCandidate(1, "花子に", &segments);
1112 predictor->Finish(*convreq_, &segments);
1113
1114 segments.Clear();
1115 MakeSegmentsForConversion("たろうは", &segments);
1116 AddCandidate(0, "太郎は", &segments);
1117 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1118
1119 // Zero query suggestion is disabled.
1120 AddSegmentForSuggestion("", &segments); // empty request
1121 EXPECT_FALSE(predictor->PredictForRequest(non_zero_query_conversion_request,
1122 &segments));
1123
1124 segments.pop_back_segment();
1125 AddSegmentForSuggestion("", &segments); // empty request
1126 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1127
1128 segments.pop_back_segment();
1129 AddSegmentForSuggestion("は", &segments);
1130 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1131
1132 segments.pop_back_segment();
1133 AddSegmentForSuggestion("た", &segments);
1134 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
1135 }
1136 }
1137
TEST_F(UserHistoryPredictorTest,MultiSegmentsMultiInput)1138 TEST_F(UserHistoryPredictorTest, MultiSegmentsMultiInput) {
1139 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1140 predictor->WaitForSyncer();
1141 predictor->ClearAllHistory();
1142 predictor->WaitForSyncer();
1143
1144 Segments segments;
1145
1146 MakeSegmentsForConversion("たろうは", &segments);
1147 AddCandidate(0, "太郎は", &segments);
1148 predictor->Finish(*convreq_, &segments);
1149 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1150
1151 AddSegmentForConversion("はなこに", &segments);
1152 AddCandidate(1, "花子に", &segments);
1153 predictor->Finish(*convreq_, &segments);
1154 segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1155
1156 segments.clear_conversion_segments();
1157 AddSegmentForConversion("むずかしい", &segments);
1158 AddCandidate(2, "難しい", &segments);
1159 predictor->Finish(*convreq_, &segments);
1160 segments.mutable_segment(2)->set_segment_type(Segment::HISTORY);
1161
1162 segments.clear_conversion_segments();
1163 AddSegmentForConversion("ほんを", &segments);
1164 AddCandidate(3, "本を", &segments);
1165 predictor->Finish(*convreq_, &segments);
1166 segments.mutable_segment(3)->set_segment_type(Segment::HISTORY);
1167
1168 segments.clear_conversion_segments();
1169 AddSegmentForConversion("よませた", &segments);
1170 AddCandidate(4, "読ませた", &segments);
1171 predictor->Finish(*convreq_, &segments);
1172
1173 segments.Clear();
1174 MakeSegmentsForSuggestion("た", &segments);
1175 EXPECT_FALSE(predictor->Predict(&segments));
1176
1177 segments.Clear();
1178 MakeSegmentsForSuggestion("たろうは", &segments);
1179 EXPECT_TRUE(predictor->Predict(&segments));
1180
1181 segments.Clear();
1182 MakeSegmentsForSuggestion("ろうは", &segments);
1183 EXPECT_FALSE(predictor->Predict(&segments));
1184
1185 segments.Clear();
1186 MakeSegmentsForSuggestion("たろうははな", &segments);
1187 EXPECT_TRUE(predictor->Predict(&segments));
1188
1189 segments.Clear();
1190 MakeSegmentsForSuggestion("はなこにむ", &segments);
1191 EXPECT_TRUE(predictor->Predict(&segments));
1192
1193 segments.Clear();
1194 MakeSegmentsForSuggestion("むずかし", &segments);
1195 EXPECT_TRUE(predictor->Predict(&segments));
1196
1197 segments.Clear();
1198 MakeSegmentsForSuggestion("はなこにむずかしいほ", &segments);
1199 EXPECT_TRUE(predictor->Predict(&segments));
1200
1201 segments.Clear();
1202 MakeSegmentsForSuggestion("ほんをよま", &segments);
1203 EXPECT_TRUE(predictor->Predict(&segments));
1204
1205 Util::Sleep(1000);
1206
1207 // Add new entry "たろうはよしこに/太郎は良子に"
1208 segments.Clear();
1209 MakeSegmentsForConversion("たろうは", &segments);
1210 AddCandidate(0, "太郎は", &segments);
1211 predictor->Finish(*convreq_, &segments);
1212 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1213
1214 AddSegmentForConversion("よしこに", &segments);
1215 AddCandidate(1, "良子に", &segments);
1216 predictor->Finish(*convreq_, &segments);
1217 segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1218
1219 segments.Clear();
1220 MakeSegmentsForSuggestion("たろうは", &segments);
1221 EXPECT_TRUE(predictor->Predict(&segments));
1222 EXPECT_EQ("太郎は良子に", segments.segment(0).candidate(0).value);
1223 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1224 Segment::Candidate::USER_HISTORY_PREDICTOR);
1225 }
1226
TEST_F(UserHistoryPredictorTest,MultiSegmentsSingleInput)1227 TEST_F(UserHistoryPredictorTest, MultiSegmentsSingleInput) {
1228 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1229 predictor->WaitForSyncer();
1230 predictor->ClearAllHistory();
1231 predictor->WaitForSyncer();
1232
1233 Segments segments;
1234
1235 MakeSegmentsForConversion("たろうは", &segments);
1236 AddCandidate(0, "太郎は", &segments);
1237
1238 AddSegmentForConversion("はなこに", &segments);
1239 AddCandidate(1, "花子に", &segments);
1240
1241 AddSegmentForConversion("むずかしい", &segments);
1242 AddCandidate(2, "難しい", &segments);
1243
1244 AddSegmentForConversion("ほんを", &segments);
1245 AddCandidate(3, "本を", &segments);
1246
1247 AddSegmentForConversion("よませた", &segments);
1248 AddCandidate(4, "読ませた", &segments);
1249
1250 predictor->Finish(*convreq_, &segments);
1251
1252 segments.Clear();
1253 MakeSegmentsForSuggestion("たろうは", &segments);
1254 EXPECT_TRUE(predictor->Predict(&segments));
1255
1256 segments.Clear();
1257 MakeSegmentsForSuggestion("た", &segments);
1258 EXPECT_FALSE(predictor->Predict(&segments));
1259
1260 segments.Clear();
1261 MakeSegmentsForSuggestion("たろうははな", &segments);
1262 EXPECT_TRUE(predictor->Predict(&segments));
1263
1264 segments.Clear();
1265 MakeSegmentsForSuggestion("ろうははな", &segments);
1266 EXPECT_FALSE(predictor->Predict(&segments));
1267
1268 segments.Clear();
1269 MakeSegmentsForSuggestion("はなこにむ", &segments);
1270 EXPECT_TRUE(predictor->Predict(&segments));
1271
1272 segments.Clear();
1273 MakeSegmentsForSuggestion("むずかし", &segments);
1274 EXPECT_TRUE(predictor->Predict(&segments));
1275
1276 segments.Clear();
1277 MakeSegmentsForSuggestion("はなこにむずかしいほ", &segments);
1278 EXPECT_TRUE(predictor->Predict(&segments));
1279
1280 segments.Clear();
1281 MakeSegmentsForSuggestion("ほんをよま", &segments);
1282 EXPECT_TRUE(predictor->Predict(&segments));
1283
1284 Util::Sleep(1000);
1285
1286 // Add new entry "たろうはよしこに/太郎は良子に"
1287 segments.Clear();
1288 MakeSegmentsForConversion("たろうは", &segments);
1289 AddCandidate(0, "太郎は", &segments);
1290 predictor->Finish(*convreq_, &segments);
1291 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
1292
1293 AddSegmentForConversion("よしこに", &segments);
1294 AddCandidate(1, "良子に", &segments);
1295 predictor->Finish(*convreq_, &segments);
1296 segments.mutable_segment(1)->set_segment_type(Segment::HISTORY);
1297
1298 segments.Clear();
1299 MakeSegmentsForSuggestion("たろうは", &segments);
1300 EXPECT_TRUE(predictor->Predict(&segments));
1301 EXPECT_EQ("太郎は良子に", segments.segment(0).candidate(0).value);
1302 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1303 Segment::Candidate::USER_HISTORY_PREDICTOR);
1304 }
1305
TEST_F(UserHistoryPredictorTest,Regression2843371_Case1)1306 TEST_F(UserHistoryPredictorTest, Regression2843371_Case1) {
1307 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1308 predictor->WaitForSyncer();
1309 predictor->ClearAllHistory();
1310 predictor->WaitForSyncer();
1311
1312 Segments segments;
1313
1314 MakeSegmentsForConversion("とうきょうは", &segments);
1315 AddCandidate(0, "東京は", &segments);
1316
1317 AddSegmentForConversion("、", &segments);
1318 AddCandidate(1, "、", &segments);
1319
1320 AddSegmentForConversion("にほんです", &segments);
1321 AddCandidate(2, "日本です", &segments);
1322
1323 AddSegmentForConversion("。", &segments);
1324 AddCandidate(3, "。", &segments);
1325
1326 predictor->Finish(*convreq_, &segments);
1327
1328 segments.Clear();
1329
1330 Util::Sleep(1000);
1331
1332 MakeSegmentsForConversion("らーめんは", &segments);
1333 AddCandidate(0, "ラーメンは", &segments);
1334
1335 AddSegmentForConversion("、", &segments);
1336 AddCandidate(1, "、", &segments);
1337
1338 AddSegmentForConversion("めんるいです", &segments);
1339 AddCandidate(2, "麺類です", &segments);
1340
1341 AddSegmentForConversion("。", &segments);
1342 AddCandidate(3, "。", &segments);
1343
1344 predictor->Finish(*convreq_, &segments);
1345
1346 segments.Clear();
1347
1348 MakeSegmentsForSuggestion("とうきょうは、", &segments);
1349 EXPECT_TRUE(predictor->Predict(&segments));
1350
1351 EXPECT_EQ("東京は、日本です", segments.segment(0).candidate(0).value);
1352 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1353 Segment::Candidate::USER_HISTORY_PREDICTOR);
1354 }
1355
TEST_F(UserHistoryPredictorTest,Regression2843371_Case2)1356 TEST_F(UserHistoryPredictorTest, Regression2843371_Case2) {
1357 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1358 predictor->WaitForSyncer();
1359 predictor->ClearAllHistory();
1360 predictor->WaitForSyncer();
1361
1362 Segments segments;
1363
1364 MakeSegmentsForConversion("えど", &segments);
1365 AddCandidate(0, "江戸", &segments);
1366
1367 AddSegmentForConversion("(", &segments);
1368 AddCandidate(1, "(", &segments);
1369
1370 AddSegmentForConversion("とうきょう", &segments);
1371 AddCandidate(2, "東京", &segments);
1372
1373 AddSegmentForConversion(")", &segments);
1374 AddCandidate(3, ")", &segments);
1375
1376 AddSegmentForConversion("は", &segments);
1377 AddCandidate(4, "は", &segments);
1378
1379 AddSegmentForConversion("えぞ", &segments);
1380 AddCandidate(5, "蝦夷", &segments);
1381
1382 AddSegmentForConversion("(", &segments);
1383 AddCandidate(6, "(", &segments);
1384
1385 AddSegmentForConversion("ほっかいどう", &segments);
1386 AddCandidate(7, "北海道", &segments);
1387
1388 AddSegmentForConversion(")", &segments);
1389 AddCandidate(8, ")", &segments);
1390
1391 AddSegmentForConversion("ではない", &segments);
1392 AddCandidate(9, "ではない", &segments);
1393
1394 AddSegmentForConversion("。", &segments);
1395 AddCandidate(10, "。", &segments);
1396
1397 predictor->Finish(*convreq_, &segments);
1398
1399 segments.Clear();
1400
1401 MakeSegmentsForSuggestion("えど(", &segments);
1402 EXPECT_TRUE(predictor->Predict(&segments));
1403 EXPECT_EQ("江戸(東京", segments.segment(0).candidate(0).value);
1404 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1405 Segment::Candidate::USER_HISTORY_PREDICTOR);
1406
1407 EXPECT_TRUE(predictor->Predict(&segments));
1408
1409 EXPECT_EQ("江戸(東京", segments.segment(0).candidate(0).value);
1410 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1411 Segment::Candidate::USER_HISTORY_PREDICTOR);
1412 }
1413
TEST_F(UserHistoryPredictorTest,Regression2843371_Case3)1414 TEST_F(UserHistoryPredictorTest, Regression2843371_Case3) {
1415 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1416 predictor->WaitForSyncer();
1417 predictor->ClearAllHistory();
1418 predictor->WaitForSyncer();
1419
1420 Segments segments;
1421
1422 MakeSegmentsForConversion("「", &segments);
1423 AddCandidate(0, "「", &segments);
1424
1425 AddSegmentForConversion("やま", &segments);
1426 AddCandidate(1, "山", &segments);
1427
1428 AddSegmentForConversion("」", &segments);
1429 AddCandidate(2, "」", &segments);
1430
1431 AddSegmentForConversion("は", &segments);
1432 AddCandidate(3, "は", &segments);
1433
1434 AddSegmentForConversion("たかい", &segments);
1435 AddCandidate(4, "高い", &segments);
1436
1437 AddSegmentForConversion("。", &segments);
1438 AddCandidate(5, "。", &segments);
1439
1440 predictor->Finish(*convreq_, &segments);
1441
1442 Util::Sleep(2000);
1443
1444 segments.Clear();
1445
1446 MakeSegmentsForConversion("「", &segments);
1447 AddCandidate(0, "「", &segments);
1448
1449 AddSegmentForConversion("うみ", &segments);
1450 AddCandidate(1, "海", &segments);
1451
1452 AddSegmentForConversion("」", &segments);
1453 AddCandidate(2, "」", &segments);
1454
1455 AddSegmentForConversion("は", &segments);
1456 AddCandidate(3, "は", &segments);
1457
1458 AddSegmentForConversion("ふかい", &segments);
1459 AddCandidate(4, "深い", &segments);
1460
1461 AddSegmentForConversion("。", &segments);
1462 AddCandidate(5, "。", &segments);
1463
1464 predictor->Finish(*convreq_, &segments);
1465
1466 segments.Clear();
1467
1468 MakeSegmentsForSuggestion("「やま」は", &segments);
1469 EXPECT_TRUE(predictor->Predict(&segments));
1470
1471 EXPECT_EQ("「山」は高い", segments.segment(0).candidate(0).value);
1472 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1473 Segment::Candidate::USER_HISTORY_PREDICTOR);
1474 }
1475
TEST_F(UserHistoryPredictorTest,Regression2843775)1476 TEST_F(UserHistoryPredictorTest, Regression2843775) {
1477 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1478 predictor->WaitForSyncer();
1479 predictor->ClearAllHistory();
1480 predictor->WaitForSyncer();
1481
1482 Segments segments;
1483
1484 MakeSegmentsForConversion("そうです", &segments);
1485 AddCandidate(0, "そうです", &segments);
1486
1487 AddSegmentForConversion("。よろしくおねがいします", &segments);
1488 AddCandidate(1, "。よろしくお願いします", &segments);
1489
1490 predictor->Finish(*convreq_, &segments);
1491
1492 segments.Clear();
1493
1494 MakeSegmentsForSuggestion("そうです", &segments);
1495 EXPECT_TRUE(predictor->Predict(&segments));
1496
1497 EXPECT_EQ("そうです。よろしくお願いします",
1498 segments.segment(0).candidate(0).value);
1499 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
1500 Segment::Candidate::USER_HISTORY_PREDICTOR);
1501 }
1502
TEST_F(UserHistoryPredictorTest,DuplicateString)1503 TEST_F(UserHistoryPredictorTest, DuplicateString) {
1504 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1505 predictor->WaitForSyncer();
1506 predictor->ClearAllHistory();
1507 predictor->WaitForSyncer();
1508
1509 Segments segments;
1510
1511 MakeSegmentsForConversion("らいおん", &segments);
1512 AddCandidate(0, "ライオン", &segments);
1513
1514 AddSegmentForConversion("(", &segments);
1515 AddCandidate(1, "(", &segments);
1516
1517 AddSegmentForConversion("もうじゅう", &segments);
1518 AddCandidate(2, "猛獣", &segments);
1519
1520 AddSegmentForConversion(")と", &segments);
1521 AddCandidate(3, ")と", &segments);
1522
1523 AddSegmentForConversion("ぞうりむし", &segments);
1524 AddCandidate(4, "ゾウリムシ", &segments);
1525
1526 AddSegmentForConversion("(", &segments);
1527 AddCandidate(5, "(", &segments);
1528
1529 AddSegmentForConversion("びせいぶつ", &segments);
1530 AddCandidate(6, "微生物", &segments);
1531
1532 AddSegmentForConversion(")", &segments);
1533 AddCandidate(7, ")", &segments);
1534
1535 predictor->Finish(*convreq_, &segments);
1536
1537 segments.Clear();
1538
1539 MakeSegmentsForSuggestion("ぞうりむし", &segments);
1540 EXPECT_TRUE(predictor->Predict(&segments));
1541
1542 for (int i = 0; i < segments.segment(0).candidates_size(); ++i) {
1543 EXPECT_EQ(string::npos,
1544 segments.segment(0).candidate(i).value.find(
1545 "猛獣")); // "猛獣" should not be found
1546 }
1547
1548 segments.Clear();
1549
1550 MakeSegmentsForSuggestion("らいおん", &segments);
1551 EXPECT_TRUE(predictor->Predict(&segments));
1552
1553 for (int i = 0; i < segments.segment(0).candidates_size(); ++i) {
1554 EXPECT_EQ(string::npos,
1555 segments.segment(0).candidate(i).value.find("ライオン(微生物"));
1556 }
1557 }
1558
1559 struct Command {
1560 enum Type {
1561 LOOKUP,
1562 INSERT,
1563 SYNC,
1564 WAIT,
1565 };
1566 Type type;
1567 string key;
1568 string value;
Commandmozc::Command1569 Command() : type(LOOKUP) {}
1570 };
1571
TEST_F(UserHistoryPredictorTest,SyncTest)1572 TEST_F(UserHistoryPredictorTest, SyncTest) {
1573 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1574 predictor->WaitForSyncer();
1575
1576 std::vector<Command> commands(10000);
1577 for (size_t i = 0; i < commands.size(); ++i) {
1578 commands[i].key = std::to_string(static_cast<uint32>(i)) + "key";
1579 commands[i].value = std::to_string(static_cast<uint32>(i)) + "value";
1580 const int n = Util::Random(100);
1581 if (n == 0) {
1582 commands[i].type = Command::WAIT;
1583 } else if (n < 10) {
1584 commands[i].type = Command::SYNC;
1585 } else if (n < 50) {
1586 commands[i].type = Command::INSERT;
1587 } else {
1588 commands[i].type = Command::LOOKUP;
1589 }
1590 }
1591
1592 // Kind of stress test
1593 Segments segments;
1594 for (size_t i = 0; i < commands.size(); ++i) {
1595 switch (commands[i].type) {
1596 case Command::SYNC:
1597 predictor->Sync();
1598 break;
1599 case Command::WAIT:
1600 predictor->WaitForSyncer();
1601 break;
1602 case Command::INSERT:
1603 segments.Clear();
1604 MakeSegmentsForConversion(commands[i].key, &segments);
1605 AddCandidate(commands[i].value, &segments);
1606 predictor->Finish(*convreq_, &segments);
1607 break;
1608 case Command::LOOKUP:
1609 segments.Clear();
1610 MakeSegmentsForSuggestion(commands[i].key, &segments);
1611 predictor->Predict(&segments);
1612 break;
1613 default:
1614 break;
1615 }
1616 }
1617 }
1618
TEST_F(UserHistoryPredictorTest,GetMatchTypeTest)1619 TEST_F(UserHistoryPredictorTest, GetMatchTypeTest) {
1620 EXPECT_EQ(UserHistoryPredictor::NO_MATCH,
1621 UserHistoryPredictor::GetMatchType("test", ""));
1622
1623 EXPECT_EQ(UserHistoryPredictor::NO_MATCH,
1624 UserHistoryPredictor::GetMatchType("", ""));
1625
1626 EXPECT_EQ(UserHistoryPredictor::LEFT_EMPTY_MATCH,
1627 UserHistoryPredictor::GetMatchType("", "test"));
1628
1629 EXPECT_EQ(UserHistoryPredictor::NO_MATCH,
1630 UserHistoryPredictor::GetMatchType("foo", "bar"));
1631
1632 EXPECT_EQ(UserHistoryPredictor::EXACT_MATCH,
1633 UserHistoryPredictor::GetMatchType("foo", "foo"));
1634
1635 EXPECT_EQ(UserHistoryPredictor::LEFT_PREFIX_MATCH,
1636 UserHistoryPredictor::GetMatchType("foo", "foobar"));
1637
1638 EXPECT_EQ(UserHistoryPredictor::RIGHT_PREFIX_MATCH,
1639 UserHistoryPredictor::GetMatchType("foobar", "foo"));
1640 }
1641
TEST_F(UserHistoryPredictorTest,FingerPrintTest)1642 TEST_F(UserHistoryPredictorTest, FingerPrintTest) {
1643 const char kKey[] = "abc";
1644 const char kValue[] = "ABC";
1645
1646 UserHistoryPredictor::Entry entry;
1647 entry.set_key(kKey);
1648 entry.set_value(kValue);
1649
1650 const uint32 entry_fp1 =
1651 UserHistoryPredictor::Fingerprint(kKey, kValue);
1652 const uint32 entry_fp2 =
1653 UserHistoryPredictor::EntryFingerprint(entry);
1654
1655 const uint32 entry_fp3 =
1656 UserHistoryPredictor::Fingerprint(
1657 kKey, kValue,
1658 UserHistoryPredictor::Entry::DEFAULT_ENTRY);
1659
1660 const uint32 entry_fp4 =
1661 UserHistoryPredictor::Fingerprint(
1662 kKey, kValue,
1663 UserHistoryPredictor::Entry::CLEAN_ALL_EVENT);
1664
1665 const uint32 entry_fp5 =
1666 UserHistoryPredictor::Fingerprint(
1667 kKey, kValue,
1668 UserHistoryPredictor::Entry::CLEAN_UNUSED_EVENT);
1669
1670 Segment segment;
1671 segment.set_key(kKey);
1672 Segment::Candidate *c = segment.add_candidate();
1673 c->key = kKey;
1674 c->content_key = kKey;
1675 c->value = kValue;
1676 c->content_value = kValue;
1677
1678 const uint32 segment_fp =
1679 UserHistoryPredictor::SegmentFingerprint(segment);
1680
1681 Segment segment2;
1682 segment2.set_key("ab");
1683 Segment::Candidate *c2 = segment2.add_candidate();
1684 c2->key = kKey;
1685 c2->content_key = kKey;
1686 c2->value = kValue;
1687 c2->content_value = kValue;
1688
1689 const uint32 segment_fp2 =
1690 UserHistoryPredictor::SegmentFingerprint(segment2);
1691
1692 EXPECT_EQ(entry_fp1, entry_fp2);
1693 EXPECT_EQ(entry_fp1, entry_fp3);
1694 EXPECT_NE(entry_fp1, entry_fp4);
1695 EXPECT_NE(entry_fp1, entry_fp5);
1696 EXPECT_NE(entry_fp4, entry_fp5);
1697 EXPECT_EQ(segment_fp, entry_fp2);
1698 EXPECT_EQ(segment_fp, entry_fp1);
1699 EXPECT_EQ(segment_fp, segment_fp2);
1700 }
1701
TEST_F(UserHistoryPredictorTest,Uint32ToStringTest)1702 TEST_F(UserHistoryPredictorTest, Uint32ToStringTest) {
1703 EXPECT_EQ(123,
1704 UserHistoryPredictor::StringToUint32(
1705 UserHistoryPredictor::Uint32ToString(123)));
1706
1707 EXPECT_EQ(12141,
1708 UserHistoryPredictor::StringToUint32(
1709 UserHistoryPredictor::Uint32ToString(12141)));
1710
1711 for (uint32 i = 0; i < 10000; ++i) {
1712 EXPECT_EQ(i,
1713 UserHistoryPredictor::StringToUint32(
1714 UserHistoryPredictor::Uint32ToString(i)));
1715 }
1716
1717 // invalid input
1718 EXPECT_EQ(0, UserHistoryPredictor::StringToUint32(""));
1719
1720 // not 4byte
1721 EXPECT_EQ(0, UserHistoryPredictor::StringToUint32("abcdef"));
1722 }
1723
TEST_F(UserHistoryPredictorTest,GetScore)1724 TEST_F(UserHistoryPredictorTest, GetScore) {
1725 // latest value has higher score.
1726 {
1727 UserHistoryPredictor::Entry entry1, entry2;
1728
1729 entry1.set_key("abc");
1730 entry1.set_value("ABC");
1731 entry1.set_last_access_time(10);
1732
1733 entry2.set_key("foo");
1734 entry2.set_value("ABC");
1735 entry2.set_last_access_time(20);
1736
1737 EXPECT_GT(UserHistoryPredictor::GetScore(entry2),
1738 UserHistoryPredictor::GetScore(entry1));
1739 }
1740
1741 // shorter value has higher score.
1742 {
1743 UserHistoryPredictor::Entry entry1, entry2;
1744
1745 entry1.set_key("abc");
1746 entry1.set_value("ABC");
1747 entry1.set_last_access_time(10);
1748
1749 entry2.set_key("foo");
1750 entry2.set_value("ABCD");
1751 entry2.set_last_access_time(10);
1752
1753 EXPECT_GT(UserHistoryPredictor::GetScore(entry1),
1754 UserHistoryPredictor::GetScore(entry2));
1755 }
1756
1757 // bigram boost makes the entry stronger
1758 {
1759 UserHistoryPredictor::Entry entry1, entry2;
1760
1761 entry1.set_key("abc");
1762 entry1.set_value("ABC");
1763 entry1.set_last_access_time(10);
1764
1765 entry2.set_key("foo");
1766 entry2.set_value("ABC");
1767 entry2.set_last_access_time(10);
1768 entry2.set_bigram_boost(true);
1769
1770 EXPECT_GT(UserHistoryPredictor::GetScore(entry2),
1771 UserHistoryPredictor::GetScore(entry1));
1772 }
1773
1774 // bigram boost makes the entry stronger
1775 {
1776 UserHistoryPredictor::Entry entry1, entry2;
1777
1778 entry1.set_key("abc");
1779 entry1.set_value("ABCD");
1780 entry1.set_last_access_time(10);
1781 entry1.set_bigram_boost(true);
1782
1783 entry2.set_key("foo");
1784 entry2.set_value("ABC");
1785 entry2.set_last_access_time(50);
1786
1787 EXPECT_GT(UserHistoryPredictor::GetScore(entry1),
1788 UserHistoryPredictor::GetScore(entry2));
1789 }
1790 }
1791
TEST_F(UserHistoryPredictorTest,IsValidEntry)1792 TEST_F(UserHistoryPredictorTest, IsValidEntry) {
1793 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
1794
1795 UserHistoryPredictor::Entry entry;
1796
1797 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1798
1799 entry.set_key("key");
1800 entry.set_value("value");
1801
1802 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1803 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1804 entry, Request::UNICODE_EMOJI));
1805
1806 entry.set_removed(true);
1807 EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1808 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1809 entry, Request::UNICODE_EMOJI));
1810
1811 entry.set_removed(false);
1812 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1813 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1814 entry, Request::UNICODE_EMOJI));
1815
1816 entry.set_entry_type(UserHistoryPredictor::Entry::CLEAN_ALL_EVENT);
1817 EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1818 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1819 entry, Request::UNICODE_EMOJI));
1820
1821 entry.set_entry_type(UserHistoryPredictor::Entry::CLEAN_UNUSED_EVENT);
1822 EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1823 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1824 entry, Request::UNICODE_EMOJI));
1825
1826 entry.set_removed(true);
1827 EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1828 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1829 entry, Request::UNICODE_EMOJI));
1830
1831 entry.Clear();
1832 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1833 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1834 entry, Request::UNICODE_EMOJI));
1835
1836 entry.Clear();
1837 entry.set_key("key");
1838 entry.set_value("value");
1839 entry.set_description("絵文字");
1840 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1841 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1842 entry, Request::UNICODE_EMOJI));
1843 EXPECT_FALSE(predictor->IsValidEntry(entry, 0));
1844 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(entry, 0));
1845
1846 // An android pua emoji example. (Note: 0xFE000 is in the region).
1847 Util::UCS4ToUTF8(0xFE000, entry.mutable_value());
1848 EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1849 EXPECT_FALSE(predictor->IsValidEntry(entry, 0));
1850 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::DOCOMO_EMOJI));
1851 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::SOFTBANK_EMOJI));
1852 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::KDDI_EMOJI));
1853
1854 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1855 entry, Request::UNICODE_EMOJI));
1856 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(entry, 0));
1857 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1858 entry, Request::DOCOMO_EMOJI));
1859 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1860 entry, Request::SOFTBANK_EMOJI));
1861 EXPECT_TRUE(
1862 predictor->IsValidEntryIgnoringRemovedField(entry, Request::KDDI_EMOJI));
1863
1864 SuppressionDictionary *d = GetSuppressionDictionary();
1865 DCHECK(d);
1866 d->Lock();
1867 d->AddEntry("foo", "bar");
1868 d->UnLock();
1869
1870 entry.set_key("key");
1871 entry.set_value("value");
1872 EXPECT_TRUE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1873 EXPECT_TRUE(predictor->IsValidEntryIgnoringRemovedField(
1874 entry, Request::UNICODE_EMOJI));
1875
1876 entry.set_key("foo");
1877 entry.set_value("bar");
1878 EXPECT_FALSE(predictor->IsValidEntry(entry, Request::UNICODE_EMOJI));
1879 EXPECT_FALSE(predictor->IsValidEntryIgnoringRemovedField(
1880 entry, Request::UNICODE_EMOJI));
1881
1882 d->Lock();
1883 d->Clear();
1884 d->UnLock();
1885 }
1886
TEST_F(UserHistoryPredictorTest,IsValidSuggestion)1887 TEST_F(UserHistoryPredictorTest, IsValidSuggestion) {
1888 UserHistoryPredictor::Entry entry;
1889
1890 EXPECT_FALSE(UserHistoryPredictor::IsValidSuggestion(
1891 UserHistoryPredictor::DEFAULT, 1, entry));
1892
1893 entry.set_bigram_boost(true);
1894 EXPECT_TRUE(UserHistoryPredictor::IsValidSuggestion(
1895 UserHistoryPredictor::DEFAULT, 1, entry));
1896
1897 entry.set_bigram_boost(false);
1898 EXPECT_TRUE(UserHistoryPredictor::IsValidSuggestion(
1899 UserHistoryPredictor::ZERO_QUERY_SUGGESTION, 1, entry));
1900
1901 entry.set_bigram_boost(false);
1902 entry.set_conversion_freq(10);
1903 EXPECT_TRUE(UserHistoryPredictor::IsValidSuggestion(
1904 UserHistoryPredictor::DEFAULT, 1, entry));
1905 }
1906
TEST_F(UserHistoryPredictorTest,EntryPriorityQueueTest)1907 TEST_F(UserHistoryPredictorTest, EntryPriorityQueueTest) {
1908 // removed automatically
1909 const int kSize = 10000;
1910 {
1911 UserHistoryPredictor::EntryPriorityQueue queue;
1912 for (int i = 0; i < 10000; ++i) {
1913 EXPECT_NE(nullptr, queue.NewEntry());
1914 }
1915 }
1916
1917 {
1918 UserHistoryPredictor::EntryPriorityQueue queue;
1919 std::vector<UserHistoryPredictor::Entry *> expected;
1920 for (int i = 0; i < kSize; ++i) {
1921 UserHistoryPredictor::Entry *entry = queue.NewEntry();
1922 entry->set_key("test" + std::to_string(i));
1923 entry->set_value("test" + std::to_string(i));
1924 entry->set_last_access_time(i + 1000);
1925 expected.push_back(entry);
1926 EXPECT_TRUE(queue.Push(entry));
1927 }
1928
1929 int n = kSize - 1;
1930 while (true) {
1931 const UserHistoryPredictor::Entry *entry = queue.Pop();
1932 if (entry == nullptr) {
1933 break;
1934 }
1935 EXPECT_EQ(expected[n], entry);
1936 --n;
1937 }
1938 EXPECT_EQ(-1, n);
1939 }
1940
1941 {
1942 UserHistoryPredictor::EntryPriorityQueue queue;
1943 for (int i = 0; i < 5; ++i) {
1944 UserHistoryPredictor::Entry *entry = queue.NewEntry();
1945 entry->set_key("test");
1946 entry->set_value("test");
1947 queue.Push(entry);
1948 }
1949 EXPECT_EQ(1, queue.size());
1950
1951 for (int i = 0; i < 5; ++i) {
1952 UserHistoryPredictor::Entry *entry = queue.NewEntry();
1953 entry->set_key("foo");
1954 entry->set_value("bar");
1955 queue.Push(entry);
1956 }
1957
1958 EXPECT_EQ(2, queue.size());
1959 }
1960 }
1961
1962 namespace {
1963
RemoveLastUCS4Character(const string & input)1964 string RemoveLastUCS4Character(const string &input) {
1965 const size_t ucs4_count = Util::CharsLen(input);
1966 if (ucs4_count == 0) {
1967 return "";
1968 }
1969
1970 size_t ucs4_processed = 0;
1971 string output;
1972 for (ConstChar32Iterator iter(input);
1973 !iter.Done() && (ucs4_processed < ucs4_count - 1);
1974 iter.Next(), ++ucs4_processed) {
1975 Util::UCS4ToUTF8Append(iter.Get(), &output);
1976 }
1977 return output;
1978 }
1979
1980 struct PrivacySensitiveTestData {
1981 bool is_sensitive;
1982 const char *scenario_description;
1983 const char *input;
1984 const char *output;
1985 };
1986
1987 const bool kSensitive = true;
1988 const bool kNonSensitive = false;
1989
1990 const PrivacySensitiveTestData kNonSensitiveCases[] = {
1991 {
1992 kNonSensitive, // We might want to revisit this behavior
1993 "Type privacy sensitive number but it is commited as full-width number "
1994 "by mistake.",
1995 "0007",
1996 "0007"
1997 }, {
1998 kNonSensitive,
1999 "Type a ZIP number.",
2000 "100-0001",
2001 "東京都千代田区千代田"
2002 }, {
2003 kNonSensitive, // We might want to revisit this behavior
2004 "Type privacy sensitive number but the result contains one or more "
2005 "non-ASCII character such as full-width dash.",
2006 "1111-1111",
2007 "1111-1111"
2008 }, {
2009 kNonSensitive, // We might want to revisit this behavior
2010 "User dictionary contains a credit card number.",
2011 "かーどばんごう",
2012 "0000-0000-0000-0000"
2013 }, {
2014 kNonSensitive, // We might want to revisit this behavior
2015 "User dictionary contains a credit card number.",
2016 "かーどばんごう",
2017 "0000000000000000"
2018 }, {
2019 kNonSensitive, // We might want to revisit this behavior
2020 "User dictionary contains privacy sensitive information.",
2021 "ぱすわーど",
2022 "ywwz1sxm"
2023 }, {
2024 kNonSensitive, // We might want to revisit this behavior
2025 "Input privacy sensitive text by Roman-input mode by mistake and then "
2026 "hit F10 key to convert it to half-alphanumeric text. In this case "
2027 "we assume all the alphabetical characters are consumed by Roman-input "
2028 "rules.",
2029 "いあ1ぼ3ぅ",
2030 "ia1bo3xu"
2031 }, {
2032 kNonSensitive,
2033 "Katakana to English transliteration.", // http://b/4394325
2034 "おれんじ",
2035 "Orange"
2036 }, {
2037 kNonSensitive,
2038 "Input a very common English word which should be included in our "
2039 "system dictionary by Roman-input mode by mistake and "
2040 "then hit F10 key to convert it to half-alphanumeric text.",
2041 "おらんげ",
2042 "orange"
2043 }, {
2044 kSensitive,
2045 "Input a password-like text.",
2046 "123abc!",
2047 "123abc!",
2048 }, {
2049 kSensitive,
2050 "Input privacy sensitive text by Roman-input mode by mistake and then "
2051 "hit F10 key to convert it to half-alphanumeric text. In this case, "
2052 "there may remain one or more alphabetical characters, which have not "
2053 "been consumed by Roman-input rules.",
2054 "yっwz1sxm",
2055 "ywwz1sxm"
2056 }, {
2057 kNonSensitive,
2058 "Type a very common English word all in lower case which should be "
2059 "included in our system dictionary without capitalization.",
2060 "variable",
2061 "variable"
2062 }, {
2063 kNonSensitive,
2064 "Type a very common English word all in upper case whose lower case "
2065 "should be included in our system dictionary.",
2066 "VARIABLE",
2067 "VARIABLE"
2068 }, {
2069 kNonSensitive,
2070 "Type a very common English word with capitalization whose lower case "
2071 "should be included in our system dictionary.",
2072 "Variable",
2073 "Variable"
2074 }, {
2075 kSensitive, // We might want to revisit this behavior
2076 "Type a very common English word with random capitalization, which "
2077 "should be treated as case SENSITIVE.",
2078 "vArIaBle",
2079 "vArIaBle"
2080 }, {
2081 kSensitive,
2082 "Type an English word in lower case but only its upper case form is "
2083 "stored in dictionary.",
2084 "upper",
2085 "upper",
2086 }, {
2087 kSensitive, // We might want to revisit this behavior
2088 "Type just a number.",
2089 "2398402938402934",
2090 "2398402938402934"
2091 }, {
2092 kSensitive, // We might want to revisit this behavior
2093 "Type an common English word which might be included in our system "
2094 "dictionary with number postfix.",
2095 "Orange10000",
2096 "Orange10000"
2097 },
2098 };
2099
2100 } // namespace
2101
TEST_F(UserHistoryPredictorTest,PrivacySensitiveTest)2102 TEST_F(UserHistoryPredictorTest, PrivacySensitiveTest) {
2103 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2104
2105 // Add those words to the mock dictionary that are assumed to exist in privacy
2106 // sensitive filtering.
2107 const char *kEnglishWords[] = {
2108 "variable", "UPPER",
2109 };
2110 for (size_t i = 0; i < arraysize(kEnglishWords); ++i) {
2111 // LookupPredictive is used in UserHistoryPredictor::IsPrivacySensitive().
2112 GetDictionaryMock()->AddLookupExact(
2113 kEnglishWords[i], kEnglishWords[i], kEnglishWords[i], Token::NONE);
2114 }
2115
2116 for (size_t i = 0; i < arraysize(kNonSensitiveCases); ++i) {
2117 predictor->ClearAllHistory();
2118 predictor->WaitForSyncer();
2119
2120 const PrivacySensitiveTestData &data = kNonSensitiveCases[i];
2121 const string description(data.scenario_description);
2122 const string input(data.input);
2123 const string output(data.output);
2124 const string &partial_input = RemoveLastUCS4Character(input);
2125 const bool expect_sensitive = data.is_sensitive;
2126
2127 // Initial commit.
2128 {
2129 Segments segments;
2130 MakeSegmentsForConversion(input, &segments);
2131 AddCandidate(0, output, &segments);
2132 predictor->Finish(*convreq_, &segments);
2133 }
2134
2135 // TODO(yukawa): Refactor the scenario runner below by making
2136 // some utility functions.
2137
2138 // Check suggestion
2139 {
2140 Segments segments;
2141 MakeSegmentsForSuggestion(partial_input, &segments);
2142 if (expect_sensitive) {
2143 EXPECT_FALSE(predictor->Predict(&segments))
2144 << description << " input: " << input << " output: " << output;
2145 } else {
2146 EXPECT_TRUE(predictor->Predict(&segments))
2147 << description << " input: " << input << " output: " << output;
2148 }
2149 segments.Clear();
2150 MakeSegmentsForPrediction(input, &segments);
2151 if (expect_sensitive) {
2152 EXPECT_FALSE(predictor->Predict(&segments))
2153 << description << " input: " << input << " output: " << output;
2154 } else {
2155 EXPECT_TRUE(predictor->Predict(&segments))
2156 << description << " input: " << input << " output: " << output;
2157 }
2158 }
2159
2160 // Check Prediction
2161 {
2162 Segments segments;
2163 MakeSegmentsForPrediction(partial_input, &segments);
2164 if (expect_sensitive) {
2165 EXPECT_FALSE(predictor->Predict(&segments))
2166 << description << " input: " << input << " output: " << output;
2167 } else {
2168 EXPECT_TRUE(predictor->Predict(&segments))
2169 << description << " input: " << input << " output: " << output;
2170 }
2171 segments.Clear();
2172 MakeSegmentsForPrediction(input, &segments);
2173 if (expect_sensitive) {
2174 EXPECT_FALSE(predictor->Predict(&segments))
2175 << description << " input: " << input << " output: " << output;
2176 } else {
2177 EXPECT_TRUE(predictor->Predict(&segments))
2178 << description << " input: " << input << " output: " << output;
2179 }
2180 }
2181 }
2182 }
2183
TEST_F(UserHistoryPredictorTest,PrivacySensitiveMultiSegmentsTest)2184 TEST_F(UserHistoryPredictorTest, PrivacySensitiveMultiSegmentsTest) {
2185 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2186 predictor->WaitForSyncer();
2187
2188 // If a password-like input consists of multiple segments, it is not
2189 // considered to be privacy sensitive when the input is committed.
2190 // Currently this is a known issue.
2191 {
2192 Segments segments;
2193 MakeSegmentsForConversion("123", &segments);
2194 AddSegmentForConversion("abc!", &segments);
2195 AddCandidate(0, "123", &segments);
2196 AddCandidate(1, "abc!", &segments);
2197 predictor->Finish(*convreq_, &segments);
2198 }
2199
2200 {
2201 Segments segments;
2202 MakeSegmentsForSuggestion("123abc", &segments);
2203 EXPECT_TRUE(predictor->Predict(&segments));
2204 segments.Clear();
2205 MakeSegmentsForSuggestion("123abc!", &segments);
2206 EXPECT_TRUE(predictor->Predict(&segments));
2207 }
2208
2209 {
2210 Segments segments;
2211 MakeSegmentsForPrediction("123abc", &segments);
2212 EXPECT_TRUE(predictor->Predict(&segments));
2213 segments.Clear();
2214 MakeSegmentsForPrediction("123abc!", &segments);
2215 EXPECT_TRUE(predictor->Predict(&segments));
2216 }
2217 }
2218
TEST_F(UserHistoryPredictorTest,UserHistoryStorage)2219 TEST_F(UserHistoryPredictorTest, UserHistoryStorage) {
2220 const string filename =
2221 FileUtil::JoinPath(SystemUtil::GetUserProfileDirectory(), "test");
2222
2223 UserHistoryStorage storage1(filename);
2224
2225 UserHistoryPredictor::Entry *entry = storage1.user_history_base.add_entries();
2226 CHECK(entry);
2227 entry->set_key("key");
2228 entry->set_key("value");
2229 storage1.Save();
2230 UserHistoryStorage storage2(filename);
2231 storage2.Load();
2232
2233 EXPECT_EQ(storage1.user_history_base.DebugString(), storage2.user_history_base.DebugString());
2234 FileUtil::Unlink(filename);
2235 }
2236
TEST_F(UserHistoryPredictorTest,RomanFuzzyPrefixMatch)2237 TEST_F(UserHistoryPredictorTest, RomanFuzzyPrefixMatch) {
2238 // same
2239 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", "abc"));
2240 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("a", "a"));
2241
2242 // exact prefix
2243 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", "a"));
2244 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", "ab"));
2245 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abc", ""));
2246
2247 // swap
2248 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("ab", "ba"));
2249 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abfoo", "bafoo"));
2250 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("fooab", "fooba"));
2251 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("fooabfoo",
2252 "foobafoo"));
2253
2254 // swap + prefix
2255 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("fooabfoo",
2256 "fooba"));
2257
2258 // deletion
2259 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcd", "acd"));
2260 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcd", "bcd"));
2261
2262 // deletion + prefix
2263 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcdf", "acd"));
2264 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcdfoo", "bcd"));
2265
2266 // voice sound mark
2267 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2268 "gu^guru"));
2269 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2270 "gu=guru"));
2271 EXPECT_TRUE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2272 "gu^gu"));
2273 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("gu-guru",
2274 "gugu"));
2275
2276 // Invalid
2277 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("", ""));
2278 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("", "a"));
2279 EXPECT_FALSE(UserHistoryPredictor::RomanFuzzyPrefixMatch("abcde",
2280 "defe"));
2281 }
2282
TEST_F(UserHistoryPredictorTest,MaybeRomanMisspelledKey)2283 TEST_F(UserHistoryPredictorTest, MaybeRomanMisspelledKey) {
2284 EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("こんぴゅーt"));
2285 EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("こんぴゅーt"));
2286 EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("こんぴゅーた"));
2287 EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("ぱsこん"));
2288 EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("ぱそこん"));
2289 EXPECT_TRUE(
2290 UserHistoryPredictor::MaybeRomanMisspelledKey("おねがいしまうs"));
2291 EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("おねがいします"));
2292 EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("いんた=ねっと"));
2293 EXPECT_FALSE(UserHistoryPredictor::MaybeRomanMisspelledKey("t"));
2294 EXPECT_TRUE(UserHistoryPredictor::MaybeRomanMisspelledKey("ーt"));
2295 EXPECT_FALSE(
2296 UserHistoryPredictor::MaybeRomanMisspelledKey("おnがいしまうs"));
2297 // Two unknowns
2298 EXPECT_FALSE(
2299 UserHistoryPredictor::MaybeRomanMisspelledKey("お&がい$しまう"));
2300 // One alpha and one unknown
2301 EXPECT_FALSE(
2302 UserHistoryPredictor::MaybeRomanMisspelledKey("お&がいしまうs"));
2303 }
2304
TEST_F(UserHistoryPredictorTest,GetRomanMisspelledKey)2305 TEST_F(UserHistoryPredictorTest, GetRomanMisspelledKey) {
2306 Segments segments;
2307 Segment *seg = segments.add_segment();
2308 seg->set_segment_type(Segment::FREE);
2309 Segment::Candidate *candidate = seg->add_candidate();
2310 candidate->value = "test";
2311
2312 config_->set_preedit_method(config::Config::ROMAN);
2313
2314 seg->set_key("");
2315 EXPECT_EQ("",
2316 UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2317
2318 seg->set_key("おねがいしまうs");
2319 EXPECT_EQ("onegaisimaus",
2320 UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2321
2322 seg->set_key("おねがいします");
2323 EXPECT_EQ("",
2324 UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2325
2326 config_->set_preedit_method(config::Config::KANA);
2327
2328 seg->set_key("おねがいしまうs");
2329 EXPECT_EQ("",
2330 UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2331
2332 seg->set_key("おねがいします");
2333 EXPECT_EQ("",
2334 UserHistoryPredictor::GetRomanMisspelledKey(*convreq_, segments));
2335 }
2336
TEST_F(UserHistoryPredictorTest,RomanFuzzyLookupEntry)2337 TEST_F(UserHistoryPredictorTest, RomanFuzzyLookupEntry) {
2338 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2339 UserHistoryPredictor::Entry entry;
2340 UserHistoryPredictor::EntryPriorityQueue results;
2341
2342 entry.set_key("");
2343 EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("", &entry, &results));
2344
2345 entry.set_key("よろしく");
2346 EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("yorosku", &entry, &results));
2347 EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("yrosiku", &entry, &results));
2348 EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("yorsiku", &entry, &results));
2349 EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("yrsk", &entry, &results));
2350 EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("yorosiku", &entry, &results));
2351
2352 entry.set_key("ぐーぐる");
2353 EXPECT_TRUE(predictor->RomanFuzzyLookupEntry("gu=guru", &entry, &results));
2354 EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("gu-guru", &entry, &results));
2355 EXPECT_FALSE(predictor->RomanFuzzyLookupEntry("g=guru", &entry, &results));
2356 }
2357
2358 namespace {
2359 struct LookupTestData {
2360 const string entry_key;
2361 const bool expect_result;
2362 };
2363 } // namespace
2364
TEST_F(UserHistoryPredictorTest,ExpandedLookupRoman)2365 TEST_F(UserHistoryPredictorTest, ExpandedLookupRoman) {
2366 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2367 UserHistoryPredictor::Entry entry;
2368 UserHistoryPredictor::EntryPriorityQueue results;
2369
2370 // Roman
2371 // preedit: "あk"
2372 // input_key: "あk"
2373 // key_base: "あ"
2374 // key_expanded: "か","き","く","け", "こ"
2375 unique_ptr<Trie<string>> expanded(new Trie<string>);
2376 expanded->AddEntry("か", "");
2377 expanded->AddEntry("き", "");
2378 expanded->AddEntry("く", "");
2379 expanded->AddEntry("け", "");
2380 expanded->AddEntry("こ", "");
2381
2382 const LookupTestData kTests1[] = {
2383 { "", false },
2384 { "あか", true },
2385 { "あき", true },
2386 { "あかい", true },
2387 { "あまい", false },
2388 { "あ", false },
2389 { "さか", false },
2390 { "さき", false },
2391 { "さかい", false },
2392 { "さまい", false },
2393 { "さ", false },
2394 };
2395
2396 // with expanded
2397 for (size_t i = 0; i < arraysize(kTests1); ++i) {
2398 entry.set_key(kTests1[i].entry_key);
2399 EXPECT_EQ(kTests1[i].expect_result, predictor->LookupEntry(
2400 UserHistoryPredictor::DEFAULT,
2401 "あk", "あ",
2402 expanded.get(), &entry, nullptr, &results))
2403 << kTests1[i].entry_key;
2404 }
2405
2406 // only expanded
2407 // preedit: "k"
2408 // input_key: ""
2409 // key_base: ""
2410 // key_expanded: "か","き","く","け", "こ"
2411
2412 const LookupTestData kTests2[] = {
2413 { "", false },
2414 { "か", true },
2415 { "き", true },
2416 { "かい", true },
2417 { "まい", false },
2418 { "も", false },
2419 };
2420
2421 for (size_t i = 0; i < arraysize(kTests2); ++i) {
2422 entry.set_key(kTests2[i].entry_key);
2423 EXPECT_EQ(kTests2[i].expect_result, predictor->LookupEntry(
2424 UserHistoryPredictor::DEFAULT,
2425 "", "", expanded.get(), &entry, nullptr, &results))
2426 << kTests2[i].entry_key;
2427 }
2428 }
2429
TEST_F(UserHistoryPredictorTest,ExpandedLookupKana)2430 TEST_F(UserHistoryPredictorTest, ExpandedLookupKana) {
2431 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2432 UserHistoryPredictor::Entry entry;
2433 UserHistoryPredictor::EntryPriorityQueue results;
2434
2435 // Kana
2436 // preedit: "あし"
2437 // input_key: "あし"
2438 // key_base: "あ"
2439 // key_expanded: "し","じ"
2440 unique_ptr<Trie<string>> expanded(new Trie<string>);
2441 expanded->AddEntry("し", "");
2442 expanded->AddEntry("じ", "");
2443
2444 const LookupTestData kTests1[] = {
2445 { "", false },
2446 { "あ", false },
2447 { "あし", true },
2448 { "あじ", true },
2449 { "あしかゆい", true },
2450 { "あじうまい", true },
2451 { "あまにがい", false },
2452 { "あめ", false },
2453 { "まし", false },
2454 { "まじ", false },
2455 { "ましなあじ", false },
2456 { "まじうまい", false },
2457 { "ままにがい", false },
2458 { "まめ", false },
2459 };
2460
2461 // with expanded
2462 for (size_t i = 0; i < arraysize(kTests1); ++i) {
2463 entry.set_key(kTests1[i].entry_key);
2464 EXPECT_EQ(kTests1[i].expect_result, predictor->LookupEntry(
2465 UserHistoryPredictor::DEFAULT,
2466 "あし", "あ",
2467 expanded.get(), &entry, nullptr, &results))
2468 << kTests1[i].entry_key;
2469 }
2470
2471 // only expanded
2472 // input_key: "し"
2473 // key_base: ""
2474 // key_expanded: "し","じ"
2475 const LookupTestData kTests2[] = {
2476 { "", false },
2477 { "し", true },
2478 { "じ", true },
2479 { "しかうまい", true },
2480 { "じゅうかい", true },
2481 { "ま", false },
2482 { "まめ", false },
2483 };
2484
2485 for (size_t i = 0; i < arraysize(kTests2); ++i) {
2486 entry.set_key(kTests2[i].entry_key);
2487 EXPECT_EQ(kTests2[i].expect_result, predictor->LookupEntry(
2488 UserHistoryPredictor::DEFAULT,
2489 "し", "", expanded.get(), &entry, nullptr, &results))
2490 << kTests2[i].entry_key;
2491 }
2492 }
2493
TEST_F(UserHistoryPredictorTest,GetMatchTypeFromInputRoman)2494 TEST_F(UserHistoryPredictorTest, GetMatchTypeFromInputRoman) {
2495 // We have to define this here,
2496 // because UserHistoryPredictor::MatchType is private
2497 struct MatchTypeTestData {
2498 const string target;
2499 const UserHistoryPredictor::MatchType expect_type;
2500 };
2501
2502 // Roman
2503 // preedit: "あk"
2504 // input_key: "あ"
2505 // key_base: "あ"
2506 // key_expanded: "か","き","く","け", "こ"
2507 unique_ptr<Trie<string>> expanded(new Trie<string>);
2508 expanded->AddEntry("か", "か");
2509 expanded->AddEntry("き", "き");
2510 expanded->AddEntry("く", "く");
2511 expanded->AddEntry("け", "け");
2512 expanded->AddEntry("こ", "こ");
2513
2514 const MatchTypeTestData kTests1[] = {
2515 {"", UserHistoryPredictor::NO_MATCH},
2516 {"い", UserHistoryPredictor::NO_MATCH},
2517 {"あ", UserHistoryPredictor::RIGHT_PREFIX_MATCH},
2518 {"あい", UserHistoryPredictor::NO_MATCH},
2519 {"あか", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2520 {"あかい", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2521 };
2522
2523 for (size_t i = 0; i < arraysize(kTests1); ++i) {
2524 EXPECT_EQ(kTests1[i].expect_type,
2525 UserHistoryPredictor::GetMatchTypeFromInput(
2526 "あ", "あ",
2527 expanded.get(), kTests1[i].target))
2528 << kTests1[i].target;
2529 }
2530
2531 // only expanded
2532 // preedit: "k"
2533 // input_key: ""
2534 // key_base: ""
2535 // key_expanded: "か","き","く","け", "こ"
2536 const MatchTypeTestData kTests2[] = {
2537 {"", UserHistoryPredictor::NO_MATCH},
2538 {"い", UserHistoryPredictor::NO_MATCH},
2539 {"いか", UserHistoryPredictor::NO_MATCH},
2540 {"か", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2541 {"かいがい", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2542 };
2543
2544 for (size_t i = 0; i < arraysize(kTests2); ++i) {
2545 EXPECT_EQ(kTests2[i].expect_type,
2546 UserHistoryPredictor::GetMatchTypeFromInput(
2547 "", "", expanded.get(), kTests2[i].target))
2548 << kTests2[i].target;
2549 }
2550 }
2551
TEST_F(UserHistoryPredictorTest,GetMatchTypeFromInputKana)2552 TEST_F(UserHistoryPredictorTest, GetMatchTypeFromInputKana) {
2553 // We have to define this here,
2554 // because UserHistoryPredictor::MatchType is private
2555 struct MatchTypeTestData {
2556 const string target;
2557 const UserHistoryPredictor::MatchType expect_type;
2558 };
2559
2560 // Kana
2561 // preedit: "あし"
2562 // input_key: "あし"
2563 // key_base: "あ"
2564 // key_expanded: "し","じ"
2565 unique_ptr<Trie<string>> expanded(new Trie<string>);
2566 expanded->AddEntry("し", "し");
2567 expanded->AddEntry("じ", "じ");
2568
2569 const MatchTypeTestData kTests1[] = {
2570 {"", UserHistoryPredictor::NO_MATCH},
2571 {"い", UserHistoryPredictor::NO_MATCH},
2572 {"いし", UserHistoryPredictor::NO_MATCH},
2573 {"あ", UserHistoryPredictor::RIGHT_PREFIX_MATCH},
2574 {"あし", UserHistoryPredictor::EXACT_MATCH},
2575 {"あじ", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2576 {"あした", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2577 {"あじしお", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2578 };
2579
2580 for (size_t i = 0; i < arraysize(kTests1); ++i) {
2581 EXPECT_EQ(kTests1[i].expect_type,
2582 UserHistoryPredictor::GetMatchTypeFromInput(
2583 "あし", "あ",
2584 expanded.get(), kTests1[i].target))
2585 << kTests1[i].target;
2586 }
2587
2588 // only expanded
2589 // preedit: "し"
2590 // input_key: "し"
2591 // key_base: ""
2592 // key_expanded: "し","じ"
2593 const MatchTypeTestData kTests2[] = {
2594 {"", UserHistoryPredictor::NO_MATCH},
2595 {"い", UserHistoryPredictor::NO_MATCH},
2596 {"し", UserHistoryPredictor::EXACT_MATCH},
2597 {"じ", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2598 {"しじみ", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2599 {"じかん", UserHistoryPredictor::LEFT_PREFIX_MATCH},
2600 };
2601
2602 for (size_t i = 0; i < arraysize(kTests2); ++i) {
2603 EXPECT_EQ(kTests2[i].expect_type,
2604 UserHistoryPredictor::GetMatchTypeFromInput(
2605 "し", "", expanded.get(), kTests2[i].target))
2606 << kTests2[i].target;
2607 }
2608 }
2609
2610 namespace {
InitSegmentsFromInputSequence(const string & text,composer::Composer * composer,ConversionRequest * request,Segments * segments)2611 void InitSegmentsFromInputSequence(const string &text,
2612 composer::Composer *composer,
2613 ConversionRequest *request,
2614 Segments *segments) {
2615 DCHECK(composer);
2616 DCHECK(request);
2617 DCHECK(segments);
2618 const char *begin = text.data();
2619 const char *end = text.data() + text.size();
2620 size_t mblen = 0;
2621
2622 while (begin < end) {
2623 commands::KeyEvent key;
2624 const char32 w = Util::UTF8ToUCS4(begin, end, &mblen);
2625 if (Util::GetCharacterSet(w) == Util::ASCII) {
2626 key.set_key_code(*begin);
2627 } else {
2628 key.set_key_code('?');
2629 key.set_key_string(string(begin, mblen));
2630 }
2631 begin += mblen;
2632 composer->InsertCharacterKeyEvent(key);
2633 }
2634
2635 request->set_composer(composer);
2636
2637 segments->set_request_type(Segments::PREDICTION);
2638 Segment *segment = segments->add_segment();
2639 CHECK(segment);
2640 string query;
2641 composer->GetQueryForPrediction(&query);
2642 segment->set_key(query);
2643 }
2644 } // namespace
2645
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsRoman)2646 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsRoman) {
2647 table_->LoadFromFile("system://romanji-hiragana.tsv");
2648 composer_->SetTable(table_.get());
2649 Segments segments;
2650
2651 InitSegmentsFromInputSequence("gu-g",
2652 composer_.get(),
2653 convreq_.get(),
2654 &segments);
2655
2656 {
2657 FLAGS_enable_expansion_for_user_history_predictor = true;
2658 string input_key;
2659 string base;
2660 unique_ptr<Trie<string>> expanded;
2661 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2662 segments,
2663 &input_key,
2664 &base,
2665 &expanded);
2666 EXPECT_EQ("ぐーg", input_key);
2667 EXPECT_EQ("ぐー", base);
2668 EXPECT_TRUE(expanded != nullptr);
2669 string value;
2670 size_t key_length = 0;
2671 bool has_subtrie = false;
2672 EXPECT_TRUE(
2673 expanded->LookUpPrefix("ぐ", &value, &key_length, &has_subtrie));
2674 EXPECT_EQ("ぐ", value);
2675 }
2676
2677 {
2678 FLAGS_enable_expansion_for_user_history_predictor = false;
2679 string input_key;
2680 string base;
2681 unique_ptr<Trie<string>> expanded;
2682 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2683 segments,
2684 &input_key,
2685 &base,
2686 &expanded);
2687 EXPECT_EQ("ぐー", input_key);
2688 EXPECT_EQ("ぐー", base);
2689 EXPECT_TRUE(expanded == nullptr);
2690 }
2691 }
2692
2693 namespace {
GetRandomAscii()2694 uint32 GetRandomAscii() {
2695 return static_cast<uint32>(' ') +
2696 Util::Random(static_cast<uint32>('~' - ' '));
2697 }
2698 } // namespace
2699
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsRomanRandom)2700 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsRomanRandom) {
2701 FLAGS_enable_expansion_for_user_history_predictor = true;
2702 table_->LoadFromFile("system://romanji-hiragana.tsv");
2703 composer_->SetTable(table_.get());
2704 Segments segments;
2705
2706 for (size_t i = 0; i < 1000; ++i) {
2707 composer_->Reset();
2708 const int len = 1 + Util::Random(4);
2709 DCHECK_GE(len, 1);
2710 DCHECK_LE(len, 5);
2711 string input;
2712 for (size_t j = 0; j < len; ++j) {
2713 input += GetRandomAscii();
2714 }
2715 InitSegmentsFromInputSequence(input,
2716 composer_.get(),
2717 convreq_.get(),
2718 &segments);
2719 string input_key;
2720 string base;
2721 unique_ptr<Trie<string>> expanded;
2722 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2723 segments,
2724 &input_key,
2725 &base,
2726 &expanded);
2727 }
2728 }
2729
2730 // Found by random test.
2731 // input_key != base by compoesr modification.
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsShouldNotCrash)2732 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsShouldNotCrash) {
2733 FLAGS_enable_expansion_for_user_history_predictor = true;
2734 table_->LoadFromFile("system://romanji-hiragana.tsv");
2735 composer_->SetTable(table_.get());
2736 Segments segments;
2737
2738 {
2739 InitSegmentsFromInputSequence("8,+",
2740 composer_.get(),
2741 convreq_.get(),
2742 &segments);
2743 string input_key;
2744 string base;
2745 unique_ptr<Trie<string>> expanded;
2746 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2747 segments,
2748 &input_key,
2749 &base,
2750 &expanded);
2751 }
2752 }
2753
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsRomanN)2754 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsRomanN) {
2755 FLAGS_enable_expansion_for_user_history_predictor = true;
2756 table_->LoadFromFile("system://romanji-hiragana.tsv");
2757 composer_->SetTable(table_.get());
2758 Segments segments;
2759
2760 {
2761 InitSegmentsFromInputSequence(
2762 "n", composer_.get(), convreq_.get(), &segments);
2763 string input_key;
2764 string base;
2765 unique_ptr<Trie<string>> expanded;
2766 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2767 segments,
2768 &input_key,
2769 &base,
2770 &expanded);
2771 EXPECT_EQ("n", input_key);
2772 EXPECT_EQ("", base);
2773 EXPECT_TRUE(expanded != nullptr);
2774 string value;
2775 size_t key_length = 0;
2776 bool has_subtrie = false;
2777 EXPECT_TRUE(
2778 expanded->LookUpPrefix("な", &value, &key_length, &has_subtrie));
2779 EXPECT_EQ("な", value);
2780 }
2781
2782 composer_->Reset();
2783 segments.Clear();
2784 {
2785 InitSegmentsFromInputSequence(
2786 "nn", composer_.get(), convreq_.get(), &segments);
2787 string input_key;
2788 string base;
2789 unique_ptr<Trie<string>> expanded;
2790 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2791 segments,
2792 &input_key,
2793 &base,
2794 &expanded);
2795 EXPECT_EQ("ん", input_key);
2796 EXPECT_EQ("ん", base);
2797 EXPECT_TRUE(expanded == nullptr);
2798 }
2799
2800 composer_->Reset();
2801 segments.Clear();
2802 {
2803 InitSegmentsFromInputSequence("n'", composer_.get(),
2804 convreq_.get(), &segments);
2805 string input_key;
2806 string base;
2807 unique_ptr<Trie<string>> expanded;
2808 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2809 segments,
2810 &input_key,
2811 &base,
2812 &expanded);
2813 EXPECT_EQ("ん", input_key);
2814 EXPECT_EQ("ん", base);
2815 EXPECT_TRUE(expanded == nullptr);
2816 }
2817
2818 composer_->Reset();
2819 segments.Clear();
2820 {
2821 InitSegmentsFromInputSequence("n'n", composer_.get(),
2822 convreq_.get(), &segments);
2823 string input_key;
2824 string base;
2825 unique_ptr<Trie<string>> expanded;
2826 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2827 segments,
2828 &input_key,
2829 &base,
2830 &expanded);
2831 EXPECT_EQ("んn", input_key);
2832 EXPECT_EQ("ん", base);
2833 EXPECT_TRUE(expanded != nullptr);
2834 string value;
2835 size_t key_length = 0;
2836 bool has_subtrie = false;
2837 EXPECT_TRUE(
2838 expanded->LookUpPrefix("な",
2839 &value, &key_length, &has_subtrie));
2840 EXPECT_EQ("な", value);
2841 }
2842 }
2843
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsFlickN)2844 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsFlickN) {
2845 FLAGS_enable_expansion_for_user_history_predictor = true;
2846 table_->LoadFromFile("system://flick-hiragana.tsv");
2847 composer_->SetTable(table_.get());
2848 Segments segments;
2849
2850 {
2851 InitSegmentsFromInputSequence("/", composer_.get(), convreq_.get(),
2852 &segments);
2853 string input_key;
2854 string base;
2855 unique_ptr<Trie<string>> expanded;
2856 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2857 segments,
2858 &input_key,
2859 &base,
2860 &expanded);
2861 EXPECT_EQ("ん", input_key);
2862 EXPECT_EQ("", base);
2863 EXPECT_TRUE(expanded != nullptr);
2864 string value;
2865 size_t key_length = 0;
2866 bool has_subtrie = false;
2867 EXPECT_TRUE(
2868 expanded->LookUpPrefix("ん", &value, &key_length, &has_subtrie));
2869 EXPECT_EQ("ん", value);
2870 }
2871 }
2872
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegments12KeyN)2873 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegments12KeyN) {
2874 FLAGS_enable_expansion_for_user_history_predictor = true;
2875 table_->LoadFromFile("system://12keys-hiragana.tsv");
2876 composer_->SetTable(table_.get());
2877 Segments segments;
2878
2879 {
2880 InitSegmentsFromInputSequence("わ00",
2881 composer_.get(),
2882 convreq_.get(),
2883 &segments);
2884 string input_key;
2885 string base;
2886 unique_ptr<Trie<string>> expanded;
2887 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2888 segments,
2889 &input_key,
2890 &base,
2891 &expanded);
2892 EXPECT_EQ("ん", input_key);
2893 EXPECT_EQ("", base);
2894 EXPECT_TRUE(expanded != nullptr);
2895 string value;
2896 size_t key_length = 0;
2897 bool has_subtrie = false;
2898 EXPECT_TRUE(
2899 expanded->LookUpPrefix("ん", &value, &key_length, &has_subtrie));
2900 EXPECT_EQ("ん", value);
2901 }
2902 }
2903
TEST_F(UserHistoryPredictorTest,GetInputKeyFromSegmentsKana)2904 TEST_F(UserHistoryPredictorTest, GetInputKeyFromSegmentsKana) {
2905 table_->LoadFromFile("system://kana.tsv");
2906 composer_->SetTable(table_.get());
2907 Segments segments;
2908
2909 InitSegmentsFromInputSequence("あか",
2910 composer_.get(), convreq_.get(), &segments);
2911
2912 {
2913 FLAGS_enable_expansion_for_user_history_predictor = true;
2914 string input_key;
2915 string base;
2916 unique_ptr<Trie<string>> expanded;
2917 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2918 segments,
2919 &input_key,
2920 &base,
2921 &expanded);
2922 EXPECT_EQ("あか", input_key);
2923 EXPECT_EQ("あ", base);
2924 EXPECT_TRUE(expanded != nullptr);
2925 string value;
2926 size_t key_length = 0;
2927 bool has_subtrie = false;
2928 EXPECT_TRUE(
2929 expanded->LookUpPrefix("が",
2930 &value, &key_length, &has_subtrie));
2931 EXPECT_EQ("が", value);
2932 }
2933
2934 {
2935 FLAGS_enable_expansion_for_user_history_predictor = false;
2936 string input_key;
2937 string base;
2938 unique_ptr<Trie<string>> expanded;
2939 UserHistoryPredictor::GetInputKeyFromSegments(*convreq_,
2940 segments,
2941 &input_key,
2942 &base,
2943 &expanded);
2944 EXPECT_EQ("あか", input_key);
2945 EXPECT_EQ("あか", base);
2946 EXPECT_TRUE(expanded == nullptr);
2947 }
2948 }
2949
TEST_F(UserHistoryPredictorTest,RealtimeConversionInnerSegment)2950 TEST_F(UserHistoryPredictorTest, RealtimeConversionInnerSegment) {
2951 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2952 predictor->WaitForSyncer();
2953 predictor->ClearAllHistory();
2954 predictor->WaitForSyncer();
2955
2956 Segments segments;
2957 {
2958 const char kKey[] = "わたしのなまえはなかのです";
2959 const char kValue[] = "私の名前は中野です";
2960 MakeSegmentsForPrediction(kKey, &segments);
2961 Segment::Candidate *candidate =
2962 segments.mutable_segment(0)->add_candidate();
2963 CHECK(candidate);
2964 candidate->Init();
2965 candidate->value = kValue;
2966 candidate->content_value = kValue;
2967 candidate->key = kKey;
2968 candidate->content_key = kKey;
2969 // "わたしの, 私の", "わたし, 私"
2970 candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
2971 // "なまえは, 名前は", "なまえ, 名前"
2972 candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
2973 // "なかのです, 中野です", "なかの, 中野"
2974 candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
2975 }
2976 predictor->Finish(*convreq_, &segments);
2977 segments.Clear();
2978
2979 MakeSegmentsForPrediction("なかの", &segments);
2980 EXPECT_TRUE(predictor->Predict(&segments));
2981 EXPECT_TRUE(FindCandidateByValue("中野です", segments));
2982
2983 segments.Clear();
2984 MakeSegmentsForPrediction("なまえ", &segments);
2985 EXPECT_TRUE(predictor->Predict(&segments));
2986 EXPECT_TRUE(FindCandidateByValue("名前は", segments));
2987 EXPECT_TRUE(FindCandidateByValue("名前は中野です", segments));
2988 }
2989
TEST_F(UserHistoryPredictorTest,ZeroQueryFromRealtimeConversion)2990 TEST_F(UserHistoryPredictorTest, ZeroQueryFromRealtimeConversion) {
2991 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
2992 predictor->WaitForSyncer();
2993 predictor->ClearAllHistory();
2994 predictor->WaitForSyncer();
2995
2996 Segments segments;
2997 {
2998 const char kKey[] = "わたしのなまえはなかのです";
2999 const char kValue[] = "私の名前は中野です";
3000 MakeSegmentsForPrediction(kKey, &segments);
3001 Segment::Candidate *candidate =
3002 segments.mutable_segment(0)->add_candidate();
3003 CHECK(candidate);
3004 candidate->Init();
3005 candidate->value = kValue;
3006 candidate->content_value = kValue;
3007 candidate->key = kKey;
3008 candidate->content_key = kKey;
3009 // "わたしの, 私の", "わたし, 私"
3010 candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
3011 // "なまえは, 名前は", "なまえ, 名前"
3012 candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
3013 // "なかのです, 中野です", "なかの, 中野"
3014 candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
3015 }
3016 predictor->Finish(*convreq_, &segments);
3017 segments.Clear();
3018
3019 MakeSegmentsForConversion("わたしの", &segments);
3020 AddCandidate(0, "私の", &segments);
3021 predictor->Finish(*convreq_, &segments);
3022 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3023
3024 AddSegmentForSuggestion("", &segments); // empty request
3025 commands::Request request;
3026 request_->set_zero_query_suggestion(true);
3027 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3028 EXPECT_TRUE(FindCandidateByValue("名前は", segments));
3029 }
3030
TEST_F(UserHistoryPredictorTest,LongCandidateForMobile)3031 TEST_F(UserHistoryPredictorTest, LongCandidateForMobile) {
3032 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3033 predictor->WaitForSyncer();
3034 predictor->ClearAllHistory();
3035 predictor->WaitForSyncer();
3036
3037 commands::RequestForUnitTest::FillMobileRequest(request_.get());
3038
3039 Segments segments;
3040 for (size_t i = 0; i < 3; ++i) {
3041 const char kKey[] = "よろしくおねがいします";
3042 const char kValue[] = "よろしくお願いします";
3043 MakeSegmentsForPrediction(kKey, &segments);
3044 Segment::Candidate *candidate =
3045 segments.mutable_segment(0)->add_candidate();
3046 CHECK(candidate);
3047 candidate->Init();
3048 candidate->value = kValue;
3049 candidate->content_value = kValue;
3050 candidate->key = kKey;
3051 candidate->content_key = kKey;
3052 predictor->Finish(*convreq_, &segments);
3053 segments.Clear();
3054 }
3055
3056 MakeSegmentsForPrediction("よろ", &segments);
3057 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3058 EXPECT_TRUE(FindCandidateByValue("よろしくお願いします", segments));
3059 }
3060
TEST_F(UserHistoryPredictorTest,EraseNextEntries)3061 TEST_F(UserHistoryPredictorTest, EraseNextEntries) {
3062 UserHistoryPredictor::Entry e;
3063 e.add_next_entries()->set_entry_fp(100);
3064 e.add_next_entries()->set_entry_fp(10);
3065 e.add_next_entries()->set_entry_fp(30);
3066 e.add_next_entries()->set_entry_fp(10);
3067 e.add_next_entries()->set_entry_fp(100);
3068
3069 UserHistoryPredictor::EraseNextEntries(1234, &e);
3070 EXPECT_EQ(5, e.next_entries_size());
3071
3072 UserHistoryPredictor::EraseNextEntries(30, &e);
3073 ASSERT_EQ(4, e.next_entries_size());
3074 for (size_t i = 0; i < 4; ++i) {
3075 EXPECT_NE(30, e.next_entries(i).entry_fp());
3076 }
3077
3078 UserHistoryPredictor::EraseNextEntries(10, &e);
3079 ASSERT_EQ(2, e.next_entries_size());
3080 for (size_t i = 0; i < 2; ++i) {
3081 EXPECT_NE(10, e.next_entries(i).entry_fp());
3082 }
3083
3084 UserHistoryPredictor::EraseNextEntries(100, &e);
3085 EXPECT_EQ(0, e.next_entries_size());
3086 }
3087
TEST_F(UserHistoryPredictorTest,RemoveNgramChain)3088 TEST_F(UserHistoryPredictorTest, RemoveNgramChain) {
3089 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3090
3091 // Set up the following chain of next entries:
3092 // ("abc", "ABC")
3093 // ( "a", "A") --- ("b", "B") --- ("c", "C")
3094 UserHistoryPredictor::Entry *abc = InsertEntry(predictor, "abc", "ABC");
3095 UserHistoryPredictor::Entry *a = InsertEntry(predictor, "a", "A");
3096 UserHistoryPredictor::Entry *b = AppendEntry(predictor, "b", "B", a);
3097 UserHistoryPredictor::Entry *c = AppendEntry(predictor, "c", "C", b);
3098
3099 std::vector<UserHistoryPredictor::Entry *> entries;
3100 entries.push_back(abc);
3101 entries.push_back(a);
3102 entries.push_back(b);
3103 entries.push_back(c);
3104
3105 // The method should return NOT_FOUND for key-value pairs not in the chain.
3106 for (size_t i = 0; i < entries.size(); ++i) {
3107 std::vector<StringPiece> dummy1, dummy2;
3108 EXPECT_EQ(UserHistoryPredictor::NOT_FOUND,
3109 predictor->RemoveNgramChain("hoge", "HOGE", entries[i],
3110 &dummy1, 0, &dummy2, 0));
3111 }
3112 // Moreover, all nodes and links should be kept.
3113 for (size_t i = 0; i < entries.size(); ++i) {
3114 EXPECT_FALSE(entries[i]->removed());
3115 }
3116 EXPECT_TRUE(IsConnected(*a, *b));
3117 EXPECT_TRUE(IsConnected(*b, *c));
3118
3119 {
3120 // Try deleting the chain for "abc". Only the link from "b" to "c" should be
3121 // removed.
3122 std::vector<StringPiece> dummy1, dummy2;
3123 EXPECT_EQ(UserHistoryPredictor::DONE,
3124 predictor->RemoveNgramChain("abc", "ABC", a,
3125 &dummy1, 0, &dummy2, 0));
3126 for (size_t i = 0; i < entries.size(); ++i) {
3127 EXPECT_FALSE(entries[i]->removed());
3128 }
3129 EXPECT_TRUE(IsConnected(*a, *b));
3130 EXPECT_FALSE(IsConnected(*b, *c));
3131 }
3132 {
3133 // Try deleting the chain for "a". Since this is the head of the chain, the
3134 // function returns TAIL and nothing should be removed.
3135 std::vector<StringPiece> dummy1, dummy2;
3136 EXPECT_EQ(UserHistoryPredictor::TAIL,
3137 predictor->RemoveNgramChain("a", "A", a,
3138 &dummy1, 0, &dummy2, 0));
3139 for (size_t i = 0; i < entries.size(); ++i) {
3140 EXPECT_FALSE(entries[i]->removed());
3141 }
3142 EXPECT_TRUE(IsConnected(*a, *b));
3143 EXPECT_FALSE(IsConnected(*b, *c));
3144 }
3145 {
3146 // Further delete the chain for "ab". Now all the links should be removed.
3147 std::vector<StringPiece> dummy1, dummy2;
3148 EXPECT_EQ(UserHistoryPredictor::DONE,
3149 predictor->RemoveNgramChain("ab", "AB", a,
3150 &dummy1, 0, &dummy2, 0));
3151 for (size_t i = 0; i < entries.size(); ++i) {
3152 EXPECT_FALSE(entries[i]->removed());
3153 }
3154 EXPECT_FALSE(IsConnected(*a, *b));
3155 EXPECT_FALSE(IsConnected(*b, *c));
3156 }
3157 }
3158
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Unigram)3159 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Unigram) {
3160 // Tests ClearHistoryEntry() for unigram history.
3161 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3162
3163 // Add a unigram history ("japanese", "Japanese").
3164 UserHistoryPredictor::Entry *e =
3165 InsertEntry(predictor, "japanese", "Japanese");
3166
3167 // "Japanese" should be suggested and predicted from "japan".
3168 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3169
3170 // Delete the history.
3171 EXPECT_TRUE(predictor->ClearHistoryEntry("japanese", "Japanese"));
3172
3173 EXPECT_TRUE(e->removed());
3174
3175 // "Japanese" should be never be suggested nor predicted.
3176 const string key = "japanese";
3177 for (size_t i = 0; i < key.size(); ++i) {
3178 const string &prefix = key.substr(0, i);
3179 EXPECT_FALSE(IsSuggested(predictor, prefix, "Japanese"));
3180 EXPECT_FALSE(IsPredicted(predictor, prefix, "Japanese"));
3181 }
3182 }
3183
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Bigram_DeleteWhole)3184 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Bigram_DeleteWhole) {
3185 // Tests ClearHistoryEntry() for bigram history. This case tests the deletion
3186 // of whole sentence.
3187 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3188
3189 // Make the history for ("japaneseinput", "JapaneseInput"). It's assumed that
3190 // this sentence consists of two segments, "japanese" and "input". So, the
3191 // following history entries are constructed:
3192 // ("japaneseinput", "JapaneseInput") // Unigram
3193 // ("japanese", "Japanese") --- ("input", "Input") // Bigram chain
3194 UserHistoryPredictor::Entry *japaneseinput;
3195 UserHistoryPredictor::Entry *japanese;
3196 UserHistoryPredictor::Entry *input;
3197 InitHistory_JapaneseInput(predictor, &japaneseinput, &japanese, &input);
3198
3199 // Check the predictor functionality for the above history structure.
3200 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3201 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3202 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
3203
3204 // Delete the unigram ("japaneseinput", "JapaneseInput").
3205 EXPECT_TRUE(predictor->ClearHistoryEntry("japaneseinput", "JapaneseInput"));
3206
3207 EXPECT_TRUE(japaneseinput->removed());
3208 EXPECT_FALSE(japanese->removed());
3209 EXPECT_FALSE(input->removed());
3210 EXPECT_FALSE(IsConnected(*japanese, *input));
3211
3212 // Now "JapaneseInput" should never be suggested nor predicted.
3213 const string key = "japaneseinput";
3214 for (size_t i = 0; i < key.size(); ++i) {
3215 const string &prefix = key.substr(0, i);
3216 EXPECT_FALSE(IsSuggested(predictor, prefix, "Japaneseinput"));
3217 EXPECT_FALSE(IsPredicted(predictor, prefix, "Japaneseinput"));
3218 }
3219
3220 // However, predictor should show "Japanese" and "Input".
3221 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3222 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3223 }
3224
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Bigram_DeleteFirst)3225 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Bigram_DeleteFirst) {
3226 // Tests ClearHistoryEntry() for bigram history. This case tests the deletion
3227 // of the first node of the bigram chain.
3228 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3229
3230 // Make the history for ("japaneseinput", "JapaneseInput"), i.e., the same
3231 // history structure as ClearHistoryEntry_Bigram_DeleteWhole is constructed.
3232 UserHistoryPredictor::Entry *japaneseinput;
3233 UserHistoryPredictor::Entry *japanese;
3234 UserHistoryPredictor::Entry *input;
3235 InitHistory_JapaneseInput(predictor, &japaneseinput, &japanese, &input);
3236
3237 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3238 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3239 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
3240
3241 // Delete the first bigram node ("japanese", "Japanese").
3242 EXPECT_TRUE(predictor->ClearHistoryEntry("japanese", "Japanese"));
3243
3244 // Note that the first node was removed but the connection to the second node
3245 // is still valid.
3246 EXPECT_FALSE(japaneseinput->removed());
3247 EXPECT_TRUE(japanese->removed());
3248 EXPECT_FALSE(input->removed());
3249 EXPECT_TRUE(IsConnected(*japanese, *input));
3250
3251 // Now "Japanese" should never be suggested nor predicted.
3252 const string key = "japaneseinput";
3253 for (size_t i = 0; i < key.size(); ++i) {
3254 const string &prefix = key.substr(0, i);
3255 EXPECT_FALSE(IsSuggested(predictor, prefix, "Japanese"));
3256 EXPECT_FALSE(IsPredicted(predictor, prefix, "Japanese"));
3257 }
3258
3259 // However, predictor should show "JapaneseInput" and "Input".
3260 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3261 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3262 }
3263
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Bigram_DeleteSecond)3264 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Bigram_DeleteSecond) {
3265 // Tests ClearHistoryEntry() for bigram history. This case tests the deletion
3266 // of the first node of the bigram chain.
3267 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3268
3269 // Make the history for ("japaneseinput", "JapaneseInput"), i.e., the same
3270 // history structure as ClearHistoryEntry_Bigram_DeleteWhole is constructed.
3271 UserHistoryPredictor::Entry *japaneseinput;
3272 UserHistoryPredictor::Entry *japanese;
3273 UserHistoryPredictor::Entry *input;
3274 InitHistory_JapaneseInput(predictor, &japaneseinput, &japanese, &input);
3275
3276 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3277 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3278 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "input", "Input"));
3279
3280 // Delete the second bigram node ("input", "Input").
3281 EXPECT_TRUE(predictor->ClearHistoryEntry("input", "Input"));
3282
3283 EXPECT_FALSE(japaneseinput->removed());
3284 EXPECT_FALSE(japanese->removed());
3285 EXPECT_TRUE(input->removed());
3286 EXPECT_TRUE(IsConnected(*japanese, *input));
3287
3288 // Now "Input" should never be suggested nor predicted.
3289 const string key = "input";
3290 for (size_t i = 0; i < key.size(); ++i) {
3291 const string &prefix = key.substr(0, i);
3292 EXPECT_FALSE(IsSuggested(predictor, prefix, "Input"));
3293 EXPECT_FALSE(IsPredicted(predictor, prefix, "Input"));
3294 }
3295
3296 // However, predictor should show "Japanese" and "JapaneseInput".
3297 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3298 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3299 }
3300
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteWhole)3301 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteWhole) {
3302 // Tests ClearHistoryEntry() for trigram history. This case tests the
3303 // deletion of the whole sentence.
3304 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3305
3306 // Make the history for ("japaneseinputmethod", "JapaneseInputMethod"). It's
3307 // assumed that this sentence consists of three segments, "japanese", "input"
3308 // and "method". So, the following history entries are constructed:
3309 // ("japaneseinputmethod", "JapaneseInputMethod") // Unigram
3310 // ("japanese", "Japanese") -- ("input", "Input") -- ("method", "Method")
3311 UserHistoryPredictor::Entry *japaneseinputmethod;
3312 UserHistoryPredictor::Entry *japanese;
3313 UserHistoryPredictor::Entry *input;
3314 UserHistoryPredictor::Entry *method;
3315 InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3316 &japanese, &input, &method);
3317
3318 // Delete the history of the whole sentence.
3319 EXPECT_TRUE(predictor->ClearHistoryEntry(
3320 "japaneseinputmethod", "JapaneseInputMethod"));
3321
3322 // Note that only the link from "input" to "method" was removed.
3323 EXPECT_TRUE(japaneseinputmethod->removed());
3324 EXPECT_FALSE(japanese->removed());
3325 EXPECT_FALSE(input->removed());
3326 EXPECT_FALSE(method->removed());
3327 EXPECT_TRUE(IsConnected(*japanese, *input));
3328 EXPECT_FALSE(IsConnected(*input, *method));
3329
3330 {
3331 // Now "JapaneseInputMethod" should never be suggested nor predicted.
3332 const string key = "japaneseinputmethod";
3333 for (size_t i = 0; i < key.size(); ++i) {
3334 const string &prefix = key.substr(0, i);
3335 EXPECT_FALSE(IsSuggested(predictor, prefix, "JapaneseInputMethod"));
3336 EXPECT_FALSE(IsPredicted(predictor, prefix, "JapaneseInputMethod"));
3337 }
3338 }
3339 {
3340 // Here's a limitation of chain cut. Since we have cut the link from
3341 // "input" to "method", the predictor cannot show "InputMethod" although it
3342 // could before. However, since "InputMethod" is not the direct input by
3343 // the user (user's input was "JapaneseInputMethod" in this case), this
3344 // limitation would be acceptable.
3345 const string key = "inputmethod";
3346 for (size_t i = 0; i < key.size(); ++i) {
3347 const string &prefix = key.substr(0, i);
3348 EXPECT_FALSE(IsSuggested(predictor, prefix, "InputMethod"));
3349 EXPECT_FALSE(IsPredicted(predictor, prefix, "InputMethod"));
3350 }
3351 }
3352
3353 // The following can be still suggested and predicted.
3354 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3355 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3356 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3357 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3358 }
3359
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteFirst)3360 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteFirst) {
3361 // Tests ClearHistoryEntry() for trigram history. This case tests the
3362 // deletion of the first node of trigram.
3363 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3364
3365 // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3366 UserHistoryPredictor::Entry *japaneseinputmethod;
3367 UserHistoryPredictor::Entry *japanese;
3368 UserHistoryPredictor::Entry *input;
3369 UserHistoryPredictor::Entry *method;
3370 InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3371 &japanese, &input, &method);
3372
3373 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3374 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3375 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3376 "japan", "JapaneseInputMethod"));
3377 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3378 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3379 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3380
3381 // Delete the first node of the chain.
3382 EXPECT_TRUE(predictor->ClearHistoryEntry("japanese", "Japanese"));
3383
3384 // Note that the two links are still alive.
3385 EXPECT_FALSE(japaneseinputmethod->removed());
3386 EXPECT_TRUE(japanese->removed());
3387 EXPECT_FALSE(input->removed());
3388 EXPECT_FALSE(method->removed());
3389 EXPECT_TRUE(IsConnected(*japanese, *input));
3390 EXPECT_TRUE(IsConnected(*input, *method));
3391
3392 {
3393 // Now "Japanese" should never be suggested nor predicted.
3394 const string key = "japaneseinputmethod";
3395 for (size_t i = 0; i < key.size(); ++i) {
3396 const string &prefix = key.substr(0, i);
3397 EXPECT_FALSE(IsSuggested(predictor, prefix, "Japanese"));
3398 EXPECT_FALSE(IsPredicted(predictor, prefix, "Japanese"));
3399 }
3400 }
3401
3402 // The following are still suggested and predicted.
3403 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3404 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3405 "japan", "JapaneseInputMethod"));
3406 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3407 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3408 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3409 }
3410
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteSecond)3411 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteSecond) {
3412 // Tests ClearHistoryEntry() for trigram history. This case tests the
3413 // deletion of the second node of trigram.
3414 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3415
3416 // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3417 UserHistoryPredictor::Entry *japaneseinputmethod;
3418 UserHistoryPredictor::Entry *japanese;
3419 UserHistoryPredictor::Entry *input;
3420 UserHistoryPredictor::Entry *method;
3421 InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3422 &japanese, &input, &method);
3423
3424 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3425 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3426 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3427 "japan", "JapaneseInputMethod"));
3428 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3429 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3430 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3431
3432 // Delete the second node of the chain.
3433 EXPECT_TRUE(predictor->ClearHistoryEntry("input", "Input"));
3434
3435 // Note that the two links are still alive.
3436 EXPECT_FALSE(japaneseinputmethod->removed());
3437 EXPECT_FALSE(japanese->removed());
3438 EXPECT_TRUE(input->removed());
3439 EXPECT_FALSE(method->removed());
3440 EXPECT_TRUE(IsConnected(*japanese, *input));
3441 EXPECT_TRUE(IsConnected(*input, *method));
3442
3443 {
3444 // Now "Input" should never be suggested nor predicted.
3445 const string key = "inputmethod";
3446 for (size_t i = 0; i < key.size(); ++i) {
3447 const string &prefix = key.substr(0, i);
3448 EXPECT_FALSE(IsSuggested(predictor, prefix, "Input"));
3449 EXPECT_FALSE(IsPredicted(predictor, prefix, "Input"));
3450 }
3451 }
3452
3453 // The following can still be shown by the predictor.
3454 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3455 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3456 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3457 "japan", "JapaneseInputMethod"));
3458 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3459 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3460 }
3461
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteThird)3462 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteThird) {
3463 // Tests ClearHistoryEntry() for trigram history. This case tests the
3464 // deletion of the third node of trigram.
3465 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3466
3467 // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3468 UserHistoryPredictor::Entry *japaneseinputmethod;
3469 UserHistoryPredictor::Entry *japanese;
3470 UserHistoryPredictor::Entry *input;
3471 UserHistoryPredictor::Entry *method;
3472 InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3473 &japanese, &input, &method);
3474
3475 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3476 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3477 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3478 "japan", "JapaneseInputMethod"));
3479 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3480 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3481 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3482
3483 // Delete the third node, "method".
3484 EXPECT_TRUE(predictor->ClearHistoryEntry("method", "Method"));
3485
3486 // Note that the two links are still alive.
3487 EXPECT_FALSE(japaneseinputmethod->removed());
3488 EXPECT_FALSE(japanese->removed());
3489 EXPECT_FALSE(input->removed());
3490 EXPECT_TRUE(method->removed());
3491 EXPECT_TRUE(IsConnected(*japanese, *input));
3492 EXPECT_TRUE(IsConnected(*input, *method));
3493
3494 {
3495 // Now "Method" should never be suggested nor predicted.
3496 const string key = "method";
3497 for (size_t i = 0; i < key.size(); ++i) {
3498 const string &prefix = key.substr(0, i);
3499 EXPECT_FALSE(IsSuggested(predictor, prefix, "Method"));
3500 EXPECT_FALSE(IsPredicted(predictor, prefix, "Method"));
3501 }
3502 }
3503
3504 // The following can still be shown by the predictor.
3505 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3506 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3507 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3508 "japan", "JapaneseInputMethod"));
3509 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3510 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3511 }
3512
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteFirstBigram)3513 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteFirstBigram) {
3514 // Tests ClearHistoryEntry() for trigram history. This case tests the
3515 // deletion of the first bigram of trigram.
3516 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3517
3518 // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3519 UserHistoryPredictor::Entry *japaneseinputmethod;
3520 UserHistoryPredictor::Entry *japanese;
3521 UserHistoryPredictor::Entry *input;
3522 UserHistoryPredictor::Entry *method;
3523 InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3524 &japanese, &input, &method);
3525
3526 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3527 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3528 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3529 "japan", "JapaneseInputMethod"));
3530 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3531 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3532 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3533
3534 // Delete the sentence consisting of the first two nodes.
3535 EXPECT_TRUE(predictor->ClearHistoryEntry("japaneseinput", "JapaneseInput"));
3536
3537 // Note that the node "japaneseinput" and the link from "japanese" to "input"
3538 // were removed.
3539 EXPECT_FALSE(japaneseinputmethod->removed());
3540 EXPECT_FALSE(japanese->removed());
3541 EXPECT_FALSE(input->removed());
3542 EXPECT_FALSE(method->removed());
3543 EXPECT_FALSE(IsConnected(*japanese, *input));
3544 EXPECT_TRUE(IsConnected(*input, *method));
3545
3546 {
3547 // Now "JapaneseInput" should never be suggested nor predicted.
3548 const string key = "japaneseinputmethod";
3549 for (size_t i = 0; i < key.size(); ++i) {
3550 const string &prefix = key.substr(0, i);
3551 EXPECT_FALSE(IsSuggested(predictor, prefix, "JapaneseInput"));
3552 EXPECT_FALSE(IsPredicted(predictor, prefix, "JapaneseInput"));
3553 }
3554 }
3555
3556 // However, the following can still be available, including
3557 // "JapaneseInputMethod".
3558 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3559 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3560 "japan", "JapaneseInputMethod"));
3561 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3562 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3563 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3564 }
3565
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Trigram_DeleteSecondBigram)3566 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Trigram_DeleteSecondBigram) {
3567 // Tests ClearHistoryEntry() for trigram history. This case tests the
3568 // deletion of the latter bigram of trigram.
3569 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3570
3571 // Make the same history structure as ClearHistoryEntry_Trigram_DeleteWhole.
3572 UserHistoryPredictor::Entry *japaneseinputmethod;
3573 UserHistoryPredictor::Entry *japanese;
3574 UserHistoryPredictor::Entry *input;
3575 UserHistoryPredictor::Entry *method;
3576 InitHistory_JapaneseInputMethod(predictor, &japaneseinputmethod,
3577 &japanese, &input, &method);
3578
3579 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3580 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3581 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3582 "japan", "JapaneseInputMethod"));
3583 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3584 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "InputMethod"));
3585 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3586
3587 // Delete the latter bigram.
3588 EXPECT_TRUE(predictor->ClearHistoryEntry("inputmethod", "InputMethod"));
3589
3590 // Note that only link from "input" to "method" was removed.
3591 EXPECT_FALSE(japaneseinputmethod->removed());
3592 EXPECT_FALSE(japanese->removed());
3593 EXPECT_FALSE(input->removed());
3594 EXPECT_FALSE(method->removed());
3595 EXPECT_TRUE(IsConnected(*japanese, *input));
3596 EXPECT_FALSE(IsConnected(*input, *method));
3597
3598 {
3599 // Now "InputMethod" should never be suggested.
3600 const string key = "inputmethod";
3601 for (size_t i = 0; i < key.size(); ++i) {
3602 const string &prefix = key.substr(0, i);
3603 EXPECT_FALSE(IsSuggested(predictor, prefix, "InputMethod"));
3604 EXPECT_FALSE(IsPredicted(predictor, prefix, "InputMethod"));
3605 }
3606 }
3607
3608 // However, the following are available.
3609 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "Japanese"));
3610 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "japan", "JapaneseInput"));
3611 EXPECT_TRUE(IsSuggestedAndPredicted(predictor,
3612 "japan", "JapaneseInputMethod"));
3613 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "inpu", "Input"));
3614 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "meth", "Method"));
3615 }
3616
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Scenario1)3617 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Scenario1) {
3618 // Tests a common scenario: First, a user accidentally inputs an incomplete
3619 // romaji sequence and the predictor learns it. Then, the user deletes it.
3620 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3621
3622 // Set up history. Convert "ぐーぐr" to "グーグr" 3 times. This emulates a
3623 // case that a user accidentally input incomplete sequence.
3624 for (int i = 0; i < 3; ++i) {
3625 Segments segments;
3626 MakeSegmentsForConversion("ぐーぐr", &segments);
3627 AddCandidate("グーグr", &segments);
3628 predictor->Finish(*convreq_, &segments);
3629 }
3630
3631 // Test if the predictor learned "グーグr".
3632 EXPECT_TRUE(IsSuggested(predictor, "ぐーぐ", "グーグr"));
3633 EXPECT_TRUE(IsPredicted(predictor, "ぐーぐ", "グーグr"));
3634
3635 // The user tris deleting the history ("ぐーぐr", "グーグr").
3636 EXPECT_TRUE(predictor->ClearHistoryEntry("ぐーぐr", "グーグr"));
3637
3638 // The predictor shouldn't show "グーグr" both for suggestion and prediction.
3639 EXPECT_FALSE(IsSuggested(predictor, "ぐーぐ", "グーグr"));
3640 EXPECT_FALSE(IsPredicted(predictor, "ぐーぐ", "グーグr"));
3641 }
3642
TEST_F(UserHistoryPredictorTest,ClearHistoryEntry_Scenario2)3643 TEST_F(UserHistoryPredictorTest, ClearHistoryEntry_Scenario2) {
3644 // Tests a common scenario: First, a user inputs a sentence ending with a
3645 // symbol and it's learned by the predictor. Then, the user deletes the
3646 // history containing the symbol.
3647 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3648
3649 // Set up history. Convert "きょうもいいてんき!" to "今日もいい天気!" 3 times
3650 // so that the predictor learns the sentence. We assume that this sentence
3651 // consists of three segments: "今日も|いい天気|!".
3652 for (int i = 0; i < 3; ++i) {
3653 Segments segments;
3654 segments.set_request_type(Segments::CONVERSION);
3655
3656 // The first segment: ("きょうも", "今日も")
3657 Segment *seg = segments.add_segment();
3658 seg->set_key("きょうも");
3659 seg->set_segment_type(Segment::FIXED_VALUE);
3660 Segment::Candidate *candidate = seg->add_candidate();
3661 candidate->Init();
3662 candidate->value = "今日も";
3663 candidate->content_value = "今日";
3664 candidate->key = seg->key();
3665 candidate->content_key = "きょう";
3666
3667 // The second segment: ("いいてんき", "いい天気")
3668 seg = segments.add_segment();
3669 seg->set_key("いいてんき");
3670 seg->set_segment_type(Segment::FIXED_VALUE);
3671 candidate = seg->add_candidate();
3672 candidate->Init();
3673 candidate->value = "いい天気";
3674 candidate->content_value = candidate->value;
3675 candidate->key = seg->key();
3676 candidate->content_key = seg->key();
3677
3678 // The third segment: ("!", "!")
3679 seg = segments.add_segment();
3680 seg->set_key("!");
3681 seg->set_segment_type(Segment::FIXED_VALUE);
3682 candidate = seg->add_candidate();
3683 candidate->Init();
3684 candidate->value = "!";
3685 candidate->content_value = "!";
3686 candidate->key = seg->key();
3687 candidate->content_key = seg->key();
3688
3689 predictor->Finish(*convreq_, &segments);
3690 }
3691
3692 // Check if the predictor learned the sentence. Since the symbol is contained
3693 // in one segment, both "今日もいい天気" and "今日もいい天気!" should be
3694 // suggested and predicted.
3695 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "きょうも", "今日もいい天気"));
3696 EXPECT_TRUE(
3697 IsSuggestedAndPredicted(predictor, "きょうも", "今日もいい天気!"));
3698
3699 // Now the user deletes the sentence containing the "!".
3700 EXPECT_TRUE(
3701 predictor->ClearHistoryEntry("きょうもいいてんき!", "今日もいい天気!"));
3702
3703 // The sentence "今日もいい天気" should still be suggested and predicted.
3704 EXPECT_TRUE(IsSuggestedAndPredicted(predictor, "きょうも", "今日もいい天気"));
3705
3706 // However, "今日もいい天気!" should be neither suggested nor predicted.
3707 EXPECT_FALSE(IsSuggested(predictor, "きょうも", "今日もいい天気!"));
3708 EXPECT_FALSE(IsPredicted(predictor, "きょうも", "今日もいい天気!"));
3709 }
3710
TEST_F(UserHistoryPredictorTest,ContentWordLearningFromInnerSegmentBoundary)3711 TEST_F(UserHistoryPredictorTest, ContentWordLearningFromInnerSegmentBoundary) {
3712 UserHistoryPredictor *predictor = GetUserHistoryPredictorWithClearedHistory();
3713 predictor->set_content_word_learning_enabled(true);
3714
3715 Segments segments;
3716 {
3717 const char kKey[] = "とうきょうかなごやにいきたい";
3718 const char kValue[] = "東京か名古屋に行きたい";
3719 MakeSegmentsForPrediction(kKey, &segments);
3720 Segment::Candidate *candidate =
3721 segments.mutable_segment(0)->add_candidate();
3722 candidate->Init();
3723 candidate->key = kKey;
3724 candidate->value = kValue;
3725 candidate->content_key = kKey;
3726 candidate->content_value = kValue;
3727 candidate->PushBackInnerSegmentBoundary(18, 9, 15, 6);
3728 candidate->PushBackInnerSegmentBoundary(12, 12, 9, 9);
3729 candidate->PushBackInnerSegmentBoundary(12, 12, 12, 12);
3730 predictor->Finish(*convreq_, &segments);
3731 }
3732
3733 segments.Clear();
3734 MakeSegmentsForPrediction("と", &segments);
3735 EXPECT_TRUE(predictor->Predict(&segments));
3736 EXPECT_TRUE(FindCandidateByValue("東京", segments));
3737 EXPECT_TRUE(FindCandidateByValue("東京か", segments));
3738
3739 segments.Clear();
3740 MakeSegmentsForPrediction("な", &segments);
3741 EXPECT_TRUE(predictor->Predict(&segments));
3742 EXPECT_TRUE(FindCandidateByValue("名古屋", segments));
3743 EXPECT_TRUE(FindCandidateByValue("名古屋に", segments));
3744
3745 segments.Clear();
3746 MakeSegmentsForPrediction("い", &segments);
3747 EXPECT_TRUE(predictor->Predict(&segments));
3748 EXPECT_TRUE(FindCandidateByValue("行きたい", segments));
3749 }
3750
TEST_F(UserHistoryPredictorTest,JoinedSegmentsTest_Mobile)3751 TEST_F(UserHistoryPredictorTest, JoinedSegmentsTest_Mobile) {
3752 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3753 predictor->WaitForSyncer();
3754 predictor->ClearAllHistory();
3755 predictor->WaitForSyncer();
3756 commands::RequestForUnitTest::FillMobileRequest(request_.get());
3757 Segments segments;
3758
3759 MakeSegmentsForConversion("わたしの", &segments);
3760 AddCandidate(0, "私の", &segments);
3761
3762 AddSegmentForConversion("なまえは", &segments);
3763 AddCandidate(1, "名前は", &segments);
3764
3765 predictor->Finish(*convreq_, &segments);
3766 segments.Clear();
3767
3768 MakeSegmentsForSuggestion("わたし", &segments);
3769 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3770 EXPECT_EQ(1, segments.segment(0).candidates_size());
3771 EXPECT_EQ("私の", segments.segment(0).candidate(0).value);
3772 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3773 Segment::Candidate::USER_HISTORY_PREDICTOR);
3774 segments.Clear();
3775
3776 MakeSegmentsForPrediction("わたしの", &segments);
3777 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3778 EXPECT_EQ(1, segments.segment(0).candidates_size());
3779 EXPECT_EQ("私の", segments.segment(0).candidate(0).value);
3780 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3781 Segment::Candidate::USER_HISTORY_PREDICTOR);
3782 segments.Clear();
3783
3784 MakeSegmentsForPrediction("わたしのな", &segments);
3785 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3786 EXPECT_EQ(1, segments.segment(0).candidates_size());
3787 EXPECT_EQ("私の名前は", segments.segment(0).candidate(0).value);
3788 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3789 Segment::Candidate::USER_HISTORY_PREDICTOR);
3790 segments.Clear();
3791 }
3792
TEST_F(UserHistoryPredictorTest,JoinedSegmentsTest_Desktop)3793 TEST_F(UserHistoryPredictorTest, JoinedSegmentsTest_Desktop) {
3794 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3795 predictor->WaitForSyncer();
3796 predictor->ClearAllHistory();
3797 predictor->WaitForSyncer();
3798
3799 Segments segments;
3800
3801 MakeSegmentsForConversion("わたしの", &segments);
3802 AddCandidate(0, "私の", &segments);
3803
3804 AddSegmentForConversion("なまえは", &segments);
3805 AddCandidate(1, "名前は", &segments);
3806
3807 predictor->Finish(*convreq_, &segments);
3808
3809 segments.Clear();
3810
3811 MakeSegmentsForSuggestion("わたし", &segments);
3812 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3813 EXPECT_EQ(2, segments.segment(0).candidates_size());
3814 EXPECT_EQ("私の", segments.segment(0).candidate(0).value);
3815 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3816 Segment::Candidate::USER_HISTORY_PREDICTOR);
3817 EXPECT_EQ("私の名前は", segments.segment(0).candidate(1).value);
3818 EXPECT_TRUE(segments.segment(0).candidate(1).source_info &
3819 Segment::Candidate::USER_HISTORY_PREDICTOR);
3820 segments.Clear();
3821
3822 MakeSegmentsForPrediction("わたしの", &segments);
3823 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3824 EXPECT_EQ(1, segments.segment(0).candidates_size());
3825 EXPECT_EQ("私の名前は", segments.segment(0).candidate(0).value);
3826 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3827 Segment::Candidate::USER_HISTORY_PREDICTOR);
3828 segments.Clear();
3829
3830 MakeSegmentsForPrediction("わたしのな", &segments);
3831 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3832 EXPECT_EQ(1, segments.segment(0).candidates_size());
3833 EXPECT_EQ("私の名前は", segments.segment(0).candidate(0).value);
3834 EXPECT_TRUE(segments.segment(0).candidate(0).source_info &
3835 Segment::Candidate::USER_HISTORY_PREDICTOR);
3836 segments.Clear();
3837 }
3838
TEST_F(UserHistoryPredictorTest,UsageStats)3839 TEST_F(UserHistoryPredictorTest, UsageStats) {
3840 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3841 predictor->WaitForSyncer();
3842 predictor->ClearAllHistory();
3843 predictor->WaitForSyncer();
3844
3845 Segments segments;
3846 EXPECT_COUNT_STATS("CommitUserHistoryPredictor", 0);
3847 EXPECT_COUNT_STATS("CommitUserHistoryPredictorZeroQuery", 0);
3848
3849 MakeSegmentsForConversion("なまえは", &segments);
3850 AddCandidate(0, "名前は", &segments);
3851 segments.mutable_conversion_segment(0)->mutable_candidate(0)->source_info |=
3852 Segment::Candidate::USER_HISTORY_PREDICTOR;
3853 predictor->Finish(*convreq_, &segments);
3854
3855 EXPECT_COUNT_STATS("CommitUserHistoryPredictor", 1);
3856 EXPECT_COUNT_STATS("CommitUserHistoryPredictorZeroQuery", 0);
3857
3858 segments.Clear();
3859
3860 // Zero query
3861 MakeSegmentsForConversion("", &segments);
3862 AddCandidate(0, "名前は", &segments);
3863 segments.mutable_conversion_segment(0)->mutable_candidate(0)->source_info |=
3864 Segment::Candidate::USER_HISTORY_PREDICTOR;
3865 predictor->Finish(*convreq_, &segments);
3866
3867 // UserHistoryPredictor && ZeroQuery
3868 EXPECT_COUNT_STATS("CommitUserHistoryPredictor", 2);
3869 EXPECT_COUNT_STATS("CommitUserHistoryPredictorZeroQuery", 1);
3870 }
3871
TEST_F(UserHistoryPredictorTest,PunctuationLink_Mobile)3872 TEST_F(UserHistoryPredictorTest, PunctuationLink_Mobile) {
3873 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3874 predictor->WaitForSyncer();
3875 predictor->ClearAllHistory();
3876 predictor->WaitForSyncer();
3877 commands::RequestForUnitTest::FillMobileRequest(request_.get());
3878 Segments segments;
3879 {
3880 MakeSegmentsForConversion("ございます", &segments);
3881 AddCandidate(0, "ございます", &segments);
3882
3883 predictor->Finish(*convreq_, &segments);
3884 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3885
3886 AddSegmentForConversion("!", &segments);
3887 AddCandidate(1, "!", &segments);
3888 predictor->Finish(*convreq_, &segments);
3889
3890 segments.Clear();
3891 MakeSegmentsForSuggestion("ございま", &segments);
3892 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3893 EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
3894 EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
3895
3896 // Zero query from "ございます" -> "!"
3897 segments.Clear();
3898 MakeSegmentsForConversion("ございます", &segments);
3899 AddCandidate(0, "ございます", &segments);
3900 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3901 AddSegmentForSuggestion("", &segments); // empty request
3902 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3903 EXPECT_EQ("!", segments.conversion_segment(0).candidate(0).value);
3904 }
3905
3906 predictor->ClearAllHistory();
3907 predictor->WaitForSyncer();
3908
3909 {
3910 MakeSegmentsForConversion("!", &segments);
3911 AddCandidate(0, "!", &segments);
3912
3913 predictor->Finish(*convreq_, &segments);
3914 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3915
3916 AddSegmentForConversion("ございます", &segments);
3917 AddCandidate(1, "ございます", &segments);
3918 predictor->Finish(*convreq_, &segments);
3919
3920 // Zero query from "!" -> no suggestion
3921 segments.Clear();
3922 MakeSegmentsForConversion("!", &segments);
3923 AddCandidate(0, "!", &segments);
3924 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3925 AddSegmentForSuggestion("", &segments); // empty request
3926 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3927 }
3928
3929 predictor->ClearAllHistory();
3930 predictor->WaitForSyncer();
3931
3932 {
3933 MakeSegmentsForConversion("ございます!", &segments);
3934 AddCandidate(0, "ございます!", &segments);
3935
3936 predictor->Finish(*convreq_, &segments);
3937 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3938
3939 AddSegmentForConversion("よろしくおねがいします", &segments);
3940 AddCandidate(1, "よろしくお願いします", &segments);
3941 predictor->Finish(*convreq_, &segments);
3942
3943 // Zero query from "!" -> no suggestion
3944 segments.Clear();
3945 MakeSegmentsForConversion("!", &segments);
3946 AddCandidate(0, "!", &segments);
3947 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3948 AddSegmentForSuggestion("", &segments); // empty request
3949 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3950
3951 // Zero query from "ございます!" -> no suggestion
3952 segments.Clear();
3953 MakeSegmentsForConversion("ございます!", &segments);
3954 AddCandidate(0, "ございます!", &segments);
3955 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3956 AddSegmentForSuggestion("", &segments); // empty request
3957 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3958 }
3959
3960 predictor->ClearAllHistory();
3961 predictor->WaitForSyncer();
3962
3963 {
3964 MakeSegmentsForConversion("ございます", &segments);
3965 AddCandidate(0, "ございます", &segments);
3966
3967 predictor->Finish(*convreq_, &segments);
3968 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3969
3970 AddSegmentForConversion("!よろしくおねがいします", &segments);
3971 AddCandidate(1, "!よろしくお願いします", &segments);
3972 predictor->Finish(*convreq_, &segments);
3973
3974 segments.Clear();
3975 MakeSegmentsForSuggestion("ございま", &segments);
3976 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
3977 EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
3978 EXPECT_FALSE(
3979 FindCandidateByValue("ございます!よろしくお願いします", segments));
3980
3981 // Zero query from "ございます" -> no suggestion
3982 segments.Clear();
3983 MakeSegmentsForConversion("ございます", &segments);
3984 AddCandidate(0, "ございます", &segments);
3985 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
3986 AddSegmentForSuggestion("", &segments); // empty request
3987 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
3988 }
3989 }
3990
TEST_F(UserHistoryPredictorTest,PunctuationLink_Desktop)3991 TEST_F(UserHistoryPredictorTest, PunctuationLink_Desktop) {
3992 UserHistoryPredictor *predictor = GetUserHistoryPredictor();
3993 predictor->WaitForSyncer();
3994 predictor->ClearAllHistory();
3995 predictor->WaitForSyncer();
3996 Segments segments;
3997 {
3998 MakeSegmentsForConversion("ございます", &segments);
3999 AddCandidate(0, "ございます", &segments);
4000
4001 predictor->Finish(*convreq_, &segments);
4002 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4003
4004 AddSegmentForConversion("!", &segments);
4005 AddCandidate(1, "!", &segments);
4006 predictor->Finish(*convreq_, &segments);
4007
4008 segments.Clear();
4009 MakeSegmentsForSuggestion("ございま", &segments);
4010 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4011 EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
4012 EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
4013
4014 segments.Clear();
4015 MakeSegmentsForSuggestion("ございます", &segments);
4016 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4017 EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
4018 EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
4019 }
4020
4021 predictor->ClearAllHistory();
4022 predictor->WaitForSyncer();
4023
4024 {
4025 MakeSegmentsForConversion("!", &segments);
4026 AddCandidate(0, "!", &segments);
4027
4028 predictor->Finish(*convreq_, &segments);
4029 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4030
4031 AddSegmentForConversion("よろしくおねがいします", &segments);
4032 AddCandidate(1, "よろしくお願いします", &segments);
4033 predictor->Finish(*convreq_, &segments);
4034
4035 segments.Clear();
4036 MakeSegmentsForSuggestion("!", &segments);
4037 EXPECT_FALSE(predictor->PredictForRequest(*convreq_, &segments));
4038 }
4039
4040 predictor->ClearAllHistory();
4041 predictor->WaitForSyncer();
4042
4043 {
4044 MakeSegmentsForConversion("ございます!", &segments);
4045 AddCandidate(0, "ございます!", &segments);
4046
4047 predictor->Finish(*convreq_, &segments);
4048 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4049
4050 AddSegmentForConversion("よろしくおねがいします", &segments);
4051 AddCandidate(1, "よろしくお願いします", &segments);
4052 predictor->Finish(*convreq_, &segments);
4053
4054 segments.Clear();
4055 MakeSegmentsForSuggestion("ございます", &segments);
4056 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4057 EXPECT_EQ("ございます!",
4058 segments.conversion_segment(0).candidate(0).value);
4059 EXPECT_FALSE(
4060 FindCandidateByValue("ございます!よろしくお願いします", segments));
4061
4062 segments.Clear();
4063 MakeSegmentsForSuggestion("ございます!", &segments);
4064 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4065 EXPECT_EQ("ございます!",
4066 segments.conversion_segment(0).candidate(0).value);
4067 EXPECT_FALSE(
4068 FindCandidateByValue("ございます!よろしくお願いします", segments));
4069 }
4070
4071 predictor->ClearAllHistory();
4072 predictor->WaitForSyncer();
4073
4074 {
4075 MakeSegmentsForConversion("ございます", &segments);
4076 AddCandidate(0, "ございます", &segments);
4077
4078 predictor->Finish(*convreq_, &segments);
4079 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4080
4081 AddSegmentForConversion("!よろしくおねがいします", &segments);
4082 AddCandidate(1, "!よろしくお願いします", &segments);
4083 predictor->Finish(*convreq_, &segments);
4084
4085 segments.Clear();
4086 MakeSegmentsForSuggestion("ございます", &segments);
4087 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4088 EXPECT_EQ("ございます", segments.conversion_segment(0).candidate(0).value);
4089 EXPECT_FALSE(FindCandidateByValue("ございます!", segments));
4090 EXPECT_FALSE(
4091 FindCandidateByValue("ございます!よろしくお願いします", segments));
4092 }
4093
4094 predictor->ClearAllHistory();
4095 predictor->WaitForSyncer();
4096
4097 {
4098 // Note that "よろしくお願いします:よろしくおねがいします" is the sentence
4099 // like candidate. Please refer to user_history_predictor.cc
4100 MakeSegmentsForConversion("よろしくおねがいします", &segments);
4101 AddCandidate(0, "よろしくお願いします", &segments);
4102
4103 predictor->Finish(*convreq_, &segments);
4104 segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
4105
4106 AddSegmentForConversion("!", &segments);
4107 AddCandidate(1, "!", &segments);
4108 predictor->Finish(*convreq_, &segments);
4109
4110 segments.Clear();
4111 MakeSegmentsForSuggestion("よろしくおねがいします", &segments);
4112 EXPECT_TRUE(predictor->PredictForRequest(*convreq_, &segments));
4113 EXPECT_TRUE(FindCandidateByValue("よろしくお願いします!", segments));
4114 }
4115 }
4116
4117 } // namespace mozc
4118