1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "rewriter/language_aware_rewriter.h"
31 
32 #include <memory>
33 #include <string>
34 
35 #include "base/logging.h"
36 #include "base/util.h"
37 #include "composer/composer.h"
38 #include "composer/table.h"
39 #include "config/config_handler.h"
40 #include "converter/segments.h"
41 #include "data_manager/testing/mock_data_manager.h"
42 #include "dictionary/dictionary_mock.h"
43 #include "dictionary/pos_matcher.h"
44 #include "protocol/commands.pb.h"
45 #include "protocol/config.pb.h"
46 #include "request/conversion_request.h"
47 #include "testing/base/public/gunit.h"
48 #include "testing/base/public/mozctest.h"
49 #include "usage_stats/usage_stats.h"
50 #include "usage_stats/usage_stats_testing_util.h"
51 
52 namespace mozc {
53 namespace {
54 
55 using std::unique_ptr;
56 
57 using dictionary::DictionaryMock;
58 using dictionary::Token;
59 
InsertASCIISequence(const string & text,composer::Composer * composer)60 void InsertASCIISequence(const string &text, composer::Composer *composer) {
61   for (size_t i = 0; i < text.size(); ++i) {
62     commands::KeyEvent key;
63     key.set_key_code(text[i]);
64     composer->InsertCharacterKeyEvent(key);
65   }
66 }
67 
68 }  // namespace
69 
70 class LanguageAwareRewriterTest : public ::testing::Test {
71  protected:
72   // Workaround for C2512 error (no default appropriate constructor) on MSVS.
LanguageAwareRewriterTest()73   LanguageAwareRewriterTest() {}
~LanguageAwareRewriterTest()74   ~LanguageAwareRewriterTest() override {}
75 
SetUp()76   void SetUp() override {
77     usage_stats::UsageStats::ClearAllStatsForTest();
78     dictionary_mock_.reset(new DictionaryMock);
79   }
80 
TearDown()81   void TearDown() override {
82     dictionary_mock_.reset();
83     usage_stats::UsageStats::ClearAllStatsForTest();
84   }
85 
CreateLanguageAwareRewriter() const86   LanguageAwareRewriter *CreateLanguageAwareRewriter() const {
87     return new LanguageAwareRewriter(
88         dictionary::POSMatcher(data_manager_.GetPOSMatcherData()),
89         dictionary_mock_.get());
90   }
91 
RewriteWithLanguageAwareInput(const LanguageAwareRewriter * rewriter,const string & key,string * composition,Segments * segments)92   bool RewriteWithLanguageAwareInput(const LanguageAwareRewriter *rewriter,
93                                      const string &key, string *composition,
94                                      Segments *segments) {
95     commands::Request client_request;
96     client_request.set_language_aware_input(
97         commands::Request::LANGUAGE_AWARE_SUGGESTION);
98 
99     composer::Table table;
100     config::Config default_config;
101     table.InitializeWithRequestAndConfig(client_request, default_config,
102                                          data_manager_);
103 
104     composer::Composer composer(&table, &client_request, &default_config);
105     InsertASCIISequence(key, &composer);
106     composer.GetStringForPreedit(composition);
107 
108     // Perform the rewrite command.
109     segments->set_request_type(Segments::SUGGESTION);
110     if (segments->conversion_segments_size() == 0) {
111       segments->add_segment();
112     }
113     Segment *segment = segments->mutable_conversion_segment(0);
114     segment->set_key(*composition);
115     ConversionRequest request(&composer, &client_request, &default_config);
116 
117     return rewriter->Rewrite(request, segments);
118   }
119 
120   unique_ptr<DictionaryMock> dictionary_mock_;
121   usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
122 
123   const testing::MockDataManager data_manager_;
124 
125  private:
126   const testing::ScopedTmpUserProfileDirectory tmp_profile_dir_;
127 };
128 
129 namespace {
130 
PushFrontCandidate(const string & data,Segment * segment)131 void PushFrontCandidate(const string &data, Segment *segment) {
132   Segment::Candidate *candidate = segment->push_front_candidate();
133   candidate->Init();
134   candidate->value = data;
135   candidate->key = data;
136   candidate->content_value = data;
137   candidate->content_key = data;
138 }
139 }  // namespace
140 
TEST_F(LanguageAwareRewriterTest,LanguageAwareInput)141 TEST_F(LanguageAwareRewriterTest, LanguageAwareInput) {
142   dictionary_mock_->AddLookupExact("house", "house", "house", Token::NONE);
143   dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
144   dictionary_mock_->AddLookupExact("google", "google", "google", Token::NONE);
145   dictionary_mock_->AddLookupExact("naru", "naru", "naru", Token::NONE);
146   dictionary_mock_->AddLookupExact("なる", "なる", "naru", Token::NONE);
147 
148   unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
149 
150   const string &kPrefix = "→ ";
151   const string &kDidYouMean = "もしかして";
152 
153   {
154     // "python" is composed to "pyてょn", but "python" should be suggested,
155     // because alphabet characters are in the middle of the word.
156     string composition;
157     Segments segments;
158     EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "python",
159                                               &composition, &segments));
160 
161     EXPECT_EQ("pyてょn", composition);
162     const Segment::Candidate &candidate =
163         segments.conversion_segment(0).candidate(0);
164     EXPECT_EQ("python", candidate.key);
165     EXPECT_EQ("python", candidate.value);
166     EXPECT_EQ(kPrefix, candidate.prefix);
167     EXPECT_EQ(kDidYouMean, candidate.description);
168   }
169 
170   {
171     // "mozuk" is composed to "もずk", then "mozuk" is not suggested.
172     // The tailing alphabet characters are not counted.
173     string composition;
174     Segments segments;
175     EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "mozuk",
176                                                &composition, &segments));
177 
178     EXPECT_EQ("もずk", composition);
179     EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
180   }
181 
182   {
183     // "house" is composed to "ほうせ".  Since "house" is in the dictionary
184     // dislike the above "mozuk" case, "house" should be suggested.
185     string composition;
186     Segments segments;
187 
188     if (segments.conversion_segments_size() == 0) {
189       segments.add_segment();
190     }
191 
192     Segment *segment = segments.mutable_conversion_segment(0);
193     // Add three candidates.
194     // => ["cand0", "cand1", "cand2"]
195     PushFrontCandidate("cand2", segment);
196     PushFrontCandidate("cand1", segment);
197     PushFrontCandidate("cand0", segment);
198     EXPECT_EQ(3, segment->candidates_size());
199 
200     // "house" should be inserted as the 3rd candidate (b/w cand1 and cand2).
201     // => ["cand0", "cand1", "house", "cand2"]
202     EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "house",
203                                               &composition, &segments));
204     EXPECT_EQ(4, segment->candidates_size());
205 
206     EXPECT_EQ("ほうせ", composition);
207     const Segment::Candidate &candidate =
208         segments.conversion_segment(0).candidate(2);
209     EXPECT_EQ("house", candidate.key);
210     EXPECT_EQ("house", candidate.value);
211     EXPECT_EQ(kPrefix, candidate.prefix);
212     EXPECT_EQ(kDidYouMean, candidate.description);
213   }
214 
215   {
216     // "query" is composed to "くえry".  Since "query" is in the dictionary
217     // dislike the above "mozuk" case, "query" should be suggested.
218     string composition;
219     Segments segments;
220     EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "query",
221                                               &composition, &segments));
222 
223     EXPECT_EQ("くえry", composition);
224     const Segment::Candidate &candidate =
225         segments.conversion_segment(0).candidate(0);
226     EXPECT_EQ("query", candidate.key);
227     EXPECT_EQ("query", candidate.value);
228     EXPECT_EQ(kPrefix, candidate.prefix);
229     EXPECT_EQ(kDidYouMean, candidate.description);
230   }
231 
232   {
233     // "google" is composed to "google" by mode_switching_handler.
234     // If the suggestion is equal to the composition, that suggestion
235     // is not added.
236     string composition;
237     Segments segments;
238     EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "google",
239                                                &composition, &segments));
240     EXPECT_EQ("google", composition);
241   }
242 
243   {
244     // The key "なる" has two value "naru" and "なる".
245     // In this case, language aware rewriter should not be triggered.
246     string composition;
247     Segments segments;
248     EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "naru",
249                                                &composition, &segments));
250 
251     EXPECT_EQ("なる", composition);
252     EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
253   }
254 }
255 
TEST_F(LanguageAwareRewriterTest,LanguageAwareInputUsageStats)256 TEST_F(LanguageAwareRewriterTest, LanguageAwareInputUsageStats) {
257   unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
258 
259   EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionTriggered");
260   EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionCommitted");
261 
262   const string kPyTeyoN = "pyてょn";
263 
264   {
265     // "python" is composed to "pyてょn", but "python" should be suggested,
266     // because alphabet characters are in the middle of the word.
267     string composition;
268     Segments segments;
269     EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "python",
270                                               &composition, &segments));
271     EXPECT_EQ(kPyTeyoN, composition);
272 
273     const Segment::Candidate &candidate =
274         segments.conversion_segment(0).candidate(0);
275     EXPECT_EQ("python", candidate.key);
276     EXPECT_EQ("python", candidate.value);
277 
278     EXPECT_COUNT_STATS("LanguageAwareSuggestionTriggered", 1);
279     EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionCommitted");
280   }
281 
282   {
283     // Call Rewrite with "python" again, then call Finish.  Both ...Triggered
284     // and ...Committed should be incremented.
285     // Note, RewriteWithLanguageAwareInput is not used here, because
286     // Finish also requires ConversionRequest.
287     string composition;
288     Segments segments;
289 
290     commands::Request client_request;
291     client_request.set_language_aware_input(
292         commands::Request::LANGUAGE_AWARE_SUGGESTION);
293 
294     composer::Table table;
295     config::Config default_config;
296     table.InitializeWithRequestAndConfig(client_request, default_config,
297                                          data_manager_);
298 
299     composer::Composer composer(&table, &client_request, &default_config);
300     InsertASCIISequence("python", &composer);
301     composer.GetStringForPreedit(&composition);
302     EXPECT_EQ(kPyTeyoN, composition);
303 
304     // Perform the rewrite command.
305     segments.set_request_type(Segments::SUGGESTION);
306     Segment *segment = segments.add_segment();
307     segment->set_key(composition);
308     ConversionRequest request(&composer, &client_request, &default_config);
309 
310     EXPECT_TRUE(rewriter->Rewrite(request, &segments));
311 
312     EXPECT_COUNT_STATS("LanguageAwareSuggestionTriggered", 2);
313 
314     segment->set_segment_type(Segment::FIXED_VALUE);
315     EXPECT_LT(0, segment->candidates_size());
316     rewriter->Finish(request, &segments);
317     EXPECT_COUNT_STATS("LanguageAwareSuggestionCommitted", 1);
318   }
319 }
320 
TEST_F(LanguageAwareRewriterTest,NotRewriteFullWidthAsciiToHalfWidthAscii)321 TEST_F(LanguageAwareRewriterTest, NotRewriteFullWidthAsciiToHalfWidthAscii) {
322   unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
323 
324   {
325     // "1d*=" is composed to "1d*=", which are the full width ascii
326     // characters of "1d*=". We do not want to rewrite full width ascii to
327     // half width ascii by LanguageAwareRewriter.
328     string composition;
329     Segments segments;
330     EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "1d*=",
331                                                &composition, &segments));
332     EXPECT_EQ("1d*=", composition);
333   }
334 
335   {
336     // "xyzw" is composed to "xyzw". Do not rewrite.
337     string composition;
338     Segments segments;
339     EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "xyzw",
340                                                &composition, &segments));
341     EXPECT_EQ("xyzw", composition);
342   }
343 }
344 
TEST_F(LanguageAwareRewriterTest,IsDisabledInTwelveKeyLayout)345 TEST_F(LanguageAwareRewriterTest, IsDisabledInTwelveKeyLayout) {
346   dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
347 
348   struct {
349     commands::Request::SpecialRomanjiTable table;
350     config::Config::PreeditMethod preedit_method;
351     int type;
352   } const kParams[] = {
353       // Enabled combinations.
354       {commands::Request::DEFAULT_TABLE, config::Config::ROMAN,
355        RewriterInterface::SUGGESTION | RewriterInterface::PREDICTION},
356       {commands::Request::QWERTY_MOBILE_TO_HIRAGANA, config::Config::ROMAN,
357        RewriterInterface::SUGGESTION | RewriterInterface::PREDICTION},
358       // Disabled combinations.
359       {commands::Request::DEFAULT_TABLE, config::Config::KANA,
360        RewriterInterface::NOT_AVAILABLE},
361       {commands::Request::TWELVE_KEYS_TO_HIRAGANA, config::Config::ROMAN,
362        RewriterInterface::NOT_AVAILABLE},
363       {commands::Request::TOGGLE_FLICK_TO_HIRAGANA, config::Config::ROMAN,
364        RewriterInterface::NOT_AVAILABLE},
365       {commands::Request::GODAN_TO_HIRAGANA, config::Config::ROMAN,
366        RewriterInterface::NOT_AVAILABLE},
367   };
368 
369   unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
370   for (const auto &param : kParams) {
371     commands::Request request;
372     request.set_language_aware_input(
373         commands::Request::LANGUAGE_AWARE_SUGGESTION);
374     request.set_special_romanji_table(param.table);
375 
376     config::Config config;
377     config.set_preedit_method(param.preedit_method);
378 
379     composer::Table table;
380     table.InitializeWithRequestAndConfig(request, config, data_manager_);
381 
382     composer::Composer composer(&table, &request, &config);
383     InsertASCIISequence("query", &composer);
384 
385     ConversionRequest conv_request(&composer, &request, &config);
386     EXPECT_EQ(param.type, rewriter->capability(conv_request));
387   }
388 }
389 
390 }  // namespace mozc
391