1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "rewriter/language_aware_rewriter.h"
31
32 #include <memory>
33 #include <string>
34
35 #include "base/logging.h"
36 #include "base/util.h"
37 #include "composer/composer.h"
38 #include "composer/table.h"
39 #include "config/config_handler.h"
40 #include "converter/segments.h"
41 #include "data_manager/testing/mock_data_manager.h"
42 #include "dictionary/dictionary_mock.h"
43 #include "dictionary/pos_matcher.h"
44 #include "protocol/commands.pb.h"
45 #include "protocol/config.pb.h"
46 #include "request/conversion_request.h"
47 #include "testing/base/public/gunit.h"
48 #include "testing/base/public/mozctest.h"
49 #include "usage_stats/usage_stats.h"
50 #include "usage_stats/usage_stats_testing_util.h"
51
52 namespace mozc {
53 namespace {
54
55 using std::unique_ptr;
56
57 using dictionary::DictionaryMock;
58 using dictionary::Token;
59
InsertASCIISequence(const string & text,composer::Composer * composer)60 void InsertASCIISequence(const string &text, composer::Composer *composer) {
61 for (size_t i = 0; i < text.size(); ++i) {
62 commands::KeyEvent key;
63 key.set_key_code(text[i]);
64 composer->InsertCharacterKeyEvent(key);
65 }
66 }
67
68 } // namespace
69
70 class LanguageAwareRewriterTest : public ::testing::Test {
71 protected:
72 // Workaround for C2512 error (no default appropriate constructor) on MSVS.
LanguageAwareRewriterTest()73 LanguageAwareRewriterTest() {}
~LanguageAwareRewriterTest()74 ~LanguageAwareRewriterTest() override {}
75
SetUp()76 void SetUp() override {
77 usage_stats::UsageStats::ClearAllStatsForTest();
78 dictionary_mock_.reset(new DictionaryMock);
79 }
80
TearDown()81 void TearDown() override {
82 dictionary_mock_.reset();
83 usage_stats::UsageStats::ClearAllStatsForTest();
84 }
85
CreateLanguageAwareRewriter() const86 LanguageAwareRewriter *CreateLanguageAwareRewriter() const {
87 return new LanguageAwareRewriter(
88 dictionary::POSMatcher(data_manager_.GetPOSMatcherData()),
89 dictionary_mock_.get());
90 }
91
RewriteWithLanguageAwareInput(const LanguageAwareRewriter * rewriter,const string & key,string * composition,Segments * segments)92 bool RewriteWithLanguageAwareInput(const LanguageAwareRewriter *rewriter,
93 const string &key, string *composition,
94 Segments *segments) {
95 commands::Request client_request;
96 client_request.set_language_aware_input(
97 commands::Request::LANGUAGE_AWARE_SUGGESTION);
98
99 composer::Table table;
100 config::Config default_config;
101 table.InitializeWithRequestAndConfig(client_request, default_config,
102 data_manager_);
103
104 composer::Composer composer(&table, &client_request, &default_config);
105 InsertASCIISequence(key, &composer);
106 composer.GetStringForPreedit(composition);
107
108 // Perform the rewrite command.
109 segments->set_request_type(Segments::SUGGESTION);
110 if (segments->conversion_segments_size() == 0) {
111 segments->add_segment();
112 }
113 Segment *segment = segments->mutable_conversion_segment(0);
114 segment->set_key(*composition);
115 ConversionRequest request(&composer, &client_request, &default_config);
116
117 return rewriter->Rewrite(request, segments);
118 }
119
120 unique_ptr<DictionaryMock> dictionary_mock_;
121 usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
122
123 const testing::MockDataManager data_manager_;
124
125 private:
126 const testing::ScopedTmpUserProfileDirectory tmp_profile_dir_;
127 };
128
129 namespace {
130
PushFrontCandidate(const string & data,Segment * segment)131 void PushFrontCandidate(const string &data, Segment *segment) {
132 Segment::Candidate *candidate = segment->push_front_candidate();
133 candidate->Init();
134 candidate->value = data;
135 candidate->key = data;
136 candidate->content_value = data;
137 candidate->content_key = data;
138 }
139 } // namespace
140
TEST_F(LanguageAwareRewriterTest,LanguageAwareInput)141 TEST_F(LanguageAwareRewriterTest, LanguageAwareInput) {
142 dictionary_mock_->AddLookupExact("house", "house", "house", Token::NONE);
143 dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
144 dictionary_mock_->AddLookupExact("google", "google", "google", Token::NONE);
145 dictionary_mock_->AddLookupExact("naru", "naru", "naru", Token::NONE);
146 dictionary_mock_->AddLookupExact("なる", "なる", "naru", Token::NONE);
147
148 unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
149
150 const string &kPrefix = "→ ";
151 const string &kDidYouMean = "もしかして";
152
153 {
154 // "python" is composed to "pyてょn", but "python" should be suggested,
155 // because alphabet characters are in the middle of the word.
156 string composition;
157 Segments segments;
158 EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "python",
159 &composition, &segments));
160
161 EXPECT_EQ("pyてょn", composition);
162 const Segment::Candidate &candidate =
163 segments.conversion_segment(0).candidate(0);
164 EXPECT_EQ("python", candidate.key);
165 EXPECT_EQ("python", candidate.value);
166 EXPECT_EQ(kPrefix, candidate.prefix);
167 EXPECT_EQ(kDidYouMean, candidate.description);
168 }
169
170 {
171 // "mozuk" is composed to "もずk", then "mozuk" is not suggested.
172 // The tailing alphabet characters are not counted.
173 string composition;
174 Segments segments;
175 EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "mozuk",
176 &composition, &segments));
177
178 EXPECT_EQ("もずk", composition);
179 EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
180 }
181
182 {
183 // "house" is composed to "ほうせ". Since "house" is in the dictionary
184 // dislike the above "mozuk" case, "house" should be suggested.
185 string composition;
186 Segments segments;
187
188 if (segments.conversion_segments_size() == 0) {
189 segments.add_segment();
190 }
191
192 Segment *segment = segments.mutable_conversion_segment(0);
193 // Add three candidates.
194 // => ["cand0", "cand1", "cand2"]
195 PushFrontCandidate("cand2", segment);
196 PushFrontCandidate("cand1", segment);
197 PushFrontCandidate("cand0", segment);
198 EXPECT_EQ(3, segment->candidates_size());
199
200 // "house" should be inserted as the 3rd candidate (b/w cand1 and cand2).
201 // => ["cand0", "cand1", "house", "cand2"]
202 EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "house",
203 &composition, &segments));
204 EXPECT_EQ(4, segment->candidates_size());
205
206 EXPECT_EQ("ほうせ", composition);
207 const Segment::Candidate &candidate =
208 segments.conversion_segment(0).candidate(2);
209 EXPECT_EQ("house", candidate.key);
210 EXPECT_EQ("house", candidate.value);
211 EXPECT_EQ(kPrefix, candidate.prefix);
212 EXPECT_EQ(kDidYouMean, candidate.description);
213 }
214
215 {
216 // "query" is composed to "くえry". Since "query" is in the dictionary
217 // dislike the above "mozuk" case, "query" should be suggested.
218 string composition;
219 Segments segments;
220 EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "query",
221 &composition, &segments));
222
223 EXPECT_EQ("くえry", composition);
224 const Segment::Candidate &candidate =
225 segments.conversion_segment(0).candidate(0);
226 EXPECT_EQ("query", candidate.key);
227 EXPECT_EQ("query", candidate.value);
228 EXPECT_EQ(kPrefix, candidate.prefix);
229 EXPECT_EQ(kDidYouMean, candidate.description);
230 }
231
232 {
233 // "google" is composed to "google" by mode_switching_handler.
234 // If the suggestion is equal to the composition, that suggestion
235 // is not added.
236 string composition;
237 Segments segments;
238 EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "google",
239 &composition, &segments));
240 EXPECT_EQ("google", composition);
241 }
242
243 {
244 // The key "なる" has two value "naru" and "なる".
245 // In this case, language aware rewriter should not be triggered.
246 string composition;
247 Segments segments;
248 EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "naru",
249 &composition, &segments));
250
251 EXPECT_EQ("なる", composition);
252 EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
253 }
254 }
255
TEST_F(LanguageAwareRewriterTest,LanguageAwareInputUsageStats)256 TEST_F(LanguageAwareRewriterTest, LanguageAwareInputUsageStats) {
257 unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
258
259 EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionTriggered");
260 EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionCommitted");
261
262 const string kPyTeyoN = "pyてょn";
263
264 {
265 // "python" is composed to "pyてょn", but "python" should be suggested,
266 // because alphabet characters are in the middle of the word.
267 string composition;
268 Segments segments;
269 EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "python",
270 &composition, &segments));
271 EXPECT_EQ(kPyTeyoN, composition);
272
273 const Segment::Candidate &candidate =
274 segments.conversion_segment(0).candidate(0);
275 EXPECT_EQ("python", candidate.key);
276 EXPECT_EQ("python", candidate.value);
277
278 EXPECT_COUNT_STATS("LanguageAwareSuggestionTriggered", 1);
279 EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionCommitted");
280 }
281
282 {
283 // Call Rewrite with "python" again, then call Finish. Both ...Triggered
284 // and ...Committed should be incremented.
285 // Note, RewriteWithLanguageAwareInput is not used here, because
286 // Finish also requires ConversionRequest.
287 string composition;
288 Segments segments;
289
290 commands::Request client_request;
291 client_request.set_language_aware_input(
292 commands::Request::LANGUAGE_AWARE_SUGGESTION);
293
294 composer::Table table;
295 config::Config default_config;
296 table.InitializeWithRequestAndConfig(client_request, default_config,
297 data_manager_);
298
299 composer::Composer composer(&table, &client_request, &default_config);
300 InsertASCIISequence("python", &composer);
301 composer.GetStringForPreedit(&composition);
302 EXPECT_EQ(kPyTeyoN, composition);
303
304 // Perform the rewrite command.
305 segments.set_request_type(Segments::SUGGESTION);
306 Segment *segment = segments.add_segment();
307 segment->set_key(composition);
308 ConversionRequest request(&composer, &client_request, &default_config);
309
310 EXPECT_TRUE(rewriter->Rewrite(request, &segments));
311
312 EXPECT_COUNT_STATS("LanguageAwareSuggestionTriggered", 2);
313
314 segment->set_segment_type(Segment::FIXED_VALUE);
315 EXPECT_LT(0, segment->candidates_size());
316 rewriter->Finish(request, &segments);
317 EXPECT_COUNT_STATS("LanguageAwareSuggestionCommitted", 1);
318 }
319 }
320
TEST_F(LanguageAwareRewriterTest,NotRewriteFullWidthAsciiToHalfWidthAscii)321 TEST_F(LanguageAwareRewriterTest, NotRewriteFullWidthAsciiToHalfWidthAscii) {
322 unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
323
324 {
325 // "1d*=" is composed to "1d*=", which are the full width ascii
326 // characters of "1d*=". We do not want to rewrite full width ascii to
327 // half width ascii by LanguageAwareRewriter.
328 string composition;
329 Segments segments;
330 EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "1d*=",
331 &composition, &segments));
332 EXPECT_EQ("1d*=", composition);
333 }
334
335 {
336 // "xyzw" is composed to "xyzw". Do not rewrite.
337 string composition;
338 Segments segments;
339 EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "xyzw",
340 &composition, &segments));
341 EXPECT_EQ("xyzw", composition);
342 }
343 }
344
TEST_F(LanguageAwareRewriterTest,IsDisabledInTwelveKeyLayout)345 TEST_F(LanguageAwareRewriterTest, IsDisabledInTwelveKeyLayout) {
346 dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
347
348 struct {
349 commands::Request::SpecialRomanjiTable table;
350 config::Config::PreeditMethod preedit_method;
351 int type;
352 } const kParams[] = {
353 // Enabled combinations.
354 {commands::Request::DEFAULT_TABLE, config::Config::ROMAN,
355 RewriterInterface::SUGGESTION | RewriterInterface::PREDICTION},
356 {commands::Request::QWERTY_MOBILE_TO_HIRAGANA, config::Config::ROMAN,
357 RewriterInterface::SUGGESTION | RewriterInterface::PREDICTION},
358 // Disabled combinations.
359 {commands::Request::DEFAULT_TABLE, config::Config::KANA,
360 RewriterInterface::NOT_AVAILABLE},
361 {commands::Request::TWELVE_KEYS_TO_HIRAGANA, config::Config::ROMAN,
362 RewriterInterface::NOT_AVAILABLE},
363 {commands::Request::TOGGLE_FLICK_TO_HIRAGANA, config::Config::ROMAN,
364 RewriterInterface::NOT_AVAILABLE},
365 {commands::Request::GODAN_TO_HIRAGANA, config::Config::ROMAN,
366 RewriterInterface::NOT_AVAILABLE},
367 };
368
369 unique_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
370 for (const auto ¶m : kParams) {
371 commands::Request request;
372 request.set_language_aware_input(
373 commands::Request::LANGUAGE_AWARE_SUGGESTION);
374 request.set_special_romanji_table(param.table);
375
376 config::Config config;
377 config.set_preedit_method(param.preedit_method);
378
379 composer::Table table;
380 table.InitializeWithRequestAndConfig(request, config, data_manager_);
381
382 composer::Composer composer(&table, &request, &config);
383 InsertASCIISequence("query", &composer);
384
385 ConversionRequest conv_request(&composer, &request, &config);
386 EXPECT_EQ(param.type, rewriter->capability(conv_request));
387 }
388 }
389
390 } // namespace mozc
391