1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "rewriter/number_rewriter.h"
31
32 #include <cstddef>
33 #include <memory>
34 #include <string>
35
36 #include "base/logging.h"
37 #include "base/port.h"
38 #include "base/util.h"
39 #include "config/config_handler.h"
40 #include "converter/segments.h"
41 #include "request/conversion_request.h"
42 #include "data_manager/testing/mock_data_manager.h"
43 #include "dictionary/pos_matcher.h"
44 #include "protocol/commands.pb.h"
45 #include "testing/base/public/gunit.h"
46 #include "testing/base/public/mozctest.h"
47
48 // To show the value of size_t, 'z' speficier should be used.
49 // But MSVC doesn't support it yet so use 'l' instead.
50 #ifdef _MSC_VER
51 #define SIZE_T_PRINTF_FORMAT "%lu"
52 #else // _MSC_VER
53 #define SIZE_T_PRINTF_FORMAT "%zu"
54 #endif // _MSC_VER
55
56 namespace mozc {
57 namespace {
58
59 using dictionary::POSMatcher;
60
61 const char kKanjiDescription[] = "漢数字";
62 const char kArabicDescription[] = "数字";
63 const char kOldKanjiDescription[] = "大字";
64 const char kMaruNumberDescription[] = "丸数字";
65 const char kRomanCapitalDescription[] = "ローマ数字(大文字)";
66 const char kRomanNoCapitalDescription[] = "ローマ数字(小文字)";
67
FindValue(const Segment & segment,const string & value)68 bool FindValue(const Segment &segment, const string &value) {
69 for (size_t i = 0; i < segment.candidates_size(); ++i) {
70 if (segment.candidate(i).value == value) {
71 return true;
72 }
73 }
74 return false;
75 }
76
SetupSegments(const POSMatcher & pos_matcher,const string & candidate_value,Segments * segments)77 Segment *SetupSegments(const POSMatcher& pos_matcher,
78 const string &candidate_value, Segments *segments) {
79 segments->Clear();
80 Segment *segment = segments->push_back_segment();
81 Segment::Candidate *candidate = segment->add_candidate();
82 candidate->Init();
83 candidate->lid = pos_matcher.GetNumberId();
84 candidate->rid = pos_matcher.GetNumberId();
85 candidate->value = candidate_value;
86 candidate->content_value = candidate_value;
87 return segment;
88 }
89
HasDescription(const Segment & segment,const string & description)90 bool HasDescription(const Segment &segment, const string &description) {
91 for (size_t i = 0; i < segment.candidates_size(); ++i) {
92 if (segment.candidate(i).description == description) {
93 return true;
94 }
95 }
96 return false;
97 }
98
99 // Find candiadte id
FindCandidateId(const Segment & segment,const string & value,int * id)100 bool FindCandidateId(const Segment &segment, const string &value, int *id) {
101 for (size_t i = 0; i < segment.candidates_size(); ++i) {
102 if (segment.candidate(i).value == value) {
103 *id = i;
104 return true;
105 }
106 }
107 return false;
108 }
109 } // namespace
110
111 class NumberRewriterTest : public ::testing::Test {
112 protected:
113 // Explicitly define constructor to prevent Visual C++ from
114 // considering this class as POD.
NumberRewriterTest()115 NumberRewriterTest() {}
116
SetUp()117 void SetUp() override {
118 pos_matcher_.Set(mock_data_manager_.GetPOSMatcherData());
119 }
120
CreateNumberRewriter()121 NumberRewriter *CreateNumberRewriter() {
122 return new NumberRewriter(&mock_data_manager_);
123 }
124
125 const testing::MockDataManager mock_data_manager_;
126 POSMatcher pos_matcher_;
127 const ConversionRequest default_request_;
128
129 private:
130 testing::ScopedTmpUserProfileDirectory tmp_profile_dir_;
131 };
132
133 namespace {
134 struct ExpectResult {
135 const char *value;
136 const char *content_value;
137 const char *description;
138 };
139 } // namespace
140
TEST_F(NumberRewriterTest,BasicTest)141 TEST_F(NumberRewriterTest, BasicTest) {
142 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
143
144 Segments segments;
145 Segment *seg = segments.push_back_segment();
146 Segment::Candidate *candidate = seg->add_candidate();
147 candidate->Init();
148 candidate->lid = pos_matcher_.GetNumberId();
149 candidate->rid = pos_matcher_.GetNumberId();
150 candidate->value = "012";
151 candidate->content_value = "012";
152
153 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
154
155 const ExpectResult kExpectResults[] = {
156 {"012", "012", ""},
157 {"〇一二", "〇一二", kKanjiDescription},
158 {"012", "012", kArabicDescription},
159 {"十二", "十二", kKanjiDescription},
160 {"壱拾弐", "壱拾弐", kOldKanjiDescription},
161 {"Ⅻ", "Ⅻ", kRomanCapitalDescription},
162 {"ⅻ", "ⅻ", kRomanNoCapitalDescription},
163 {"⑫", "⑫", kMaruNumberDescription},
164 {"0xc", "0xc", "16進数"},
165 {"014", "014", "8進数"},
166 {"0b1100", "0b1100", "2進数"},
167 };
168
169 const size_t kExpectResultSize = arraysize(kExpectResults);
170 EXPECT_EQ(kExpectResultSize, seg->candidates_size());
171
172 for (size_t i = 0; i < kExpectResultSize; ++i) {
173 SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
174 EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
175 EXPECT_EQ(kExpectResults[i].content_value,
176 seg->candidate(i).content_value);
177 EXPECT_EQ(kExpectResults[i].description,
178 seg->candidate(i).description);
179 }
180 seg->clear_candidates();
181 }
182
TEST_F(NumberRewriterTest,RequestType)183 TEST_F(NumberRewriterTest, RequestType) {
184 class TestData {
185 public:
186 Segments::RequestType request_type_;
187 int expected_candidate_number_;
188 TestData(Segments::RequestType request_type, int expected_number) :
189 request_type_(request_type),
190 expected_candidate_number_(expected_number) {
191 }
192 };
193 TestData test_data_list[] = {
194 TestData(Segments::CONVERSION, 11), // 11 comes from BasicTest
195 TestData(Segments::REVERSE_CONVERSION, 8),
196 TestData(Segments::PREDICTION, 8),
197 TestData(Segments::SUGGESTION, 8),
198 };
199
200 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
201
202 for (size_t i = 0; i < arraysize(test_data_list); ++i) {
203 TestData& test_data = test_data_list[i];
204 Segments segments;
205 segments.set_request_type(test_data.request_type_);
206 Segment *seg = segments.push_back_segment();
207 Segment::Candidate *candidate = seg->add_candidate();
208 candidate->Init();
209 candidate->lid = pos_matcher_.GetNumberId();
210 candidate->rid = pos_matcher_.GetNumberId();
211 candidate->value = "012";
212 candidate->content_value = "012";
213 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
214 EXPECT_EQ(test_data.expected_candidate_number_, seg->candidates_size());
215 }
216 }
217
TEST_F(NumberRewriterTest,BasicTestWithSuffix)218 TEST_F(NumberRewriterTest, BasicTestWithSuffix) {
219 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
220
221 Segments segments;
222 Segment *seg = segments.push_back_segment();
223 Segment::Candidate *candidate = seg->add_candidate();
224 candidate->Init();
225 candidate->lid = pos_matcher_.GetNumberId();
226 candidate->rid = pos_matcher_.GetNumberId();
227 candidate->value = "012が";
228 candidate->content_value = "012";
229
230 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
231
232 const ExpectResult kExpectResults[] = {
233 {"012が", "012", ""},
234 {"〇一二が", "〇一二", kKanjiDescription},
235 {"012が", "012", kArabicDescription},
236 {"十二が", "十二", kKanjiDescription},
237 {"壱拾弐が", "壱拾弐", kOldKanjiDescription},
238 {"Ⅻが", "Ⅻ", kRomanCapitalDescription},
239 {"ⅻが", "ⅻ", kRomanNoCapitalDescription},
240 {"⑫が", "⑫", kMaruNumberDescription},
241 {"0xcが", "0xc", "16進数"},
242 {"014が", "014", "8進数"},
243 {"0b1100が", "0b1100", "2進数"},
244 };
245
246 const size_t kExpectResultSize = arraysize(kExpectResults);
247 EXPECT_EQ(kExpectResultSize, seg->candidates_size());
248
249 for (size_t i = 0; i < kExpectResultSize; ++i) {
250 SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
251 EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
252 EXPECT_EQ(kExpectResults[i].content_value,
253 seg->candidate(i).content_value);
254 EXPECT_EQ(kExpectResults[i].description,
255 seg->candidate(i).description);
256 }
257
258 seg->clear_candidates();
259 }
260
TEST_F(NumberRewriterTest,BasicTestWithNumberSuffix)261 TEST_F(NumberRewriterTest, BasicTestWithNumberSuffix) {
262 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
263
264 Segments segments;
265 Segment *seg = segments.push_back_segment();
266 Segment::Candidate *candidate = seg->add_candidate();
267 candidate->Init();
268 candidate->lid = pos_matcher_.GetNumberId();
269 candidate->rid = pos_matcher_.GetCounterSuffixWordId();
270 candidate->value = "十五個";
271 candidate->content_value = "十五個";
272
273 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
274
275 EXPECT_EQ(2, seg->candidates_size());
276
277 EXPECT_EQ("十五個", seg->candidate(0).value);
278 EXPECT_EQ("十五個", seg->candidate(0).content_value);
279 EXPECT_EQ("", seg->candidate(0).description);
280
281 EXPECT_EQ("15個", seg->candidate(1).value);
282 EXPECT_EQ("15個", seg->candidate(1).content_value);
283 EXPECT_EQ("", seg->candidate(1).description);
284 seg->clear_candidates();
285 }
286
TEST_F(NumberRewriterTest,TestWithMultipleNumberSuffix)287 TEST_F(NumberRewriterTest, TestWithMultipleNumberSuffix) {
288 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
289
290 Segments segments;
291 Segment *seg = segments.push_back_segment();
292 Segment::Candidate *candidate = seg->add_candidate();
293 candidate->Init();
294 candidate->lid = pos_matcher_.GetNumberId();
295 candidate->rid = pos_matcher_.GetCounterSuffixWordId();
296 candidate->value = "十五回";
297 candidate->content_value = "十五回";
298 candidate = seg->add_candidate();
299 candidate->Init();
300 candidate->lid = pos_matcher_.GetNumberId();
301 candidate->rid = pos_matcher_.GetCounterSuffixWordId();
302 candidate->value = "十五階";
303 candidate->content_value = "十五階";
304
305 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
306
307 EXPECT_EQ(4, seg->candidates_size());
308
309 EXPECT_EQ("十五回", seg->candidate(0).value);
310 EXPECT_EQ("十五回", seg->candidate(0).content_value);
311 EXPECT_EQ("", seg->candidate(0).description);
312
313 EXPECT_EQ("15回", seg->candidate(1).value);
314 EXPECT_EQ("15回", seg->candidate(1).content_value);
315 EXPECT_EQ("", seg->candidate(1).description);
316
317 EXPECT_EQ("十五階", seg->candidate(2).value);
318 EXPECT_EQ("十五階",
319 seg->candidate(2).content_value);
320 EXPECT_EQ("", seg->candidate(2).description);
321
322 EXPECT_EQ("15階", seg->candidate(3).value);
323 EXPECT_EQ("15階", seg->candidate(3).content_value);
324 EXPECT_EQ("", seg->candidate(3).description);
325
326 seg->clear_candidates();
327 }
328
TEST_F(NumberRewriterTest,SpecialFormBoundaries)329 TEST_F(NumberRewriterTest, SpecialFormBoundaries) {
330 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
331 Segments segments;
332
333 // Special forms doesn't have zeros.
334 Segment *seg = SetupSegments(pos_matcher_, "0", &segments);
335 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
336 EXPECT_FALSE(HasDescription(*seg, kMaruNumberDescription));
337 EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
338 EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
339
340 // "1" has special forms.
341 seg = SetupSegments(pos_matcher_, "1", &segments);
342 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
343 EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
344 EXPECT_TRUE(HasDescription(*seg, kRomanCapitalDescription));
345 EXPECT_TRUE(HasDescription(*seg, kRomanNoCapitalDescription));
346
347 // "12" has every special forms.
348 seg = SetupSegments(pos_matcher_, "12", &segments);
349 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
350 EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
351 EXPECT_TRUE(HasDescription(*seg, kRomanCapitalDescription));
352 EXPECT_TRUE(HasDescription(*seg, kRomanNoCapitalDescription));
353
354 // "13" doesn't have roman forms.
355 seg = SetupSegments(pos_matcher_, "13", &segments);
356 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
357 EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
358 EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
359 EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
360
361 // "50" has circled numerics.
362 seg = SetupSegments(pos_matcher_, "50", &segments);
363 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
364 EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
365 EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
366 EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
367
368 // "51" doesn't have special forms.
369 seg = SetupSegments(pos_matcher_, "51", &segments);
370 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
371 EXPECT_FALSE(HasDescription(*seg, kMaruNumberDescription));
372 EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
373 EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
374 }
375
TEST_F(NumberRewriterTest,OneOfCandidatesIsEmpty)376 TEST_F(NumberRewriterTest, OneOfCandidatesIsEmpty) {
377 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
378
379 Segments segments;
380 Segment *seg = segments.push_back_segment();
381 Segment::Candidate *first_candidate = seg->add_candidate();
382 first_candidate->Init();
383
384 // this candidate should be skipped
385 first_candidate->value = "";
386 first_candidate->content_value = first_candidate->value;
387
388 Segment::Candidate *second_candidate = seg->add_candidate();
389 second_candidate->Init();
390
391 second_candidate->value = "0";
392 second_candidate->lid = pos_matcher_.GetNumberId();
393 second_candidate->rid = pos_matcher_.GetNumberId();
394 second_candidate->content_value = second_candidate->value;
395
396 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
397
398 EXPECT_EQ("", seg->candidate(0).value);
399 EXPECT_EQ("", seg->candidate(0).content_value);
400 EXPECT_EQ("", seg->candidate(0).description);
401
402 EXPECT_EQ("0", seg->candidate(1).value);
403 EXPECT_EQ("0", seg->candidate(1).content_value);
404 EXPECT_EQ("", seg->candidate(1).description);
405
406 EXPECT_EQ("〇", seg->candidate(2).value);
407 EXPECT_EQ("〇", seg->candidate(2).content_value);
408 EXPECT_EQ(kKanjiDescription, seg->candidate(2).description);
409
410 EXPECT_EQ("0", seg->candidate(3).value);
411 EXPECT_EQ("0", seg->candidate(3).content_value);
412 EXPECT_EQ(kArabicDescription, seg->candidate(3).description);
413
414 EXPECT_EQ("零", seg->candidate(4).value);
415 EXPECT_EQ("零", seg->candidate(4).content_value);
416 EXPECT_EQ(kOldKanjiDescription, seg->candidate(4).description);
417
418 seg->clear_candidates();
419 }
420
TEST_F(NumberRewriterTest,RewriteDoesNotHappen)421 TEST_F(NumberRewriterTest, RewriteDoesNotHappen) {
422 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
423
424 Segments segments;
425 Segment *seg = segments.push_back_segment();
426 Segment::Candidate *candidate = seg->add_candidate();
427 candidate->Init();
428
429 candidate->value = "タンポポ";
430 candidate->content_value = candidate->value;
431
432 // Number rewrite should not occur
433 EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
434
435 // Number of cahdidates should be maintained
436 EXPECT_EQ(1, seg->candidates_size());
437
438 seg->clear_candidates();
439 }
440
TEST_F(NumberRewriterTest,NumberIsZero)441 TEST_F(NumberRewriterTest, NumberIsZero) {
442 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
443
444 Segments segments;
445 Segment *seg = segments.push_back_segment();
446 Segment::Candidate *candidate = seg->add_candidate();
447 candidate->Init();
448 candidate->lid = pos_matcher_.GetNumberId();
449 candidate->rid = pos_matcher_.GetNumberId();
450 candidate->value = "0";
451 candidate->content_value = "0";
452
453 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
454
455 EXPECT_EQ(4, seg->candidates_size());
456
457 EXPECT_EQ("0", seg->candidate(0).value);
458 EXPECT_EQ("0", seg->candidate(0).content_value);
459 EXPECT_EQ("", seg->candidate(0).description);
460
461 EXPECT_EQ("〇", seg->candidate(1).value);
462 EXPECT_EQ("〇", seg->candidate(1).content_value);
463 EXPECT_EQ(kKanjiDescription, seg->candidate(1).description);
464
465 EXPECT_EQ("0", seg->candidate(2).value);
466 EXPECT_EQ("0", seg->candidate(2).content_value);
467 EXPECT_EQ(kArabicDescription, seg->candidate(2).description);
468
469 EXPECT_EQ("零", seg->candidate(3).value);
470 EXPECT_EQ("零", seg->candidate(3).content_value);
471 EXPECT_EQ(kOldKanjiDescription, seg->candidate(3).description);
472
473 seg->clear_candidates();
474 }
475
TEST_F(NumberRewriterTest,NumberIsZeroZero)476 TEST_F(NumberRewriterTest, NumberIsZeroZero) {
477 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
478
479 Segments segments;
480 Segment *seg = segments.push_back_segment();
481 Segment::Candidate *candidate = seg->add_candidate();
482 candidate->Init();
483 candidate->lid = pos_matcher_.GetNumberId();
484 candidate->rid = pos_matcher_.GetNumberId();
485 candidate->value = "00";
486 candidate->content_value = "00";
487
488 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
489
490 EXPECT_EQ(4, seg->candidates_size());
491
492 EXPECT_EQ("00", seg->candidate(0).value);
493 EXPECT_EQ("00", seg->candidate(0).content_value);
494 EXPECT_EQ("", seg->candidate(0).description);
495
496 EXPECT_EQ("〇〇", seg->candidate(1).value);
497 EXPECT_EQ("〇〇", seg->candidate(1).content_value);
498 EXPECT_EQ(kKanjiDescription, seg->candidate(1).description);
499
500 EXPECT_EQ("00", seg->candidate(2).value);
501 EXPECT_EQ("00", seg->candidate(2).content_value);
502 EXPECT_EQ(kArabicDescription, seg->candidate(2).description);
503
504 EXPECT_EQ("零", seg->candidate(3).value);
505 EXPECT_EQ("零", seg->candidate(3).content_value);
506 EXPECT_EQ(kOldKanjiDescription, seg->candidate(3).description);
507
508 seg->clear_candidates();
509 }
510
TEST_F(NumberRewriterTest,NumberIs19Digit)511 TEST_F(NumberRewriterTest, NumberIs19Digit) {
512 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
513
514 Segments segments;
515 Segment *seg = segments.push_back_segment();
516 Segment::Candidate *candidate = seg->add_candidate();
517 candidate->Init();
518 candidate->lid = pos_matcher_.GetNumberId();
519 candidate->rid = pos_matcher_.GetNumberId();
520 candidate->value = "1000000000000000000";
521 candidate->content_value = "1000000000000000000";
522
523 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
524
525 const ExpectResult kExpectResults[] = {
526 {"1000000000000000000", "1000000000000000000", ""},
527 {"一〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇",
528 "一〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇", kKanjiDescription},
529 {"1000000000000000000",
530 "1000000000000000000", kArabicDescription},
531 {"1,000,000,000,000,000,000", "1,000,000,000,000,000,000",
532 kArabicDescription},
533 {"1,000,000,000,000,000,000",
534 "1,000,000,000,000,000,000",
535 kArabicDescription},
536 {"100京", "100京", kArabicDescription},
537 {"100京", "100京", kArabicDescription},
538 {"百京", "百京", kKanjiDescription},
539 {"壱百京", "壱百京", kOldKanjiDescription},
540 {"0xde0b6b3a7640000", "0xde0b6b3a7640000", "16進数"},
541 {"067405553164731000000", "067405553164731000000", "8進数"},
542 {"0b110111100000101101101011001110100111011001000000000000000000",
543 "0b110111100000101101101011001110100111011001000000000000000000",
544 "2進数"},
545 };
546
547 const size_t kExpectResultSize = arraysize(kExpectResults);
548 EXPECT_EQ(kExpectResultSize, seg->candidates_size());
549
550 for (size_t i = 0; i < kExpectResultSize; ++i) {
551 SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
552 EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
553 EXPECT_EQ(kExpectResults[i].content_value,
554 seg->candidate(i).content_value);
555 EXPECT_EQ(kExpectResults[i].description,
556 seg->candidate(i).description);
557 }
558
559 seg->clear_candidates();
560 }
561
TEST_F(NumberRewriterTest,NumberIsGreaterThanUInt64Max)562 TEST_F(NumberRewriterTest, NumberIsGreaterThanUInt64Max) {
563 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
564
565 Segments segments;
566 Segment *seg = segments.push_back_segment();
567 Segment::Candidate *candidate = seg->add_candidate();
568 candidate->Init();
569 candidate->lid = pos_matcher_.GetNumberId();
570 candidate->rid = pos_matcher_.GetNumberId();
571 candidate->value = "18446744073709551616"; // 2^64
572 candidate->content_value = "18446744073709551616";
573
574 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
575
576 const ExpectResult kExpectResults[] = {
577 {"18446744073709551616",
578 "18446744073709551616",
579 ""},
580 {"一八四四六七四四〇七三七〇九五五一六一六",
581 "一八四四六七四四〇七三七〇九五五一六一六",
582 kKanjiDescription},
583 {"18446744073709551616",
584 "18446744073709551616",
585 kArabicDescription},
586 {"18,446,744,073,709,551,616",
587 "18,446,744,073,709,551,616",
588 kArabicDescription},
589 {"18,446,744,073,709,551,616",
590 "18,446,744,073,709,551,616",
591 kArabicDescription},
592 {"1844京6744兆737億955万1616",
593 "1844京6744兆737億955万1616",
594 kArabicDescription},
595 {"1844京6744兆737億955万1616",
596 "1844京6744兆737億955万1616",
597 kArabicDescription},
598 {"千八百四十四京六千七百四十四兆七百三十七億九百五十五万千六百十六",
599 "千八百四十四京六千七百四十四兆七百三十七億九百五十五万千六百十六",
600 kKanjiDescription},
601 {"壱阡八百四拾四京六阡七百四拾四兆七百参拾七億九百五拾五萬壱阡六百壱拾六",
602 "壱阡八百四拾四京六阡七百四拾四兆七百参拾七億九百五拾五萬壱阡六百壱拾六",
603 kOldKanjiDescription},
604 };
605
606 const size_t kExpectResultSize = arraysize(kExpectResults);
607 EXPECT_EQ(kExpectResultSize, seg->candidates_size());
608
609 for (size_t i = 0; i < kExpectResultSize; ++i) {
610 SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
611 EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
612 EXPECT_EQ(kExpectResults[i].content_value,
613 seg->candidate(i).content_value);
614 EXPECT_EQ(kExpectResults[i].description,
615 seg->candidate(i).description);
616 }
617
618 seg->clear_candidates();
619 }
620
TEST_F(NumberRewriterTest,NumberIsGoogol)621 TEST_F(NumberRewriterTest, NumberIsGoogol) {
622 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
623
624 Segments segments;
625 Segment *seg = segments.push_back_segment();
626 Segment::Candidate *candidate = seg->add_candidate();
627 candidate->Init();
628 candidate->lid = pos_matcher_.GetNumberId();
629 candidate->rid = pos_matcher_.GetNumberId();
630
631 // 10^100 as "100000 ... 0"
632 string input = "1";
633 for (size_t i = 0; i < 100; ++i) {
634 input += "0";
635 }
636
637 candidate->value = input;
638 candidate->content_value = input;
639
640 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
641
642 EXPECT_EQ(6, seg->candidates_size());
643
644 EXPECT_EQ(input, seg->candidate(0).value);
645 EXPECT_EQ(input, seg->candidate(0).content_value);
646 EXPECT_EQ("", seg->candidate(0).description);
647
648 // 10^100 as "一〇〇〇〇〇 ... 〇"
649 string expected2 = "一";
650 for (size_t i = 0; i < 100; ++i) {
651 expected2 += "〇";
652 }
653 EXPECT_EQ(expected2, seg->candidate(1).value);
654 EXPECT_EQ(expected2, seg->candidate(1).content_value);
655 EXPECT_EQ(kKanjiDescription, seg->candidate(1).description);
656
657 // 10^100 as "100000 ... 0"
658 string expected3 = "1";
659 for (size_t i = 0; i < 100; ++i) {
660 expected3 += "0";
661 }
662 EXPECT_EQ(expected3, seg->candidate(2).value);
663 EXPECT_EQ(expected3, seg->candidate(2).content_value);
664 EXPECT_EQ(kArabicDescription, seg->candidate(2).description);
665
666 // 10,000, ... ,000
667 string expected1 = "10";
668 for (size_t i = 0; i < 100 / 3; ++i) {
669 expected1 += ",000";
670 }
671 EXPECT_EQ(expected1, seg->candidate(3).value);
672 EXPECT_EQ(expected1, seg->candidate(3).content_value);
673 EXPECT_EQ(kArabicDescription, seg->candidate(3).description);
674
675 // "10,000, ... ,000"
676 string expected4 = "10"; // "10"
677 for (size_t i = 0; i < 100 / 3; ++i) {
678 expected4 += ",000";
679 }
680 EXPECT_EQ(expected4, seg->candidate(4).value);
681 EXPECT_EQ(expected4, seg->candidate(4).content_value);
682 EXPECT_EQ(kArabicDescription, seg->candidate(4).description);
683
684 EXPECT_EQ("Googol", seg->candidate(5).value);
685 EXPECT_EQ("Googol", seg->candidate(5).content_value);
686 EXPECT_EQ("", seg->candidate(5).description);
687
688 seg->clear_candidates();
689 }
690
TEST_F(NumberRewriterTest,RankingForKanjiCandidate)691 TEST_F(NumberRewriterTest, RankingForKanjiCandidate) {
692 // If kanji candidate is higher before we rewrite segments,
693 // kanji should have higher raking.
694 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
695
696 Segments segments;
697 {
698 Segment *segment = segments.add_segment();
699 DCHECK(segment);
700 segment->set_key("さんびゃく");
701 Segment::Candidate *candidate = segment->add_candidate();
702 candidate = segment->add_candidate();
703 candidate->Init();
704 candidate->lid = pos_matcher_.GetNumberId();
705 candidate->rid = pos_matcher_.GetNumberId();
706 candidate->key = "さんびゃく";
707 candidate->value = "三百";
708 candidate->content_value = "三百";
709 }
710
711 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
712 EXPECT_NE(0, segments.segments_size());
713 int kanji_pos = 0, arabic_pos = 0;
714 EXPECT_TRUE(FindCandidateId(segments.segment(0), "三百", &kanji_pos));
715 EXPECT_TRUE(FindCandidateId(segments.segment(0), "300", &arabic_pos));
716 EXPECT_LT(kanji_pos, arabic_pos);
717 }
718
TEST_F(NumberRewriterTest,ModifyExsistingRanking)719 TEST_F(NumberRewriterTest, ModifyExsistingRanking) {
720 // Modify exsisting ranking even if the converter returns unusual results
721 // due to dictionary noise, etc.
722 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
723
724 Segments segments;
725 {
726 Segment *segment = segments.add_segment();
727 DCHECK(segment);
728 segment->set_key("さんびゃく");
729 Segment::Candidate *candidate = segment->add_candidate();
730 candidate->Init();
731 candidate->lid = pos_matcher_.GetNumberId();
732 candidate->rid = pos_matcher_.GetNumberId();
733 candidate->key = "さんびゃく";
734 candidate->value = "参百";
735 candidate->content_value = "参百";
736
737 candidate = segment->add_candidate();
738 candidate->Init();
739 candidate->lid = pos_matcher_.GetNumberId();
740 candidate->rid = pos_matcher_.GetNumberId();
741 candidate->key = "さんびゃく";
742 candidate->value = "三百";
743 candidate->content_value = "三百";
744 }
745
746 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
747 int kanji_pos = 0, old_kanji_pos = 0;
748 EXPECT_NE(0, segments.segments_size());
749 EXPECT_TRUE(FindCandidateId(segments.segment(0), "三百", &kanji_pos));
750 EXPECT_TRUE(FindCandidateId(segments.segment(0), "参百", &old_kanji_pos));
751 EXPECT_LT(kanji_pos, old_kanji_pos);
752 }
753
TEST_F(NumberRewriterTest,EraseExistingCandidates)754 TEST_F(NumberRewriterTest, EraseExistingCandidates) {
755 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
756
757 Segments segments;
758 {
759 Segment *segment = segments.add_segment();
760 DCHECK(segment);
761 segment->set_key("いち");
762 Segment::Candidate *candidate = segment->add_candidate();
763 candidate->Init();
764 candidate->lid = pos_matcher_.GetUnknownId(); // Not number POS
765 candidate->rid = pos_matcher_.GetUnknownId();
766 candidate->key = "いち";
767 candidate->content_key = "いち";
768 candidate->value = "壱";
769 candidate->content_value = "壱";
770
771 candidate = segment->add_candidate();
772 candidate->Init();
773 candidate->lid = pos_matcher_.GetNumberId(); // Number POS
774 candidate->rid = pos_matcher_.GetNumberId();
775 candidate->key = "いち";
776 candidate->content_key = "いち";
777 candidate->value = "一";
778 candidate->content_value = "一";
779 }
780
781 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
782
783 // "一" becomes the base candidate, instead of "壱"
784 int base_pos = 0;
785 EXPECT_TRUE(FindCandidateId(segments.segment(0), "一", &base_pos));
786 EXPECT_EQ(0, base_pos);
787
788 // Daiji will be inserted with new correct POS ids.
789 int daiji_pos = 0;
790 EXPECT_TRUE(FindCandidateId(segments.segment(0), "壱", &daiji_pos));
791 EXPECT_GT(daiji_pos, 0);
792 EXPECT_EQ(pos_matcher_.GetNumberId(),
793 segments.segment(0).candidate(daiji_pos).lid);
794 EXPECT_EQ(pos_matcher_.GetNumberId(),
795 segments.segment(0).candidate(daiji_pos).rid);
796 }
797
TEST_F(NumberRewriterTest,SeparatedArabicsTest)798 TEST_F(NumberRewriterTest, SeparatedArabicsTest) {
799 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
800
801 // Expected data to succeed tests.
802 const char *kSuccess[][3] = {
803 {"1000", "1,000", "1,000"},
804 {"12345678", "12,345,678", "12,345,678"},
805 {"1234.5", "1,234.5", "1,234.5"},
806 };
807
808 for (size_t i = 0; i < arraysize(kSuccess); ++i) {
809 Segments segments;
810 Segment *seg = segments.push_back_segment();
811 Segment::Candidate *candidate = seg->add_candidate();
812 candidate->Init();
813 candidate->lid = pos_matcher_.GetNumberId();
814 candidate->rid = pos_matcher_.GetNumberId();
815 candidate->value = kSuccess[i][0];
816 candidate->content_value = kSuccess[i][0];
817 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
818 EXPECT_TRUE(FindValue(segments.segment(0), kSuccess[i][1]))
819 << "Input : " << kSuccess[i][0];
820 EXPECT_TRUE(FindValue(segments.segment(0), kSuccess[i][2]))
821 << "Input : " << kSuccess[i][0];
822 }
823
824 // Expected data to fail tests.
825 const char *kFail[][3] = {
826 {"123", ",123", ",123"},
827 {"999", ",999", ",999"},
828 {"0000", "0,000", "0,000"},
829 };
830
831 for (size_t i = 0; i < arraysize(kFail); ++i) {
832 Segments segments;
833 Segment *seg = segments.push_back_segment();
834 Segment::Candidate *candidate = seg->add_candidate();
835 candidate->Init();
836 candidate->lid = pos_matcher_.GetNumberId();
837 candidate->rid = pos_matcher_.GetNumberId();
838 candidate->value = kFail[i][0];
839 candidate->content_value = kFail[i][0];
840 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
841 EXPECT_FALSE(FindValue(segments.segment(0), kFail[i][1]))
842 << "Input : " << kFail[i][0];
843 EXPECT_FALSE(FindValue(segments.segment(0), kFail[i][2]))
844 << "Input : " << kFail[i][0];
845 }
846 }
847
848 // Consider the case where user dictionaries contain following entry.
849 // - Reading: "はやぶさ"
850 // - Value: "8823"
851 // - POS: GeneralNoun (not *Number*)
852 // In this case, NumberRewriter should not clear
853 // Segment::Candidate::USER_DICTIONARY bit in the base candidate.
TEST_F(NumberRewriterTest,PreserveUserDictionaryAttibute)854 TEST_F(NumberRewriterTest, PreserveUserDictionaryAttibute) {
855 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
856 {
857 Segments segments;
858 {
859 Segment *seg = segments.push_back_segment();
860 Segment::Candidate *candidate = seg->add_candidate();
861 candidate->Init();
862 candidate->lid = pos_matcher_.GetGeneralNounId();
863 candidate->rid = pos_matcher_.GetGeneralNounId();
864 candidate->key = "はやぶさ";
865 candidate->content_key = candidate->key;
866 candidate->value = "8823";
867 candidate->content_value = candidate->value;
868 candidate->cost = 5925;
869 candidate->wcost = 5000;
870 candidate->attributes =
871 Segment::Candidate::USER_DICTIONARY |
872 Segment::Candidate::NO_VARIANTS_EXPANSION;
873 }
874
875 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
876 bool base_candidate_found = false;
877 {
878 const Segment &segment = segments.segment(0);
879 for (size_t i = 0; i < segment.candidates_size(); ++i) {
880 const Segment::Candidate &candidate = segment.candidate(i);
881 if (candidate.value == "8823" &&
882 (candidate.attributes & Segment::Candidate::USER_DICTIONARY)) {
883 base_candidate_found = true;
884 break;
885 }
886 }
887 }
888 EXPECT_TRUE(base_candidate_found);
889 }
890 }
891
TEST_F(NumberRewriterTest,DuplicateCandidateTest)892 TEST_F(NumberRewriterTest, DuplicateCandidateTest) {
893 // To reproduce issue b/6714268.
894 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
895 ConversionRequest convreq;
896 commands::Request request;
897 convreq.set_request(&request);
898 std::unique_ptr<NumberRewriter> rewriter(CreateNumberRewriter());
899
900 {
901 request.set_mixed_conversion(true);
902 EXPECT_EQ(RewriterInterface::ALL, rewriter->capability(convreq));
903 }
904
905 {
906 request.set_mixed_conversion(false);
907 EXPECT_EQ(RewriterInterface::CONVERSION, rewriter->capability(convreq));
908 }
909 }
910
TEST_F(NumberRewriterTest,NonNumberNounTest)911 TEST_F(NumberRewriterTest, NonNumberNounTest) {
912 // Test if "百舌鳥" is not rewritten to "100舌鳥", etc.
913 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
914 Segments segments;
915 Segment *segment = segments.push_back_segment();
916 segment->set_key("もず");
917 Segment::Candidate *cand = segment->add_candidate();
918 cand->Init();
919 cand->key = "もず";
920 cand->content_key = cand->key;
921 cand->value = "百舌鳥";
922 cand->content_value = cand->value;
923 cand->lid = pos_matcher_.GetGeneralNounId();
924 cand->rid = pos_matcher_.GetGeneralNounId();
925 EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
926 }
927
TEST_F(NumberRewriterTest,RewriteForPartialSuggestion_b16765535)928 TEST_F(NumberRewriterTest, RewriteForPartialSuggestion_b16765535) {
929 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
930
931 const char kBubun[] = "部分";
932 Segments segments;
933 {
934 Segment *seg = segments.push_back_segment();
935 Segment::Candidate *candidate = seg->add_candidate();
936 candidate->Init();
937 candidate->lid = pos_matcher_.GetNumberId();
938 candidate->rid = pos_matcher_.GetNumberId();
939 candidate->key = "090";
940 candidate->value = "090";
941 candidate->content_key = "090";
942 candidate->content_value = "090";
943 candidate->description = kBubun;
944 candidate->attributes = Segment::Candidate::PARTIALLY_KEY_CONSUMED;
945 candidate->consumed_key_size = 3;
946 }
947 {
948 Segment *seg = segments.push_back_segment();
949 Segment::Candidate *candidate = seg->add_candidate();
950 candidate->Init();
951 candidate->key = "-";
952 candidate->value = "-";
953 candidate->content_key = "-";
954 candidate->content_value = "-";
955 }
956 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
957
958 ASSERT_EQ(2, segments.conversion_segments_size());
959 const Segment &seg = segments.conversion_segment(0);
960 ASSERT_LE(2, seg.candidates_size());
961 for (size_t i = 0; i < seg.candidates_size(); ++i) {
962 const Segment::Candidate &candidate = seg.candidate(i);
963 EXPECT_TRUE(Util::StartsWith(candidate.description, kBubun));
964 EXPECT_TRUE(
965 candidate.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED);
966 }
967 }
968
TEST_F(NumberRewriterTest,RewriteForPartialSuggestion_b19470020)969 TEST_F(NumberRewriterTest, RewriteForPartialSuggestion_b19470020) {
970 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
971
972 const char kBubun[] = "部分";
973 Segments segments;
974 {
975 Segment *seg = segments.push_back_segment();
976 seg->set_key("ひとりひとぱっく");
977 Segment::Candidate *candidate = seg->add_candidate();
978 candidate->Init();
979 candidate->lid = pos_matcher_.GetNumberId();
980 candidate->rid = pos_matcher_.GetNumberId();
981 candidate->key = "ひとり";
982 candidate->value = "一人";
983 candidate->content_key = "ひとり";
984 candidate->content_value = "一人";
985 candidate->description = kBubun;
986 candidate->attributes = Segment::Candidate::PARTIALLY_KEY_CONSUMED;
987 candidate->consumed_key_size = 3;
988 }
989 EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
990
991 ASSERT_EQ(1, segments.conversion_segments_size());
992 const Segment &seg = segments.conversion_segment(0);
993 ASSERT_LE(2, seg.candidates_size());
994 bool found_halfwidth = false;
995 for (size_t i = 0; i < seg.candidates_size(); ++i) {
996 const Segment::Candidate &candidate = seg.candidate(i);
997 if (candidate.value != "1人") {
998 continue;
999 }
1000 found_halfwidth = true;
1001 EXPECT_EQ(3, candidate.consumed_key_size);
1002 EXPECT_TRUE(Util::StartsWith(candidate.description, kBubun));
1003 EXPECT_TRUE(
1004 candidate.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED);
1005 }
1006 EXPECT_TRUE(found_halfwidth);
1007 }
1008
TEST_F(NumberRewriterTest,RewritePhonePrefix_b16668386)1009 TEST_F(NumberRewriterTest, RewritePhonePrefix_b16668386) {
1010 std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
1011
1012 Segments segments;
1013 Segment *seg = segments.push_back_segment();
1014 Segment::Candidate *candidate = seg->add_candidate();
1015 candidate->Init();
1016 candidate->lid = pos_matcher_.GetNumberId();
1017 candidate->rid = pos_matcher_.GetGeneralSymbolId();
1018 candidate->key = "090-";
1019 candidate->value = "090-";
1020 candidate->content_key = "090-";
1021 candidate->content_value = "090-";
1022
1023 EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
1024 }
1025
1026 } // namespace mozc
1027