1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "rewriter/number_rewriter.h"
31 
32 #include <cstddef>
33 #include <memory>
34 #include <string>
35 
36 #include "base/logging.h"
37 #include "base/port.h"
38 #include "base/util.h"
39 #include "config/config_handler.h"
40 #include "converter/segments.h"
41 #include "request/conversion_request.h"
42 #include "data_manager/testing/mock_data_manager.h"
43 #include "dictionary/pos_matcher.h"
44 #include "protocol/commands.pb.h"
45 #include "testing/base/public/gunit.h"
46 #include "testing/base/public/mozctest.h"
47 
48 // To show the value of size_t, 'z' speficier should be used.
49 // But MSVC doesn't support it yet so use 'l' instead.
50 #ifdef _MSC_VER
51 #define SIZE_T_PRINTF_FORMAT "%lu"
52 #else  // _MSC_VER
53 #define SIZE_T_PRINTF_FORMAT "%zu"
54 #endif  // _MSC_VER
55 
56 namespace mozc {
57 namespace {
58 
59 using dictionary::POSMatcher;
60 
61 const char kKanjiDescription[] = "漢数字";
62 const char kArabicDescription[] = "数字";
63 const char kOldKanjiDescription[] = "大字";
64 const char kMaruNumberDescription[] = "丸数字";
65 const char kRomanCapitalDescription[] = "ローマ数字(大文字)";
66 const char kRomanNoCapitalDescription[] = "ローマ数字(小文字)";
67 
FindValue(const Segment & segment,const string & value)68 bool FindValue(const Segment &segment, const string &value) {
69   for (size_t i = 0; i < segment.candidates_size(); ++i) {
70     if (segment.candidate(i).value == value) {
71       return true;
72     }
73   }
74   return false;
75 }
76 
SetupSegments(const POSMatcher & pos_matcher,const string & candidate_value,Segments * segments)77 Segment *SetupSegments(const POSMatcher& pos_matcher,
78                        const string &candidate_value, Segments *segments) {
79   segments->Clear();
80   Segment *segment = segments->push_back_segment();
81   Segment::Candidate *candidate = segment->add_candidate();
82   candidate->Init();
83   candidate->lid = pos_matcher.GetNumberId();
84   candidate->rid = pos_matcher.GetNumberId();
85   candidate->value = candidate_value;
86   candidate->content_value = candidate_value;
87   return segment;
88 }
89 
HasDescription(const Segment & segment,const string & description)90 bool HasDescription(const Segment &segment, const string &description) {
91   for (size_t i = 0; i < segment.candidates_size(); ++i) {
92     if (segment.candidate(i).description == description) {
93       return true;
94     }
95   }
96   return false;
97 }
98 
99 // Find candiadte id
FindCandidateId(const Segment & segment,const string & value,int * id)100 bool FindCandidateId(const Segment &segment, const string &value, int *id) {
101   for (size_t i = 0; i < segment.candidates_size(); ++i) {
102     if (segment.candidate(i).value == value) {
103       *id = i;
104       return true;
105     }
106   }
107   return false;
108 }
109 }  // namespace
110 
111 class NumberRewriterTest : public ::testing::Test {
112  protected:
113   // Explicitly define constructor to prevent Visual C++ from
114   // considering this class as POD.
NumberRewriterTest()115   NumberRewriterTest() {}
116 
SetUp()117   void SetUp() override {
118     pos_matcher_.Set(mock_data_manager_.GetPOSMatcherData());
119   }
120 
CreateNumberRewriter()121   NumberRewriter *CreateNumberRewriter() {
122     return new NumberRewriter(&mock_data_manager_);
123   }
124 
125   const testing::MockDataManager mock_data_manager_;
126   POSMatcher pos_matcher_;
127   const ConversionRequest default_request_;
128 
129  private:
130   testing::ScopedTmpUserProfileDirectory tmp_profile_dir_;
131 };
132 
133 namespace {
134 struct ExpectResult {
135   const char *value;
136   const char *content_value;
137   const char *description;
138 };
139 }  // namespace
140 
TEST_F(NumberRewriterTest,BasicTest)141 TEST_F(NumberRewriterTest, BasicTest) {
142   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
143 
144   Segments segments;
145   Segment *seg = segments.push_back_segment();
146   Segment::Candidate *candidate = seg->add_candidate();
147   candidate->Init();
148   candidate->lid = pos_matcher_.GetNumberId();
149   candidate->rid = pos_matcher_.GetNumberId();
150   candidate->value = "012";
151   candidate->content_value = "012";
152 
153   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
154 
155   const ExpectResult kExpectResults[] = {
156       {"012", "012", ""},
157       {"〇一二", "〇一二", kKanjiDescription},
158       {"012", "012", kArabicDescription},
159       {"十二", "十二", kKanjiDescription},
160       {"壱拾弐", "壱拾弐", kOldKanjiDescription},
161       {"Ⅻ", "Ⅻ", kRomanCapitalDescription},
162       {"ⅻ", "ⅻ", kRomanNoCapitalDescription},
163       {"⑫", "⑫", kMaruNumberDescription},
164       {"0xc", "0xc", "16進数"},
165       {"014", "014", "8進数"},
166       {"0b1100", "0b1100", "2進数"},
167   };
168 
169   const size_t kExpectResultSize = arraysize(kExpectResults);
170   EXPECT_EQ(kExpectResultSize, seg->candidates_size());
171 
172   for (size_t i = 0; i < kExpectResultSize; ++i) {
173     SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
174     EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
175     EXPECT_EQ(kExpectResults[i].content_value,
176               seg->candidate(i).content_value);
177     EXPECT_EQ(kExpectResults[i].description,
178               seg->candidate(i).description);
179   }
180   seg->clear_candidates();
181 }
182 
TEST_F(NumberRewriterTest,RequestType)183 TEST_F(NumberRewriterTest, RequestType) {
184   class TestData {
185    public:
186     Segments::RequestType request_type_;
187     int expected_candidate_number_;
188     TestData(Segments::RequestType request_type, int expected_number) :
189         request_type_(request_type),
190         expected_candidate_number_(expected_number) {
191     }
192   };
193   TestData test_data_list[] = {
194       TestData(Segments::CONVERSION, 11),  // 11 comes from BasicTest
195       TestData(Segments::REVERSE_CONVERSION, 8),
196       TestData(Segments::PREDICTION, 8),
197       TestData(Segments::SUGGESTION, 8),
198   };
199 
200   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
201 
202   for (size_t i = 0; i < arraysize(test_data_list); ++i) {
203     TestData& test_data = test_data_list[i];
204     Segments segments;
205     segments.set_request_type(test_data.request_type_);
206     Segment *seg = segments.push_back_segment();
207     Segment::Candidate *candidate = seg->add_candidate();
208     candidate->Init();
209     candidate->lid = pos_matcher_.GetNumberId();
210     candidate->rid = pos_matcher_.GetNumberId();
211     candidate->value = "012";
212     candidate->content_value = "012";
213     EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
214     EXPECT_EQ(test_data.expected_candidate_number_, seg->candidates_size());
215   }
216 }
217 
TEST_F(NumberRewriterTest,BasicTestWithSuffix)218 TEST_F(NumberRewriterTest, BasicTestWithSuffix) {
219   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
220 
221   Segments segments;
222   Segment *seg = segments.push_back_segment();
223   Segment::Candidate *candidate = seg->add_candidate();
224   candidate->Init();
225   candidate->lid = pos_matcher_.GetNumberId();
226   candidate->rid = pos_matcher_.GetNumberId();
227   candidate->value = "012が";
228   candidate->content_value = "012";
229 
230   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
231 
232   const ExpectResult kExpectResults[] = {
233       {"012が", "012", ""},
234       {"〇一二が", "〇一二", kKanjiDescription},
235       {"012が", "012", kArabicDescription},
236       {"十二が", "十二", kKanjiDescription},
237       {"壱拾弐が", "壱拾弐", kOldKanjiDescription},
238       {"Ⅻが", "Ⅻ", kRomanCapitalDescription},
239       {"ⅻが", "ⅻ", kRomanNoCapitalDescription},
240       {"⑫が", "⑫", kMaruNumberDescription},
241       {"0xcが", "0xc", "16進数"},
242       {"014が", "014", "8進数"},
243       {"0b1100が", "0b1100", "2進数"},
244   };
245 
246   const size_t kExpectResultSize = arraysize(kExpectResults);
247   EXPECT_EQ(kExpectResultSize, seg->candidates_size());
248 
249   for (size_t i = 0; i < kExpectResultSize; ++i) {
250     SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
251     EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
252     EXPECT_EQ(kExpectResults[i].content_value,
253               seg->candidate(i).content_value);
254     EXPECT_EQ(kExpectResults[i].description,
255               seg->candidate(i).description);
256   }
257 
258   seg->clear_candidates();
259 }
260 
TEST_F(NumberRewriterTest,BasicTestWithNumberSuffix)261 TEST_F(NumberRewriterTest, BasicTestWithNumberSuffix) {
262   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
263 
264   Segments segments;
265   Segment *seg = segments.push_back_segment();
266   Segment::Candidate *candidate = seg->add_candidate();
267   candidate->Init();
268   candidate->lid = pos_matcher_.GetNumberId();
269   candidate->rid = pos_matcher_.GetCounterSuffixWordId();
270   candidate->value = "十五個";
271   candidate->content_value = "十五個";
272 
273   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
274 
275   EXPECT_EQ(2, seg->candidates_size());
276 
277   EXPECT_EQ("十五個", seg->candidate(0).value);
278   EXPECT_EQ("十五個", seg->candidate(0).content_value);
279   EXPECT_EQ("", seg->candidate(0).description);
280 
281   EXPECT_EQ("15個", seg->candidate(1).value);
282   EXPECT_EQ("15個", seg->candidate(1).content_value);
283   EXPECT_EQ("", seg->candidate(1).description);
284   seg->clear_candidates();
285 }
286 
TEST_F(NumberRewriterTest,TestWithMultipleNumberSuffix)287 TEST_F(NumberRewriterTest, TestWithMultipleNumberSuffix) {
288   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
289 
290   Segments segments;
291   Segment *seg = segments.push_back_segment();
292   Segment::Candidate *candidate = seg->add_candidate();
293   candidate->Init();
294   candidate->lid = pos_matcher_.GetNumberId();
295   candidate->rid = pos_matcher_.GetCounterSuffixWordId();
296   candidate->value = "十五回";
297   candidate->content_value = "十五回";
298   candidate = seg->add_candidate();
299   candidate->Init();
300   candidate->lid = pos_matcher_.GetNumberId();
301   candidate->rid = pos_matcher_.GetCounterSuffixWordId();
302   candidate->value = "十五階";
303   candidate->content_value = "十五階";
304 
305   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
306 
307   EXPECT_EQ(4, seg->candidates_size());
308 
309   EXPECT_EQ("十五回", seg->candidate(0).value);
310   EXPECT_EQ("十五回", seg->candidate(0).content_value);
311   EXPECT_EQ("", seg->candidate(0).description);
312 
313   EXPECT_EQ("15回", seg->candidate(1).value);
314   EXPECT_EQ("15回", seg->candidate(1).content_value);
315   EXPECT_EQ("", seg->candidate(1).description);
316 
317   EXPECT_EQ("十五階", seg->candidate(2).value);
318   EXPECT_EQ("十五階",
319             seg->candidate(2).content_value);
320   EXPECT_EQ("", seg->candidate(2).description);
321 
322   EXPECT_EQ("15階", seg->candidate(3).value);
323   EXPECT_EQ("15階", seg->candidate(3).content_value);
324   EXPECT_EQ("", seg->candidate(3).description);
325 
326   seg->clear_candidates();
327 }
328 
TEST_F(NumberRewriterTest,SpecialFormBoundaries)329 TEST_F(NumberRewriterTest, SpecialFormBoundaries) {
330   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
331   Segments segments;
332 
333   // Special forms doesn't have zeros.
334   Segment *seg = SetupSegments(pos_matcher_, "0", &segments);
335   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
336   EXPECT_FALSE(HasDescription(*seg, kMaruNumberDescription));
337   EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
338   EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
339 
340   // "1" has special forms.
341   seg = SetupSegments(pos_matcher_, "1", &segments);
342   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
343   EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
344   EXPECT_TRUE(HasDescription(*seg, kRomanCapitalDescription));
345   EXPECT_TRUE(HasDescription(*seg, kRomanNoCapitalDescription));
346 
347   // "12" has every special forms.
348   seg = SetupSegments(pos_matcher_, "12", &segments);
349   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
350   EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
351   EXPECT_TRUE(HasDescription(*seg, kRomanCapitalDescription));
352   EXPECT_TRUE(HasDescription(*seg, kRomanNoCapitalDescription));
353 
354   // "13" doesn't have roman forms.
355   seg = SetupSegments(pos_matcher_, "13", &segments);
356   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
357   EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
358   EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
359   EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
360 
361   // "50" has circled numerics.
362   seg = SetupSegments(pos_matcher_, "50", &segments);
363   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
364   EXPECT_TRUE(HasDescription(*seg, kMaruNumberDescription));
365   EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
366   EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
367 
368   // "51" doesn't have special forms.
369   seg = SetupSegments(pos_matcher_, "51", &segments);
370   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
371   EXPECT_FALSE(HasDescription(*seg, kMaruNumberDescription));
372   EXPECT_FALSE(HasDescription(*seg, kRomanCapitalDescription));
373   EXPECT_FALSE(HasDescription(*seg, kRomanNoCapitalDescription));
374 }
375 
TEST_F(NumberRewriterTest,OneOfCandidatesIsEmpty)376 TEST_F(NumberRewriterTest, OneOfCandidatesIsEmpty) {
377   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
378 
379   Segments segments;
380   Segment *seg = segments.push_back_segment();
381   Segment::Candidate *first_candidate = seg->add_candidate();
382   first_candidate->Init();
383 
384   // this candidate should be skipped
385   first_candidate->value = "";
386   first_candidate->content_value = first_candidate->value;
387 
388   Segment::Candidate *second_candidate = seg->add_candidate();
389   second_candidate->Init();
390 
391   second_candidate->value = "0";
392   second_candidate->lid = pos_matcher_.GetNumberId();
393   second_candidate->rid = pos_matcher_.GetNumberId();
394   second_candidate->content_value = second_candidate->value;
395 
396   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
397 
398   EXPECT_EQ("", seg->candidate(0).value);
399   EXPECT_EQ("", seg->candidate(0).content_value);
400   EXPECT_EQ("", seg->candidate(0).description);
401 
402   EXPECT_EQ("0", seg->candidate(1).value);
403   EXPECT_EQ("0", seg->candidate(1).content_value);
404   EXPECT_EQ("", seg->candidate(1).description);
405 
406   EXPECT_EQ("〇", seg->candidate(2).value);
407   EXPECT_EQ("〇", seg->candidate(2).content_value);
408   EXPECT_EQ(kKanjiDescription, seg->candidate(2).description);
409 
410   EXPECT_EQ("0", seg->candidate(3).value);
411   EXPECT_EQ("0", seg->candidate(3).content_value);
412   EXPECT_EQ(kArabicDescription, seg->candidate(3).description);
413 
414   EXPECT_EQ("零", seg->candidate(4).value);
415   EXPECT_EQ("零", seg->candidate(4).content_value);
416   EXPECT_EQ(kOldKanjiDescription, seg->candidate(4).description);
417 
418   seg->clear_candidates();
419 }
420 
TEST_F(NumberRewriterTest,RewriteDoesNotHappen)421 TEST_F(NumberRewriterTest, RewriteDoesNotHappen) {
422   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
423 
424   Segments segments;
425   Segment *seg = segments.push_back_segment();
426   Segment::Candidate *candidate = seg->add_candidate();
427   candidate->Init();
428 
429   candidate->value = "タンポポ";
430   candidate->content_value = candidate->value;
431 
432   // Number rewrite should not occur
433   EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
434 
435   // Number of cahdidates should be maintained
436   EXPECT_EQ(1, seg->candidates_size());
437 
438   seg->clear_candidates();
439 }
440 
TEST_F(NumberRewriterTest,NumberIsZero)441 TEST_F(NumberRewriterTest, NumberIsZero) {
442   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
443 
444   Segments segments;
445   Segment *seg = segments.push_back_segment();
446   Segment::Candidate *candidate = seg->add_candidate();
447   candidate->Init();
448   candidate->lid = pos_matcher_.GetNumberId();
449   candidate->rid = pos_matcher_.GetNumberId();
450   candidate->value = "0";
451   candidate->content_value = "0";
452 
453   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
454 
455   EXPECT_EQ(4, seg->candidates_size());
456 
457   EXPECT_EQ("0", seg->candidate(0).value);
458   EXPECT_EQ("0", seg->candidate(0).content_value);
459   EXPECT_EQ("", seg->candidate(0).description);
460 
461   EXPECT_EQ("〇", seg->candidate(1).value);
462   EXPECT_EQ("〇", seg->candidate(1).content_value);
463   EXPECT_EQ(kKanjiDescription, seg->candidate(1).description);
464 
465   EXPECT_EQ("0", seg->candidate(2).value);
466   EXPECT_EQ("0", seg->candidate(2).content_value);
467   EXPECT_EQ(kArabicDescription, seg->candidate(2).description);
468 
469   EXPECT_EQ("零", seg->candidate(3).value);
470   EXPECT_EQ("零", seg->candidate(3).content_value);
471   EXPECT_EQ(kOldKanjiDescription, seg->candidate(3).description);
472 
473   seg->clear_candidates();
474 }
475 
TEST_F(NumberRewriterTest,NumberIsZeroZero)476 TEST_F(NumberRewriterTest, NumberIsZeroZero) {
477   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
478 
479   Segments segments;
480   Segment *seg = segments.push_back_segment();
481   Segment::Candidate *candidate = seg->add_candidate();
482   candidate->Init();
483   candidate->lid = pos_matcher_.GetNumberId();
484   candidate->rid = pos_matcher_.GetNumberId();
485   candidate->value = "00";
486   candidate->content_value = "00";
487 
488   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
489 
490   EXPECT_EQ(4, seg->candidates_size());
491 
492   EXPECT_EQ("00", seg->candidate(0).value);
493   EXPECT_EQ("00", seg->candidate(0).content_value);
494   EXPECT_EQ("", seg->candidate(0).description);
495 
496   EXPECT_EQ("〇〇", seg->candidate(1).value);
497   EXPECT_EQ("〇〇", seg->candidate(1).content_value);
498   EXPECT_EQ(kKanjiDescription, seg->candidate(1).description);
499 
500   EXPECT_EQ("00", seg->candidate(2).value);
501   EXPECT_EQ("00", seg->candidate(2).content_value);
502   EXPECT_EQ(kArabicDescription, seg->candidate(2).description);
503 
504   EXPECT_EQ("零", seg->candidate(3).value);
505   EXPECT_EQ("零", seg->candidate(3).content_value);
506   EXPECT_EQ(kOldKanjiDescription, seg->candidate(3).description);
507 
508   seg->clear_candidates();
509 }
510 
TEST_F(NumberRewriterTest,NumberIs19Digit)511 TEST_F(NumberRewriterTest, NumberIs19Digit) {
512   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
513 
514   Segments segments;
515   Segment *seg = segments.push_back_segment();
516   Segment::Candidate *candidate = seg->add_candidate();
517   candidate->Init();
518   candidate->lid = pos_matcher_.GetNumberId();
519   candidate->rid = pos_matcher_.GetNumberId();
520   candidate->value = "1000000000000000000";
521   candidate->content_value = "1000000000000000000";
522 
523   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
524 
525   const ExpectResult kExpectResults[] = {
526       {"1000000000000000000", "1000000000000000000", ""},
527       {"一〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇",
528        "一〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇〇", kKanjiDescription},
529       {"1000000000000000000",
530        "1000000000000000000", kArabicDescription},
531       {"1,000,000,000,000,000,000", "1,000,000,000,000,000,000",
532        kArabicDescription},
533       {"1,000,000,000,000,000,000",
534        "1,000,000,000,000,000,000",
535        kArabicDescription},
536       {"100京", "100京", kArabicDescription},
537       {"100京", "100京", kArabicDescription},
538       {"百京", "百京", kKanjiDescription},
539       {"壱百京", "壱百京", kOldKanjiDescription},
540       {"0xde0b6b3a7640000", "0xde0b6b3a7640000", "16進数"},
541       {"067405553164731000000", "067405553164731000000", "8進数"},
542       {"0b110111100000101101101011001110100111011001000000000000000000",
543        "0b110111100000101101101011001110100111011001000000000000000000",
544        "2進数"},
545   };
546 
547   const size_t kExpectResultSize = arraysize(kExpectResults);
548   EXPECT_EQ(kExpectResultSize, seg->candidates_size());
549 
550   for (size_t i = 0; i < kExpectResultSize; ++i) {
551     SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
552     EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
553     EXPECT_EQ(kExpectResults[i].content_value,
554               seg->candidate(i).content_value);
555     EXPECT_EQ(kExpectResults[i].description,
556               seg->candidate(i).description);
557   }
558 
559   seg->clear_candidates();
560 }
561 
TEST_F(NumberRewriterTest,NumberIsGreaterThanUInt64Max)562 TEST_F(NumberRewriterTest, NumberIsGreaterThanUInt64Max) {
563   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
564 
565   Segments segments;
566   Segment *seg = segments.push_back_segment();
567   Segment::Candidate *candidate = seg->add_candidate();
568   candidate->Init();
569   candidate->lid = pos_matcher_.GetNumberId();
570   candidate->rid = pos_matcher_.GetNumberId();
571   candidate->value = "18446744073709551616";  // 2^64
572   candidate->content_value = "18446744073709551616";
573 
574   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
575 
576   const ExpectResult kExpectResults[] = {
577       {"18446744073709551616",
578        "18446744073709551616",
579        ""},
580       {"一八四四六七四四〇七三七〇九五五一六一六",
581        "一八四四六七四四〇七三七〇九五五一六一六",
582        kKanjiDescription},
583       {"18446744073709551616",
584        "18446744073709551616",
585        kArabicDescription},
586       {"18,446,744,073,709,551,616",
587        "18,446,744,073,709,551,616",
588        kArabicDescription},
589       {"18,446,744,073,709,551,616",
590        "18,446,744,073,709,551,616",
591        kArabicDescription},
592       {"1844京6744兆737億955万1616",
593        "1844京6744兆737億955万1616",
594        kArabicDescription},
595       {"1844京6744兆737億955万1616",
596        "1844京6744兆737億955万1616",
597        kArabicDescription},
598       {"千八百四十四京六千七百四十四兆七百三十七億九百五十五万千六百十六",
599        "千八百四十四京六千七百四十四兆七百三十七億九百五十五万千六百十六",
600        kKanjiDescription},
601       {"壱阡八百四拾四京六阡七百四拾四兆七百参拾七億九百五拾五萬壱阡六百壱拾六",
602        "壱阡八百四拾四京六阡七百四拾四兆七百参拾七億九百五拾五萬壱阡六百壱拾六",
603        kOldKanjiDescription},
604   };
605 
606   const size_t kExpectResultSize = arraysize(kExpectResults);
607   EXPECT_EQ(kExpectResultSize, seg->candidates_size());
608 
609   for (size_t i = 0; i < kExpectResultSize; ++i) {
610     SCOPED_TRACE(Util::StringPrintf("i = " SIZE_T_PRINTF_FORMAT, i));
611     EXPECT_EQ(kExpectResults[i].value, seg->candidate(i).value);
612     EXPECT_EQ(kExpectResults[i].content_value,
613               seg->candidate(i).content_value);
614     EXPECT_EQ(kExpectResults[i].description,
615               seg->candidate(i).description);
616   }
617 
618   seg->clear_candidates();
619 }
620 
TEST_F(NumberRewriterTest,NumberIsGoogol)621 TEST_F(NumberRewriterTest, NumberIsGoogol) {
622   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
623 
624   Segments segments;
625   Segment *seg = segments.push_back_segment();
626   Segment::Candidate *candidate = seg->add_candidate();
627   candidate->Init();
628   candidate->lid = pos_matcher_.GetNumberId();
629   candidate->rid = pos_matcher_.GetNumberId();
630 
631   // 10^100 as "100000 ... 0"
632   string input = "1";
633   for (size_t i = 0; i < 100; ++i) {
634     input += "0";
635   }
636 
637   candidate->value = input;
638   candidate->content_value = input;
639 
640   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
641 
642   EXPECT_EQ(6, seg->candidates_size());
643 
644   EXPECT_EQ(input, seg->candidate(0).value);
645   EXPECT_EQ(input, seg->candidate(0).content_value);
646   EXPECT_EQ("", seg->candidate(0).description);
647 
648   // 10^100 as "一〇〇〇〇〇 ... 〇"
649   string expected2 = "一";
650   for (size_t i = 0; i < 100; ++i) {
651     expected2 += "〇";
652   }
653   EXPECT_EQ(expected2, seg->candidate(1).value);
654   EXPECT_EQ(expected2, seg->candidate(1).content_value);
655   EXPECT_EQ(kKanjiDescription, seg->candidate(1).description);
656 
657   // 10^100 as "100000 ... 0"
658   string expected3 = "1";
659   for (size_t i = 0; i < 100; ++i) {
660     expected3 += "0";
661   }
662   EXPECT_EQ(expected3, seg->candidate(2).value);
663   EXPECT_EQ(expected3, seg->candidate(2).content_value);
664   EXPECT_EQ(kArabicDescription, seg->candidate(2).description);
665 
666   // 10,000, ... ,000
667   string expected1 = "10";
668   for (size_t i = 0; i < 100 / 3; ++i) {
669     expected1 += ",000";
670   }
671   EXPECT_EQ(expected1, seg->candidate(3).value);
672   EXPECT_EQ(expected1, seg->candidate(3).content_value);
673   EXPECT_EQ(kArabicDescription, seg->candidate(3).description);
674 
675   // "10,000, ... ,000"
676   string expected4 = "10";  // "10"
677   for (size_t i = 0; i < 100 / 3; ++i) {
678     expected4 += ",000";
679   }
680   EXPECT_EQ(expected4, seg->candidate(4).value);
681   EXPECT_EQ(expected4, seg->candidate(4).content_value);
682   EXPECT_EQ(kArabicDescription, seg->candidate(4).description);
683 
684   EXPECT_EQ("Googol", seg->candidate(5).value);
685   EXPECT_EQ("Googol", seg->candidate(5).content_value);
686   EXPECT_EQ("", seg->candidate(5).description);
687 
688   seg->clear_candidates();
689 }
690 
TEST_F(NumberRewriterTest,RankingForKanjiCandidate)691 TEST_F(NumberRewriterTest, RankingForKanjiCandidate) {
692   // If kanji candidate is higher before we rewrite segments,
693   // kanji should have higher raking.
694   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
695 
696   Segments segments;
697   {
698     Segment *segment = segments.add_segment();
699     DCHECK(segment);
700     segment->set_key("さんびゃく");
701     Segment::Candidate *candidate = segment->add_candidate();
702     candidate = segment->add_candidate();
703     candidate->Init();
704     candidate->lid = pos_matcher_.GetNumberId();
705     candidate->rid = pos_matcher_.GetNumberId();
706     candidate->key = "さんびゃく";
707     candidate->value = "三百";
708     candidate->content_value = "三百";
709   }
710 
711   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
712   EXPECT_NE(0, segments.segments_size());
713   int kanji_pos = 0, arabic_pos = 0;
714   EXPECT_TRUE(FindCandidateId(segments.segment(0), "三百", &kanji_pos));
715   EXPECT_TRUE(FindCandidateId(segments.segment(0), "300", &arabic_pos));
716   EXPECT_LT(kanji_pos, arabic_pos);
717 }
718 
TEST_F(NumberRewriterTest,ModifyExsistingRanking)719 TEST_F(NumberRewriterTest, ModifyExsistingRanking) {
720   // Modify exsisting ranking even if the converter returns unusual results
721   // due to dictionary noise, etc.
722   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
723 
724   Segments segments;
725   {
726     Segment *segment = segments.add_segment();
727     DCHECK(segment);
728     segment->set_key("さんびゃく");
729     Segment::Candidate *candidate = segment->add_candidate();
730     candidate->Init();
731     candidate->lid = pos_matcher_.GetNumberId();
732     candidate->rid = pos_matcher_.GetNumberId();
733     candidate->key = "さんびゃく";
734     candidate->value = "参百";
735     candidate->content_value = "参百";
736 
737     candidate = segment->add_candidate();
738     candidate->Init();
739     candidate->lid = pos_matcher_.GetNumberId();
740     candidate->rid = pos_matcher_.GetNumberId();
741     candidate->key = "さんびゃく";
742     candidate->value = "三百";
743     candidate->content_value = "三百";
744   }
745 
746   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
747   int kanji_pos = 0, old_kanji_pos = 0;
748   EXPECT_NE(0, segments.segments_size());
749   EXPECT_TRUE(FindCandidateId(segments.segment(0), "三百", &kanji_pos));
750   EXPECT_TRUE(FindCandidateId(segments.segment(0), "参百", &old_kanji_pos));
751   EXPECT_LT(kanji_pos, old_kanji_pos);
752 }
753 
TEST_F(NumberRewriterTest,EraseExistingCandidates)754 TEST_F(NumberRewriterTest, EraseExistingCandidates) {
755   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
756 
757   Segments segments;
758   {
759     Segment *segment = segments.add_segment();
760     DCHECK(segment);
761     segment->set_key("いち");
762     Segment::Candidate *candidate = segment->add_candidate();
763     candidate->Init();
764     candidate->lid = pos_matcher_.GetUnknownId();  // Not number POS
765     candidate->rid = pos_matcher_.GetUnknownId();
766     candidate->key = "いち";
767     candidate->content_key = "いち";
768     candidate->value = "壱";
769     candidate->content_value = "壱";
770 
771     candidate = segment->add_candidate();
772     candidate->Init();
773     candidate->lid = pos_matcher_.GetNumberId();  // Number POS
774     candidate->rid = pos_matcher_.GetNumberId();
775     candidate->key = "いち";
776     candidate->content_key = "いち";
777     candidate->value = "一";
778     candidate->content_value = "一";
779   }
780 
781   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
782 
783   // "一" becomes the base candidate, instead of "壱"
784   int base_pos = 0;
785   EXPECT_TRUE(FindCandidateId(segments.segment(0), "一", &base_pos));
786   EXPECT_EQ(0, base_pos);
787 
788   // Daiji will be inserted with new correct POS ids.
789   int daiji_pos = 0;
790   EXPECT_TRUE(FindCandidateId(segments.segment(0), "壱", &daiji_pos));
791   EXPECT_GT(daiji_pos, 0);
792   EXPECT_EQ(pos_matcher_.GetNumberId(),
793             segments.segment(0).candidate(daiji_pos).lid);
794   EXPECT_EQ(pos_matcher_.GetNumberId(),
795             segments.segment(0).candidate(daiji_pos).rid);
796 }
797 
TEST_F(NumberRewriterTest,SeparatedArabicsTest)798 TEST_F(NumberRewriterTest, SeparatedArabicsTest) {
799   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
800 
801   // Expected data to succeed tests.
802   const char *kSuccess[][3] = {
803       {"1000", "1,000", "1,000"},
804       {"12345678", "12,345,678", "12,345,678"},
805       {"1234.5", "1,234.5", "1,234.5"},
806   };
807 
808   for (size_t i = 0; i < arraysize(kSuccess); ++i) {
809     Segments segments;
810     Segment *seg = segments.push_back_segment();
811     Segment::Candidate *candidate = seg->add_candidate();
812     candidate->Init();
813     candidate->lid = pos_matcher_.GetNumberId();
814     candidate->rid = pos_matcher_.GetNumberId();
815     candidate->value = kSuccess[i][0];
816     candidate->content_value = kSuccess[i][0];
817     EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
818     EXPECT_TRUE(FindValue(segments.segment(0), kSuccess[i][1]))
819         << "Input : " << kSuccess[i][0];
820     EXPECT_TRUE(FindValue(segments.segment(0), kSuccess[i][2]))
821         << "Input : " << kSuccess[i][0];
822   }
823 
824   // Expected data to fail tests.
825   const char *kFail[][3] = {
826       {"123", ",123", ",123"},
827       {"999", ",999", ",999"},
828       {"0000", "0,000", "0,000"},
829   };
830 
831   for (size_t i = 0; i < arraysize(kFail); ++i) {
832     Segments segments;
833     Segment *seg = segments.push_back_segment();
834     Segment::Candidate *candidate = seg->add_candidate();
835     candidate->Init();
836     candidate->lid = pos_matcher_.GetNumberId();
837     candidate->rid = pos_matcher_.GetNumberId();
838     candidate->value = kFail[i][0];
839     candidate->content_value = kFail[i][0];
840     EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
841     EXPECT_FALSE(FindValue(segments.segment(0), kFail[i][1]))
842         << "Input : " << kFail[i][0];
843     EXPECT_FALSE(FindValue(segments.segment(0), kFail[i][2]))
844         << "Input : " << kFail[i][0];
845   }
846 }
847 
848 // Consider the case where user dictionaries contain following entry.
849 // - Reading: "はやぶさ"
850 // - Value: "8823"
851 // - POS: GeneralNoun (not *Number*)
852 // In this case, NumberRewriter should not clear
853 // Segment::Candidate::USER_DICTIONARY bit in the base candidate.
TEST_F(NumberRewriterTest,PreserveUserDictionaryAttibute)854 TEST_F(NumberRewriterTest, PreserveUserDictionaryAttibute) {
855   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
856   {
857     Segments segments;
858     {
859       Segment *seg = segments.push_back_segment();
860       Segment::Candidate *candidate = seg->add_candidate();
861       candidate->Init();
862       candidate->lid = pos_matcher_.GetGeneralNounId();
863       candidate->rid = pos_matcher_.GetGeneralNounId();
864       candidate->key = "はやぶさ";
865       candidate->content_key = candidate->key;
866       candidate->value = "8823";
867       candidate->content_value = candidate->value;
868       candidate->cost = 5925;
869       candidate->wcost = 5000;
870       candidate->attributes =
871           Segment::Candidate::USER_DICTIONARY |
872           Segment::Candidate::NO_VARIANTS_EXPANSION;
873     }
874 
875     EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
876     bool base_candidate_found = false;
877     {
878       const Segment &segment = segments.segment(0);
879       for (size_t i = 0; i < segment.candidates_size(); ++i) {
880         const Segment::Candidate &candidate = segment.candidate(i);
881         if (candidate.value == "8823" &&
882             (candidate.attributes & Segment::Candidate::USER_DICTIONARY)) {
883           base_candidate_found = true;
884           break;
885         }
886       }
887     }
888     EXPECT_TRUE(base_candidate_found);
889   }
890 }
891 
TEST_F(NumberRewriterTest,DuplicateCandidateTest)892 TEST_F(NumberRewriterTest, DuplicateCandidateTest) {
893   // To reproduce issue b/6714268.
894   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
895   ConversionRequest convreq;
896   commands::Request request;
897   convreq.set_request(&request);
898   std::unique_ptr<NumberRewriter> rewriter(CreateNumberRewriter());
899 
900   {
901     request.set_mixed_conversion(true);
902     EXPECT_EQ(RewriterInterface::ALL, rewriter->capability(convreq));
903   }
904 
905   {
906     request.set_mixed_conversion(false);
907     EXPECT_EQ(RewriterInterface::CONVERSION, rewriter->capability(convreq));
908   }
909 }
910 
TEST_F(NumberRewriterTest,NonNumberNounTest)911 TEST_F(NumberRewriterTest, NonNumberNounTest) {
912   // Test if "百舌鳥" is not rewritten to "100舌鳥", etc.
913   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
914   Segments segments;
915   Segment *segment = segments.push_back_segment();
916   segment->set_key("もず");
917   Segment::Candidate *cand = segment->add_candidate();
918   cand->Init();
919   cand->key = "もず";
920   cand->content_key = cand->key;
921   cand->value = "百舌鳥";
922   cand->content_value = cand->value;
923   cand->lid = pos_matcher_.GetGeneralNounId();
924   cand->rid = pos_matcher_.GetGeneralNounId();
925   EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
926 }
927 
TEST_F(NumberRewriterTest,RewriteForPartialSuggestion_b16765535)928 TEST_F(NumberRewriterTest, RewriteForPartialSuggestion_b16765535) {
929   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
930 
931   const char kBubun[] = "部分";
932   Segments segments;
933   {
934     Segment *seg = segments.push_back_segment();
935     Segment::Candidate *candidate = seg->add_candidate();
936     candidate->Init();
937     candidate->lid = pos_matcher_.GetNumberId();
938     candidate->rid = pos_matcher_.GetNumberId();
939     candidate->key = "090";
940     candidate->value = "090";
941     candidate->content_key = "090";
942     candidate->content_value = "090";
943     candidate->description = kBubun;
944     candidate->attributes = Segment::Candidate::PARTIALLY_KEY_CONSUMED;
945     candidate->consumed_key_size = 3;
946   }
947   {
948     Segment *seg = segments.push_back_segment();
949     Segment::Candidate *candidate = seg->add_candidate();
950     candidate->Init();
951     candidate->key = "-";
952     candidate->value = "-";
953     candidate->content_key = "-";
954     candidate->content_value = "-";
955   }
956   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
957 
958   ASSERT_EQ(2, segments.conversion_segments_size());
959   const Segment &seg = segments.conversion_segment(0);
960   ASSERT_LE(2, seg.candidates_size());
961   for (size_t i = 0; i < seg.candidates_size(); ++i) {
962     const Segment::Candidate &candidate = seg.candidate(i);
963     EXPECT_TRUE(Util::StartsWith(candidate.description, kBubun));
964     EXPECT_TRUE(
965         candidate.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED);
966   }
967 }
968 
TEST_F(NumberRewriterTest,RewriteForPartialSuggestion_b19470020)969 TEST_F(NumberRewriterTest, RewriteForPartialSuggestion_b19470020) {
970   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
971 
972   const char kBubun[] = "部分";
973   Segments segments;
974   {
975     Segment *seg = segments.push_back_segment();
976     seg->set_key("ひとりひとぱっく");
977     Segment::Candidate *candidate = seg->add_candidate();
978     candidate->Init();
979     candidate->lid = pos_matcher_.GetNumberId();
980     candidate->rid = pos_matcher_.GetNumberId();
981     candidate->key = "ひとり";
982     candidate->value = "一人";
983     candidate->content_key = "ひとり";
984     candidate->content_value = "一人";
985     candidate->description = kBubun;
986     candidate->attributes = Segment::Candidate::PARTIALLY_KEY_CONSUMED;
987     candidate->consumed_key_size = 3;
988   }
989   EXPECT_TRUE(number_rewriter->Rewrite(default_request_, &segments));
990 
991   ASSERT_EQ(1, segments.conversion_segments_size());
992   const Segment &seg = segments.conversion_segment(0);
993   ASSERT_LE(2, seg.candidates_size());
994   bool found_halfwidth = false;
995   for (size_t i = 0; i < seg.candidates_size(); ++i) {
996     const Segment::Candidate &candidate = seg.candidate(i);
997     if (candidate.value != "1人") {
998       continue;
999     }
1000     found_halfwidth = true;
1001     EXPECT_EQ(3, candidate.consumed_key_size);
1002     EXPECT_TRUE(Util::StartsWith(candidate.description, kBubun));
1003     EXPECT_TRUE(
1004         candidate.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED);
1005   }
1006   EXPECT_TRUE(found_halfwidth);
1007 }
1008 
TEST_F(NumberRewriterTest,RewritePhonePrefix_b16668386)1009 TEST_F(NumberRewriterTest, RewritePhonePrefix_b16668386) {
1010   std::unique_ptr<NumberRewriter> number_rewriter(CreateNumberRewriter());
1011 
1012   Segments segments;
1013   Segment *seg = segments.push_back_segment();
1014   Segment::Candidate *candidate = seg->add_candidate();
1015   candidate->Init();
1016   candidate->lid = pos_matcher_.GetNumberId();
1017   candidate->rid = pos_matcher_.GetGeneralSymbolId();
1018   candidate->key = "090-";
1019   candidate->value = "090-";
1020   candidate->content_key = "090-";
1021   candidate->content_value = "090-";
1022 
1023   EXPECT_FALSE(number_rewriter->Rewrite(default_request_, &segments));
1024 }
1025 
1026 }  // namespace mozc
1027