1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include <iostream>
31 #include <map>
32 #include <memory>
33 #include <string>
34 #include <utility>
35 #include <vector>
36 
37 #include "base/file_stream.h"
38 #include "base/file_util.h"
39 #include "base/flags.h"
40 #include "base/init_mozc.h"
41 #include "base/logging.h"
42 #include "base/number_util.h"
43 #include "base/port.h"
44 #include "base/singleton.h"
45 #include "base/system_util.h"
46 #include "base/util.h"
47 #include "composer/composer.h"
48 #include "composer/table.h"
49 #include "converter/converter_interface.h"
50 #include "converter/lattice.h"
51 #include "converter/pos_id_printer.h"
52 #include "converter/segments.h"
53 #include "data_manager/data_manager.h"
54 #include "engine/engine.h"
55 #include "protocol/commands.pb.h"
56 #include "protocol/config.pb.h"
57 #include "request/conversion_request.h"
58 #include "session/request_test_util.h"
59 
60 DEFINE_int32(max_conversion_candidates_size, 200, "maximum candidates size");
61 DEFINE_string(user_profile_dir, "", "path to user profile directory");
62 DEFINE_string(engine, "default",
63               "Shortcut to select engine_data from name: (default|oss|mock)");
64 DEFINE_string(engine_type, "desktop", "Engine type: (desktop|mobile)");
65 DEFINE_bool(output_debug_string, true, "output debug string for each input");
66 DEFINE_bool(show_meta_candidates, false, "if true, show meta candidates");
67 
68 // Advanced options for data files.  These are automatically set when --engine
69 // is used but they can be overridden by specifying these flags.
70 DEFINE_string(engine_data, "", "Path to engine data file");
71 DEFINE_string(magic, "", "Expected magic number of data file");
72 DEFINE_string(id_def, "",
73               "id.def file for POS IDs. If provided, show human readable "
74               "POS instead of ID number");
75 
76 namespace mozc {
77 namespace {
78 
79 using composer::Composer;
80 using composer::Table;
81 using config::Config;
82 
83 // Wrapper class for pos id printing
84 class PosIdPrintUtil {
85  public:
IdToString(int id)86   static string IdToString(int id) {
87     return Singleton<PosIdPrintUtil>::get()->IdToStringInternal(id);
88   }
89 
90  private:
PosIdPrintUtil()91   PosIdPrintUtil() :
92       pos_id_(new InputFileStream(FLAGS_id_def.c_str())),
93       pos_id_printer_(new internal::PosIdPrinter(pos_id_.get())) {}
94 
IdToStringInternal(int id) const95   string IdToStringInternal(int id) const {
96     const string &pos_string = pos_id_printer_->IdToString(id);
97     if (pos_string.empty()) {
98       return std::to_string(id);
99     }
100     return Util::StringPrintf("%s (%d)", pos_string.c_str(), id);
101   }
102 
103   std::unique_ptr<InputFileStream> pos_id_;
104   std::unique_ptr<internal::PosIdPrinter> pos_id_printer_;
105 
106   friend class Singleton<PosIdPrintUtil>;
107   DISALLOW_COPY_AND_ASSIGN(PosIdPrintUtil);
108 };
109 
SegmentTypeToString(Segment::SegmentType type)110 string SegmentTypeToString(Segment::SegmentType type) {
111 #define RETURN_STR(val) case Segment::val: return #val
112   switch (type) {
113     RETURN_STR(FREE);
114     RETURN_STR(FIXED_BOUNDARY);
115     RETURN_STR(FIXED_VALUE);
116     RETURN_STR(SUBMITTED);
117     RETURN_STR(HISTORY);
118     default:
119       return "UNKNOWN";
120   }
121 #undef RETURN_STR
122 }
123 
CandidateAttributesToString(uint32 attrs)124 string CandidateAttributesToString(uint32 attrs) {
125   std::vector<string> v;
126 #define ADD_STR(fieldname)                       \
127   do {                                           \
128     if (attrs & Segment::Candidate::fieldname)   \
129       v.push_back(#fieldname);                   \
130   } while (false)
131 
132   ADD_STR(BEST_CANDIDATE);
133   ADD_STR(RERANKED);
134   ADD_STR(NO_HISTORY_LEARNING);
135   ADD_STR(NO_SUGGEST_LEARNING);
136   ADD_STR(CONTEXT_SENSITIVE);
137   ADD_STR(SPELLING_CORRECTION);
138   ADD_STR(NO_VARIANTS_EXPANSION);
139   ADD_STR(NO_EXTRA_DESCRIPTION);
140   ADD_STR(REALTIME_CONVERSION);
141   ADD_STR(USER_DICTIONARY);
142   ADD_STR(COMMAND_CANDIDATE);
143   ADD_STR(PARTIALLY_KEY_CONSUMED);
144   ADD_STR(TYPING_CORRECTION);
145   ADD_STR(AUTO_PARTIAL_SUGGESTION);
146   ADD_STR(USER_HISTORY_PREDICTION);
147 
148 #undef ADD_STR
149   string s;
150   Util::JoinStrings(v, " | ", &s);
151   return s;
152 }
153 
NumberStyleToString(NumberUtil::NumberString::Style style)154 string NumberStyleToString(NumberUtil::NumberString::Style style) {
155 #define RETURN_STR(val) case NumberUtil::NumberString::val: return #val
156   switch (style) {
157     RETURN_STR(DEFAULT_STYLE);
158     RETURN_STR(NUMBER_SEPARATED_ARABIC_HALFWIDTH);
159     RETURN_STR(NUMBER_SEPARATED_ARABIC_FULLWIDTH);
160     RETURN_STR(NUMBER_ARABIC_AND_KANJI_HALFWIDTH);
161     RETURN_STR(NUMBER_ARABIC_AND_KANJI_FULLWIDTH);
162     RETURN_STR(NUMBER_KANJI);
163     RETURN_STR(NUMBER_OLD_KANJI);
164     RETURN_STR(NUMBER_ROMAN_CAPITAL);
165     RETURN_STR(NUMBER_ROMAN_SMALL);
166     RETURN_STR(NUMBER_CIRCLED);
167     RETURN_STR(NUMBER_KANJI_ARABIC);
168     RETURN_STR(NUMBER_HEX);
169     RETURN_STR(NUMBER_OCT);
170     RETURN_STR(NUMBER_BIN);
171     default:
172       return "UNKNOWN";
173   }
174 #undef RETURN_STR
175 }
176 
InnerSegmentBoundaryToString(const Segment::Candidate & cand)177 string InnerSegmentBoundaryToString(const Segment::Candidate &cand) {
178   if (cand.inner_segment_boundary.empty()) {
179     return "";
180   }
181   std::vector<string> pieces;
182   for (Segment::Candidate::InnerSegmentIterator iter(&cand);
183        !iter.Done(); iter.Next()) {
184     string s = "<";
185     s.append(iter.GetKey().data(), iter.GetKey().size());
186     s.append(", ");
187     s.append(iter.GetValue().data(), iter.GetValue().size());
188     s.append(", ");
189     s.append(iter.GetContentKey().data(), iter.GetContentKey().size());
190     s.append(", ");
191     s.append(iter.GetContentValue().data(), iter.GetContentValue().size());
192     s.append(1, '>');
193     pieces.push_back(s);
194   }
195   string s;
196   Util::JoinStrings(pieces, " | ", &s);
197   return s;
198 }
199 
PrintCandidate(const Segment & parent,int num,const Segment::Candidate & cand,std::ostream * os)200 void PrintCandidate(const Segment &parent, int num,
201                     const Segment::Candidate &cand, std::ostream *os) {
202   std::vector<string> lines;
203   if (parent.key() != cand.key) {
204     lines.push_back("key: " + cand.key);
205   }
206   lines.push_back("content_vk: " + cand.content_value +
207                   "  " + cand.content_key);
208   lines.push_back(Util::StringPrintf(
209       "cost: %d  scost: %d  wcost: %d",
210       cand.cost, cand.structure_cost, cand.wcost));
211   lines.push_back("lid: " + PosIdPrintUtil::IdToString(cand.lid));
212   lines.push_back("rid: " + PosIdPrintUtil::IdToString(cand.rid));
213   lines.push_back("attr: " + CandidateAttributesToString(cand.attributes));
214   lines.push_back("num_style: " + NumberStyleToString(cand.style));
215   const string &segbdd_str = InnerSegmentBoundaryToString(cand);
216   if (!segbdd_str.empty()) {
217     lines.push_back("segbdd: " + segbdd_str);
218   }
219 
220   (*os) << "  " << num << " " << cand.value << std::endl;
221   for (size_t i = 0; i < lines.size(); ++i) {
222     if (!lines[i].empty()) {
223       (*os) << "       " << lines[i] << std::endl;
224     }
225   }
226 }
227 
PrintSegment(size_t num,size_t segments_size,const Segment & segment,std::ostream * os)228 void PrintSegment(size_t num, size_t segments_size, const Segment &segment,
229                   std::ostream *os) {
230   (*os) << "---------- Segment " << num << "/" << segments_size << " ["
231         << SegmentTypeToString(segment.segment_type()) << "] ----------"
232         << std::endl
233         << segment.key() << std::endl;
234   if (FLAGS_show_meta_candidates) {
235     for (int i = 0; i < segment.meta_candidates_size(); ++i) {
236       PrintCandidate(segment, -i - 1, segment.meta_candidate(i), os);
237     }
238   }
239   for (size_t i = 0; i < segment.candidates_size(); ++i) {
240     PrintCandidate(segment, i, segment.candidate(i), os);
241   }
242 }
243 
PrintSegments(const Segments & segments,std::ostream * os)244 void PrintSegments(const Segments &segments, std::ostream *os) {
245   for (size_t i = 0; i < segments.segments_size(); ++i) {
246     PrintSegment(i, segments.segments_size(), segments.segment(i), os);
247   }
248 }
249 
ExecCommand(const ConverterInterface & converter,Segments * segments,const string & line,const commands::Request & request)250 bool ExecCommand(const ConverterInterface &converter,
251                  Segments *segments,
252                  const string &line,
253                  const commands::Request &request) {
254   std::vector<string> fields;
255   Util::SplitStringUsing(line, "\t ", &fields);
256 
257 #define CHECK_FIELDS_LENGTH(length) \
258   if (fields.size() < (length)) { \
259      return false; \
260   }
261 
262   CHECK_FIELDS_LENGTH(1);
263 
264   const string &func = fields[0];
265 
266   const Config config;
267 
268   segments->set_max_conversion_candidates_size(
269       FLAGS_max_conversion_candidates_size);
270 
271   if (func == "startconversion" || func == "start" || func == "s") {
272     CHECK_FIELDS_LENGTH(2);
273     Table table;
274     Composer composer(&table, &request, &config);
275     composer.SetPreeditTextForTestOnly(fields[1]);
276     ConversionRequest conversion_request(&composer, &request, &config);
277     return converter.StartConversionForRequest(conversion_request, segments);
278   } else if (func == "convertwithnodeinfo" || func == "cn") {
279     CHECK_FIELDS_LENGTH(5);
280     Lattice::SetDebugDisplayNode(
281         NumberUtil::SimpleAtoi(fields[2]),  // begin pos
282         NumberUtil::SimpleAtoi(fields[3]),  // end pos
283         fields[4]);
284     const bool result = converter.StartConversion(segments, fields[1]);
285     Lattice::ResetDebugDisplayNode();
286     return result;
287   } else if (func == "reverseconversion" || func == "reverse" || func == "r") {
288     CHECK_FIELDS_LENGTH(2);
289     return converter.StartReverseConversion(segments, fields[1]);
290   } else if (func == "startprediction" || func == "predict" || func == "p") {
291     Table table;
292     Composer composer(&table, &request, &config);
293     if (fields.size() >= 2) {
294       composer.SetPreeditTextForTestOnly(fields[1]);
295       ConversionRequest conversion_request(&composer, &request, &config);
296       return converter.StartPredictionForRequest(conversion_request, segments);
297     } else {
298       ConversionRequest conversion_request(&composer, &request, &config);
299       return converter.StartPredictionForRequest(conversion_request, segments);
300     }
301   } else if (func == "startsuggestion" || func == "suggest") {
302     Table table;
303     Composer composer(&table, &request, &config);
304     if (fields.size() >= 2) {
305       composer.SetPreeditTextForTestOnly(fields[1]);
306       ConversionRequest conversion_request(&composer, &request, &config);
307       return converter.StartSuggestionForRequest(conversion_request, segments);
308     } else {
309       ConversionRequest conversion_request(&composer, &request, &config);
310       return converter.StartSuggestionForRequest(conversion_request, segments);
311     }
312   } else if (func == "finishconversion" || func == "finish") {
313     Table table;
314     Composer composer(&table, &request, &config);
315     ConversionRequest conversion_request(&composer, &request, &config);
316     return converter.FinishConversion(conversion_request, segments);
317   } else if (func == "resetconversion" || func == "reset") {
318     return converter.ResetConversion(segments);
319   } else if (func == "cancelconversion" || func == "cancel") {
320     return converter.CancelConversion(segments);
321   } else if (func == "commitsegmentvalue" || func == "commit" || func == "c") {
322     CHECK_FIELDS_LENGTH(3);
323     return converter.CommitSegmentValue(segments,
324                                         NumberUtil::SimpleAtoi(fields[1]),
325                                         NumberUtil::SimpleAtoi(fields[2]));
326   } else if (func == "commitallandfinish") {
327     for (int i = 0; i < segments->conversion_segments_size(); ++i) {
328       if (segments->conversion_segment(i).segment_type() !=
329             Segment::FIXED_VALUE) {
330         if (!(converter.CommitSegmentValue(segments, i, 0))) return false;
331       }
332     }
333     Table table;
334     Composer composer(&table, &request, &config);
335     ConversionRequest conversion_request(&composer, &request, &config);
336     return converter.FinishConversion(conversion_request, segments);
337   } else if (func == "focussegmentvalue" || func == "focus") {
338     CHECK_FIELDS_LENGTH(3);
339     return converter.FocusSegmentValue(segments,
340                                        NumberUtil::SimpleAtoi(fields[1]),
341                                        NumberUtil::SimpleAtoi(fields[2]));
342   } else if (func == "commitfirstsegment") {
343     CHECK_FIELDS_LENGTH(2);
344     std::vector<size_t> singleton_vector;
345     singleton_vector.push_back(NumberUtil::SimpleAtoi(fields[1]));
346     return converter.CommitSegments(segments, singleton_vector);
347   } else if (func == "freesegmentvalue" || func == "free") {
348     CHECK_FIELDS_LENGTH(2);
349     return converter.FreeSegmentValue(segments,
350                                       NumberUtil::SimpleAtoi(fields[1]));
351   } else if (func == "resizesegment" || func == "resize") {
352     const ConversionRequest request;
353     if (fields.size() == 3) {
354       return converter.ResizeSegment(segments,
355                                      request,
356                                      NumberUtil::SimpleAtoi(fields[1]),
357                                      NumberUtil::SimpleAtoi(fields[2]));
358     } else if (fields.size() > 3) {
359       std::vector<uint8> new_arrays;
360       for (size_t i = 3; i < fields.size(); ++i) {
361         new_arrays.push_back(
362             static_cast<uint8>(NumberUtil::SimpleAtoi(fields[i])));
363       }
364       return converter.ResizeSegment(segments,
365                                      request,
366                                      NumberUtil::SimpleAtoi(fields[1]),
367                                      NumberUtil::SimpleAtoi(fields[2]),
368                                      &new_arrays[0],
369                                      new_arrays.size());
370     }
371   } else if (func == "disableuserhistory") {
372     segments->set_user_history_enabled(false);
373   } else if (func == "enableuserhistory") {
374     segments->set_user_history_enabled(true);
375   } else {
376     LOG(WARNING) << "Unknown command: " <<  func;
377     return false;
378   }
379 
380 #undef CHECK_FIELDS_LENGTH
381   return true;
382 }
383 
SelectDataFileFromName(const string & mozc_runfiles_dir,const string & engine_name)384 std::pair<string, string> SelectDataFileFromName(
385     const string &mozc_runfiles_dir, const string &engine_name) {
386   struct {
387     const char *engine_name;
388     const char *path;
389     const char *magic;
390   } kNameAndPath[] = {
391     {"default", "data_manager/oss/mozc.data", "\xEFMOZC\r\n"},
392     {"oss", "data_manager/oss/mozc.data", "\xEFMOZC\r\n"},
393     {"mock", "data_manager/testing/mock_mozc.data", "MOCK"},
394   };
395   for (const auto &entry : kNameAndPath) {
396     if (engine_name == entry.engine_name) {
397       return std::pair<string, string>(
398           FileUtil::JoinPath(mozc_runfiles_dir, entry.path),
399           entry.magic);
400     }
401   }
402   return std::pair<string, string>("", "");
403 }
404 
SelectIdDefFromName(const string & mozc_runfiles_dir,const string & engine_name)405 string SelectIdDefFromName(const string &mozc_runfiles_dir,
406                            const string &engine_name) {
407   struct {
408     const char *engine_name;
409     const char *path;
410   } kNameAndPath[] = {
411     {"default", "data/dictionary_oss/id.def"},
412     {"oss", "data/dictionary_oss/id.def"},
413     {"mock", "data/test/dictionary/id.def"},
414   };
415   for (const auto &entry : kNameAndPath) {
416     if (engine_name == entry.engine_name) {
417       return FileUtil::JoinPath(mozc_runfiles_dir, entry.path);
418     }
419   }
420   return "";
421 }
422 
423 }  // namespace
424 }  // namespace mozc
425 
main(int argc,char ** argv)426 int main(int argc, char **argv) {
427   mozc::InitMozc(argv[0], &argc, &argv, false);
428 
429   if (!FLAGS_user_profile_dir.empty()) {
430     mozc::SystemUtil::SetUserProfileDirectory(FLAGS_user_profile_dir);
431   }
432 
433   string mozc_runfiles_dir = ".";
434   if (FLAGS_engine_data.empty()) {
435     const auto path_and_magic = mozc::SelectDataFileFromName(mozc_runfiles_dir,
436                                                              FLAGS_engine);
437     FLAGS_engine_data = path_and_magic.first;
438     FLAGS_magic = path_and_magic.second;
439   }
440   CHECK(!FLAGS_engine_data.empty())
441       << "--engine_data or --engine is invalid: "
442       << "--engine_data=" << FLAGS_engine_data << " "
443       << "--engine=" << FLAGS_engine;
444 
445   if (FLAGS_id_def.empty()) {
446     FLAGS_id_def = mozc::SelectIdDefFromName(mozc_runfiles_dir, FLAGS_engine);
447   }
448 
449   std::cout << "Engine type: " << FLAGS_engine_type
450             << "\nData file: " << FLAGS_engine_data
451             << "\nid.def: " << FLAGS_id_def << std::endl;
452 
453   std::unique_ptr<mozc::DataManager> data_manager(new mozc::DataManager);
454   const auto status = data_manager->InitFromFile(FLAGS_engine_data,
455                                                  FLAGS_magic);
456   CHECK_EQ(status, mozc::DataManager::Status::OK);
457 
458   mozc::commands::Request request;
459   std::unique_ptr<mozc::EngineInterface> engine;
460   if (FLAGS_engine_type == "desktop") {
461     engine = mozc::Engine::CreateDesktopEngine(std::move(data_manager));
462   } else if (FLAGS_engine_type == "mobile") {
463     engine = mozc::Engine::CreateMobileEngine(std::move(data_manager));
464     mozc::commands::RequestForUnitTest::FillMobileRequest(&request);
465   } else {
466     LOG(FATAL) << "Invalid type: --engine_type=" << FLAGS_engine_type;
467     return 0;
468   }
469 
470   mozc::ConverterInterface *converter = engine->GetConverter();
471   CHECK(converter);
472 
473   mozc::Segments segments;
474   string line;
475 
476   while (!getline(std::cin, line).fail()) {
477     if (mozc::ExecCommand(*converter, &segments, line, request)) {
478       if (FLAGS_output_debug_string) {
479         mozc::PrintSegments(segments, &std::cout);
480       }
481     } else {
482       std::cout << "ExecCommand() return false" << std::endl;
483     }
484   }
485   return 0;
486 }
487