1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include <iostream>
31 #include <map>
32 #include <memory>
33 #include <string>
34 #include <utility>
35 #include <vector>
36
37 #include "base/file_stream.h"
38 #include "base/file_util.h"
39 #include "base/flags.h"
40 #include "base/init_mozc.h"
41 #include "base/logging.h"
42 #include "base/number_util.h"
43 #include "base/port.h"
44 #include "base/singleton.h"
45 #include "base/system_util.h"
46 #include "base/util.h"
47 #include "composer/composer.h"
48 #include "composer/table.h"
49 #include "converter/converter_interface.h"
50 #include "converter/lattice.h"
51 #include "converter/pos_id_printer.h"
52 #include "converter/segments.h"
53 #include "data_manager/data_manager.h"
54 #include "engine/engine.h"
55 #include "protocol/commands.pb.h"
56 #include "protocol/config.pb.h"
57 #include "request/conversion_request.h"
58 #include "session/request_test_util.h"
59
60 DEFINE_int32(max_conversion_candidates_size, 200, "maximum candidates size");
61 DEFINE_string(user_profile_dir, "", "path to user profile directory");
62 DEFINE_string(engine, "default",
63 "Shortcut to select engine_data from name: (default|oss|mock)");
64 DEFINE_string(engine_type, "desktop", "Engine type: (desktop|mobile)");
65 DEFINE_bool(output_debug_string, true, "output debug string for each input");
66 DEFINE_bool(show_meta_candidates, false, "if true, show meta candidates");
67
68 // Advanced options for data files. These are automatically set when --engine
69 // is used but they can be overridden by specifying these flags.
70 DEFINE_string(engine_data, "", "Path to engine data file");
71 DEFINE_string(magic, "", "Expected magic number of data file");
72 DEFINE_string(id_def, "",
73 "id.def file for POS IDs. If provided, show human readable "
74 "POS instead of ID number");
75
76 namespace mozc {
77 namespace {
78
79 using composer::Composer;
80 using composer::Table;
81 using config::Config;
82
83 // Wrapper class for pos id printing
84 class PosIdPrintUtil {
85 public:
IdToString(int id)86 static string IdToString(int id) {
87 return Singleton<PosIdPrintUtil>::get()->IdToStringInternal(id);
88 }
89
90 private:
PosIdPrintUtil()91 PosIdPrintUtil() :
92 pos_id_(new InputFileStream(FLAGS_id_def.c_str())),
93 pos_id_printer_(new internal::PosIdPrinter(pos_id_.get())) {}
94
IdToStringInternal(int id) const95 string IdToStringInternal(int id) const {
96 const string &pos_string = pos_id_printer_->IdToString(id);
97 if (pos_string.empty()) {
98 return std::to_string(id);
99 }
100 return Util::StringPrintf("%s (%d)", pos_string.c_str(), id);
101 }
102
103 std::unique_ptr<InputFileStream> pos_id_;
104 std::unique_ptr<internal::PosIdPrinter> pos_id_printer_;
105
106 friend class Singleton<PosIdPrintUtil>;
107 DISALLOW_COPY_AND_ASSIGN(PosIdPrintUtil);
108 };
109
SegmentTypeToString(Segment::SegmentType type)110 string SegmentTypeToString(Segment::SegmentType type) {
111 #define RETURN_STR(val) case Segment::val: return #val
112 switch (type) {
113 RETURN_STR(FREE);
114 RETURN_STR(FIXED_BOUNDARY);
115 RETURN_STR(FIXED_VALUE);
116 RETURN_STR(SUBMITTED);
117 RETURN_STR(HISTORY);
118 default:
119 return "UNKNOWN";
120 }
121 #undef RETURN_STR
122 }
123
CandidateAttributesToString(uint32 attrs)124 string CandidateAttributesToString(uint32 attrs) {
125 std::vector<string> v;
126 #define ADD_STR(fieldname) \
127 do { \
128 if (attrs & Segment::Candidate::fieldname) \
129 v.push_back(#fieldname); \
130 } while (false)
131
132 ADD_STR(BEST_CANDIDATE);
133 ADD_STR(RERANKED);
134 ADD_STR(NO_HISTORY_LEARNING);
135 ADD_STR(NO_SUGGEST_LEARNING);
136 ADD_STR(CONTEXT_SENSITIVE);
137 ADD_STR(SPELLING_CORRECTION);
138 ADD_STR(NO_VARIANTS_EXPANSION);
139 ADD_STR(NO_EXTRA_DESCRIPTION);
140 ADD_STR(REALTIME_CONVERSION);
141 ADD_STR(USER_DICTIONARY);
142 ADD_STR(COMMAND_CANDIDATE);
143 ADD_STR(PARTIALLY_KEY_CONSUMED);
144 ADD_STR(TYPING_CORRECTION);
145 ADD_STR(AUTO_PARTIAL_SUGGESTION);
146 ADD_STR(USER_HISTORY_PREDICTION);
147
148 #undef ADD_STR
149 string s;
150 Util::JoinStrings(v, " | ", &s);
151 return s;
152 }
153
NumberStyleToString(NumberUtil::NumberString::Style style)154 string NumberStyleToString(NumberUtil::NumberString::Style style) {
155 #define RETURN_STR(val) case NumberUtil::NumberString::val: return #val
156 switch (style) {
157 RETURN_STR(DEFAULT_STYLE);
158 RETURN_STR(NUMBER_SEPARATED_ARABIC_HALFWIDTH);
159 RETURN_STR(NUMBER_SEPARATED_ARABIC_FULLWIDTH);
160 RETURN_STR(NUMBER_ARABIC_AND_KANJI_HALFWIDTH);
161 RETURN_STR(NUMBER_ARABIC_AND_KANJI_FULLWIDTH);
162 RETURN_STR(NUMBER_KANJI);
163 RETURN_STR(NUMBER_OLD_KANJI);
164 RETURN_STR(NUMBER_ROMAN_CAPITAL);
165 RETURN_STR(NUMBER_ROMAN_SMALL);
166 RETURN_STR(NUMBER_CIRCLED);
167 RETURN_STR(NUMBER_KANJI_ARABIC);
168 RETURN_STR(NUMBER_HEX);
169 RETURN_STR(NUMBER_OCT);
170 RETURN_STR(NUMBER_BIN);
171 default:
172 return "UNKNOWN";
173 }
174 #undef RETURN_STR
175 }
176
InnerSegmentBoundaryToString(const Segment::Candidate & cand)177 string InnerSegmentBoundaryToString(const Segment::Candidate &cand) {
178 if (cand.inner_segment_boundary.empty()) {
179 return "";
180 }
181 std::vector<string> pieces;
182 for (Segment::Candidate::InnerSegmentIterator iter(&cand);
183 !iter.Done(); iter.Next()) {
184 string s = "<";
185 s.append(iter.GetKey().data(), iter.GetKey().size());
186 s.append(", ");
187 s.append(iter.GetValue().data(), iter.GetValue().size());
188 s.append(", ");
189 s.append(iter.GetContentKey().data(), iter.GetContentKey().size());
190 s.append(", ");
191 s.append(iter.GetContentValue().data(), iter.GetContentValue().size());
192 s.append(1, '>');
193 pieces.push_back(s);
194 }
195 string s;
196 Util::JoinStrings(pieces, " | ", &s);
197 return s;
198 }
199
PrintCandidate(const Segment & parent,int num,const Segment::Candidate & cand,std::ostream * os)200 void PrintCandidate(const Segment &parent, int num,
201 const Segment::Candidate &cand, std::ostream *os) {
202 std::vector<string> lines;
203 if (parent.key() != cand.key) {
204 lines.push_back("key: " + cand.key);
205 }
206 lines.push_back("content_vk: " + cand.content_value +
207 " " + cand.content_key);
208 lines.push_back(Util::StringPrintf(
209 "cost: %d scost: %d wcost: %d",
210 cand.cost, cand.structure_cost, cand.wcost));
211 lines.push_back("lid: " + PosIdPrintUtil::IdToString(cand.lid));
212 lines.push_back("rid: " + PosIdPrintUtil::IdToString(cand.rid));
213 lines.push_back("attr: " + CandidateAttributesToString(cand.attributes));
214 lines.push_back("num_style: " + NumberStyleToString(cand.style));
215 const string &segbdd_str = InnerSegmentBoundaryToString(cand);
216 if (!segbdd_str.empty()) {
217 lines.push_back("segbdd: " + segbdd_str);
218 }
219
220 (*os) << " " << num << " " << cand.value << std::endl;
221 for (size_t i = 0; i < lines.size(); ++i) {
222 if (!lines[i].empty()) {
223 (*os) << " " << lines[i] << std::endl;
224 }
225 }
226 }
227
PrintSegment(size_t num,size_t segments_size,const Segment & segment,std::ostream * os)228 void PrintSegment(size_t num, size_t segments_size, const Segment &segment,
229 std::ostream *os) {
230 (*os) << "---------- Segment " << num << "/" << segments_size << " ["
231 << SegmentTypeToString(segment.segment_type()) << "] ----------"
232 << std::endl
233 << segment.key() << std::endl;
234 if (FLAGS_show_meta_candidates) {
235 for (int i = 0; i < segment.meta_candidates_size(); ++i) {
236 PrintCandidate(segment, -i - 1, segment.meta_candidate(i), os);
237 }
238 }
239 for (size_t i = 0; i < segment.candidates_size(); ++i) {
240 PrintCandidate(segment, i, segment.candidate(i), os);
241 }
242 }
243
PrintSegments(const Segments & segments,std::ostream * os)244 void PrintSegments(const Segments &segments, std::ostream *os) {
245 for (size_t i = 0; i < segments.segments_size(); ++i) {
246 PrintSegment(i, segments.segments_size(), segments.segment(i), os);
247 }
248 }
249
ExecCommand(const ConverterInterface & converter,Segments * segments,const string & line,const commands::Request & request)250 bool ExecCommand(const ConverterInterface &converter,
251 Segments *segments,
252 const string &line,
253 const commands::Request &request) {
254 std::vector<string> fields;
255 Util::SplitStringUsing(line, "\t ", &fields);
256
257 #define CHECK_FIELDS_LENGTH(length) \
258 if (fields.size() < (length)) { \
259 return false; \
260 }
261
262 CHECK_FIELDS_LENGTH(1);
263
264 const string &func = fields[0];
265
266 const Config config;
267
268 segments->set_max_conversion_candidates_size(
269 FLAGS_max_conversion_candidates_size);
270
271 if (func == "startconversion" || func == "start" || func == "s") {
272 CHECK_FIELDS_LENGTH(2);
273 Table table;
274 Composer composer(&table, &request, &config);
275 composer.SetPreeditTextForTestOnly(fields[1]);
276 ConversionRequest conversion_request(&composer, &request, &config);
277 return converter.StartConversionForRequest(conversion_request, segments);
278 } else if (func == "convertwithnodeinfo" || func == "cn") {
279 CHECK_FIELDS_LENGTH(5);
280 Lattice::SetDebugDisplayNode(
281 NumberUtil::SimpleAtoi(fields[2]), // begin pos
282 NumberUtil::SimpleAtoi(fields[3]), // end pos
283 fields[4]);
284 const bool result = converter.StartConversion(segments, fields[1]);
285 Lattice::ResetDebugDisplayNode();
286 return result;
287 } else if (func == "reverseconversion" || func == "reverse" || func == "r") {
288 CHECK_FIELDS_LENGTH(2);
289 return converter.StartReverseConversion(segments, fields[1]);
290 } else if (func == "startprediction" || func == "predict" || func == "p") {
291 Table table;
292 Composer composer(&table, &request, &config);
293 if (fields.size() >= 2) {
294 composer.SetPreeditTextForTestOnly(fields[1]);
295 ConversionRequest conversion_request(&composer, &request, &config);
296 return converter.StartPredictionForRequest(conversion_request, segments);
297 } else {
298 ConversionRequest conversion_request(&composer, &request, &config);
299 return converter.StartPredictionForRequest(conversion_request, segments);
300 }
301 } else if (func == "startsuggestion" || func == "suggest") {
302 Table table;
303 Composer composer(&table, &request, &config);
304 if (fields.size() >= 2) {
305 composer.SetPreeditTextForTestOnly(fields[1]);
306 ConversionRequest conversion_request(&composer, &request, &config);
307 return converter.StartSuggestionForRequest(conversion_request, segments);
308 } else {
309 ConversionRequest conversion_request(&composer, &request, &config);
310 return converter.StartSuggestionForRequest(conversion_request, segments);
311 }
312 } else if (func == "finishconversion" || func == "finish") {
313 Table table;
314 Composer composer(&table, &request, &config);
315 ConversionRequest conversion_request(&composer, &request, &config);
316 return converter.FinishConversion(conversion_request, segments);
317 } else if (func == "resetconversion" || func == "reset") {
318 return converter.ResetConversion(segments);
319 } else if (func == "cancelconversion" || func == "cancel") {
320 return converter.CancelConversion(segments);
321 } else if (func == "commitsegmentvalue" || func == "commit" || func == "c") {
322 CHECK_FIELDS_LENGTH(3);
323 return converter.CommitSegmentValue(segments,
324 NumberUtil::SimpleAtoi(fields[1]),
325 NumberUtil::SimpleAtoi(fields[2]));
326 } else if (func == "commitallandfinish") {
327 for (int i = 0; i < segments->conversion_segments_size(); ++i) {
328 if (segments->conversion_segment(i).segment_type() !=
329 Segment::FIXED_VALUE) {
330 if (!(converter.CommitSegmentValue(segments, i, 0))) return false;
331 }
332 }
333 Table table;
334 Composer composer(&table, &request, &config);
335 ConversionRequest conversion_request(&composer, &request, &config);
336 return converter.FinishConversion(conversion_request, segments);
337 } else if (func == "focussegmentvalue" || func == "focus") {
338 CHECK_FIELDS_LENGTH(3);
339 return converter.FocusSegmentValue(segments,
340 NumberUtil::SimpleAtoi(fields[1]),
341 NumberUtil::SimpleAtoi(fields[2]));
342 } else if (func == "commitfirstsegment") {
343 CHECK_FIELDS_LENGTH(2);
344 std::vector<size_t> singleton_vector;
345 singleton_vector.push_back(NumberUtil::SimpleAtoi(fields[1]));
346 return converter.CommitSegments(segments, singleton_vector);
347 } else if (func == "freesegmentvalue" || func == "free") {
348 CHECK_FIELDS_LENGTH(2);
349 return converter.FreeSegmentValue(segments,
350 NumberUtil::SimpleAtoi(fields[1]));
351 } else if (func == "resizesegment" || func == "resize") {
352 const ConversionRequest request;
353 if (fields.size() == 3) {
354 return converter.ResizeSegment(segments,
355 request,
356 NumberUtil::SimpleAtoi(fields[1]),
357 NumberUtil::SimpleAtoi(fields[2]));
358 } else if (fields.size() > 3) {
359 std::vector<uint8> new_arrays;
360 for (size_t i = 3; i < fields.size(); ++i) {
361 new_arrays.push_back(
362 static_cast<uint8>(NumberUtil::SimpleAtoi(fields[i])));
363 }
364 return converter.ResizeSegment(segments,
365 request,
366 NumberUtil::SimpleAtoi(fields[1]),
367 NumberUtil::SimpleAtoi(fields[2]),
368 &new_arrays[0],
369 new_arrays.size());
370 }
371 } else if (func == "disableuserhistory") {
372 segments->set_user_history_enabled(false);
373 } else if (func == "enableuserhistory") {
374 segments->set_user_history_enabled(true);
375 } else {
376 LOG(WARNING) << "Unknown command: " << func;
377 return false;
378 }
379
380 #undef CHECK_FIELDS_LENGTH
381 return true;
382 }
383
SelectDataFileFromName(const string & mozc_runfiles_dir,const string & engine_name)384 std::pair<string, string> SelectDataFileFromName(
385 const string &mozc_runfiles_dir, const string &engine_name) {
386 struct {
387 const char *engine_name;
388 const char *path;
389 const char *magic;
390 } kNameAndPath[] = {
391 {"default", "data_manager/oss/mozc.data", "\xEFMOZC\r\n"},
392 {"oss", "data_manager/oss/mozc.data", "\xEFMOZC\r\n"},
393 {"mock", "data_manager/testing/mock_mozc.data", "MOCK"},
394 };
395 for (const auto &entry : kNameAndPath) {
396 if (engine_name == entry.engine_name) {
397 return std::pair<string, string>(
398 FileUtil::JoinPath(mozc_runfiles_dir, entry.path),
399 entry.magic);
400 }
401 }
402 return std::pair<string, string>("", "");
403 }
404
SelectIdDefFromName(const string & mozc_runfiles_dir,const string & engine_name)405 string SelectIdDefFromName(const string &mozc_runfiles_dir,
406 const string &engine_name) {
407 struct {
408 const char *engine_name;
409 const char *path;
410 } kNameAndPath[] = {
411 {"default", "data/dictionary_oss/id.def"},
412 {"oss", "data/dictionary_oss/id.def"},
413 {"mock", "data/test/dictionary/id.def"},
414 };
415 for (const auto &entry : kNameAndPath) {
416 if (engine_name == entry.engine_name) {
417 return FileUtil::JoinPath(mozc_runfiles_dir, entry.path);
418 }
419 }
420 return "";
421 }
422
423 } // namespace
424 } // namespace mozc
425
main(int argc,char ** argv)426 int main(int argc, char **argv) {
427 mozc::InitMozc(argv[0], &argc, &argv, false);
428
429 if (!FLAGS_user_profile_dir.empty()) {
430 mozc::SystemUtil::SetUserProfileDirectory(FLAGS_user_profile_dir);
431 }
432
433 string mozc_runfiles_dir = ".";
434 if (FLAGS_engine_data.empty()) {
435 const auto path_and_magic = mozc::SelectDataFileFromName(mozc_runfiles_dir,
436 FLAGS_engine);
437 FLAGS_engine_data = path_and_magic.first;
438 FLAGS_magic = path_and_magic.second;
439 }
440 CHECK(!FLAGS_engine_data.empty())
441 << "--engine_data or --engine is invalid: "
442 << "--engine_data=" << FLAGS_engine_data << " "
443 << "--engine=" << FLAGS_engine;
444
445 if (FLAGS_id_def.empty()) {
446 FLAGS_id_def = mozc::SelectIdDefFromName(mozc_runfiles_dir, FLAGS_engine);
447 }
448
449 std::cout << "Engine type: " << FLAGS_engine_type
450 << "\nData file: " << FLAGS_engine_data
451 << "\nid.def: " << FLAGS_id_def << std::endl;
452
453 std::unique_ptr<mozc::DataManager> data_manager(new mozc::DataManager);
454 const auto status = data_manager->InitFromFile(FLAGS_engine_data,
455 FLAGS_magic);
456 CHECK_EQ(status, mozc::DataManager::Status::OK);
457
458 mozc::commands::Request request;
459 std::unique_ptr<mozc::EngineInterface> engine;
460 if (FLAGS_engine_type == "desktop") {
461 engine = mozc::Engine::CreateDesktopEngine(std::move(data_manager));
462 } else if (FLAGS_engine_type == "mobile") {
463 engine = mozc::Engine::CreateMobileEngine(std::move(data_manager));
464 mozc::commands::RequestForUnitTest::FillMobileRequest(&request);
465 } else {
466 LOG(FATAL) << "Invalid type: --engine_type=" << FLAGS_engine_type;
467 return 0;
468 }
469
470 mozc::ConverterInterface *converter = engine->GetConverter();
471 CHECK(converter);
472
473 mozc::Segments segments;
474 string line;
475
476 while (!getline(std::cin, line).fail()) {
477 if (mozc::ExecCommand(*converter, &segments, line, request)) {
478 if (FLAGS_output_debug_string) {
479 mozc::PrintSegments(segments, &std::cout);
480 }
481 } else {
482 std::cout << "ExecCommand() return false" << std::endl;
483 }
484 }
485 return 0;
486 }
487