1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "composer/internal/typing_model.h"
31 
32 #include <limits>
33 #include <memory>
34 
35 #include "base/port.h"
36 #include "base/string_piece.h"
37 
38 namespace mozc {
39 namespace composer {
40 
41 const uint8 TypingModel::kNoData = std::numeric_limits<uint8>::max();
42 const int TypingModel::kInfinity = (2 << 20);  // approximately equals 1e+6
43 
TypingModel(const char * characters,size_t characters_size,const uint8 * cost_table,size_t cost_table_size,const int32 * mapping_table)44 TypingModel::TypingModel(const char *characters,
45                          size_t characters_size,
46                          const uint8 *cost_table,
47                          size_t cost_table_size,
48                          const int32 *mapping_table) :
49     character_to_radix_table_(
50         new unsigned char[std::numeric_limits<unsigned char>::max()]),
51     characters_size_(characters_size),
52     cost_table_(cost_table),
53     cost_table_size_(cost_table_size),
54     mapping_table_(mapping_table) {
55   for (size_t i = 0; i < characters_size; ++i) {
56     character_to_radix_table_[characters[i]] = i + 1;
57   }
58 }
59 
60 TypingModel::~TypingModel() = default;
61 
GetCost(StringPiece key) const62 int TypingModel::GetCost(StringPiece key) const {
63   size_t index = GetIndex(key);
64   if (index >= cost_table_size_) {
65     return kInfinity;
66   }
67   uint8 cost_index = cost_table_[index];
68   return cost_index == kNoData ? kInfinity : mapping_table_[cost_index];
69 }
70 
GetIndex(StringPiece key) const71 size_t TypingModel::GetIndex(StringPiece key) const {
72   const unsigned int radix = characters_size_ + 1;
73   size_t index = 0;
74   for (size_t i = 0; i < key.length(); ++i) {
75     index = index * radix + character_to_radix_table_[key[i]];
76   }
77   return index;
78 }
79 
80 // static
CreateTypingModel(const mozc::commands::Request::SpecialRomanjiTable & special_romanji_table,const DataManagerInterface & data_manager)81 std::unique_ptr<const TypingModel> TypingModel::CreateTypingModel(
82     const mozc::commands::Request::SpecialRomanjiTable &special_romanji_table,
83     const DataManagerInterface &data_manager) {
84   const char *key = nullptr;
85   switch (special_romanji_table) {
86     case mozc::commands::Request::TWELVE_KEYS_TO_HIRAGANA:
87       key = "typing_model_12keys-hiragana.tsv";
88       break;
89     case mozc::commands::Request::FLICK_TO_HIRAGANA:
90       key = "typing_model_flick-hiragana.tsv";
91       break;
92     case mozc::commands::Request::TOGGLE_FLICK_TO_HIRAGANA:
93       key = "typing_model_toggle_flick-hiragana.tsv";
94       break;
95     case mozc::commands::Request::QWERTY_MOBILE_TO_HIRAGANA:
96       key = "typing_model_qwerty_mobile-hiragana.tsv";
97       break;
98     case mozc::commands::Request::GODAN_TO_HIRAGANA:
99       key = "typing_model_godan-hiragana.tsv";
100       break;
101     default:
102       return nullptr;
103   }
104 
105   const StringPiece data = data_manager.GetTypingModel(key);
106   if (data.empty()) {
107     return nullptr;
108   }
109   // Parse the binary image of typing model.  See gen_typing_model.py for file
110   // format.
111   const uint32 characters_size =
112       *reinterpret_cast<const uint32*>(data.data());
113   const char *characters = data.data() + 4;
114 
115   size_t offset = 4 + characters_size;
116   if (offset % 4 != 0) {
117     offset += 4 - offset % 4;
118   }
119   const uint32 cost_table_size =
120       *reinterpret_cast<const uint32*>(data.data() + offset);
121   const uint8 *cost_table =
122       reinterpret_cast<const uint8*>(data.data() + offset + 4);
123 
124   offset += 4 + cost_table_size;
125   if (offset % 4 != 0) {
126     offset += 4 - offset % 4;
127   }
128   const int32 *mapping_table =
129       reinterpret_cast<const int32*>(data.data() + offset);
130 
131   return std::unique_ptr<const TypingModel>(
132       new TypingModel(characters, characters_size, cost_table, cost_table_size,
133                       mapping_table));
134 }
135 
136 }  // namespace composer
137 }  // namespace mozc
138