1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "converter/connector.h"
31 
32 #include <algorithm>
33 
34 #include "base/logging.h"
35 #include "base/port.h"
36 #include "base/stl_util.h"
37 #include "data_manager/data_manager_interface.h"
38 #include "storage/louds/simple_succinct_bit_vector_index.h"
39 
40 using mozc::storage::louds::SimpleSuccinctBitVectorIndex;
41 
42 namespace mozc {
43 namespace {
44 
45 const uint32 kInvalidCacheKey = 0xFFFFFFFF;
46 const uint16 kConnectorMagicNumber = 0xCDAB;
47 const uint8 kInvalid1ByteCostValue = 255;
48 
GetHashValue(uint16 rid,uint16 lid,uint32 hash_mask)49 inline uint32 GetHashValue(uint16 rid, uint16 lid, uint32 hash_mask) {
50   return (3 * static_cast<uint32>(rid) + lid) & hash_mask;
51   // Note: The above value is equivalent to
52   //   return (3 * rid + lid) % cache_size_
53   // because cache_size_ is the power of 2.
54   // Multiplying '3' makes the conversion speed faster.
55   // The result hash value becomes reasonalbly random.
56 }
57 
EncodeKey(uint16 rid,uint16 lid)58 inline uint32 EncodeKey(uint16 rid, uint16 lid) {
59   return (static_cast<uint32>(rid) << 16) | lid;
60 }
61 
62 }  // namespace
63 
64 class Connector::Row {
65  public:
Row()66   Row() : chunk_bits_index_(sizeof(uint32)),
67           compact_bits_index_(sizeof(uint32)) {}
68 
Init(const uint8 * chunk_bits,size_t chunk_bits_size,const uint8 * compact_bits,size_t compact_bits_size,const uint8 * values,bool use_1byte_value)69   void Init(const uint8 *chunk_bits, size_t chunk_bits_size,
70             const uint8 *compact_bits, size_t compact_bits_size,
71             const uint8 *values, bool use_1byte_value) {
72     chunk_bits_index_.Init(chunk_bits, chunk_bits_size);
73     compact_bits_index_.Init(compact_bits, compact_bits_size);
74     values_ = values;
75     use_1byte_value_ = use_1byte_value;
76   }
77 
78   // Returns true if the value is found in the row and then store the found
79   // value into |value|. Otherwise returns false.
GetValue(uint16 index,uint16 * value) const80   bool GetValue(uint16 index, uint16 *value) const {
81     int chunk_bit_position = index / 8;
82     if (!chunk_bits_index_.Get(chunk_bit_position)) {
83       return false;
84     }
85     int compact_bit_position =
86         chunk_bits_index_.Rank1(chunk_bit_position) * 8 + index % 8;
87     if (!compact_bits_index_.Get(compact_bit_position)) {
88       return false;
89     }
90     int value_position = compact_bits_index_.Rank1(compact_bit_position);
91     if (use_1byte_value_) {
92       *value = values_[value_position];
93       if (*value == kInvalid1ByteCostValue) {
94         *value = kInvalidCost;
95       }
96     } else {
97       *value = reinterpret_cast<const uint16 *>(values_)[value_position];
98     }
99 
100     return true;
101   }
102 
103  private:
104   SimpleSuccinctBitVectorIndex chunk_bits_index_;
105   SimpleSuccinctBitVectorIndex compact_bits_index_;
106   const uint8 *values_;
107   bool use_1byte_value_;
108 
109   DISALLOW_COPY_AND_ASSIGN(Row);
110 };
111 
CreateFromDataManager(const DataManagerInterface & data_manager)112 Connector *Connector::CreateFromDataManager(
113     const DataManagerInterface &data_manager) {
114 #ifdef OS_ANDROID
115   const int kCacheSize = 256;
116 #else
117   const int kCacheSize = 1024;
118 #endif  // OS_ANDROID
119   const char *connection_data = nullptr;
120   size_t connection_data_size = 0;
121   data_manager.GetConnectorData(&connection_data, &connection_data_size);
122   return new Connector(connection_data, connection_data_size, kCacheSize);
123 }
124 
Connector(const char * connection_data,size_t connection_size,int cache_size)125 Connector::Connector(const char *connection_data,
126                      size_t connection_size,
127                      int cache_size)
128     : default_cost_(nullptr),
129       cache_size_(cache_size),
130       cache_hash_mask_(cache_size - 1),
131       cache_key_(new uint32[cache_size]),
132       cache_value_(new int[cache_size]) {
133   const uint16 *ptr = reinterpret_cast<const uint16 *>(connection_data);
134   CHECK_EQ(kConnectorMagicNumber, ptr[0]);
135   resolution_ = ptr[1];
136   const uint16 rsize = ptr[2];
137   const uint16 lsize = ptr[3];
138   CHECK_EQ(rsize, lsize) << "The connector matrix should be square.";
139   default_cost_ = ptr + 4;
140 
141   // Calculate the row's beginning position. Note that it should be aligned to
142   // 32-bits boundary.
143   size_t offset = 8 + (rsize + (rsize & 1)) * 2;
144 
145   // The number of valid bits in a chunk. Each bit is bitwise-or of consecutive
146   // 8-bits.
147   const size_t num_chunk_bits = (lsize + 7) / 8;
148 
149   // Then calculate the actual size of chunk in bytes, which is aligned to
150   // 32-bits boundary.
151   const size_t chunk_bits_size = (num_chunk_bits + 31) / 32 * 4;
152 
153   const bool use_1byte_value = resolution_ != 1;
154 
155   rows_.reserve(rsize);
156   for (size_t i = 0; i < rsize; ++i) {
157     const uint16 *size_data =
158         reinterpret_cast<const uint16 *>(connection_data + offset);
159     Row *row = new Row;
160     const uint16 compact_bits_size = size_data[0];
161     CHECK_EQ(compact_bits_size % 4, 0) << compact_bits_size;
162     const uint16 values_size = size_data[1];
163     CHECK_EQ(values_size % 4, 0) << values_size;
164 
165     const uint8 *chunk_bits =
166         reinterpret_cast<const uint8 *>(connection_data + offset + 4);
167     const uint8 *compact_bits = chunk_bits + chunk_bits_size;
168     const uint8 *values = compact_bits + compact_bits_size;
169     row->Init(chunk_bits, chunk_bits_size, compact_bits, compact_bits_size,
170               values, use_1byte_value);
171     rows_.push_back(row);
172 
173     offset += 4 + chunk_bits_size + compact_bits_size + values_size;
174   }
175 
176   // Check if the cache_size is the power of 2 and clear cache.
177   DCHECK_EQ(0, cache_size & (cache_size - 1));
178   ClearCache();
179 }
180 
~Connector()181 Connector::~Connector() {
182   STLDeleteElements(&rows_);
183 }
184 
185 
GetTransitionCost(uint16 rid,uint16 lid) const186 int Connector::GetTransitionCost(uint16 rid, uint16 lid) const {
187   const uint32 index = EncodeKey(rid, lid);
188   const uint32 bucket = GetHashValue(rid, lid, cache_hash_mask_);
189   if (cache_key_[bucket] == index) {
190     return cache_value_[bucket];
191   }
192   const int value = LookupCost(rid, lid);
193   cache_key_[bucket] = index;
194   cache_value_[bucket] = value;
195   return value;
196 }
197 
GetResolution() const198 int Connector::GetResolution() const {
199   return resolution_;
200 }
201 
ClearCache()202 void Connector::ClearCache() {
203   std::fill(cache_key_.get(), cache_key_.get() + cache_size_, kInvalidCacheKey);
204 }
205 
LookupCost(uint16 rid,uint16 lid) const206 int Connector::LookupCost(uint16 rid, uint16 lid) const {
207   uint16 value;
208   if (!rows_[rid]->GetValue(lid, &value)) {
209     return default_cost_[rid];
210   }
211   return value * resolution_;
212 }
213 
214 }  // namespace mozc
215