1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "converter/lattice.h"
31
32 #include <algorithm>
33 #include <sstream>
34 #include <string>
35 #include <vector>
36
37 #include "base/logging.h"
38 #include "base/port.h"
39 #include "base/singleton.h"
40 #include "base/util.h"
41 #include "converter/node.h"
42 #include "converter/node_allocator.h"
43
44 namespace mozc {
45 namespace {
46
InitBOSNode(Lattice * lattice,uint16 length)47 Node *InitBOSNode(Lattice *lattice, uint16 length) {
48 Node *bos_node = lattice->NewNode();
49 DCHECK(bos_node);
50 bos_node->rid = 0; // 0 is reserved for EOS/BOS
51 bos_node->lid = 0;
52 bos_node->key.clear();
53 bos_node->value = "BOS";
54 bos_node->node_type = Node::BOS_NODE;
55 bos_node->wcost = 0;
56 bos_node->cost = 0;
57 bos_node->begin_pos = length;
58 bos_node->end_pos = length;
59 bos_node->enext = NULL;
60 return bos_node;
61 }
62
InitEOSNode(Lattice * lattice,uint16 length)63 Node *InitEOSNode(Lattice *lattice, uint16 length) {
64 Node *eos_node = lattice->NewNode();
65 DCHECK(eos_node);
66 eos_node->rid = 0; // 0 is reserved for EOS/BOS
67 eos_node->lid = 0;
68 eos_node->key.clear();
69 eos_node->value = "EOS";
70 eos_node->node_type = Node::EOS_NODE;
71 eos_node->wcost = 0;
72 eos_node->cost = 0;
73 eos_node->begin_pos = length;
74 eos_node->end_pos = length;
75 eos_node->bnext = NULL;
76 return eos_node;
77 }
78
PathContainsString(const Node * node,size_t begin_pos,size_t end_pos,const string & str)79 bool PathContainsString(const Node *node, size_t begin_pos, size_t end_pos,
80 const string &str) {
81 CHECK(node);
82 for (; node->prev != NULL; node = node->prev) {
83 if (node->begin_pos == begin_pos && node->end_pos == end_pos &&
84 node->value == str) {
85 return true;
86 }
87 }
88 return false;
89 }
90
GetDebugStringForNode(const Node * node,const Node * prev_node)91 string GetDebugStringForNode(const Node *node, const Node *prev_node) {
92 CHECK(node);
93 std::stringstream os;
94 os << "[con:" << node->cost - (prev_node ? prev_node->cost : 0) -
95 node->wcost << "]";
96 os << "[lid:" << node->lid << "]";
97 os << "\"" << node->value << "\"";
98 os << "[wcost:" << node->wcost << "]";
99 os << "[cost:" << node->cost << "]";
100 os << "[rid:" << node->rid << "]";
101 return os.str();
102 }
103
GetDebugStringForPath(const Node * end_node)104 string GetDebugStringForPath(const Node *end_node) {
105 CHECK(end_node);
106 std::stringstream os;
107 std::vector<const Node *> node_vector;
108
109 for (const Node *node = end_node; node; node = node->prev) {
110 node_vector.push_back(node);
111 }
112 const Node *prev_node = NULL;
113
114 for (int i = static_cast<int>(node_vector.size()) - 1; i >= 0; --i) {
115 const Node *node = node_vector[i];
116 os << GetDebugStringForNode(node, prev_node);
117 prev_node = node;
118 }
119 return os.str();
120 }
121
GetCommonPrefix(const string & str1,const string & str2)122 string GetCommonPrefix(const string &str1, const string &str2) {
123 std::vector<string> split1, split2;
124 Util::SplitStringToUtf8Chars(str1, &split1);
125 Util::SplitStringToUtf8Chars(str2, &split2);
126 string common_prefix = "";
127 for (int i = 0; i < std::min(split1.size(), split2.size()); ++i) {
128 if (split1[i] == split2[i]) {
129 common_prefix += split1[i];
130 } else {
131 break;
132 }
133 }
134 return common_prefix;
135 }
136
137 } // namespace
138
139 struct LatticeDisplayNodeInfo {
140 size_t display_node_begin_pos_;
141 size_t display_node_end_pos_;
142 string display_node_str_;
143 };
144
Lattice()145 Lattice::Lattice() : history_end_pos_(0), node_allocator_(new NodeAllocator) {}
146
~Lattice()147 Lattice::~Lattice() {}
148
node_allocator() const149 NodeAllocator *Lattice::node_allocator() const {
150 return node_allocator_.get();
151 }
152
NewNode()153 Node *Lattice::NewNode() {
154 return node_allocator_->NewNode();
155 }
156
begin_nodes(size_t pos) const157 Node *Lattice::begin_nodes(size_t pos) const {
158 return begin_nodes_[pos];
159 }
160
end_nodes(size_t pos) const161 Node *Lattice::end_nodes(size_t pos) const {
162 return end_nodes_[pos];
163 }
164
SetKey(StringPiece key)165 void Lattice::SetKey(StringPiece key) {
166 Clear();
167 key_.assign(key.data(), key.size());
168 const size_t size = key.size();
169 begin_nodes_.resize(size + 4);
170 end_nodes_.resize(size + 4);
171 cache_info_.resize(size + 4);
172
173 std::fill(begin_nodes_.begin(), begin_nodes_.end(),
174 static_cast<Node *>(NULL));
175 std::fill(end_nodes_.begin(), end_nodes_.end(), static_cast<Node *>(NULL));
176 std::fill(cache_info_.begin(), cache_info_.end(), 0);
177
178 end_nodes_[0] = InitBOSNode(this,
179 static_cast<uint16>(0));
180 begin_nodes_[key_.size()] =
181 InitEOSNode(this, static_cast<uint16>(key_.size()));
182 }
183
bos_nodes() const184 Node *Lattice::bos_nodes() const {
185 return end_nodes_[0];
186 }
187
eos_nodes() const188 Node *Lattice::eos_nodes() const {
189 return begin_nodes_[key_.size()];
190 }
191
Insert(size_t pos,Node * node)192 void Lattice::Insert(size_t pos, Node *node) {
193 for (Node *rnode = node; rnode != NULL; rnode = rnode->bnext) {
194 const size_t end_pos = std::min(rnode->key.size() + pos, key_.size());
195 rnode->begin_pos = static_cast<uint16>(pos);
196 rnode->end_pos = static_cast<uint16>(end_pos);
197 rnode->prev = NULL;
198 rnode->next = NULL;
199 rnode->cost = 0;
200 rnode->enext = end_nodes_[end_pos];
201 end_nodes_[end_pos] = rnode;
202 }
203
204 if (begin_nodes_[pos] == NULL) {
205 begin_nodes_[pos] = node;
206 } else {
207 for (Node *rnode = node; rnode != NULL; rnode = rnode->bnext) {
208 if (rnode->bnext == NULL) {
209 rnode->bnext = begin_nodes_[pos];
210 begin_nodes_[pos] = node;
211 break;
212 }
213 }
214 }
215 }
216
key() const217 const string &Lattice::key() const {
218 return key_;
219 }
220
has_lattice() const221 bool Lattice::has_lattice() const {
222 return !begin_nodes_.empty();
223 }
224
Clear()225 void Lattice::Clear() {
226 key_.clear();
227 begin_nodes_.clear();
228 end_nodes_.clear();
229 node_allocator_->Free();
230 cache_info_.clear();
231 history_end_pos_ = 0;
232 }
233
SetDebugDisplayNode(size_t begin_pos,size_t end_pos,const string & str)234 void Lattice::SetDebugDisplayNode(size_t begin_pos, size_t end_pos,
235 const string &str) {
236 LatticeDisplayNodeInfo *info = Singleton<LatticeDisplayNodeInfo>::get();
237 info->display_node_begin_pos_ = begin_pos;
238 info->display_node_end_pos_ = end_pos;
239 info->display_node_str_ = str;
240 }
241
ResetDebugDisplayNode()242 void Lattice::ResetDebugDisplayNode() {
243 LatticeDisplayNodeInfo *info = Singleton<LatticeDisplayNodeInfo>::get();
244 info->display_node_str_.clear();
245 }
246
set_history_end_pos(size_t pos)247 void Lattice::set_history_end_pos(size_t pos) {
248 history_end_pos_ = pos;
249 }
250
history_end_pos() const251 size_t Lattice::history_end_pos() const {
252 return history_end_pos_;
253 }
254
UpdateKey(const string & new_key)255 void Lattice::UpdateKey(const string &new_key) {
256 const string old_key = key_;
257 const string common_prefix = GetCommonPrefix(new_key, old_key);
258
259 // if the length of common prefix is too short, call SetKey
260 if (common_prefix.size() <= old_key.size() / 2) {
261 SetKey(new_key);
262 return;
263 }
264
265 // if node_allocator has many nodes, then clean up
266 const size_t size_threshold = node_allocator_->max_nodes_size();
267 if (node_allocator_->node_count() > size_threshold) {
268 SetKey(new_key);
269 return;
270 }
271
272 // erase the suffix of old_key so that the key becomes common_prefix
273 ShrinkKey(common_prefix.size());
274 // add a suffix so that the key becomes new_key
275 AddSuffix(new_key.substr(common_prefix.size()));
276 }
277
AddSuffix(const string & suffix_key)278 void Lattice::AddSuffix(const string &suffix_key) {
279 if (suffix_key.empty()) {
280 return;
281 }
282 const size_t old_size = key_.size();
283 const size_t new_size = key_.size() + suffix_key.size();
284
285 // update begin_nodes and end_nodes
286 begin_nodes_.resize(new_size + 4);
287 end_nodes_.resize(new_size + 4);
288
289 std::fill(begin_nodes_.begin() + old_size, begin_nodes_.end(),
290 static_cast<Node *>(NULL));
291 std::fill(end_nodes_.begin() + old_size + 1, end_nodes_.end(),
292 static_cast<Node *>(NULL));
293
294 end_nodes_[0] = InitBOSNode(this,
295 static_cast<uint16>(0));
296 begin_nodes_[new_size] =
297 InitEOSNode(this, static_cast<uint16>(new_size));
298
299 // update cache_info
300 cache_info_.resize(new_size + 4, 0);
301
302 // update key
303 key_ += suffix_key;
304 }
305
ShrinkKey(const size_t new_len)306 void Lattice::ShrinkKey(const size_t new_len) {
307 const size_t old_len = key_.size();
308 CHECK_LE(new_len, old_len);
309 if (new_len == old_len) {
310 return;
311 }
312
313 // erase nodes whose end position exceeds new_len
314 for (size_t i = 0; i < new_len; ++i) {
315 Node *begin = begin_nodes_[i];
316 if (begin == NULL) {
317 continue;
318 }
319
320 for (Node *prev = begin, *curr = begin->bnext; curr != NULL; ) {
321 CHECK(prev);
322 if (curr->end_pos > new_len) {
323 prev->bnext = curr->bnext;
324 curr = curr->bnext;
325 } else {
326 prev = prev->bnext;
327 curr = curr->bnext;
328 }
329 }
330 if (begin->end_pos > new_len) {
331 begin_nodes_[i] = begin->bnext;
332 }
333 }
334
335 // update begin_nodes and end_nodes
336 for (size_t i = new_len; i <= old_len; ++i) {
337 begin_nodes_[i] = NULL;
338 }
339 for (size_t i = new_len + 1; i <= old_len; ++i) {
340 end_nodes_[i] = NULL;
341 }
342 begin_nodes_[new_len] =
343 InitEOSNode(this, static_cast<uint16>(new_len));
344
345 // update cache_info
346 for (size_t i = 0; i < new_len; ++i) {
347 cache_info_[i] = std::min(cache_info_[i], new_len - i);
348 }
349 std::fill(cache_info_.begin() + new_len, cache_info_.end(), 0);
350
351 // update key
352 key_.erase(new_len);
353 }
354
cache_info(const size_t pos) const355 size_t Lattice::cache_info(const size_t pos) const {
356 CHECK_LE(pos, key_.size());
357 return cache_info_[pos];
358 }
359
SetCacheInfo(const size_t pos,const size_t len)360 void Lattice::SetCacheInfo(const size_t pos, const size_t len) {
361 CHECK_LE(pos, key_.size());
362 cache_info_[pos] = len;
363 }
364
ResetNodeCost()365 void Lattice::ResetNodeCost() {
366 for (size_t i = 0; i <= key_.size(); ++i) {
367 if (begin_nodes_[i] != NULL) {
368 Node *prev = NULL;
369 for (Node *node = begin_nodes_[i]; node != NULL; node = node->bnext) {
370 // do not process BOS / EOS nodes
371 if (node->node_type == Node::BOS_NODE ||
372 node->node_type == Node::EOS_NODE) {
373 continue;
374 }
375 // if the node has ENABLE_CACHE attribute, then revert its wcost.
376 // Otherwise, erase the node from the lattice.
377 if (node->attributes & Node::ENABLE_CACHE) {
378 node->wcost = node->raw_wcost;
379 } else {
380 if (node == begin_nodes_[i]) {
381 if (node->bnext == NULL) {
382 begin_nodes_[i] = NULL;
383 } else {
384 begin_nodes_[i] = node->bnext;
385 }
386 } else {
387 CHECK(prev);
388 CHECK_EQ(prev->bnext, node);
389 prev->next = node->bnext;
390 }
391 }
392 // traverse a next node
393 prev = node;
394 }
395 }
396
397 if (end_nodes_[i] != NULL) {
398 Node *prev = NULL;
399 for (Node *node = end_nodes_[i]; node != NULL; node = node->enext) {
400 if (node->node_type == Node::BOS_NODE ||
401 node->node_type == Node::EOS_NODE) {
402 continue;
403 }
404 if (node->attributes & Node::ENABLE_CACHE) {
405 node->wcost = node->raw_wcost;
406 } else {
407 if (node == end_nodes_[i]) {
408 if (node->enext == NULL) {
409 end_nodes_[i] = NULL;
410 } else {
411 end_nodes_[i] = node->enext;
412 }
413 } else {
414 CHECK(prev);
415 CHECK_EQ(prev->enext, node);
416 prev->next = node->enext;
417 }
418 }
419 prev = node;
420 }
421 }
422 }
423 }
424
DebugString() const425 string Lattice::DebugString() const {
426 std::stringstream os;
427 if (!has_lattice()) {
428 return "";
429 }
430
431 std::vector<const Node *> best_path_nodes;
432
433 const Node *node = eos_nodes();
434 // Print the best path
435 os << "Best path: ";
436 os << GetDebugStringForPath(node);
437 os << std::endl;
438
439 LatticeDisplayNodeInfo *info = Singleton<LatticeDisplayNodeInfo>::get();
440
441 if (info->display_node_str_.empty()) {
442 return os.str();
443 }
444
445 for (; node != NULL; node = node->prev) {
446 best_path_nodes.push_back(node);
447 }
448
449 // Print tha path that contains the designated node
450 for (std::vector<const Node *>::const_iterator it = best_path_nodes.begin();
451 it != best_path_nodes.end(); ++it) {
452 const Node *best_path_node = *it;
453 if (best_path_node->begin_pos < info->display_node_end_pos_) {
454 break;
455 }
456 for (const Node *prev_node = end_nodes(best_path_node->begin_pos);
457 prev_node; prev_node = prev_node->enext) {
458 if (!PathContainsString(prev_node,
459 info->display_node_begin_pos_,
460 info->display_node_end_pos_,
461 info->display_node_str_)) {
462 continue;
463 }
464 os << "The path " << GetDebugStringForPath(prev_node)
465 << " ( + connection cost + wcost: " << best_path_node->wcost << ")"
466 << std::endl
467 << "was defeated"
468 << " by the path " << std::endl
469 << GetDebugStringForPath(best_path_node->prev)
470 << " connecting to the node "
471 << GetDebugStringForNode(best_path_node, best_path_node->prev)
472 << std::endl;
473 }
474 }
475
476 return os.str();
477 }
478
479 } // namespace mozc
480