1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #ifndef MOZC_CONVERTER_NODE_H_
31 #define MOZC_CONVERTER_NODE_H_
32 
33 #include <string>
34 
35 #include "base/port.h"
36 #include "dictionary/dictionary_token.h"
37 
38 namespace mozc {
39 
40 struct Node {
41   enum NodeType {
42     NOR_NODE,  // normal node
43     BOS_NODE,  // BOS (beginning of sentence)
44     EOS_NODE,  // EOS (end of sentence)
45     CON_NODE,  // constrained node
46     HIS_NODE,  // history node
47   };
48 
49   enum Attribute {
50     DEFAULT_ATTRIBUTE = 0,
51     SYSTEM_DICTIONARY = 1 << 0,  // System dictionary (not used now)
52     USER_DICTIONARY = 1 << 1,  // User dictionary
53     NO_VARIANTS_EXPANSION = 1 << 2,  // No need to expand full/half
54     // Internally used in the converter
55     // TODO(toshiyuki): Delete this attribute.
56     WEAK_CONNECTED_OBSOLETE = 1 << 3,
57     STARTS_WITH_PARTICLE = 1 << 4,  // User input starts with particle
58     SPELLING_CORRECTION = 1 << 5,  // "Did you mean"
59     ENABLE_CACHE = 1 << 6,  // Cache the node in lattice
60     // Equal to that of Candidate.
61     // Life of suggestion candidates from realtime conversion is;
62     // 1. Created by ImmutableConverter as Candidate instance.
63     // 2. The Candidate instances are aggregated as Node instances
64     //    in DictionaryPredictor::AggregateRealtimeConversion.
65     // 3. The Node instances are converted into Candidate instances
66     //    in DictionaryPredictor::AddPredictionToCandidates.
67     // To propagate this information from Node to Candidate,
68     // Node should have the same information as Candidate.
69     PARTIALLY_KEY_CONSUMED = 1 << 7,
70   };
71 
72   // prev and next are linking pointers to connect minimum cost path
73   // in the lattice. In other words, we can think the doubly-linked list
74   // with prev/next represents the minimum cost path.
75   Node *prev;
76   Node *next;
77 
78   // bnext points to another Node instance which shares the same beginning
79   // position of the key.
80   // enext points to another Node instance which shares the same ending
81   // position of the key.
82   //
83   // Here illustrates the image:
84   //
85   // key:         | 0 | 1 | 2 | 3 | 4 | 5 | 6 | ... | N |
86   // begin_nodes: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | ... | N | (in lattice)
87   //                |   |   :   :   :   :   :         :
88   //                |   :
89   //                |   :          (nullptr)
90   //                |   :           ^
91   //                |   :           :
92   //                v   :           |
93   //               +-----------------+
94   //               | Node1(len4)     |
95   //               +-----------------+
96   //           bnext|   :           ^
97   //                v   :           |enext
98   //               +-----------------+
99   //               | Node2(len4)     | (nullptr)
100   //               +-----------------+  ^
101   //           bnext|   :           ^   :
102   //                |   :           |   :
103   //                v   :           :   |enext
104   //               +---------------------+
105   //               | Node3(len5)         |
106   //               +---------------------+ (nullptr)
107   //           bnext|   :           :   ^   ^
108   //                |   :           :   |   :
109   //                v   :           :   :   |enext
110   //               +-------------------------+
111   //               | Node4(len6)             |
112   //               +-------------------------+
113   //           bnext|   :           :   :   ^
114   //                :   :           :   :   |
115   //                v   :           :   :   :
116   //          (nullptr) |           :   :   :
117   //                    v           :   |enext
118   //                   +-----------------+  :
119   //                   | Node5(len4)     |  :
120   //                   +-----------------+  :
121   //               bnext|           :   ^   :
122   //                    v           :   |enext
123   //                   +-----------------+  :
124   //                   | Node6(len4)     |  :
125   //                   +-----------------+  :
126   //               bnext|           :   ^   :
127   //                    |           :   |   :
128   //                    v           :   :   |enext
129   //                   +---------------------+
130   //                   | Node7(len5)         |
131   //                   +---------------------+
132   //               bnext|           :   :   ^
133   //                    v           :   :   |enext
134   //                   +---------------------+
135   //                   | Node8(len5)         |
136   //                   +---------------------+
137   //               bnext|           :   :   ^
138   //                    :           :   :   |
139   //                    v           :   :   |
140   //               (nullptr)        :   :   |
141   //                                :   :   |
142   //                :   :   :   :   :   |   |         :
143   // end_nodes:   | 0 | 1 | 2 | 3 | 4 | 5 | 6 | ... | N |  (in lattice)
144   //
145   // Note:
146   // 1) Nodes 1, 2, 3 and 4 start with pos "0", so they are connected by
147   //    bnext. Same for Nodes 5, 6, 7 and 8.
148   // 2) Nodes 3, 5 and 6 end with pos "5", so they are connected by enext.
149   //    Same for Nodes 4, 7 and 8.
150   Node *bnext;
151   Node *enext;
152 
153   // if it is not nullptr, transition cost
154   // from constrained_prev to current node is defined,
155   // other transition is set to be infinite
156   Node *constrained_prev;
157 
158   uint16 rid;
159   uint16 lid;
160   uint16 begin_pos;
161   uint16 end_pos;
162 
163   // wcost: word cost for the node; it may be changed after lookup
164   // cost: the total cost between BOS and the node
165   // raw_wcost: raw word cost for the node; it is not changed after lookup.
166   //            It is used for the cache of lattice.
167   int32 wcost;
168   int32 cost;
169   int32 raw_wcost;
170 
171   NodeType node_type;
172   uint32 attributes;
173 
174   // key: The user input.
175   // actual_key: The actual search key that corresponds to the value.
176   //           Can differ from key when no modifier conversion is enabled.
177   // value: The surface form of the word.
178   string key;
179   string actual_key;
180   string value;
181 
NodeNode182   Node() {
183     Init();
184   }
185 
InitNode186   inline void Init() {
187     prev = nullptr;
188     next = nullptr;
189     bnext = nullptr;
190     enext = nullptr;
191     constrained_prev = nullptr;
192     rid = 0;
193     lid = 0;
194     begin_pos = 0;
195     end_pos = 0;
196     node_type = NOR_NODE;
197     wcost = 0;
198     cost = 0;
199     raw_wcost = 0;
200     attributes = 0;
201     key.clear();
202     actual_key.clear();
203     value.clear();
204   }
205 
InitFromTokenNode206   inline void InitFromToken(const dictionary::Token &token) {
207     prev = nullptr;
208     next = nullptr;
209     bnext = nullptr;
210     enext = nullptr;
211     constrained_prev = nullptr;
212     rid = token.rid;
213     lid = token.lid;
214     begin_pos = 0;
215     end_pos = 0;
216     node_type = NOR_NODE;
217     wcost = token.cost;
218     cost = 0;
219     raw_wcost = 0;
220     attributes = 0;
221     if (token.attributes & dictionary::Token::SPELLING_CORRECTION) {
222       attributes |= SPELLING_CORRECTION;
223     }
224     if (token.attributes & dictionary::Token::USER_DICTIONARY) {
225       attributes |= USER_DICTIONARY;
226       attributes |= NO_VARIANTS_EXPANSION;
227     }
228     key = token.key;
229     actual_key.clear();
230     value = token.value;
231   }
232 };
233 
234 }  // namespace mozc
235 
236 #endif  // MOZC_CONVERTER_NODE_H_
237