1 // Copyright 2010-2018, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 #ifndef MOZC_CONVERTER_NODE_H_ 31 #define MOZC_CONVERTER_NODE_H_ 32 33 #include <string> 34 35 #include "base/port.h" 36 #include "dictionary/dictionary_token.h" 37 38 namespace mozc { 39 40 struct Node { 41 enum NodeType { 42 NOR_NODE, // normal node 43 BOS_NODE, // BOS (beginning of sentence) 44 EOS_NODE, // EOS (end of sentence) 45 CON_NODE, // constrained node 46 HIS_NODE, // history node 47 }; 48 49 enum Attribute { 50 DEFAULT_ATTRIBUTE = 0, 51 SYSTEM_DICTIONARY = 1 << 0, // System dictionary (not used now) 52 USER_DICTIONARY = 1 << 1, // User dictionary 53 NO_VARIANTS_EXPANSION = 1 << 2, // No need to expand full/half 54 // Internally used in the converter 55 // TODO(toshiyuki): Delete this attribute. 56 WEAK_CONNECTED_OBSOLETE = 1 << 3, 57 STARTS_WITH_PARTICLE = 1 << 4, // User input starts with particle 58 SPELLING_CORRECTION = 1 << 5, // "Did you mean" 59 ENABLE_CACHE = 1 << 6, // Cache the node in lattice 60 // Equal to that of Candidate. 61 // Life of suggestion candidates from realtime conversion is; 62 // 1. Created by ImmutableConverter as Candidate instance. 63 // 2. The Candidate instances are aggregated as Node instances 64 // in DictionaryPredictor::AggregateRealtimeConversion. 65 // 3. The Node instances are converted into Candidate instances 66 // in DictionaryPredictor::AddPredictionToCandidates. 67 // To propagate this information from Node to Candidate, 68 // Node should have the same information as Candidate. 69 PARTIALLY_KEY_CONSUMED = 1 << 7, 70 }; 71 72 // prev and next are linking pointers to connect minimum cost path 73 // in the lattice. In other words, we can think the doubly-linked list 74 // with prev/next represents the minimum cost path. 75 Node *prev; 76 Node *next; 77 78 // bnext points to another Node instance which shares the same beginning 79 // position of the key. 80 // enext points to another Node instance which shares the same ending 81 // position of the key. 82 // 83 // Here illustrates the image: 84 // 85 // key: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | ... | N | 86 // begin_nodes: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | ... | N | (in lattice) 87 // | | : : : : : : 88 // | : 89 // | : (nullptr) 90 // | : ^ 91 // | : : 92 // v : | 93 // +-----------------+ 94 // | Node1(len4) | 95 // +-----------------+ 96 // bnext| : ^ 97 // v : |enext 98 // +-----------------+ 99 // | Node2(len4) | (nullptr) 100 // +-----------------+ ^ 101 // bnext| : ^ : 102 // | : | : 103 // v : : |enext 104 // +---------------------+ 105 // | Node3(len5) | 106 // +---------------------+ (nullptr) 107 // bnext| : : ^ ^ 108 // | : : | : 109 // v : : : |enext 110 // +-------------------------+ 111 // | Node4(len6) | 112 // +-------------------------+ 113 // bnext| : : : ^ 114 // : : : : | 115 // v : : : : 116 // (nullptr) | : : : 117 // v : |enext 118 // +-----------------+ : 119 // | Node5(len4) | : 120 // +-----------------+ : 121 // bnext| : ^ : 122 // v : |enext 123 // +-----------------+ : 124 // | Node6(len4) | : 125 // +-----------------+ : 126 // bnext| : ^ : 127 // | : | : 128 // v : : |enext 129 // +---------------------+ 130 // | Node7(len5) | 131 // +---------------------+ 132 // bnext| : : ^ 133 // v : : |enext 134 // +---------------------+ 135 // | Node8(len5) | 136 // +---------------------+ 137 // bnext| : : ^ 138 // : : : | 139 // v : : | 140 // (nullptr) : : | 141 // : : | 142 // : : : : : | | : 143 // end_nodes: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | ... | N | (in lattice) 144 // 145 // Note: 146 // 1) Nodes 1, 2, 3 and 4 start with pos "0", so they are connected by 147 // bnext. Same for Nodes 5, 6, 7 and 8. 148 // 2) Nodes 3, 5 and 6 end with pos "5", so they are connected by enext. 149 // Same for Nodes 4, 7 and 8. 150 Node *bnext; 151 Node *enext; 152 153 // if it is not nullptr, transition cost 154 // from constrained_prev to current node is defined, 155 // other transition is set to be infinite 156 Node *constrained_prev; 157 158 uint16 rid; 159 uint16 lid; 160 uint16 begin_pos; 161 uint16 end_pos; 162 163 // wcost: word cost for the node; it may be changed after lookup 164 // cost: the total cost between BOS and the node 165 // raw_wcost: raw word cost for the node; it is not changed after lookup. 166 // It is used for the cache of lattice. 167 int32 wcost; 168 int32 cost; 169 int32 raw_wcost; 170 171 NodeType node_type; 172 uint32 attributes; 173 174 // key: The user input. 175 // actual_key: The actual search key that corresponds to the value. 176 // Can differ from key when no modifier conversion is enabled. 177 // value: The surface form of the word. 178 string key; 179 string actual_key; 180 string value; 181 NodeNode182 Node() { 183 Init(); 184 } 185 InitNode186 inline void Init() { 187 prev = nullptr; 188 next = nullptr; 189 bnext = nullptr; 190 enext = nullptr; 191 constrained_prev = nullptr; 192 rid = 0; 193 lid = 0; 194 begin_pos = 0; 195 end_pos = 0; 196 node_type = NOR_NODE; 197 wcost = 0; 198 cost = 0; 199 raw_wcost = 0; 200 attributes = 0; 201 key.clear(); 202 actual_key.clear(); 203 value.clear(); 204 } 205 InitFromTokenNode206 inline void InitFromToken(const dictionary::Token &token) { 207 prev = nullptr; 208 next = nullptr; 209 bnext = nullptr; 210 enext = nullptr; 211 constrained_prev = nullptr; 212 rid = token.rid; 213 lid = token.lid; 214 begin_pos = 0; 215 end_pos = 0; 216 node_type = NOR_NODE; 217 wcost = token.cost; 218 cost = 0; 219 raw_wcost = 0; 220 attributes = 0; 221 if (token.attributes & dictionary::Token::SPELLING_CORRECTION) { 222 attributes |= SPELLING_CORRECTION; 223 } 224 if (token.attributes & dictionary::Token::USER_DICTIONARY) { 225 attributes |= USER_DICTIONARY; 226 attributes |= NO_VARIANTS_EXPANSION; 227 } 228 key = token.key; 229 actual_key.clear(); 230 value = token.value; 231 } 232 }; 233 234 } // namespace mozc 235 236 #endif // MOZC_CONVERTER_NODE_H_ 237