1 /***************************************************************************** 2 3 Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. 4 Copyright (c) 2016, 2020, MariaDB Corporation. 5 6 This program is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free Software 8 Foundation; version 2 of the License. 9 10 This program is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along with 15 this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 18 *****************************************************************************/ 19 20 /******************************************************************//** 21 @file include/fts0ast.h 22 The FTS query parser (AST) abstract syntax tree routines 23 24 Created 2007/03/16/03 Sunny Bains 25 *******************************************************/ 26 27 #ifndef INNOBASE_FST0AST_H 28 #define INNOBASE_FST0AST_H 29 30 #include "mem0mem.h" 31 32 /* The type of AST Node */ 33 enum fts_ast_type_t { 34 FTS_AST_OPER, /*!< Operator */ 35 FTS_AST_NUMB, /*!< Number */ 36 FTS_AST_TERM, /*!< Term (or word) */ 37 FTS_AST_TEXT, /*!< Text string */ 38 FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser 39 The difference from text type 40 is that we tokenize text into 41 term list */ 42 FTS_AST_LIST, /*!< Expression list */ 43 FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ 44 }; 45 46 /* The FTS query operators that we support */ 47 enum fts_ast_oper_t { 48 FTS_NONE, /*!< No operator */ 49 50 FTS_IGNORE, /*!< Ignore rows that contain 51 this word */ 52 53 FTS_EXIST, /*!< Include rows that contain 54 this word */ 55 56 FTS_NEGATE, /*!< Include rows that contain 57 this word but rank them 58 lower*/ 59 60 FTS_INCR_RATING, /*!< Increase the rank for this 61 word*/ 62 63 FTS_DECR_RATING, /*!< Decrease the rank for this 64 word*/ 65 66 FTS_DISTANCE, /*!< Proximity distance */ 67 FTS_IGNORE_SKIP, /*!< Transient node operator 68 signifies that this is a 69 FTS_IGNORE node, and ignored in 70 the first pass of 71 fts_ast_visit() */ 72 FTS_EXIST_SKIP /*!< Transient node operator 73 signifies that this ia a 74 FTS_EXIST node, and ignored in 75 the first pass of 76 fts_ast_visit() */ 77 }; 78 79 /* Data types used by the FTS parser */ 80 struct fts_lexer_t; 81 struct fts_ast_node_t; 82 struct fts_ast_state_t; 83 struct fts_ast_string_t; 84 85 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); 86 87 /******************************************************************** 88 Parse the string using the lexer setup within state.*/ 89 int 90 fts_parse( 91 /*======*/ 92 /* out: 0 on OK, 1 on error */ 93 fts_ast_state_t* state); /*!< in: ast state instance.*/ 94 95 /******************************************************************** 96 Create an AST operator node */ 97 extern 98 fts_ast_node_t* 99 fts_ast_create_node_oper( 100 /*=====================*/ 101 void* arg, /*!< in: ast state */ 102 fts_ast_oper_t oper); /*!< in: ast operator */ 103 /******************************************************************** 104 Create an AST term node, makes a copy of ptr */ 105 extern 106 fts_ast_node_t* 107 fts_ast_create_node_term( 108 /*=====================*/ 109 void* arg, /*!< in: ast state */ 110 const fts_ast_string_t* ptr); /*!< in: term string */ 111 /******************************************************************** 112 Create an AST text node */ 113 extern 114 fts_ast_node_t* 115 fts_ast_create_node_text( 116 /*=====================*/ 117 void* arg, /*!< in: ast state */ 118 const fts_ast_string_t* ptr); /*!< in: text string */ 119 /******************************************************************** 120 Create an AST expr list node */ 121 extern 122 fts_ast_node_t* 123 fts_ast_create_node_list( 124 /*=====================*/ 125 void* arg, /*!< in: ast state */ 126 fts_ast_node_t* expr); /*!< in: ast expr */ 127 /******************************************************************** 128 Create a sub-expression list node. This function takes ownership of 129 expr and is responsible for deleting it. */ 130 extern 131 fts_ast_node_t* 132 fts_ast_create_node_subexp_list( 133 /*============================*/ 134 /* out: new node */ 135 void* arg, /*!< in: ast state instance */ 136 fts_ast_node_t* expr); /*!< in: ast expr instance */ 137 /******************************************************************** 138 Set the wildcard attribute of a term.*/ 139 extern 140 void 141 fts_ast_term_set_wildcard( 142 /*======================*/ 143 fts_ast_node_t* node); /*!< in: term to change */ 144 /******************************************************************** 145 Set the proximity attribute of a text node. */ 146 void 147 fts_ast_text_set_distance( 148 /*======================*/ 149 fts_ast_node_t* node, /*!< in/out: text node */ 150 ulint distance); /*!< in: the text proximity 151 distance */ 152 /********************************************************************//** 153 Free a fts_ast_node_t instance. 154 @return next node to free */ 155 fts_ast_node_t* 156 fts_ast_free_node( 157 /*==============*/ 158 fts_ast_node_t* node); /*!< in: node to free */ 159 /******************************************************************** 160 Add a sub-expression to an AST*/ 161 extern 162 fts_ast_node_t* 163 fts_ast_add_node( 164 /*=============*/ 165 fts_ast_node_t* list, /*!< in: list node instance */ 166 fts_ast_node_t* node); /*!< in: (sub) expr to add */ 167 /******************************************************************** 168 Print the AST node recursively.*/ 169 extern 170 void 171 fts_ast_node_print( 172 /*===============*/ 173 fts_ast_node_t* node); /*!< in: ast node to print */ 174 /******************************************************************** 175 Free node and expr allocations.*/ 176 extern 177 void 178 fts_ast_state_free( 179 /*===============*/ 180 fts_ast_state_t*state); /*!< in: state instance 181 to free */ 182 /** Check only union operation involved in the node 183 @param[in] node ast node to check 184 @return true if the node contains only union else false. */ 185 bool 186 fts_ast_node_check_union( 187 fts_ast_node_t* node); 188 189 /******************************************************************//** 190 Traverse the AST - in-order traversal. 191 @return DB_SUCCESS if all went well */ 192 dberr_t 193 fts_ast_visit( 194 /*==========*/ 195 fts_ast_oper_t oper, /*!< in: FTS operator */ 196 fts_ast_node_t* node, /*!< in: instance to traverse*/ 197 fts_ast_callback visitor, /*!< in: callback */ 198 void* arg, /*!< in: callback arg */ 199 bool* has_ignore) /*!< out: whether we encounter 200 and ignored processing an 201 operator, currently we only 202 ignore FTS_IGNORE operator */ 203 MY_ATTRIBUTE((nonnull, warn_unused_result)); 204 /******************************************************************** 205 Create a lex instance.*/ 206 fts_lexer_t* 207 fts_lexer_create( 208 /*=============*/ 209 ibool boolean_mode, /*!< in: query type */ 210 const byte* query, /*!< in: query string */ 211 ulint query_len) /*!< in: query string len */ 212 MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); 213 /******************************************************************** 214 Free an fts_lexer_t instance.*/ 215 void 216 fts_lexer_free( 217 /*===========*/ 218 fts_lexer_t* fts_lexer) /*!< in: lexer instance to 219 free */ 220 MY_ATTRIBUTE((nonnull)); 221 222 /** 223 Create an ast string object, with NUL-terminator, so the string 224 has one more byte than len 225 @param[in] str pointer to string 226 @param[in] len length of the string 227 @return ast string with NUL-terminator */ 228 fts_ast_string_t* 229 fts_ast_string_create( 230 const byte* str, 231 ulint len); 232 233 /** 234 Free an ast string instance 235 @param[in,out] ast_str string to free */ 236 void 237 fts_ast_string_free( 238 fts_ast_string_t* ast_str); 239 240 /** 241 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul 242 @param[in] str string to translate 243 @param[in] base the base 244 @return translated number */ 245 ulint 246 fts_ast_string_to_ul( 247 const fts_ast_string_t* ast_str, 248 int base); 249 250 /* String of length len. 251 We always store the string of length len with a terminating '\0', 252 regardless of there is any 0x00 in the string itself */ 253 struct fts_ast_string_t { 254 /*!< Pointer to string. */ 255 byte* str; 256 257 /*!< Length of the string. */ 258 ulint len; 259 }; 260 261 /* Query term type */ 262 struct fts_ast_term_t { 263 fts_ast_string_t* ptr; /*!< Pointer to term string.*/ 264 ibool wildcard; /*!< TRUE if wild card set.*/ 265 }; 266 267 /* Query text type */ 268 struct fts_ast_text_t { 269 fts_ast_string_t* ptr; /*!< Pointer to text string.*/ 270 ulint distance; /*!< > 0 if proximity distance 271 set */ 272 }; 273 274 /* The list of nodes in an expr list */ 275 struct fts_ast_list_t { 276 fts_ast_node_t* head; /*!< Children list head */ 277 fts_ast_node_t* tail; /*!< Children list tail */ 278 }; 279 280 /* FTS AST node to store the term, text, operator and sub-expressions.*/ 281 struct fts_ast_node_t { 282 fts_ast_type_t type; /*!< The type of node */ 283 fts_ast_text_t text; /*!< Text node */ 284 fts_ast_term_t term; /*!< Term node */ 285 fts_ast_oper_t oper; /*!< Operator value */ 286 fts_ast_list_t list; /*!< Expression list */ 287 fts_ast_node_t* next; /*!< Link for expr list */ 288 fts_ast_node_t* next_alloc; /*!< For tracking allocations */ 289 bool visited; /*!< whether this node is 290 already processed */ 291 /** current transaction */ 292 const trx_t* trx; 293 /* Used by plugin parser */ 294 fts_ast_node_t* up_node; /*!< Direct up node */ 295 bool go_up; /*!< Flag if go one level up */ 296 }; 297 298 /* To track state during parsing */ 299 struct fts_ast_state_t { 300 mem_heap_t* heap; /*!< Heap to use for alloc */ 301 fts_ast_node_t* root; /*!< If all goes OK, then this 302 will point to the root.*/ 303 304 fts_ast_list_t list; /*!< List of nodes allocated */ 305 306 fts_lexer_t* lexer; /*!< Lexer callback + arg */ 307 CHARSET_INFO* charset; /*!< charset used for 308 tokenization */ 309 /* Used by plugin parser */ 310 fts_ast_node_t* cur_node; /*!< Current node into which 311 we add new node */ 312 int depth; /*!< Depth of parsing state */ 313 }; 314 315 /******************************************************************//** 316 Create an AST term node, makes a copy of ptr for plugin parser 317 @return node */ 318 extern 319 fts_ast_node_t* 320 fts_ast_create_node_term_for_parser( 321 /*==========i=====================*/ 322 void* arg, /*!< in: ast state */ 323 const char* ptr, /*!< in: term string */ 324 const ulint len); /*!< in: term string length */ 325 326 /******************************************************************//** 327 Create an AST phrase list node for plugin parser 328 @return node */ 329 extern 330 fts_ast_node_t* 331 fts_ast_create_node_phrase_list( 332 /*============================*/ 333 void* arg); /*!< in: ast state */ 334 335 #ifdef UNIV_DEBUG 336 const char* 337 fts_ast_node_type_get(fts_ast_type_t type); 338 #endif /* UNIV_DEBUG */ 339 340 #endif /* INNOBASE_FSTS0AST_H */ 341