1 /***************************************************************************** 2 3 Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. 4 Copyright (c) 2016, 2020, MariaDB Corporation. 5 6 This program is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free Software 8 Foundation; version 2 of the License. 9 10 This program is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along with 15 this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 18 *****************************************************************************/ 19 20 /******************************************************************//** 21 @file include/fts0ast.h 22 The FTS query parser (AST) abstract syntax tree routines 23 24 Created 2007/03/16/03 Sunny Bains 25 *******************************************************/ 26 27 #ifndef INNOBASE_FST0AST_H 28 #define INNOBASE_FST0AST_H 29 30 #include "mem0mem.h" 31 32 #ifdef UNIV_PFS_MEMORY 33 34 #define malloc(A) ut_malloc_nokey(A) 35 #define free(A) ut_free(A) 36 #define realloc(P, A) ut_realloc(P, A) 37 38 #endif /* UNIV_PFS_MEMORY */ 39 40 /* The type of AST Node */ 41 enum fts_ast_type_t { 42 FTS_AST_OPER, /*!< Operator */ 43 FTS_AST_NUMB, /*!< Number */ 44 FTS_AST_TERM, /*!< Term (or word) */ 45 FTS_AST_TEXT, /*!< Text string */ 46 FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser 47 The difference from text type 48 is that we tokenize text into 49 term list */ 50 FTS_AST_LIST, /*!< Expression list */ 51 FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ 52 }; 53 54 /* The FTS query operators that we support */ 55 enum fts_ast_oper_t { 56 FTS_NONE, /*!< No operator */ 57 58 FTS_IGNORE, /*!< Ignore rows that contain 59 this word */ 60 61 FTS_EXIST, /*!< Include rows that contain 62 this word */ 63 64 FTS_NEGATE, /*!< Include rows that contain 65 this word but rank them 66 lower*/ 67 68 FTS_INCR_RATING, /*!< Increase the rank for this 69 word*/ 70 71 FTS_DECR_RATING, /*!< Decrease the rank for this 72 word*/ 73 74 FTS_DISTANCE, /*!< Proximity distance */ 75 FTS_IGNORE_SKIP, /*!< Transient node operator 76 signifies that this is a 77 FTS_IGNORE node, and ignored in 78 the first pass of 79 fts_ast_visit() */ 80 FTS_EXIST_SKIP /*!< Transient node operator 81 signifies that this ia a 82 FTS_EXIST node, and ignored in 83 the first pass of 84 fts_ast_visit() */ 85 }; 86 87 /* Data types used by the FTS parser */ 88 struct fts_lexer_t; 89 struct fts_ast_node_t; 90 struct fts_ast_state_t; 91 struct fts_ast_string_t; 92 93 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); 94 95 /******************************************************************** 96 Parse the string using the lexer setup within state.*/ 97 int 98 fts_parse( 99 /*======*/ 100 /* out: 0 on OK, 1 on error */ 101 fts_ast_state_t* state); /*!< in: ast state instance.*/ 102 103 /******************************************************************** 104 Create an AST operator node */ 105 extern 106 fts_ast_node_t* 107 fts_ast_create_node_oper( 108 /*=====================*/ 109 void* arg, /*!< in: ast state */ 110 fts_ast_oper_t oper); /*!< in: ast operator */ 111 /******************************************************************** 112 Create an AST term node, makes a copy of ptr */ 113 extern 114 fts_ast_node_t* 115 fts_ast_create_node_term( 116 /*=====================*/ 117 void* arg, /*!< in: ast state */ 118 const fts_ast_string_t* ptr); /*!< in: term string */ 119 /******************************************************************** 120 Create an AST text node */ 121 extern 122 fts_ast_node_t* 123 fts_ast_create_node_text( 124 /*=====================*/ 125 void* arg, /*!< in: ast state */ 126 const fts_ast_string_t* ptr); /*!< in: text string */ 127 /******************************************************************** 128 Create an AST expr list node */ 129 extern 130 fts_ast_node_t* 131 fts_ast_create_node_list( 132 /*=====================*/ 133 void* arg, /*!< in: ast state */ 134 fts_ast_node_t* expr); /*!< in: ast expr */ 135 /******************************************************************** 136 Create a sub-expression list node. This function takes ownership of 137 expr and is responsible for deleting it. */ 138 extern 139 fts_ast_node_t* 140 fts_ast_create_node_subexp_list( 141 /*============================*/ 142 /* out: new node */ 143 void* arg, /*!< in: ast state instance */ 144 fts_ast_node_t* expr); /*!< in: ast expr instance */ 145 /******************************************************************** 146 Set the wildcard attribute of a term.*/ 147 extern 148 void 149 fts_ast_term_set_wildcard( 150 /*======================*/ 151 fts_ast_node_t* node); /*!< in: term to change */ 152 /******************************************************************** 153 Set the proximity attribute of a text node. */ 154 void 155 fts_ast_text_set_distance( 156 /*======================*/ 157 fts_ast_node_t* node, /*!< in/out: text node */ 158 ulint distance); /*!< in: the text proximity 159 distance */ 160 /********************************************************************//** 161 Free a fts_ast_node_t instance. 162 @return next node to free */ 163 fts_ast_node_t* 164 fts_ast_free_node( 165 /*==============*/ 166 fts_ast_node_t* node); /*!< in: node to free */ 167 /******************************************************************** 168 Add a sub-expression to an AST*/ 169 extern 170 fts_ast_node_t* 171 fts_ast_add_node( 172 /*=============*/ 173 fts_ast_node_t* list, /*!< in: list node instance */ 174 fts_ast_node_t* node); /*!< in: (sub) expr to add */ 175 /******************************************************************** 176 Print the AST node recursively.*/ 177 extern 178 void 179 fts_ast_node_print( 180 /*===============*/ 181 fts_ast_node_t* node); /*!< in: ast node to print */ 182 /******************************************************************** 183 Free node and expr allocations.*/ 184 extern 185 void 186 fts_ast_state_free( 187 /*===============*/ 188 fts_ast_state_t*state); /*!< in: state instance 189 to free */ 190 /** Check only union operation involved in the node 191 @param[in] node ast node to check 192 @return true if the node contains only union else false. */ 193 bool 194 fts_ast_node_check_union( 195 fts_ast_node_t* node); 196 197 /******************************************************************//** 198 Traverse the AST - in-order traversal. 199 @return DB_SUCCESS if all went well */ 200 dberr_t 201 fts_ast_visit( 202 /*==========*/ 203 fts_ast_oper_t oper, /*!< in: FTS operator */ 204 fts_ast_node_t* node, /*!< in: instance to traverse*/ 205 fts_ast_callback visitor, /*!< in: callback */ 206 void* arg, /*!< in: callback arg */ 207 bool* has_ignore) /*!< out: whether we encounter 208 and ignored processing an 209 operator, currently we only 210 ignore FTS_IGNORE operator */ 211 MY_ATTRIBUTE((nonnull, warn_unused_result)); 212 /******************************************************************** 213 Create a lex instance.*/ 214 fts_lexer_t* 215 fts_lexer_create( 216 /*=============*/ 217 ibool boolean_mode, /*!< in: query type */ 218 const byte* query, /*!< in: query string */ 219 ulint query_len) /*!< in: query string len */ 220 MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); 221 /******************************************************************** 222 Free an fts_lexer_t instance.*/ 223 void 224 fts_lexer_free( 225 /*===========*/ 226 fts_lexer_t* fts_lexer) /*!< in: lexer instance to 227 free */ 228 MY_ATTRIBUTE((nonnull)); 229 230 /** 231 Create an ast string object, with NUL-terminator, so the string 232 has one more byte than len 233 @param[in] str pointer to string 234 @param[in] len length of the string 235 @return ast string with NUL-terminator */ 236 fts_ast_string_t* 237 fts_ast_string_create( 238 const byte* str, 239 ulint len); 240 241 /** 242 Free an ast string instance 243 @param[in,out] ast_str string to free */ 244 void 245 fts_ast_string_free( 246 fts_ast_string_t* ast_str); 247 248 /** 249 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul 250 @param[in] str string to translate 251 @param[in] base the base 252 @return translated number */ 253 ulint 254 fts_ast_string_to_ul( 255 const fts_ast_string_t* ast_str, 256 int base); 257 258 /* String of length len. 259 We always store the string of length len with a terminating '\0', 260 regardless of there is any 0x00 in the string itself */ 261 struct fts_ast_string_t { 262 /*!< Pointer to string. */ 263 byte* str; 264 265 /*!< Length of the string. */ 266 ulint len; 267 }; 268 269 /* Query term type */ 270 struct fts_ast_term_t { 271 fts_ast_string_t* ptr; /*!< Pointer to term string.*/ 272 ibool wildcard; /*!< TRUE if wild card set.*/ 273 }; 274 275 /* Query text type */ 276 struct fts_ast_text_t { 277 fts_ast_string_t* ptr; /*!< Pointer to text string.*/ 278 ulint distance; /*!< > 0 if proximity distance 279 set */ 280 }; 281 282 /* The list of nodes in an expr list */ 283 struct fts_ast_list_t { 284 fts_ast_node_t* head; /*!< Children list head */ 285 fts_ast_node_t* tail; /*!< Children list tail */ 286 }; 287 288 /* FTS AST node to store the term, text, operator and sub-expressions.*/ 289 struct fts_ast_node_t { 290 fts_ast_type_t type; /*!< The type of node */ 291 fts_ast_text_t text; /*!< Text node */ 292 fts_ast_term_t term; /*!< Term node */ 293 fts_ast_oper_t oper; /*!< Operator value */ 294 fts_ast_list_t list; /*!< Expression list */ 295 fts_ast_node_t* next; /*!< Link for expr list */ 296 fts_ast_node_t* next_alloc; /*!< For tracking allocations */ 297 bool visited; /*!< whether this node is 298 already processed */ 299 /** current transaction */ 300 const trx_t* trx; 301 /* Used by plugin parser */ 302 fts_ast_node_t* up_node; /*!< Direct up node */ 303 bool go_up; /*!< Flag if go one level up */ 304 }; 305 306 /* To track state during parsing */ 307 struct fts_ast_state_t { 308 mem_heap_t* heap; /*!< Heap to use for alloc */ 309 fts_ast_node_t* root; /*!< If all goes OK, then this 310 will point to the root.*/ 311 312 fts_ast_list_t list; /*!< List of nodes allocated */ 313 314 fts_lexer_t* lexer; /*!< Lexer callback + arg */ 315 CHARSET_INFO* charset; /*!< charset used for 316 tokenization */ 317 /* Used by plugin parser */ 318 fts_ast_node_t* cur_node; /*!< Current node into which 319 we add new node */ 320 int depth; /*!< Depth of parsing state */ 321 }; 322 323 /******************************************************************//** 324 Create an AST term node, makes a copy of ptr for plugin parser 325 @return node */ 326 extern 327 fts_ast_node_t* 328 fts_ast_create_node_term_for_parser( 329 /*==========i=====================*/ 330 void* arg, /*!< in: ast state */ 331 const char* ptr, /*!< in: term string */ 332 const ulint len); /*!< in: term string length */ 333 334 /******************************************************************//** 335 Create an AST phrase list node for plugin parser 336 @return node */ 337 extern 338 fts_ast_node_t* 339 fts_ast_create_node_phrase_list( 340 /*============================*/ 341 void* arg); /*!< in: ast state */ 342 343 #ifdef UNIV_DEBUG 344 const char* 345 fts_ast_node_type_get(fts_ast_type_t type); 346 #endif /* UNIV_DEBUG */ 347 348 #endif /* INNOBASE_FSTS0AST_H */ 349