1 /***************************************************************************** 2 3 Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License, version 2.0, 7 as published by the Free Software Foundation. 8 9 This program is also distributed with certain software (including 10 but not limited to OpenSSL) that is licensed under separate terms, 11 as designated in a particular file or component or in included license 12 documentation. The authors of MySQL hereby grant you an additional 13 permission to link the program and your derivative works with the 14 separately licensed software that they have included with MySQL. 15 16 This program is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License, version 2.0, for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 24 25 *****************************************************************************/ 26 27 /******************************************************************//** 28 @file include/fts0ast.h 29 The FTS query parser (AST) abstract syntax tree routines 30 31 Created 2007/03/16/03 Sunny Bains 32 *******************************************************/ 33 34 #ifndef INNOBASE_FST0AST_H 35 #define INNOBASE_FST0AST_H 36 37 #include "mem0mem.h" 38 #include "ha_prototypes.h" 39 40 /* The type of AST Node */ 41 enum fts_ast_type_t { 42 FTS_AST_OPER, /*!< Operator */ 43 FTS_AST_NUMB, /*!< Number */ 44 FTS_AST_TERM, /*!< Term (or word) */ 45 FTS_AST_TEXT, /*!< Text string */ 46 FTS_AST_LIST, /*!< Expression list */ 47 FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ 48 }; 49 50 /* The FTS query operators that we support */ 51 enum fts_ast_oper_t { 52 FTS_NONE, /*!< No operator */ 53 54 FTS_IGNORE, /*!< Ignore rows that contain 55 this word */ 56 57 FTS_EXIST, /*!< Include rows that contain 58 this word */ 59 60 FTS_NEGATE, /*!< Include rows that contain 61 this word but rank them 62 lower*/ 63 64 FTS_INCR_RATING, /*!< Increase the rank for this 65 word*/ 66 67 FTS_DECR_RATING, /*!< Decrease the rank for this 68 word*/ 69 70 FTS_DISTANCE, /*!< Proximity distance */ 71 FTS_IGNORE_SKIP, /*!< Transient node operator 72 signifies that this is a 73 FTS_IGNORE node, and ignored in 74 the first pass of 75 fts_ast_visit() */ 76 FTS_EXIST_SKIP /*!< Transient node operator 77 signifies that this ia a 78 FTS_EXIST node, and ignored in 79 the first pass of 80 fts_ast_visit() */ 81 }; 82 83 /* Data types used by the FTS parser */ 84 struct fts_lexer_t; 85 struct fts_ast_node_t; 86 struct fts_ast_state_t; 87 struct fts_ast_string_t; 88 89 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); 90 91 /******************************************************************** 92 Parse the string using the lexer setup within state.*/ 93 int 94 fts_parse( 95 /*======*/ 96 /* out: 0 on OK, 1 on error */ 97 fts_ast_state_t* state); /*!< in: ast state instance.*/ 98 99 /******************************************************************** 100 Create an AST operator node */ 101 extern 102 fts_ast_node_t* 103 fts_ast_create_node_oper( 104 /*=====================*/ 105 void* arg, /*!< in: ast state */ 106 fts_ast_oper_t oper); /*!< in: ast operator */ 107 /******************************************************************** 108 Create an AST term node, makes a copy of ptr */ 109 extern 110 fts_ast_node_t* 111 fts_ast_create_node_term( 112 /*=====================*/ 113 void* arg, /*!< in: ast state */ 114 const fts_ast_string_t* ptr); /*!< in: term string */ 115 /******************************************************************** 116 Create an AST text node */ 117 extern 118 fts_ast_node_t* 119 fts_ast_create_node_text( 120 /*=====================*/ 121 void* arg, /*!< in: ast state */ 122 const fts_ast_string_t* ptr); /*!< in: text string */ 123 /******************************************************************** 124 Create an AST expr list node */ 125 extern 126 fts_ast_node_t* 127 fts_ast_create_node_list( 128 /*=====================*/ 129 void* arg, /*!< in: ast state */ 130 fts_ast_node_t* expr); /*!< in: ast expr */ 131 /******************************************************************** 132 Create a sub-expression list node. This function takes ownership of 133 expr and is responsible for deleting it. */ 134 extern 135 fts_ast_node_t* 136 fts_ast_create_node_subexp_list( 137 /*============================*/ 138 /* out: new node */ 139 void* arg, /*!< in: ast state instance */ 140 fts_ast_node_t* expr); /*!< in: ast expr instance */ 141 /******************************************************************** 142 Set the wildcard attribute of a term.*/ 143 extern 144 void 145 fts_ast_term_set_wildcard( 146 /*======================*/ 147 fts_ast_node_t* node); /*!< in: term to change */ 148 /******************************************************************** 149 Set the proximity attribute of a text node. */ 150 151 void 152 fts_ast_term_set_distance( 153 /*======================*/ 154 fts_ast_node_t* node, /*!< in/out: text node */ 155 ulint distance); /*!< in: the text proximity 156 distance */ 157 /********************************************************************//** 158 Free a fts_ast_node_t instance. 159 @return next node to free */ 160 UNIV_INTERN 161 fts_ast_node_t* 162 fts_ast_free_node( 163 /*==============*/ 164 fts_ast_node_t* node); /*!< in: node to free */ 165 /******************************************************************** 166 Add a sub-expression to an AST*/ 167 extern 168 fts_ast_node_t* 169 fts_ast_add_node( 170 /*=============*/ 171 fts_ast_node_t* list, /*!< in: list node instance */ 172 fts_ast_node_t* node); /*!< in: (sub) expr to add */ 173 /******************************************************************** 174 Print the AST node recursively.*/ 175 extern 176 void 177 fts_ast_node_print( 178 /*===============*/ 179 fts_ast_node_t* node); /*!< in: ast node to print */ 180 /******************************************************************** 181 For tracking node allocations, in case there is an during parsing.*/ 182 extern 183 void 184 fts_ast_state_add_node( 185 /*===================*/ 186 fts_ast_state_t*state, /*!< in: ast state instance */ 187 fts_ast_node_t* node); /*!< in: node to add to state */ 188 /******************************************************************** 189 Free node and expr allocations.*/ 190 extern 191 void 192 fts_ast_state_free( 193 /*===============*/ 194 fts_ast_state_t*state); /*!< in: state instance 195 to free */ 196 /******************************************************************//** 197 Traverse the AST - in-order traversal. 198 @return DB_SUCCESS if all went well */ 199 UNIV_INTERN 200 dberr_t 201 fts_ast_visit( 202 /*==========*/ 203 fts_ast_oper_t oper, /*!< in: FTS operator */ 204 fts_ast_node_t* node, /*!< in: instance to traverse*/ 205 fts_ast_callback visitor, /*!< in: callback */ 206 void* arg, /*!< in: callback arg */ 207 bool* has_ignore) /*!< out: whether we encounter 208 and ignored processing an 209 operator, currently we only 210 ignore FTS_IGNORE operator */ 211 MY_ATTRIBUTE((nonnull, warn_unused_result)); 212 /*****************************************************************//** 213 Process (nested) sub-expression, create a new result set to store the 214 sub-expression result by processing nodes under current sub-expression 215 list. Merge the sub-expression result with that of parent expression list. 216 @return DB_SUCCESS if all went well */ 217 UNIV_INTERN 218 dberr_t 219 fts_ast_visit_sub_exp( 220 /*==================*/ 221 fts_ast_node_t* node, /*!< in: instance to traverse*/ 222 fts_ast_callback visitor, /*!< in: callback */ 223 void* arg) /*!< in: callback arg */ 224 MY_ATTRIBUTE((nonnull, warn_unused_result)); 225 /******************************************************************** 226 Create a lex instance.*/ 227 UNIV_INTERN 228 fts_lexer_t* 229 fts_lexer_create( 230 /*=============*/ 231 ibool boolean_mode, /*!< in: query type */ 232 const byte* query, /*!< in: query string */ 233 ulint query_len) /*!< in: query string len */ 234 MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); 235 /******************************************************************** 236 Free an fts_lexer_t instance.*/ 237 UNIV_INTERN 238 void 239 fts_lexer_free( 240 /*===========*/ 241 fts_lexer_t* fts_lexer) /*!< in: lexer instance to 242 free */ 243 MY_ATTRIBUTE((nonnull)); 244 245 /** 246 Create an ast string object, with NUL-terminator, so the string 247 has one more byte than len 248 @param[in] str pointer to string 249 @param[in] len length of the string 250 @return ast string with NUL-terminator */ 251 UNIV_INTERN 252 fts_ast_string_t* 253 fts_ast_string_create( 254 const byte* str, 255 ulint len); 256 257 /** 258 Free an ast string instance 259 @param[in,out] ast_str string to free */ 260 UNIV_INTERN 261 void 262 fts_ast_string_free( 263 fts_ast_string_t* ast_str); 264 265 /** 266 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul 267 @param[in] str string to translate 268 @param[in] base the base 269 @return translated number */ 270 UNIV_INTERN 271 ulint 272 fts_ast_string_to_ul( 273 const fts_ast_string_t* ast_str, 274 int base); 275 276 /** 277 Print the ast string 278 @param[in] str string to print */ 279 UNIV_INTERN 280 void 281 fts_ast_string_print( 282 const fts_ast_string_t* ast_str); 283 284 /* String of length len. 285 We always store the string of length len with a terminating '\0', 286 regardless of there is any 0x00 in the string itself */ 287 struct fts_ast_string_t { 288 /*!< Pointer to string. */ 289 byte* str; 290 291 /*!< Length of the string. */ 292 ulint len; 293 }; 294 295 /* Query term type */ 296 struct fts_ast_term_t { 297 fts_ast_string_t* ptr; /*!< Pointer to term string.*/ 298 ibool wildcard; /*!< TRUE if wild card set.*/ 299 }; 300 301 /* Query text type */ 302 struct fts_ast_text_t { 303 fts_ast_string_t* ptr; /*!< Pointer to text string.*/ 304 ulint distance; /*!< > 0 if proximity distance 305 set */ 306 }; 307 308 /* The list of nodes in an expr list */ 309 struct fts_ast_list_t { 310 fts_ast_node_t* head; /*!< Children list head */ 311 fts_ast_node_t* tail; /*!< Children list tail */ 312 }; 313 314 /* FTS AST node to store the term, text, operator and sub-expressions.*/ 315 struct fts_ast_node_t { 316 fts_ast_type_t type; /*!< The type of node */ 317 fts_ast_text_t text; /*!< Text node */ 318 fts_ast_term_t term; /*!< Term node */ 319 fts_ast_oper_t oper; /*!< Operator value */ 320 fts_ast_list_t list; /*!< Expression list */ 321 fts_ast_node_t* next; /*!< Link for expr list */ 322 fts_ast_node_t* next_alloc; /*!< For tracking allocations */ 323 bool visited; /*!< whether this node is 324 already processed */ 325 }; 326 327 /* To track state during parsing */ 328 struct fts_ast_state_t { 329 mem_heap_t* heap; /*!< Heap to use for alloc */ 330 fts_ast_node_t* root; /*!< If all goes OK, then this 331 will point to the root.*/ 332 333 fts_ast_list_t list; /*!< List of nodes allocated */ 334 335 fts_lexer_t* lexer; /*!< Lexer callback + arg */ 336 CHARSET_INFO* charset; /*!< charset used for 337 tokenization */ 338 }; 339 340 #ifdef UNIV_DEBUG 341 const char* 342 fts_ast_oper_name_get(fts_ast_oper_t oper); 343 const char* 344 fts_ast_node_type_get(fts_ast_type_t type); 345 #endif /* UNIV_DEBUG */ 346 347 #endif /* INNOBASE_FSTS0AST_H */ 348