1 /***************************************************************************** 2 3 Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/fts0ast.h 28 The FTS query parser (AST) abstract syntax tree routines 29 30 Created 2007/03/16/03 Sunny Bains 31 *******************************************************/ 32 33 #ifndef INNOBASE_FST0AST_H 34 #define INNOBASE_FST0AST_H 35 36 #include "ha_prototypes.h" 37 #include "mem0mem.h" 38 39 #ifdef UNIV_PFS_MEMORY 40 41 #define malloc(A) ut_malloc_nokey(A) 42 #define free(A) ut_free(A) 43 #define realloc(P, A) ut_realloc(P, A) 44 45 #endif /* UNIV_PFS_MEMORY */ 46 47 /* The type of AST Node */ 48 enum fts_ast_type_t { 49 FTS_AST_OPER, /*!< Operator */ 50 FTS_AST_NUMB, /*!< Number */ 51 FTS_AST_TERM, /*!< Term (or word) */ 52 FTS_AST_TEXT, /*!< Text string */ 53 FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser 54 The difference from text type 55 is that we tokenize text into 56 term list */ 57 FTS_AST_LIST, /*!< Expression list */ 58 FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ 59 }; 60 61 /* The FTS query operators that we support */ 62 enum fts_ast_oper_t { 63 FTS_NONE, /*!< No operator */ 64 65 FTS_IGNORE, /*!< Ignore rows that contain 66 this word */ 67 68 FTS_EXIST, /*!< Include rows that contain 69 this word */ 70 71 FTS_NEGATE, /*!< Include rows that contain 72 this word but rank them 73 lower*/ 74 75 FTS_INCR_RATING, /*!< Increase the rank for this 76 word*/ 77 78 FTS_DECR_RATING, /*!< Decrease the rank for this 79 word*/ 80 81 FTS_DISTANCE, /*!< Proximity distance */ 82 FTS_IGNORE_SKIP, /*!< Transient node operator 83 signifies that this is a 84 FTS_IGNORE node, and ignored in 85 the first pass of 86 fts_ast_visit() */ 87 FTS_EXIST_SKIP /*!< Transient node operator 88 signifies that this ia a 89 FTS_EXIST node, and ignored in 90 the first pass of 91 fts_ast_visit() */ 92 }; 93 94 /* Data types used by the FTS parser */ 95 struct fts_lexer_t; 96 struct fts_ast_node_t; 97 struct fts_ast_state_t; 98 struct fts_ast_string_t; 99 100 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t *, void *); 101 102 /******************************************************************** 103 Parse the string using the lexer setup within state.*/ 104 int fts_parse( 105 /* out: 0 on OK, 1 on error */ 106 fts_ast_state_t *state); /*!< in: ast state instance.*/ 107 108 /******************************************************************** 109 Create an AST operator node */ 110 extern fts_ast_node_t *fts_ast_create_node_oper( 111 void *arg, /*!< in: ast state */ 112 fts_ast_oper_t oper); /*!< in: ast operator */ 113 /******************************************************************** 114 Create an AST term node, makes a copy of ptr */ 115 extern fts_ast_node_t *fts_ast_create_node_term( 116 void *arg, /*!< in: ast state */ 117 const fts_ast_string_t *ptr); /*!< in: term string */ 118 /******************************************************************** 119 Create an AST text node */ 120 extern fts_ast_node_t *fts_ast_create_node_text( 121 void *arg, /*!< in: ast state */ 122 const fts_ast_string_t *ptr); /*!< in: text string */ 123 /******************************************************************** 124 Create an AST expr list node */ 125 extern fts_ast_node_t *fts_ast_create_node_list( 126 void *arg, /*!< in: ast state */ 127 fts_ast_node_t *expr); /*!< in: ast expr */ 128 /******************************************************************** 129 Create a sub-expression list node. This function takes ownership of 130 expr and is responsible for deleting it. */ 131 extern fts_ast_node_t *fts_ast_create_node_subexp_list( 132 /* out: new node */ 133 void *arg, /*!< in: ast state instance */ 134 fts_ast_node_t *expr); /*!< in: ast expr instance */ 135 /******************************************************************** 136 Set the wildcard attribute of a term.*/ 137 extern void fts_ast_term_set_wildcard( 138 fts_ast_node_t *node); /*!< in: term to change */ 139 /******************************************************************** 140 Set the proximity attribute of a text node. */ 141 void fts_ast_text_set_distance(fts_ast_node_t *node, /*!< in/out: text node */ 142 ulint distance); /*!< in: the text proximity 143 distance */ 144 /** Free a fts_ast_node_t instance. 145 @return next node to free */ 146 fts_ast_node_t *fts_ast_free_node( 147 fts_ast_node_t *node); /*!< in: node to free */ 148 /******************************************************************** 149 Add a sub-expression to an AST*/ 150 extern fts_ast_node_t *fts_ast_add_node( 151 fts_ast_node_t *list, /*!< in: list node instance */ 152 fts_ast_node_t *node); /*!< in: (sub) expr to add */ 153 /******************************************************************** 154 Print the AST node recursively.*/ 155 extern void fts_ast_node_print( 156 fts_ast_node_t *node); /*!< in: ast node to print */ 157 /******************************************************************** 158 Free node and expr allocations.*/ 159 extern void fts_ast_state_free(fts_ast_state_t *state); /*!< in: state instance 160 to free */ 161 /** Check only union operation involved in the node 162 @param[in] node ast node to check 163 @return true if the node contains only union else false. */ 164 bool fts_ast_node_check_union(fts_ast_node_t *node); 165 166 /** Traverse the AST - in-order traversal. 167 @return DB_SUCCESS if all went well */ 168 dberr_t fts_ast_visit(fts_ast_oper_t oper, /*!< in: FTS operator */ 169 fts_ast_node_t *node, /*!< in: instance to traverse*/ 170 fts_ast_callback visitor, /*!< in: callback */ 171 void *arg, /*!< in: callback arg */ 172 bool *has_ignore) /*!< out: whether we encounter 173 and ignored processing an 174 operator, currently we only 175 ignore FTS_IGNORE operator */ 176 MY_ATTRIBUTE((warn_unused_result)); 177 /******************************************************************** 178 Create a lex instance.*/ 179 fts_lexer_t *fts_lexer_create(ibool boolean_mode, /*!< in: query type */ 180 const byte *query, /*!< in: query string */ 181 ulint query_len) /*!< in: query string len */ 182 MY_ATTRIBUTE((malloc, warn_unused_result)); 183 /******************************************************************** 184 Free an fts_lexer_t instance.*/ 185 void fts_lexer_free(fts_lexer_t *fts_lexer); /*!< in: lexer instance to 186 free */ 187 188 /** 189 Create an ast string object, with NUL-terminator, so the string 190 has one more byte than len 191 @param[in] str pointer to string 192 @param[in] len length of the string 193 @return ast string with NUL-terminator */ 194 fts_ast_string_t *fts_ast_string_create(const byte *str, ulint len); 195 196 /** 197 Free an ast string instance 198 @param[in,out] ast_str string to free */ 199 void fts_ast_string_free(fts_ast_string_t *ast_str); 200 201 /** 202 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul 203 @param[in] ast_str string to translate 204 @param[in] base the base 205 @return translated number */ 206 ulint fts_ast_string_to_ul(const fts_ast_string_t *ast_str, int base); 207 208 /* String of length len. 209 We always store the string of length len with a terminating '\0', 210 regardless of there is any 0x00 in the string itself */ 211 struct fts_ast_string_t { 212 /*!< Pointer to string. */ 213 byte *str; 214 215 /*!< Length of the string. */ 216 ulint len; 217 }; 218 219 /* Query term type */ 220 struct fts_ast_term_t { 221 fts_ast_string_t *ptr; /*!< Pointer to term string.*/ 222 ibool wildcard; /*!< TRUE if wild card set.*/ 223 }; 224 225 /* Query text type */ 226 struct fts_ast_text_t { 227 fts_ast_string_t *ptr; /*!< Pointer to text string.*/ 228 ulint distance; /*!< > 0 if proximity distance 229 set */ 230 }; 231 232 /* The list of nodes in an expr list */ 233 struct fts_ast_list_t { 234 fts_ast_node_t *head; /*!< Children list head */ 235 fts_ast_node_t *tail; /*!< Children list tail */ 236 }; 237 238 /* FTS AST node to store the term, text, operator and sub-expressions.*/ 239 struct fts_ast_node_t { 240 fts_ast_type_t type; /*!< The type of node */ 241 fts_ast_text_t text; /*!< Text node */ 242 fts_ast_term_t term; /*!< Term node */ 243 fts_ast_oper_t oper; /*!< Operator value */ 244 fts_ast_list_t list; /*!< Expression list */ 245 fts_ast_node_t *next; /*!< Link for expr list */ 246 fts_ast_node_t *next_alloc; /*!< For tracking allocations */ 247 bool visited; /*!< whether this node is 248 already processed */ 249 trx_t *trx; 250 /* Used by plugin parser */ 251 fts_ast_node_t *up_node; /*!< Direct up node */ 252 bool go_up; /*!< Flag if go one level up */ 253 }; 254 255 /* To track state during parsing */ 256 struct fts_ast_state_t { 257 mem_heap_t *heap; /*!< Heap to use for alloc */ 258 fts_ast_node_t *root; /*!< If all goes OK, then this 259 will point to the root.*/ 260 261 fts_ast_list_t list; /*!< List of nodes allocated */ 262 263 fts_lexer_t *lexer; /*!< Lexer callback + arg */ 264 CHARSET_INFO *charset; /*!< charset used for 265 tokenization */ 266 /* Used by plugin parser */ 267 fts_ast_node_t *cur_node; /*!< Current node into which 268 we add new node */ 269 int depth; /*!< Depth of parsing state */ 270 }; 271 272 /** Create an AST term node, makes a copy of ptr for plugin parser 273 @return node */ 274 extern fts_ast_node_t *fts_ast_create_node_term_for_parser( 275 /*==========i=====================*/ 276 void *arg, /*!< in: ast state */ 277 const char *ptr, /*!< in: term string */ 278 const ulint len); /*!< in: term string length */ 279 280 /** Create an AST phrase list node for plugin parser 281 @return node */ 282 extern fts_ast_node_t *fts_ast_create_node_phrase_list( 283 void *arg); /*!< in: ast state */ 284 285 #ifdef UNIV_DEBUG 286 const char *fts_ast_node_type_get(fts_ast_type_t type); 287 #endif /* UNIV_DEBUG */ 288 289 #endif /* INNOBASE_FSTS0AST_H */ 290