1 /*****************************************************************************
2 
3 Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/fts0ast.h
28  The FTS query parser (AST) abstract syntax tree routines
29 
30  Created 2007/03/16/03 Sunny Bains
31  *******************************************************/
32 
33 #ifndef INNOBASE_FST0AST_H
34 #define INNOBASE_FST0AST_H
35 
36 #include "ha_prototypes.h"
37 #include "mem0mem.h"
38 
39 #ifdef UNIV_PFS_MEMORY
40 
41 #define malloc(A) ut_malloc_nokey(A)
42 #define free(A) ut_free(A)
43 #define realloc(P, A) ut_realloc(P, A)
44 
45 #endif /* UNIV_PFS_MEMORY */
46 
47 /* The type of AST Node */
48 enum fts_ast_type_t {
49   FTS_AST_OPER,               /*!< Operator */
50   FTS_AST_NUMB,               /*!< Number */
51   FTS_AST_TERM,               /*!< Term (or word) */
52   FTS_AST_TEXT,               /*!< Text string */
53   FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser
54                               The difference from text type
55                               is that we tokenize text into
56                               term list */
57   FTS_AST_LIST,               /*!< Expression list */
58   FTS_AST_SUBEXP_LIST         /*!< Sub-Expression list */
59 };
60 
61 /* The FTS query operators that we support */
62 enum fts_ast_oper_t {
63   FTS_NONE, /*!< No operator */
64 
65   FTS_IGNORE, /*!< Ignore rows that contain
66               this word */
67 
68   FTS_EXIST, /*!< Include rows that contain
69              this word */
70 
71   FTS_NEGATE, /*!< Include rows that contain
72               this word but rank them
73               lower*/
74 
75   FTS_INCR_RATING, /*!< Increase the rank for this
76                    word*/
77 
78   FTS_DECR_RATING, /*!< Decrease the rank for this
79                    word*/
80 
81   FTS_DISTANCE,    /*!< Proximity distance */
82   FTS_IGNORE_SKIP, /*!< Transient node operator
83                    signifies that this is a
84                    FTS_IGNORE node, and ignored in
85                    the first pass of
86                    fts_ast_visit() */
87   FTS_EXIST_SKIP   /*!< Transient node operator
88                    signifies that this ia a
89                    FTS_EXIST node, and ignored in
90                    the first pass of
91                    fts_ast_visit() */
92 };
93 
94 /* Data types used by the FTS parser */
95 struct fts_lexer_t;
96 struct fts_ast_node_t;
97 struct fts_ast_state_t;
98 struct fts_ast_string_t;
99 
100 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t *, void *);
101 
102 /********************************************************************
103 Parse the string using the lexer setup within state.*/
104 int fts_parse(
105     /* out: 0 on OK, 1 on error */
106     fts_ast_state_t *state); /*!< in: ast state instance.*/
107 
108 /********************************************************************
109 Create an AST operator node */
110 extern fts_ast_node_t *fts_ast_create_node_oper(
111     void *arg,            /*!< in: ast state */
112     fts_ast_oper_t oper); /*!< in: ast operator */
113 /********************************************************************
114 Create an AST term node, makes a copy of ptr */
115 extern fts_ast_node_t *fts_ast_create_node_term(
116     void *arg,                    /*!< in: ast state */
117     const fts_ast_string_t *ptr); /*!< in: term string */
118 /********************************************************************
119 Create an AST text node */
120 extern fts_ast_node_t *fts_ast_create_node_text(
121     void *arg,                    /*!< in: ast state */
122     const fts_ast_string_t *ptr); /*!< in: text string */
123 /********************************************************************
124 Create an AST expr list node */
125 extern fts_ast_node_t *fts_ast_create_node_list(
126     void *arg,             /*!< in: ast state */
127     fts_ast_node_t *expr); /*!< in: ast expr */
128 /********************************************************************
129 Create a sub-expression list node. This function takes ownership of
130 expr and is responsible for deleting it. */
131 extern fts_ast_node_t *fts_ast_create_node_subexp_list(
132     /* out: new node */
133     void *arg,             /*!< in: ast state instance */
134     fts_ast_node_t *expr); /*!< in: ast expr instance */
135 /********************************************************************
136 Set the wildcard attribute of a term.*/
137 extern void fts_ast_term_set_wildcard(
138     fts_ast_node_t *node); /*!< in: term to change */
139 /********************************************************************
140 Set the proximity attribute of a text node. */
141 void fts_ast_text_set_distance(fts_ast_node_t *node, /*!< in/out: text node */
142                                ulint distance);      /*!< in: the text proximity
143                                                      distance */
144 /** Free a fts_ast_node_t instance.
145  @return next node to free */
146 fts_ast_node_t *fts_ast_free_node(
147     fts_ast_node_t *node); /*!< in: node to free */
148 /********************************************************************
149 Add a sub-expression to an AST*/
150 extern fts_ast_node_t *fts_ast_add_node(
151     fts_ast_node_t *list,  /*!< in: list node instance */
152     fts_ast_node_t *node); /*!< in: (sub) expr to add */
153 /********************************************************************
154 Print the AST node recursively.*/
155 extern void fts_ast_node_print(
156     fts_ast_node_t *node); /*!< in: ast node to print */
157 /********************************************************************
158 Free node and expr allocations.*/
159 extern void fts_ast_state_free(fts_ast_state_t *state); /*!< in: state instance
160                                                         to free */
161 /** Check only union operation involved in the node
162 @param[in]	node	ast node to check
163 @return true if the node contains only union else false. */
164 bool fts_ast_node_check_union(fts_ast_node_t *node);
165 
166 /** Traverse the AST - in-order traversal.
167  @return DB_SUCCESS if all went well */
168 dberr_t fts_ast_visit(fts_ast_oper_t oper,      /*!< in: FTS operator */
169                       fts_ast_node_t *node,     /*!< in: instance to traverse*/
170                       fts_ast_callback visitor, /*!< in: callback */
171                       void *arg,                /*!< in: callback arg */
172                       bool *has_ignore)         /*!< out: whether we encounter
173                                                 and ignored processing an
174                                                 operator, currently we only
175                                                 ignore FTS_IGNORE operator */
176     MY_ATTRIBUTE((warn_unused_result));
177 /********************************************************************
178 Create a lex instance.*/
179 fts_lexer_t *fts_lexer_create(ibool boolean_mode, /*!< in: query type */
180                               const byte *query,  /*!< in: query string */
181                               ulint query_len)    /*!< in: query string len */
182     MY_ATTRIBUTE((malloc, warn_unused_result));
183 /********************************************************************
184 Free an fts_lexer_t instance.*/
185 void fts_lexer_free(fts_lexer_t *fts_lexer); /*!< in: lexer instance to
186                                              free */
187 
188 /**
189 Create an ast string object, with NUL-terminator, so the string
190 has one more byte than len
191 @param[in] str		pointer to string
192 @param[in] len		length of the string
193 @return ast string with NUL-terminator */
194 fts_ast_string_t *fts_ast_string_create(const byte *str, ulint len);
195 
196 /**
197 Free an ast string instance
198 @param[in,out] ast_str	string to free */
199 void fts_ast_string_free(fts_ast_string_t *ast_str);
200 
201 /**
202 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
203 @param[in] ast_str		string to translate
204 @param[in] base		the base
205 @return translated number */
206 ulint fts_ast_string_to_ul(const fts_ast_string_t *ast_str, int base);
207 
208 /* String of length len.
209 We always store the string of length len with a terminating '\0',
210 regardless of there is any 0x00 in the string itself */
211 struct fts_ast_string_t {
212   /*!< Pointer to string. */
213   byte *str;
214 
215   /*!< Length of the string. */
216   ulint len;
217 };
218 
219 /* Query term type */
220 struct fts_ast_term_t {
221   fts_ast_string_t *ptr; /*!< Pointer to term string.*/
222   ibool wildcard;        /*!< TRUE if wild card set.*/
223 };
224 
225 /* Query text type */
226 struct fts_ast_text_t {
227   fts_ast_string_t *ptr; /*!< Pointer to text string.*/
228   ulint distance;        /*!< > 0 if proximity distance
229                          set */
230 };
231 
232 /* The list of nodes in an expr list */
233 struct fts_ast_list_t {
234   fts_ast_node_t *head; /*!< Children list head */
235   fts_ast_node_t *tail; /*!< Children list tail */
236 };
237 
238 /* FTS AST node to store the term, text, operator and sub-expressions.*/
239 struct fts_ast_node_t {
240   fts_ast_type_t type;        /*!< The type of node */
241   fts_ast_text_t text;        /*!< Text node */
242   fts_ast_term_t term;        /*!< Term node */
243   fts_ast_oper_t oper;        /*!< Operator value */
244   fts_ast_list_t list;        /*!< Expression list */
245   fts_ast_node_t *next;       /*!< Link for expr list */
246   fts_ast_node_t *next_alloc; /*!< For tracking allocations */
247   bool visited;               /*!< whether this node is
248                               already processed */
249   trx_t *trx;
250   /* Used by plugin parser */
251   fts_ast_node_t *up_node; /*!< Direct up node */
252   bool go_up;              /*!< Flag if go one level up */
253 };
254 
255 /* To track state during parsing */
256 struct fts_ast_state_t {
257   mem_heap_t *heap;     /*!< Heap to use for alloc */
258   fts_ast_node_t *root; /*!< If all goes OK, then this
259                         will point to the root.*/
260 
261   fts_ast_list_t list; /*!< List of nodes allocated */
262 
263   fts_lexer_t *lexer;    /*!< Lexer callback + arg */
264   CHARSET_INFO *charset; /*!< charset used for
265                          tokenization */
266   /* Used by plugin parser */
267   fts_ast_node_t *cur_node; /*!< Current node into which
268                              we add new node */
269   int depth;                /*!< Depth of parsing state */
270 };
271 
272 /** Create an AST term node, makes a copy of ptr for plugin parser
273  @return node */
274 extern fts_ast_node_t *fts_ast_create_node_term_for_parser(
275     /*==========i=====================*/
276     void *arg,        /*!< in: ast state */
277     const char *ptr,  /*!< in: term string */
278     const ulint len); /*!< in: term string length */
279 
280 /** Create an AST phrase list node for plugin parser
281  @return node */
282 extern fts_ast_node_t *fts_ast_create_node_phrase_list(
283     void *arg); /*!< in: ast state */
284 
285 #ifdef UNIV_DEBUG
286 const char *fts_ast_node_type_get(fts_ast_type_t type);
287 #endif /* UNIV_DEBUG */
288 
289 #endif /* INNOBASE_FSTS0AST_H */
290