1 /*****************************************************************************
2 
3 Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2016, 2020, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /******************************************************************//**
21 @file include/fts0ast.h
22 The FTS query parser (AST) abstract syntax tree routines
23 
24 Created 2007/03/16/03 Sunny Bains
25 *******************************************************/
26 
27 #ifndef INNOBASE_FST0AST_H
28 #define INNOBASE_FST0AST_H
29 
30 #include "mem0mem.h"
31 
32 /* The type of AST Node */
33 enum fts_ast_type_t {
34 	FTS_AST_OPER,				/*!< Operator */
35 	FTS_AST_NUMB,				/*!< Number */
36 	FTS_AST_TERM,				/*!< Term (or word) */
37 	FTS_AST_TEXT,				/*!< Text string */
38 	FTS_AST_PARSER_PHRASE_LIST,		/*!< Phase for plugin parser
39 						The difference from text type
40 						is that we tokenize text into
41 						term list */
42 	FTS_AST_LIST,				/*!< Expression list */
43 	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
44 };
45 
46 /* The FTS query operators that we support */
47 enum fts_ast_oper_t {
48 	FTS_NONE,				/*!< No operator */
49 
50 	FTS_IGNORE,				/*!< Ignore rows that contain
51 						this word */
52 
53 	FTS_EXIST,				/*!< Include rows that contain
54 						this word */
55 
56 	FTS_NEGATE,				/*!< Include rows that contain
57 						this word but rank them
58 						lower*/
59 
60 	FTS_INCR_RATING,			/*!< Increase the rank for this
61 						word*/
62 
63 	FTS_DECR_RATING,			/*!< Decrease the rank for this
64 						word*/
65 
66 	FTS_DISTANCE,				/*!< Proximity distance */
67 	FTS_IGNORE_SKIP,			/*!< Transient node operator
68 						signifies that this is a
69 						FTS_IGNORE node, and ignored in
70 						the first pass of
71 						fts_ast_visit() */
72 	FTS_EXIST_SKIP				/*!< Transient node operator
73 						signifies that this ia a
74 						FTS_EXIST node, and ignored in
75 						the first pass of
76 						fts_ast_visit() */
77 };
78 
79 /* Data types used by the FTS parser */
80 struct fts_lexer_t;
81 struct fts_ast_node_t;
82 struct fts_ast_state_t;
83 struct fts_ast_string_t;
84 
85 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
86 
87 /********************************************************************
88 Parse the string using the lexer setup within state.*/
89 int
90 fts_parse(
91 /*======*/
92 						/* out: 0 on OK, 1 on error */
93 	fts_ast_state_t* state);		/*!< in: ast state instance.*/
94 
95 /********************************************************************
96 Create an AST operator node */
97 extern
98 fts_ast_node_t*
99 fts_ast_create_node_oper(
100 /*=====================*/
101 	void*		arg,			/*!< in: ast state */
102 	fts_ast_oper_t	oper);			/*!< in: ast operator */
103 /********************************************************************
104 Create an AST term node, makes a copy of ptr */
105 extern
106 fts_ast_node_t*
107 fts_ast_create_node_term(
108 /*=====================*/
109 	void*			arg,		/*!< in: ast state */
110 	const fts_ast_string_t*	ptr);		/*!< in: term string */
111 /********************************************************************
112 Create an AST text node */
113 extern
114 fts_ast_node_t*
115 fts_ast_create_node_text(
116 /*=====================*/
117 	void*			arg,		/*!< in: ast state */
118 	const fts_ast_string_t*	ptr);		/*!< in: text string */
119 /********************************************************************
120 Create an AST expr list node */
121 extern
122 fts_ast_node_t*
123 fts_ast_create_node_list(
124 /*=====================*/
125 	void*		arg,			/*!< in: ast state */
126 	fts_ast_node_t*	expr);			/*!< in: ast expr */
127 /********************************************************************
128 Create a sub-expression list node. This function takes ownership of
129 expr and is responsible for deleting it. */
130 extern
131 fts_ast_node_t*
132 fts_ast_create_node_subexp_list(
133 /*============================*/
134 						/* out: new node */
135 	void*		arg,			/*!< in: ast state instance */
136 	fts_ast_node_t*	expr);			/*!< in: ast expr instance */
137 /********************************************************************
138 Set the wildcard attribute of a term.*/
139 extern
140 void
141 fts_ast_term_set_wildcard(
142 /*======================*/
143 	fts_ast_node_t*	node);			/*!< in: term to change */
144 /********************************************************************
145 Set the proximity attribute of a text node. */
146 void
147 fts_ast_text_set_distance(
148 /*======================*/
149 	fts_ast_node_t*	node,			/*!< in/out: text node */
150 	ulint		distance);		/*!< in: the text proximity
151 						distance */
152 /********************************************************************//**
153 Free a fts_ast_node_t instance.
154 @return next node to free */
155 fts_ast_node_t*
156 fts_ast_free_node(
157 /*==============*/
158 	fts_ast_node_t*	node);			/*!< in: node to free */
159 /********************************************************************
160 Add a sub-expression to an AST*/
161 extern
162 fts_ast_node_t*
163 fts_ast_add_node(
164 /*=============*/
165 	fts_ast_node_t*	list,			/*!< in: list node instance */
166 	fts_ast_node_t*	node);			/*!< in: (sub) expr to add */
167 /********************************************************************
168 Print the AST node recursively.*/
169 extern
170 void
171 fts_ast_node_print(
172 /*===============*/
173 	fts_ast_node_t*	node);			/*!< in: ast node to print */
174 /********************************************************************
175 Free node and expr allocations.*/
176 extern
177 void
178 fts_ast_state_free(
179 /*===============*/
180 	fts_ast_state_t*state);			/*!< in: state instance
181 						to free */
182 /** Check only union operation involved in the node
183 @param[in]	node	ast node to check
184 @return true if the node contains only union else false. */
185 bool
186 fts_ast_node_check_union(
187 	fts_ast_node_t*	node);
188 
189 /******************************************************************//**
190 Traverse the AST - in-order traversal.
191 @return DB_SUCCESS if all went well */
192 dberr_t
193 fts_ast_visit(
194 /*==========*/
195 	fts_ast_oper_t		oper,		/*!< in: FTS operator */
196 	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
197 	fts_ast_callback	visitor,	/*!< in: callback */
198 	void*			arg,		/*!< in: callback arg */
199 	bool*			has_ignore)	/*!< out: whether we encounter
200 						and ignored processing an
201 						operator, currently we only
202 						ignore FTS_IGNORE operator */
203 	MY_ATTRIBUTE((nonnull, warn_unused_result));
204 /********************************************************************
205 Create a lex instance.*/
206 fts_lexer_t*
207 fts_lexer_create(
208 /*=============*/
209 	ibool		boolean_mode,		/*!< in: query type */
210 	const byte*	query,			/*!< in: query string */
211 	ulint		query_len)		/*!< in: query string len */
212 	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
213 /********************************************************************
214 Free an fts_lexer_t instance.*/
215 void
216 fts_lexer_free(
217 /*===========*/
218 	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
219 						free */
220 	MY_ATTRIBUTE((nonnull));
221 
222 /**
223 Create an ast string object, with NUL-terminator, so the string
224 has one more byte than len
225 @param[in] str		pointer to string
226 @param[in] len		length of the string
227 @return ast string with NUL-terminator */
228 fts_ast_string_t*
229 fts_ast_string_create(
230 	const byte*	str,
231 	ulint		len);
232 
233 /**
234 Free an ast string instance
235 @param[in,out] ast_str		string to free */
236 void
237 fts_ast_string_free(
238 	fts_ast_string_t*	ast_str);
239 
240 /**
241 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
242 @param[in] str		string to translate
243 @param[in] base		the base
244 @return translated number */
245 ulint
246 fts_ast_string_to_ul(
247 	const fts_ast_string_t*	ast_str,
248 	int			base);
249 
250 /* String of length len.
251 We always store the string of length len with a terminating '\0',
252 regardless of there is any 0x00 in the string itself */
253 struct fts_ast_string_t {
254 	/*!< Pointer to string. */
255 	byte*		str;
256 
257 	/*!< Length of the string. */
258 	ulint		len;
259 };
260 
261 /* Query term type */
262 struct fts_ast_term_t {
263 	fts_ast_string_t*	ptr;		/*!< Pointer to term string.*/
264 	ibool			wildcard;	/*!< TRUE if wild card set.*/
265 };
266 
267 /* Query text type */
268 struct fts_ast_text_t {
269 	fts_ast_string_t*	ptr;		/*!< Pointer to text string.*/
270 	ulint			distance;	/*!< > 0 if proximity distance
271 						set */
272 };
273 
274 /* The list of nodes in an expr list */
275 struct fts_ast_list_t {
276 	fts_ast_node_t*	head;			/*!< Children list head */
277 	fts_ast_node_t*	tail;			/*!< Children list tail */
278 };
279 
280 /* FTS AST node to store the term, text, operator and sub-expressions.*/
281 struct fts_ast_node_t {
282 	fts_ast_type_t	type;			/*!< The type of node */
283 	fts_ast_text_t	text;			/*!< Text node */
284 	fts_ast_term_t	term;			/*!< Term node */
285 	fts_ast_oper_t	oper;			/*!< Operator value */
286 	fts_ast_list_t	list;			/*!< Expression list */
287 	fts_ast_node_t*	next;			/*!< Link for expr list */
288 	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
289 	bool		visited;		/*!< whether this node is
290 						already processed */
291 	/** current transaction */
292 	const trx_t*	trx;
293 	/* Used by plugin parser */
294 	fts_ast_node_t* up_node;		/*!< Direct up node */
295 	bool		go_up;			/*!< Flag if go one level up */
296 };
297 
298 /* To track state during parsing */
299 struct fts_ast_state_t {
300 	mem_heap_t*	heap;			/*!< Heap to use for alloc */
301 	fts_ast_node_t*	root;			/*!< If all goes OK, then this
302 						will point to the root.*/
303 
304 	fts_ast_list_t	list;			/*!< List of nodes allocated */
305 
306 	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
307 	CHARSET_INFO*	charset;		/*!< charset used for
308 						tokenization */
309 	/* Used by plugin parser */
310 	fts_ast_node_t*	cur_node;		/*!< Current node into which
311 						 we add new node */
312 	int		depth;			/*!< Depth of parsing state */
313 };
314 
315 /******************************************************************//**
316 Create an AST term node, makes a copy of ptr for plugin parser
317 @return node */
318 extern
319 fts_ast_node_t*
320 fts_ast_create_node_term_for_parser(
321 /*==========i=====================*/
322 	void*		arg,			/*!< in: ast state */
323 	const char*	ptr,			/*!< in: term string */
324 	const ulint	len);			/*!< in: term string length */
325 
326 /******************************************************************//**
327 Create an AST phrase list node for plugin parser
328 @return node */
329 extern
330 fts_ast_node_t*
331 fts_ast_create_node_phrase_list(
332 /*============================*/
333 	void*		arg);			/*!< in: ast state */
334 
335 #ifdef UNIV_DEBUG
336 const char*
337 fts_ast_node_type_get(fts_ast_type_t	type);
338 #endif /* UNIV_DEBUG */
339 
340 #endif /* INNOBASE_FSTS0AST_H */
341