1 /*****************************************************************************
2 
3 Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2016, 2020, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /******************************************************************//**
21 @file include/fts0ast.h
22 The FTS query parser (AST) abstract syntax tree routines
23 
24 Created 2007/03/16/03 Sunny Bains
25 *******************************************************/
26 
27 #ifndef INNOBASE_FST0AST_H
28 #define INNOBASE_FST0AST_H
29 
30 #include "mem0mem.h"
31 
32 #ifdef UNIV_PFS_MEMORY
33 
34 #define malloc(A)	ut_malloc_nokey(A)
35 #define free(A)		ut_free(A)
36 #define realloc(P, A)	ut_realloc(P, A)
37 
38 #endif /* UNIV_PFS_MEMORY */
39 
40 /* The type of AST Node */
41 enum fts_ast_type_t {
42 	FTS_AST_OPER,				/*!< Operator */
43 	FTS_AST_NUMB,				/*!< Number */
44 	FTS_AST_TERM,				/*!< Term (or word) */
45 	FTS_AST_TEXT,				/*!< Text string */
46 	FTS_AST_PARSER_PHRASE_LIST,		/*!< Phase for plugin parser
47 						The difference from text type
48 						is that we tokenize text into
49 						term list */
50 	FTS_AST_LIST,				/*!< Expression list */
51 	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
52 };
53 
54 /* The FTS query operators that we support */
55 enum fts_ast_oper_t {
56 	FTS_NONE,				/*!< No operator */
57 
58 	FTS_IGNORE,				/*!< Ignore rows that contain
59 						this word */
60 
61 	FTS_EXIST,				/*!< Include rows that contain
62 						this word */
63 
64 	FTS_NEGATE,				/*!< Include rows that contain
65 						this word but rank them
66 						lower*/
67 
68 	FTS_INCR_RATING,			/*!< Increase the rank for this
69 						word*/
70 
71 	FTS_DECR_RATING,			/*!< Decrease the rank for this
72 						word*/
73 
74 	FTS_DISTANCE,				/*!< Proximity distance */
75 	FTS_IGNORE_SKIP,			/*!< Transient node operator
76 						signifies that this is a
77 						FTS_IGNORE node, and ignored in
78 						the first pass of
79 						fts_ast_visit() */
80 	FTS_EXIST_SKIP				/*!< Transient node operator
81 						signifies that this ia a
82 						FTS_EXIST node, and ignored in
83 						the first pass of
84 						fts_ast_visit() */
85 };
86 
87 /* Data types used by the FTS parser */
88 struct fts_lexer_t;
89 struct fts_ast_node_t;
90 struct fts_ast_state_t;
91 struct fts_ast_string_t;
92 
93 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
94 
95 /********************************************************************
96 Parse the string using the lexer setup within state.*/
97 int
98 fts_parse(
99 /*======*/
100 						/* out: 0 on OK, 1 on error */
101 	fts_ast_state_t* state);		/*!< in: ast state instance.*/
102 
103 /********************************************************************
104 Create an AST operator node */
105 extern
106 fts_ast_node_t*
107 fts_ast_create_node_oper(
108 /*=====================*/
109 	void*		arg,			/*!< in: ast state */
110 	fts_ast_oper_t	oper);			/*!< in: ast operator */
111 /********************************************************************
112 Create an AST term node, makes a copy of ptr */
113 extern
114 fts_ast_node_t*
115 fts_ast_create_node_term(
116 /*=====================*/
117 	void*			arg,		/*!< in: ast state */
118 	const fts_ast_string_t*	ptr);		/*!< in: term string */
119 /********************************************************************
120 Create an AST text node */
121 extern
122 fts_ast_node_t*
123 fts_ast_create_node_text(
124 /*=====================*/
125 	void*			arg,		/*!< in: ast state */
126 	const fts_ast_string_t*	ptr);		/*!< in: text string */
127 /********************************************************************
128 Create an AST expr list node */
129 extern
130 fts_ast_node_t*
131 fts_ast_create_node_list(
132 /*=====================*/
133 	void*		arg,			/*!< in: ast state */
134 	fts_ast_node_t*	expr);			/*!< in: ast expr */
135 /********************************************************************
136 Create a sub-expression list node. This function takes ownership of
137 expr and is responsible for deleting it. */
138 extern
139 fts_ast_node_t*
140 fts_ast_create_node_subexp_list(
141 /*============================*/
142 						/* out: new node */
143 	void*		arg,			/*!< in: ast state instance */
144 	fts_ast_node_t*	expr);			/*!< in: ast expr instance */
145 /********************************************************************
146 Set the wildcard attribute of a term.*/
147 extern
148 void
149 fts_ast_term_set_wildcard(
150 /*======================*/
151 	fts_ast_node_t*	node);			/*!< in: term to change */
152 /********************************************************************
153 Set the proximity attribute of a text node. */
154 void
155 fts_ast_text_set_distance(
156 /*======================*/
157 	fts_ast_node_t*	node,			/*!< in/out: text node */
158 	ulint		distance);		/*!< in: the text proximity
159 						distance */
160 /********************************************************************//**
161 Free a fts_ast_node_t instance.
162 @return next node to free */
163 fts_ast_node_t*
164 fts_ast_free_node(
165 /*==============*/
166 	fts_ast_node_t*	node);			/*!< in: node to free */
167 /********************************************************************
168 Add a sub-expression to an AST*/
169 extern
170 fts_ast_node_t*
171 fts_ast_add_node(
172 /*=============*/
173 	fts_ast_node_t*	list,			/*!< in: list node instance */
174 	fts_ast_node_t*	node);			/*!< in: (sub) expr to add */
175 /********************************************************************
176 Print the AST node recursively.*/
177 extern
178 void
179 fts_ast_node_print(
180 /*===============*/
181 	fts_ast_node_t*	node);			/*!< in: ast node to print */
182 /********************************************************************
183 Free node and expr allocations.*/
184 extern
185 void
186 fts_ast_state_free(
187 /*===============*/
188 	fts_ast_state_t*state);			/*!< in: state instance
189 						to free */
190 /** Check only union operation involved in the node
191 @param[in]	node	ast node to check
192 @return true if the node contains only union else false. */
193 bool
194 fts_ast_node_check_union(
195 	fts_ast_node_t*	node);
196 
197 /******************************************************************//**
198 Traverse the AST - in-order traversal.
199 @return DB_SUCCESS if all went well */
200 dberr_t
201 fts_ast_visit(
202 /*==========*/
203 	fts_ast_oper_t		oper,		/*!< in: FTS operator */
204 	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
205 	fts_ast_callback	visitor,	/*!< in: callback */
206 	void*			arg,		/*!< in: callback arg */
207 	bool*			has_ignore)	/*!< out: whether we encounter
208 						and ignored processing an
209 						operator, currently we only
210 						ignore FTS_IGNORE operator */
211 	MY_ATTRIBUTE((nonnull, warn_unused_result));
212 /********************************************************************
213 Create a lex instance.*/
214 fts_lexer_t*
215 fts_lexer_create(
216 /*=============*/
217 	ibool		boolean_mode,		/*!< in: query type */
218 	const byte*	query,			/*!< in: query string */
219 	ulint		query_len)		/*!< in: query string len */
220 	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
221 /********************************************************************
222 Free an fts_lexer_t instance.*/
223 void
224 fts_lexer_free(
225 /*===========*/
226 	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
227 						free */
228 	MY_ATTRIBUTE((nonnull));
229 
230 /**
231 Create an ast string object, with NUL-terminator, so the string
232 has one more byte than len
233 @param[in] str		pointer to string
234 @param[in] len		length of the string
235 @return ast string with NUL-terminator */
236 fts_ast_string_t*
237 fts_ast_string_create(
238 	const byte*	str,
239 	ulint		len);
240 
241 /**
242 Free an ast string instance
243 @param[in,out] ast_str		string to free */
244 void
245 fts_ast_string_free(
246 	fts_ast_string_t*	ast_str);
247 
248 /**
249 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
250 @param[in] str		string to translate
251 @param[in] base		the base
252 @return translated number */
253 ulint
254 fts_ast_string_to_ul(
255 	const fts_ast_string_t*	ast_str,
256 	int			base);
257 
258 /* String of length len.
259 We always store the string of length len with a terminating '\0',
260 regardless of there is any 0x00 in the string itself */
261 struct fts_ast_string_t {
262 	/*!< Pointer to string. */
263 	byte*		str;
264 
265 	/*!< Length of the string. */
266 	ulint		len;
267 };
268 
269 /* Query term type */
270 struct fts_ast_term_t {
271 	fts_ast_string_t*	ptr;		/*!< Pointer to term string.*/
272 	ibool			wildcard;	/*!< TRUE if wild card set.*/
273 };
274 
275 /* Query text type */
276 struct fts_ast_text_t {
277 	fts_ast_string_t*	ptr;		/*!< Pointer to text string.*/
278 	ulint			distance;	/*!< > 0 if proximity distance
279 						set */
280 };
281 
282 /* The list of nodes in an expr list */
283 struct fts_ast_list_t {
284 	fts_ast_node_t*	head;			/*!< Children list head */
285 	fts_ast_node_t*	tail;			/*!< Children list tail */
286 };
287 
288 /* FTS AST node to store the term, text, operator and sub-expressions.*/
289 struct fts_ast_node_t {
290 	fts_ast_type_t	type;			/*!< The type of node */
291 	fts_ast_text_t	text;			/*!< Text node */
292 	fts_ast_term_t	term;			/*!< Term node */
293 	fts_ast_oper_t	oper;			/*!< Operator value */
294 	fts_ast_list_t	list;			/*!< Expression list */
295 	fts_ast_node_t*	next;			/*!< Link for expr list */
296 	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
297 	bool		visited;		/*!< whether this node is
298 						already processed */
299 	/** current transaction */
300 	const trx_t*	trx;
301 	/* Used by plugin parser */
302 	fts_ast_node_t* up_node;		/*!< Direct up node */
303 	bool		go_up;			/*!< Flag if go one level up */
304 };
305 
306 /* To track state during parsing */
307 struct fts_ast_state_t {
308 	mem_heap_t*	heap;			/*!< Heap to use for alloc */
309 	fts_ast_node_t*	root;			/*!< If all goes OK, then this
310 						will point to the root.*/
311 
312 	fts_ast_list_t	list;			/*!< List of nodes allocated */
313 
314 	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
315 	CHARSET_INFO*	charset;		/*!< charset used for
316 						tokenization */
317 	/* Used by plugin parser */
318 	fts_ast_node_t*	cur_node;		/*!< Current node into which
319 						 we add new node */
320 	int		depth;			/*!< Depth of parsing state */
321 };
322 
323 /******************************************************************//**
324 Create an AST term node, makes a copy of ptr for plugin parser
325 @return node */
326 extern
327 fts_ast_node_t*
328 fts_ast_create_node_term_for_parser(
329 /*==========i=====================*/
330 	void*		arg,			/*!< in: ast state */
331 	const char*	ptr,			/*!< in: term string */
332 	const ulint	len);			/*!< in: term string length */
333 
334 /******************************************************************//**
335 Create an AST phrase list node for plugin parser
336 @return node */
337 extern
338 fts_ast_node_t*
339 fts_ast_create_node_phrase_list(
340 /*============================*/
341 	void*		arg);			/*!< in: ast state */
342 
343 #ifdef UNIV_DEBUG
344 const char*
345 fts_ast_node_type_get(fts_ast_type_t	type);
346 #endif /* UNIV_DEBUG */
347 
348 #endif /* INNOBASE_FSTS0AST_H */
349