1 /*****************************************************************************
2 
3 Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /******************************************************************//**
28 @file include/fts0ast.h
29 The FTS query parser (AST) abstract syntax tree routines
30 
31 Created 2007/03/16/03 Sunny Bains
32 *******************************************************/
33 
34 #ifndef INNOBASE_FST0AST_H
35 #define INNOBASE_FST0AST_H
36 
37 #include "mem0mem.h"
38 #include "ha_prototypes.h"
39 
40 /* The type of AST Node */
41 enum fts_ast_type_t {
42 	FTS_AST_OPER,				/*!< Operator */
43 	FTS_AST_NUMB,				/*!< Number */
44 	FTS_AST_TERM,				/*!< Term (or word) */
45 	FTS_AST_TEXT,				/*!< Text string */
46 	FTS_AST_LIST,				/*!< Expression list */
47 	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
48 };
49 
50 /* The FTS query operators that we support */
51 enum fts_ast_oper_t {
52 	FTS_NONE,				/*!< No operator */
53 
54 	FTS_IGNORE,				/*!< Ignore rows that contain
55 						this word */
56 
57 	FTS_EXIST,				/*!< Include rows that contain
58 						this word */
59 
60 	FTS_NEGATE,				/*!< Include rows that contain
61 						this word but rank them
62 						lower*/
63 
64 	FTS_INCR_RATING,			/*!< Increase the rank for this
65 						word*/
66 
67 	FTS_DECR_RATING,			/*!< Decrease the rank for this
68 						word*/
69 
70 	FTS_DISTANCE,				/*!< Proximity distance */
71 	FTS_IGNORE_SKIP,			/*!< Transient node operator
72 						signifies that this is a
73 						FTS_IGNORE node, and ignored in
74 						the first pass of
75 						fts_ast_visit() */
76 	FTS_EXIST_SKIP				/*!< Transient node operator
77 						signifies that this ia a
78 						FTS_EXIST node, and ignored in
79 						the first pass of
80 						fts_ast_visit() */
81 };
82 
83 /* Data types used by the FTS parser */
84 struct fts_lexer_t;
85 struct fts_ast_node_t;
86 struct fts_ast_state_t;
87 struct fts_ast_string_t;
88 
89 typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
90 
91 /********************************************************************
92 Parse the string using the lexer setup within state.*/
93 int
94 fts_parse(
95 /*======*/
96 						/* out: 0 on OK, 1 on error */
97 	fts_ast_state_t* state);		/*!< in: ast state instance.*/
98 
99 /********************************************************************
100 Create an AST operator node */
101 extern
102 fts_ast_node_t*
103 fts_ast_create_node_oper(
104 /*=====================*/
105 	void*		arg,			/*!< in: ast state */
106 	fts_ast_oper_t	oper);			/*!< in: ast operator */
107 /********************************************************************
108 Create an AST term node, makes a copy of ptr */
109 extern
110 fts_ast_node_t*
111 fts_ast_create_node_term(
112 /*=====================*/
113 	void*			arg,		/*!< in: ast state */
114 	const fts_ast_string_t*	ptr);		/*!< in: term string */
115 /********************************************************************
116 Create an AST text node */
117 extern
118 fts_ast_node_t*
119 fts_ast_create_node_text(
120 /*=====================*/
121 	void*			arg,		/*!< in: ast state */
122 	const fts_ast_string_t*	ptr);		/*!< in: text string */
123 /********************************************************************
124 Create an AST expr list node */
125 extern
126 fts_ast_node_t*
127 fts_ast_create_node_list(
128 /*=====================*/
129 	void*		arg,			/*!< in: ast state */
130 	fts_ast_node_t*	expr);			/*!< in: ast expr */
131 /********************************************************************
132 Create a sub-expression list node. This function takes ownership of
133 expr and is responsible for deleting it. */
134 extern
135 fts_ast_node_t*
136 fts_ast_create_node_subexp_list(
137 /*============================*/
138 						/* out: new node */
139 	void*		arg,			/*!< in: ast state instance */
140 	fts_ast_node_t*	expr);			/*!< in: ast expr instance */
141 /********************************************************************
142 Set the wildcard attribute of a term.*/
143 extern
144 void
145 fts_ast_term_set_wildcard(
146 /*======================*/
147 	fts_ast_node_t*	node);			/*!< in: term to change */
148 /********************************************************************
149 Set the proximity attribute of a text node. */
150 
151 void
152 fts_ast_term_set_distance(
153 /*======================*/
154 	fts_ast_node_t*	node,			/*!< in/out: text node */
155 	ulint		distance);		/*!< in: the text proximity
156 						distance */
157 /********************************************************************//**
158 Free a fts_ast_node_t instance.
159 @return next node to free */
160 UNIV_INTERN
161 fts_ast_node_t*
162 fts_ast_free_node(
163 /*==============*/
164 	fts_ast_node_t*	node);			/*!< in: node to free */
165 /********************************************************************
166 Add a sub-expression to an AST*/
167 extern
168 fts_ast_node_t*
169 fts_ast_add_node(
170 /*=============*/
171 	fts_ast_node_t*	list,			/*!< in: list node instance */
172 	fts_ast_node_t*	node);			/*!< in: (sub) expr to add */
173 /********************************************************************
174 Print the AST node recursively.*/
175 extern
176 void
177 fts_ast_node_print(
178 /*===============*/
179 	fts_ast_node_t*	node);			/*!< in: ast node to print */
180 /********************************************************************
181 For tracking node allocations, in case there is an during parsing.*/
182 extern
183 void
184 fts_ast_state_add_node(
185 /*===================*/
186 	fts_ast_state_t*state,			/*!< in: ast state instance */
187 	fts_ast_node_t*	node);			/*!< in: node to add to state */
188 /********************************************************************
189 Free node and expr allocations.*/
190 extern
191 void
192 fts_ast_state_free(
193 /*===============*/
194 	fts_ast_state_t*state);			/*!< in: state instance
195 						to free */
196 /******************************************************************//**
197 Traverse the AST - in-order traversal.
198 @return DB_SUCCESS if all went well */
199 UNIV_INTERN
200 dberr_t
201 fts_ast_visit(
202 /*==========*/
203 	fts_ast_oper_t		oper,		/*!< in: FTS operator */
204 	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
205 	fts_ast_callback	visitor,	/*!< in: callback */
206 	void*			arg,		/*!< in: callback arg */
207 	bool*			has_ignore)	/*!< out: whether we encounter
208 						and ignored processing an
209 						operator, currently we only
210 						ignore FTS_IGNORE operator */
211 	MY_ATTRIBUTE((nonnull, warn_unused_result));
212 /*****************************************************************//**
213 Process (nested) sub-expression, create a new result set to store the
214 sub-expression result by processing nodes under current sub-expression
215 list. Merge the sub-expression result with that of parent expression list.
216 @return DB_SUCCESS if all went well */
217 UNIV_INTERN
218 dberr_t
219 fts_ast_visit_sub_exp(
220 /*==================*/
221 	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
222 	fts_ast_callback	visitor,	/*!< in: callback */
223 	void*			arg)		/*!< in: callback arg */
224 	MY_ATTRIBUTE((nonnull, warn_unused_result));
225 /********************************************************************
226 Create a lex instance.*/
227 UNIV_INTERN
228 fts_lexer_t*
229 fts_lexer_create(
230 /*=============*/
231 	ibool		boolean_mode,		/*!< in: query type */
232 	const byte*	query,			/*!< in: query string */
233 	ulint		query_len)		/*!< in: query string len */
234 	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
235 /********************************************************************
236 Free an fts_lexer_t instance.*/
237 UNIV_INTERN
238 void
239 fts_lexer_free(
240 /*===========*/
241 	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
242 						free */
243 	MY_ATTRIBUTE((nonnull));
244 
245 /**
246 Create an ast string object, with NUL-terminator, so the string
247 has one more byte than len
248 @param[in] str		pointer to string
249 @param[in] len		length of the string
250 @return ast string with NUL-terminator */
251 UNIV_INTERN
252 fts_ast_string_t*
253 fts_ast_string_create(
254 	const byte*	str,
255 	ulint		len);
256 
257 /**
258 Free an ast string instance
259 @param[in,out] ast_str		string to free */
260 UNIV_INTERN
261 void
262 fts_ast_string_free(
263 	fts_ast_string_t*	ast_str);
264 
265 /**
266 Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
267 @param[in] str		string to translate
268 @param[in] base		the base
269 @return translated number */
270 UNIV_INTERN
271 ulint
272 fts_ast_string_to_ul(
273 	const fts_ast_string_t*	ast_str,
274 	int			base);
275 
276 /**
277 Print the ast string
278 @param[in] str		string to print */
279 UNIV_INTERN
280 void
281 fts_ast_string_print(
282 	const fts_ast_string_t*	ast_str);
283 
284 /* String of length len.
285 We always store the string of length len with a terminating '\0',
286 regardless of there is any 0x00 in the string itself */
287 struct fts_ast_string_t {
288 	/*!< Pointer to string. */
289 	byte*		str;
290 
291 	/*!< Length of the string. */
292 	ulint		len;
293 };
294 
295 /* Query term type */
296 struct fts_ast_term_t {
297 	fts_ast_string_t*	ptr;		/*!< Pointer to term string.*/
298 	ibool			wildcard;	/*!< TRUE if wild card set.*/
299 };
300 
301 /* Query text type */
302 struct fts_ast_text_t {
303 	fts_ast_string_t*	ptr;		/*!< Pointer to text string.*/
304 	ulint			distance;	/*!< > 0 if proximity distance
305 						set */
306 };
307 
308 /* The list of nodes in an expr list */
309 struct fts_ast_list_t {
310 	fts_ast_node_t*	head;			/*!< Children list head */
311 	fts_ast_node_t*	tail;			/*!< Children list tail */
312 };
313 
314 /* FTS AST node to store the term, text, operator and sub-expressions.*/
315 struct fts_ast_node_t {
316 	fts_ast_type_t	type;			/*!< The type of node */
317 	fts_ast_text_t	text;			/*!< Text node */
318 	fts_ast_term_t	term;			/*!< Term node */
319 	fts_ast_oper_t	oper;			/*!< Operator value */
320 	fts_ast_list_t	list;			/*!< Expression list */
321 	fts_ast_node_t*	next;			/*!< Link for expr list */
322 	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
323 	bool		visited;		/*!< whether this node is
324 						already processed */
325 };
326 
327 /* To track state during parsing */
328 struct fts_ast_state_t {
329 	mem_heap_t*	heap;			/*!< Heap to use for alloc */
330 	fts_ast_node_t*	root;			/*!< If all goes OK, then this
331 						will point to the root.*/
332 
333 	fts_ast_list_t	list;			/*!< List of nodes allocated */
334 
335 	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
336 	CHARSET_INFO*	charset;		/*!< charset used for
337 						tokenization */
338 };
339 
340 #ifdef UNIV_DEBUG
341 const char*
342 fts_ast_oper_name_get(fts_ast_oper_t	oper);
343 const char*
344 fts_ast_node_type_get(fts_ast_type_t	type);
345 #endif /* UNIV_DEBUG */
346 
347 #endif /* INNOBASE_FSTS0AST_H */
348