1 /*****************************************************************************
2 
3 Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
16 
17 *****************************************************************************/
18 
19 /******************************************************************//**
20 @file fts/fts0plugin.cc
21 Full Text Search plugin support.
22 
23 Created 2013/06/04 Shaohua Wang
24 ***********************************************************************/
25 
26 #include "fts0ast.h"
27 #include "fts0plugin.h"
28 #include "fts0tokenize.h"
29 
30 #include "ft_global.h"
31 
32 /******************************************************************//**
33 FTS default parser init
34 @return 0 */
fts_default_parser_init(MYSQL_FTPARSER_PARAM *)35 static int fts_default_parser_init(MYSQL_FTPARSER_PARAM*) { return 0; }
36 
37 /******************************************************************//**
38 FTS default parser deinit
39 @return 0 */
fts_default_parser_deinit(MYSQL_FTPARSER_PARAM *)40 static int fts_default_parser_deinit(MYSQL_FTPARSER_PARAM*) { return 0; }
41 
42 /******************************************************************//**
43 FTS default parser parse from ft_static.c in MYISAM.
44 @return 0 if parse successfully, or return non-zero */
45 static
46 int
fts_default_parser_parse(MYSQL_FTPARSER_PARAM * param)47 fts_default_parser_parse(
48 /*=====================*/
49 	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
50 {
51 	return(param->mysql_parse(param, param->doc, param->length));
52 }
53 
54 /* FTS default parser from ft_static.c in MYISAM. */
55 struct st_mysql_ftparser fts_default_parser =
56 {
57 	MYSQL_FTPARSER_INTERFACE_VERSION,
58 	fts_default_parser_parse,
59 	fts_default_parser_init,
60 	fts_default_parser_deinit
61 };
62 
63 /******************************************************************//**
64 Get a operator node from token boolean info
65 @return node */
66 static
67 fts_ast_node_t*
fts_query_get_oper_node(MYSQL_FTPARSER_BOOLEAN_INFO * info,fts_ast_state_t * state)68 fts_query_get_oper_node(
69 /*====================*/
70 	MYSQL_FTPARSER_BOOLEAN_INFO*	info,	/*!< in: token info */
71 	fts_ast_state_t*		state)	/*!< in/out: query parse state*/
72 {
73 	fts_ast_node_t*	oper_node = NULL;
74 
75 	if (info->yesno > 0) {
76 		oper_node = fts_ast_create_node_oper(state, FTS_EXIST);
77 	} else if (info->yesno < 0) {
78 		oper_node = fts_ast_create_node_oper(state, FTS_IGNORE);
79 	} else if (info->weight_adjust > 0) {
80 		oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING);
81 	} else if (info->weight_adjust < 0) {
82 		oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING);
83 	} else if (info->wasign > 0) {
84 		oper_node = fts_ast_create_node_oper(state, FTS_NEGATE);
85 	}
86 
87 	return(oper_node);
88 }
89 
90 /******************************************************************//**
91 FTS plugin parser 'myql_add_word' callback function for query parse.
92 Refer to 'st_mysql_ftparser_param' for more detail.
93 Note:
94 a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM;
95 b. Parse node or tree refers to fts0pars.y.
96 @return 0 if add successfully, or return non-zero. */
97 static
98 int
fts_query_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * info)99 fts_query_add_word_for_parser(
100 /*==========================*/
101 	MYSQL_FTPARSER_PARAM*	param,		/*!< in: parser param */
102 	const char*			word,		/*!< in: token */
103 	int			word_len,	/*!< in: token length */
104 	MYSQL_FTPARSER_BOOLEAN_INFO*	info)	/*!< in: token info */
105 {
106 	fts_ast_state_t* state =
107 		static_cast<fts_ast_state_t*>(param->mysql_ftparam);
108 	fts_ast_node_t*	cur_node = state->cur_node;
109 	fts_ast_node_t*	oper_node = NULL;
110 	fts_ast_node_t*	term_node = NULL;
111 	fts_ast_node_t*	node = NULL;
112 
113 	switch (info->type) {
114 	case FT_TOKEN_STOPWORD:
115 		/* We only handler stopword in phrase */
116 		if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) {
117 			break;
118 		}
119 		/* fall through */
120 
121 	case FT_TOKEN_WORD:
122 		term_node = fts_ast_create_node_term_for_parser(
123 			state, word, ulint(word_len));
124 
125 		if (info->trunc) {
126 			fts_ast_term_set_wildcard(term_node);
127 		}
128 
129 		if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) {
130 			/* Ignore operator inside phrase */
131 			fts_ast_add_node(cur_node, term_node);
132 		} else {
133 			ut_ad(cur_node->type == FTS_AST_LIST
134 			      || cur_node->type == FTS_AST_SUBEXP_LIST);
135 			oper_node = fts_query_get_oper_node(info, state);
136 
137 			if (oper_node) {
138 				node = fts_ast_create_node_list(state, oper_node);
139 				fts_ast_add_node(node, term_node);
140 				fts_ast_add_node(cur_node, node);
141 			} else {
142 				fts_ast_add_node(cur_node, term_node);
143 			}
144 		}
145 
146 		break;
147 
148 	case FT_TOKEN_LEFT_PAREN:
149 		/* Check parse error */
150 		if (cur_node->type != FTS_AST_LIST
151 		    && cur_node->type != FTS_AST_SUBEXP_LIST) {
152 			return(1);
153 		}
154 
155 		/* Set operator */
156                 oper_node = fts_query_get_oper_node(info, state);
157 		if (oper_node != NULL) {
158 			node = fts_ast_create_node_list(state, oper_node);
159 			fts_ast_add_node(cur_node, node);
160 			node->go_up = true;
161 			node->up_node = cur_node;
162 			cur_node = node;
163 		}
164 
165 		if (info->quot) {
166 			/* Phrase node */
167 			node = fts_ast_create_node_phrase_list(state);
168 		} else {
169 			/* Subexp list node */
170 			node = fts_ast_create_node_subexp_list(state, NULL);
171 		}
172 
173 		fts_ast_add_node(cur_node, node);
174 
175 		node->up_node = cur_node;
176 		state->cur_node = node;
177 		state->depth += 1;
178 
179 		break;
180 
181 	case FT_TOKEN_RIGHT_PAREN:
182 		info->quot = 0;
183 
184 		if (cur_node->up_node != NULL) {
185 			cur_node = cur_node->up_node;
186 
187 			if (cur_node->go_up) {
188 				ut_a(cur_node->up_node
189 				     && !(cur_node->up_node->go_up));
190 				cur_node = cur_node->up_node;
191 			}
192 		}
193 
194 		state->cur_node = cur_node;
195 
196 		if (state->depth > 0) {
197 			state->depth--;
198 		} else {
199 			/* Parentheses mismatch */
200 			return(1);
201 		}
202 
203 		break;
204 
205 	case FT_TOKEN_EOF:
206 	default:
207 		break;
208 	}
209 
210 	return(0);
211 }
212 
213 /******************************************************************//**
214 FTS plugin parser 'myql_parser' callback function for query parse.
215 Refer to 'st_mysql_ftparser_param' for more detail.
216 @return 0 if parse successfully */
217 static
218 int
fts_parse_query_internal(MYSQL_FTPARSER_PARAM * param,const char * query,int len)219 fts_parse_query_internal(
220 /*=====================*/
221 	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser param */
222 	const char*			query,	/*!< in: query string */
223 	int			len)	/*!< in: query length */
224 {
225 	MYSQL_FTPARSER_BOOLEAN_INFO	info;
226 	const CHARSET_INFO*		cs = param->cs;
227 	uchar**	start = (uchar**)(&query);
228 	uchar*	end = (uchar*)(query + len);
229 	FT_WORD	w = {NULL, 0, 0};
230 
231 	info.prev = ' ';
232 	info.quot = 0;
233 	memset(&w, 0, sizeof(w));
234 	/* Note: We don't handle simple parser mode here,
235 	but user supplied plugin parser should handler it. */
236 	while (fts_get_word(cs, start, end, &w, &info)) {
237 		int ret = param->mysql_add_word(
238 				param,
239 				reinterpret_cast<char*>(w.pos),
240 				int(w.len), &info);
241 		if (ret) {
242 			return(ret);
243 		}
244 	}
245 
246 	return(0);
247 }
248 
249 /******************************************************************//**
250 fts parse query by plugin parser.
251 @return 0 if parse successfully, or return non-zero. */
252 int
fts_parse_by_parser(ibool mode,uchar * query_str,ulint query_len,st_mysql_ftparser * parser,fts_ast_state_t * state)253 fts_parse_by_parser(
254 /*================*/
255 	ibool			mode,		/*!< in: parse boolean mode */
256 	uchar*			query_str,	/*!< in: query string */
257 	ulint			query_len,	/*!< in: query string length */
258 	st_mysql_ftparser*	parser,		/*!< in: fts plugin parser */
259 	fts_ast_state_t*	state)		/*!< in/out: parser state */
260 {
261 	MYSQL_FTPARSER_PARAM	param;
262 	int	ret;
263 
264 	ut_ad(parser);
265 
266 	/* Initial parser param */
267 	param.mysql_parse = fts_parse_query_internal;
268 	param.mysql_add_word = fts_query_add_word_for_parser;
269 	param.mysql_ftparam = static_cast<void*>(state);
270 	param.cs = state->charset;
271 	param.doc = reinterpret_cast<char*>(query_str);
272 	param.length = static_cast<int>(query_len);
273 	param.flags = 0;
274 	param.mode = mode ?
275 		     MYSQL_FTPARSER_FULL_BOOLEAN_INFO :
276 		     MYSQL_FTPARSER_SIMPLE_MODE;
277 
278 	PARSER_INIT(parser, &param);
279 	ret = parser->parse(&param);
280 	PARSER_DEINIT(parser, &param);
281 
282 	return(ret | state->depth);
283 }
284