1 /* Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
24
25 #define FT_CORE
26 #include "ftdefs.h"
27
28 /* search with natural language queries */
29
30 typedef struct ft_doc_rec
31 {
32 my_off_t dpos;
33 double weight;
34 } FT_DOC;
35
36 struct st_ft_info
37 {
38 struct _ft_vft *please;
39 MI_INFO *info;
40 int ndocs;
41 int curdoc;
42 FT_DOC doc[1];
43 };
44
45 typedef struct st_all_in_one
46 {
47 MI_INFO *info;
48 uint keynr;
49 const CHARSET_INFO *charset;
50 uchar *keybuff;
51 TREE dtree;
52 } ALL_IN_ONE;
53
54 typedef struct st_ft_superdoc
55 {
56 FT_DOC doc;
57 FT_WORD *word_ptr;
58 double tmp_weight;
59 } FT_SUPERDOC;
60
FT_SUPERDOC_cmp(void * cmp_arg MY_ATTRIBUTE ((unused)),FT_SUPERDOC * p1,FT_SUPERDOC * p2)61 static int FT_SUPERDOC_cmp(void* cmp_arg MY_ATTRIBUTE((unused)),
62 FT_SUPERDOC *p1, FT_SUPERDOC *p2)
63 {
64 if (p1->doc.dpos < p2->doc.dpos)
65 return -1;
66 if (p1->doc.dpos == p2->doc.dpos)
67 return 0;
68 return 1;
69 }
70
walk_and_match(FT_WORD * word,uint32 count,ALL_IN_ONE * aio)71 static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
72 {
73 int UNINIT_VAR(subkeys), r;
74 uint keylen, doc_cnt;
75 FT_SUPERDOC sdoc, *sptr;
76 TREE_ELEMENT *selem;
77 double gweight=1;
78 MI_INFO *info=aio->info;
79 MYISAM_SHARE *share= info->s;
80 uchar *keybuff=aio->keybuff;
81 MI_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr;
82 my_off_t key_root;
83 uint extra= HA_FT_WLEN + info->s->rec_reflength;
84 #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
85 float tmp_weight;
86 #else
87 #error
88 #endif
89
90 DBUG_ENTER("walk_and_match");
91
92 word->weight=LWS_FOR_QUERY;
93
94 keylen=_ft_make_key(info,aio->keynr,keybuff,word,0);
95 keylen-=HA_FT_WLEN;
96 doc_cnt=0;
97
98 if (share->concurrent_insert)
99 mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
100
101 key_root= share->state.key_root[aio->keynr];
102
103 /* Skip rows inserted by current inserted */
104 for (r=_mi_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ;
105 !r &&
106 (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
107 info->lastpos >= info->state->data_file_length ;
108 r= _mi_search_next(info, keyinfo, info->lastkey,
109 info->lastkey_length, SEARCH_BIGGER, key_root))
110 ;
111
112 if (share->concurrent_insert)
113 mysql_rwlock_unlock(&share->key_root_lock[aio->keynr]);
114
115 info->update|= HA_STATE_AKTIV; /* for _mi_test_if_changed() */
116
117 /* The following should be safe, even if we compare doubles */
118 while (!r && gweight)
119 {
120
121 if (keylen &&
122 ha_compare_text(aio->charset,info->lastkey+1,
123 info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
124 break;
125
126 if (subkeys<0)
127 {
128 if (doc_cnt)
129 DBUG_RETURN(1); /* index is corrupted */
130 /*
131 TODO here: unsafe optimization, should this word
132 be skipped (based on subkeys) ?
133 */
134 keybuff+=keylen;
135 keyinfo=& info->s->ft2_keyinfo;
136 key_root=info->lastpos;
137 keylen=0;
138 if (share->concurrent_insert)
139 mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
140 r=_mi_search_first(info, keyinfo, key_root);
141 goto do_skip;
142 }
143 #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
144 ft_floatXget(tmp_weight, info->lastkey+info->lastkey_length-extra);
145 #else
146 #error
147 #endif
148 /* The following should be safe, even if we compare doubles */
149 if (tmp_weight==0)
150 DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */
151
152 sdoc.doc.dpos=info->lastpos;
153
154 /* saving document matched into dtree */
155 if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg)))
156 DBUG_RETURN(1);
157
158 sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem);
159
160 if (selem->count==1) /* document's first match */
161 sptr->doc.weight=0;
162 else
163 sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight;
164
165 sptr->word_ptr=word;
166 sptr->tmp_weight=tmp_weight;
167
168 doc_cnt++;
169
170 gweight=word->weight*GWS_IN_USE;
171 if (gweight < 0 || doc_cnt > 2000000)
172 gweight=0;
173
174 if (share->concurrent_insert)
175 mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
176
177 if (_mi_test_if_changed(info) == 0)
178 r=_mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
179 SEARCH_BIGGER, key_root);
180 else
181 r=_mi_search(info, keyinfo, info->lastkey, info->lastkey_length,
182 SEARCH_BIGGER, key_root);
183 do_skip:
184 while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
185 !r && info->lastpos >= info->state->data_file_length)
186 r= _mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
187 SEARCH_BIGGER, key_root);
188
189 if (share->concurrent_insert)
190 mysql_rwlock_unlock(&share->key_root_lock[aio->keynr]);
191 }
192 word->weight=gweight;
193
194 DBUG_RETURN(0);
195 }
196
197
walk_and_copy(FT_SUPERDOC * from,uint32 count MY_ATTRIBUTE ((unused)),FT_DOC ** to)198 static int walk_and_copy(FT_SUPERDOC *from,
199 uint32 count MY_ATTRIBUTE((unused)), FT_DOC **to)
200 {
201 DBUG_ENTER("walk_and_copy");
202 from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
203 (*to)->dpos=from->doc.dpos;
204 (*to)->weight=from->doc.weight;
205 (*to)++;
206 DBUG_RETURN(0);
207 }
208
walk_and_push(FT_SUPERDOC * from,uint32 count MY_ATTRIBUTE ((unused)),QUEUE * best)209 static int walk_and_push(FT_SUPERDOC *from,
210 uint32 count MY_ATTRIBUTE((unused)), QUEUE *best)
211 {
212 DBUG_ENTER("walk_and_copy");
213 from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
214 set_if_smaller(best->elements, ft_query_expansion_limit-1);
215 queue_insert(best, (uchar *)& from->doc);
216 DBUG_RETURN(0);
217 }
218
219
FT_DOC_cmp(void * unused MY_ATTRIBUTE ((unused)),FT_DOC * a,FT_DOC * b)220 static int FT_DOC_cmp(void *unused MY_ATTRIBUTE((unused)),
221 FT_DOC *a, FT_DOC *b)
222 {
223 double c= b->weight - a->weight;
224 return ((c < 0) ? -1 : (c > 0) ? 1 : 0);
225 }
226
227
ft_init_nlq_search(MI_INFO * info,uint keynr,uchar * query,uint query_len,uint flags,uchar * record)228 FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, uchar *query,
229 uint query_len, uint flags, uchar *record)
230 {
231 TREE wtree;
232 ALL_IN_ONE aio;
233 FT_DOC *dptr;
234 FT_INFO *dlist=NULL;
235 my_off_t saved_lastpos=info->lastpos;
236 struct st_mysql_ftparser *parser;
237 MYSQL_FTPARSER_PARAM *ftparser_param;
238 DBUG_ENTER("ft_init_nlq_search");
239
240 /* black magic ON */
241 if ((int) (keynr = _mi_check_index(info,keynr)) < 0)
242 DBUG_RETURN(NULL);
243 if (_mi_readinfo(info,F_RDLCK,1))
244 DBUG_RETURN(NULL);
245 /* black magic OFF */
246
247 aio.info=info;
248 aio.keynr=keynr;
249 aio.charset=info->s->keyinfo[keynr].seg->charset;
250 aio.keybuff=info->lastkey+info->s->base.max_key_length;
251 parser= info->s->keyinfo[keynr].parser;
252 if (! (ftparser_param= ftparser_call_initializer(info, keynr, 0)))
253 goto err;
254
255 memset(&wtree, 0, sizeof(wtree));
256
257 init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
258 NULL, NULL);
259
260 ft_parse_init(&wtree, aio.charset);
261 ftparser_param->flags= 0;
262 if (ft_parse(&wtree, query, query_len, parser, ftparser_param,
263 &wtree.mem_root))
264 goto err;
265
266 if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
267 left_root_right))
268 goto err;
269
270 if (flags & FT_EXPAND && ft_query_expansion_limit)
271 {
272 QUEUE best;
273 init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
274 0);
275 tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
276 &best, left_root_right);
277 while (best.elements)
278 {
279 my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
280 if (!(*info->read_record)(info,docid,record))
281 {
282 info->update|= HA_STATE_AKTIV;
283 ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY;
284 if (unlikely(_mi_ft_parse(&wtree, info, keynr, record, ftparser_param,
285 &wtree.mem_root)))
286 {
287 delete_queue(&best);
288 goto err;
289 }
290 }
291 }
292 delete_queue(&best);
293 reset_tree(&aio.dtree);
294 if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
295 left_root_right))
296 goto err;
297
298 }
299
300 /*
301 If ndocs == 0, this will not allocate RAM for FT_INFO.doc[],
302 so if ndocs == 0, FT_INFO.doc[] must not be accessed.
303 */
304 dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
305 sizeof(FT_DOC)*
306 (int)(aio.dtree.elements_in_tree-1),
307 MYF(0));
308 if (!dlist)
309 goto err;
310
311 dlist->please= (struct _ft_vft *) & _ft_vft_nlq;
312 dlist->ndocs=aio.dtree.elements_in_tree;
313 dlist->curdoc=-1;
314 dlist->info=aio.info;
315 dptr=dlist->doc;
316
317 tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy,
318 &dptr, left_root_right);
319
320 if (flags & FT_SORTED)
321 my_qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp,
322 0);
323
324 err:
325 delete_tree(&aio.dtree);
326 delete_tree(&wtree);
327 info->lastpos=saved_lastpos;
328 DBUG_RETURN(dlist);
329 }
330
331
ft_nlq_read_next(FT_INFO * handler,char * record)332 int ft_nlq_read_next(FT_INFO *handler, char *record)
333 {
334 MI_INFO *info= (MI_INFO *) handler->info;
335
336 if (++handler->curdoc >= handler->ndocs)
337 {
338 --handler->curdoc;
339 return HA_ERR_END_OF_FILE;
340 }
341
342 info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
343
344 info->lastpos=handler->doc[handler->curdoc].dpos;
345 if (!(*info->read_record)(info,info->lastpos,(uchar*) record))
346 {
347 info->update|= HA_STATE_AKTIV; /* Record is read */
348 return 0;
349 }
350 return my_errno;
351 }
352
353
ft_nlq_find_relevance(FT_INFO * handler,uchar * record MY_ATTRIBUTE ((unused)),uint length MY_ATTRIBUTE ((unused)))354 float ft_nlq_find_relevance(FT_INFO *handler,
355 uchar *record MY_ATTRIBUTE((unused)),
356 uint length MY_ATTRIBUTE((unused)))
357 {
358 int a,b,c;
359 FT_DOC *docs=handler->doc;
360 my_off_t docid=handler->info->lastpos;
361
362 if (docid == HA_POS_ERROR)
363 return -5.0;
364
365 /* Assuming docs[] is sorted by dpos... */
366
367 for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
368 {
369 if (docs[c].dpos > docid)
370 b=c;
371 else
372 a=c;
373 }
374 /* bounds check to avoid accessing unallocated handler->doc */
375 if (a < handler->ndocs && docs[a].dpos == docid)
376 return (float) docs[a].weight;
377 else
378 return 0.0;
379 }
380
381
ft_nlq_close_search(FT_INFO * handler)382 void ft_nlq_close_search(FT_INFO *handler)
383 {
384 my_free(handler);
385 }
386
387
ft_nlq_get_relevance(FT_INFO * handler)388 float ft_nlq_get_relevance(FT_INFO *handler)
389 {
390 return (float) handler->doc[handler->curdoc].weight;
391 }
392
393
ft_nlq_reinit_search(FT_INFO * handler)394 void ft_nlq_reinit_search(FT_INFO *handler)
395 {
396 handler->curdoc=-1;
397 }
398
399