1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15
16 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17
18 /* TODO: add caching - pre-read several index entries at once */
19
20 /*
21 Added optimization for full-text queries with plus-words. It was
22 implemented by sharing maximal document id (max_docid) variable
23 inside plus subtree. max_docid could be used by any word in plus
24 subtree, but it could be updated by plus-word only.
25
26 Fulltext "smarter index merge" optimization assumes that rows
27 it gets are ordered by doc_id. That is not the case when we
28 search for a word with truncation operator. It may return
29 rows in random order. Thus we may not use "smarter index merge"
30 optimization with "trunc-words".
31
32 The idea is: there is no need to search for docid smaller than
33 biggest docid inside current plus subtree or any upper plus subtree.
34
35 Examples:
36 +word1 word2
37 share same max_docid
38 max_docid updated by word1
39 +word1 +(word2 word3)
40 share same max_docid
41 max_docid updated by word1
42 +(word1 -word2) +(+word3 word4)
43 share same max_docid
44 max_docid updated by word3
45 +word1 word2 (+word3 word4 (+word5 word6))
46 three subexpressions (including the top-level one),
47 every one has its own max_docid, updated by its plus word.
48 but for the search word6 uses
49 MY_MAX(word1.max_docid, word3.max_docid, word5.max_docid),
50 while word4 uses, accordingly,
51 MY_MAX(word1.max_docid, word3.max_docid).
52 */
53
54 #define FT_CORE
55 #include "ma_ftdefs.h"
56
57 /* search with boolean queries */
58
59 static double _wghts[11]=
60 {
61 0.131687242798354,
62 0.197530864197531,
63 0.296296296296296,
64 0.444444444444444,
65 0.666666666666667,
66 1.000000000000000,
67 1.500000000000000,
68 2.250000000000000,
69 3.375000000000000,
70 5.062500000000000,
71 7.593750000000000};
72 static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
73
74 static double _nwghts[11]=
75 {
76 -0.065843621399177,
77 -0.098765432098766,
78 -0.148148148148148,
79 -0.222222222222222,
80 -0.333333333333334,
81 -0.500000000000000,
82 -0.750000000000000,
83 -1.125000000000000,
84 -1.687500000000000,
85 -2.531250000000000,
86 -3.796875000000000};
87 static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
88
89 #define FTB_FLAG_TRUNC 1
90 /* At most one of the following flags can be set */
91 #define FTB_FLAG_YES 2
92 #define FTB_FLAG_NO 4
93 #define FTB_FLAG_WONLY 8
94
95 typedef struct st_ftb_expr FTB_EXPR;
96 struct st_ftb_expr
97 {
98 FTB_EXPR *up;
99 uint flags;
100 /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
101 my_off_t docid[2];
102 my_off_t max_docid;
103 float weight;
104 float cur_weight;
105 LIST *phrase; /* phrase words */
106 LIST *document; /* for phrase search */
107 uint yesses; /* number of "yes" words matched */
108 uint nos; /* number of "no" words matched */
109 uint ythresh; /* number of "yes" words in expr */
110 uint yweaks; /* number of "yes" words for scan only */
111 };
112
113 typedef struct st_ftb_word
114 {
115 FTB_EXPR *up;
116 uint flags;
117 /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
118 my_off_t docid[2]; /* for index search and for scan */
119 my_off_t key_root;
120 FTB_EXPR *max_docid_expr;
121 MARIA_KEYDEF *keyinfo;
122 struct st_ftb_word *prev;
123 float weight;
124 uint ndepth;
125 uint len;
126 uchar off;
127 uchar word[1];
128 } FTB_WORD;
129
130 typedef struct st_ft_info
131 {
132 struct _ft_vft *please;
133 MARIA_HA *info;
134 CHARSET_INFO *charset;
135 FTB_EXPR *root;
136 FTB_WORD **list;
137 FTB_WORD *last_word;
138 MEM_ROOT mem_root;
139 QUEUE queue;
140 TREE no_dupes;
141 my_off_t lastpos;
142 uint keynr;
143 uchar with_scan;
144 enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
145 } FTB;
146
FTB_WORD_cmp(my_off_t * v,FTB_WORD * a,FTB_WORD * b)147 static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
148 {
149 int i;
150
151 /* if a==curdoc, take it as a < b */
152 if (v && a->docid[0] == *v)
153 return -1;
154
155 /* ORDER BY docid, ndepth DESC */
156 i=CMP_NUM(a->docid[0], b->docid[0]);
157 if (!i)
158 i=CMP_NUM(b->ndepth,a->ndepth);
159 return i;
160 }
161
FTB_WORD_cmp_list(CHARSET_INFO * cs,FTB_WORD ** a,FTB_WORD ** b)162 static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
163 {
164 /* ORDER BY word, ndepth */
165 int i= ha_compare_text(cs, (uchar*) (*a)->word + 1,(*a)->len - 1,
166 (uchar*) (*b)->word + 1,(*b)->len - 1, 0);
167 if (!i)
168 i=CMP_NUM((*a)->ndepth, (*b)->ndepth);
169 return i;
170 }
171
172
173 typedef struct st_my_ftb_param
174 {
175 FTB *ftb;
176 FTB_EXPR *ftbe;
177 uchar *up_quot;
178 uint depth;
179 } MY_FTB_PARAM;
180
181
ftb_query_add_word(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * info)182 static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
183 const char *word, int word_len,
184 MYSQL_FTPARSER_BOOLEAN_INFO *info)
185 {
186 MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
187 FTB_WORD *ftbw;
188 FTB_EXPR *ftbe, *tmp_expr;
189 FT_WORD *phrase_word;
190 LIST *tmp_element;
191 int r= info->weight_adjust;
192 float weight= (float)
193 (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
194
195 switch (info->type) {
196 case FT_TOKEN_WORD:
197 ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
198 sizeof(FTB_WORD) + HA_MAX_KEY_BUFF);
199 ftbw->len= word_len + 1;
200 ftbw->flags= 0;
201 ftbw->off= 0;
202 if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
203 if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
204 if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
205 ftbw->weight= weight;
206 ftbw->up= ftb_param->ftbe;
207 ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
208 ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
209 ftbw->key_root= HA_OFFSET_ERROR;
210 memcpy(ftbw->word + 1, word, word_len);
211 ftbw->word[0]= word_len;
212 if (info->yesno > 0) ftbw->up->ythresh++;
213 ftb_param->ftb->queue.max_elements++;
214 ftbw->prev= ftb_param->ftb->last_word;
215 ftb_param->ftb->last_word= ftbw;
216 ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
217 for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
218 if (! (tmp_expr->flags & FTB_FLAG_YES))
219 break;
220 ftbw->max_docid_expr= tmp_expr;
221 /* fall through */
222 case FT_TOKEN_STOPWORD:
223 if (! ftb_param->up_quot) break;
224 phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
225 tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
226 phrase_word->pos= (uchar*)word;
227 phrase_word->len= word_len;
228 tmp_element->data= (void *)phrase_word;
229 ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
230 /* Allocate document list at this point.
231 It allows to avoid huge amount of allocs/frees for each row.*/
232 tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
233 tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
234 ftb_param->ftbe->document=
235 list_add(ftb_param->ftbe->document, tmp_element);
236 break;
237 case FT_TOKEN_LEFT_PAREN:
238 ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
239 ftbe->flags= 0;
240 if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
241 if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
242 ftbe->weight= weight;
243 ftbe->up= ftb_param->ftbe;
244 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
245 ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
246 ftbe->phrase= NULL;
247 ftbe->document= 0;
248 if (info->quot) ftb_param->ftb->with_scan|= 2;
249 if (info->yesno > 0) ftbe->up->ythresh++;
250 ftb_param->ftbe= ftbe;
251 ftb_param->depth++;
252 ftb_param->up_quot= (uchar*)info->quot;
253 break;
254 case FT_TOKEN_RIGHT_PAREN:
255 if (ftb_param->ftbe->document)
256 {
257 /* Circuit document list */
258 for (tmp_element= ftb_param->ftbe->document;
259 tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
260 tmp_element->next= ftb_param->ftbe->document;
261 ftb_param->ftbe->document->prev= tmp_element;
262 }
263 info->quot= 0;
264 if (ftb_param->ftbe->up)
265 {
266 DBUG_ASSERT(ftb_param->depth);
267 ftb_param->ftbe= ftb_param->ftbe->up;
268 ftb_param->depth--;
269 ftb_param->up_quot= 0;
270 }
271 break;
272 case FT_TOKEN_EOF:
273 default:
274 break;
275 }
276 return(0);
277 }
278
279
ftb_parse_query_internal(MYSQL_FTPARSER_PARAM * param,const char * query,int len)280 static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
281 const char *query, int len)
282 {
283 MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
284 MYSQL_FTPARSER_BOOLEAN_INFO info;
285 CHARSET_INFO *cs= ftb_param->ftb->charset;
286 const uchar **start= (const uchar**) &query;
287 uchar *end= (uchar*) query + len;
288 FT_WORD w;
289
290 info.prev= ' ';
291 info.quot= 0;
292 while (maria_ft_get_word(cs, start, end, &w, &info))
293 param->mysql_add_word(param, (char*)w.pos, w.len, &info);
294 return(0);
295 }
296
297
_ftb_parse_query(FTB * ftb,uchar * query,uint len,struct st_mysql_ftparser * parser)298 static int _ftb_parse_query(FTB *ftb, uchar *query, uint len,
299 struct st_mysql_ftparser *parser)
300 {
301 MYSQL_FTPARSER_PARAM *param;
302 MY_FTB_PARAM ftb_param;
303 DBUG_ENTER("_ftb_parse_query");
304 DBUG_ASSERT(parser);
305
306 if (ftb->state != UNINITIALIZED)
307 DBUG_RETURN(0);
308 if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
309 DBUG_RETURN(1);
310
311 ftb_param.ftb= ftb;
312 ftb_param.depth= 0;
313 ftb_param.ftbe= ftb->root;
314 ftb_param.up_quot= 0;
315
316 param->mysql_parse= ftb_parse_query_internal;
317 param->mysql_add_word= ftb_query_add_word;
318 param->mysql_ftparam= (void *)&ftb_param;
319 param->cs= ftb->charset;
320 param->doc= (char*)query;
321 param->length= len;
322 param->flags= 0;
323 param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
324 DBUG_RETURN(parser->parse(param));
325 }
326
327
_ftb_no_dupes_cmp(void * not_used,const void * a,const void * b)328 static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
329 const void *a,const void *b)
330 {
331 return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
332 }
333
334
335 /* returns 1 if the search was finished (must-word wasn't found) */
336
_ft2_search_no_lock(FTB * ftb,FTB_WORD * ftbw,my_bool init_search)337 static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
338 {
339 int r;
340 int subkeys=1;
341 my_bool can_go_down;
342 MARIA_HA *info=ftb->info;
343 uint UNINIT_VAR(off), extra=HA_FT_WLEN+info->s->base.rec_reflength;
344 uchar *lastkey_buf= ftbw->word+ftbw->off;
345 MARIA_KEY key;
346
347 if (ftbw->flags & FTB_FLAG_TRUNC)
348 lastkey_buf+=ftbw->len;
349
350 if (init_search)
351 {
352 ftbw->key_root=info->s->state.key_root[ftb->keynr];
353 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
354 info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
355 info->lastinx= ~0; /* Safety */
356 key.data= ftbw->word;
357 key.data_length= ftbw->len;
358 key.ref_length= 0;
359 key.flag= 0;
360
361 r= _ma_search(info, &key, SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
362 }
363 else
364 {
365 uint sflag= SEARCH_BIGGER;
366 my_off_t max_docid=0;
367 FTB_EXPR *tmp;
368
369 for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
370 set_if_bigger(max_docid, tmp->max_docid);
371
372 if (ftbw->docid[0] < max_docid)
373 {
374 sflag|= SEARCH_SAME;
375 _ma_dpointer(info->s, (uchar*) (ftbw->word + ftbw->len + HA_FT_WLEN),
376 max_docid);
377 }
378
379 info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
380 info->lastinx= ~0; /* Safety */
381 key.data= lastkey_buf;
382 key.data_length= USE_WHOLE_KEY;
383 key.ref_length= 0;
384 key.flag= 0;
385
386 r= _ma_search(info, &key, sflag, ftbw->key_root);
387 }
388
389 can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
390 /* Skip rows inserted by concurrent insert */
391 while (!r)
392 {
393 if (can_go_down)
394 {
395 /* going down ? */
396 off= info->last_key.data_length + info->last_key.ref_length - extra;
397 subkeys=ft_sintXkorr(info->last_key.data + off);
398 }
399 if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length)
400 break;
401 r= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, ftbw->key_root);
402 }
403
404 if (!r && !ftbw->off)
405 {
406 r= ha_compare_text(ftb->charset,
407 info->last_key.data+1,
408 info->last_key.data_length + info->last_key.ref_length-
409 extra-1,
410 (uchar*) ftbw->word+1,
411 ftbw->len-1,
412 (my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
413 }
414
415 if (r) /* not found */
416 {
417 if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
418 {
419 ftbw->docid[0]=HA_OFFSET_ERROR;
420 if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
421 {
422 /*
423 This word MUST BE present in every document returned,
424 so we can stop the search right now
425 */
426 ftb->state=INDEX_DONE;
427 return 1; /* search is done */
428 }
429 else
430 return 0;
431 }
432
433 /* going up to the first-level tree to continue search there */
434 _ma_dpointer(info->s, (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
435 ftbw->key_root=info->s->state.key_root[ftb->keynr];
436 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
437 ftbw->off=0;
438 return _ft2_search_no_lock(ftb, ftbw, 0);
439 }
440
441 /* matching key found */
442 memcpy(lastkey_buf, info->last_key.data,
443 info->last_key.data_length + info->last_key.ref_length);
444 if (lastkey_buf == ftbw->word)
445 ftbw->len= info->last_key.data_length + info->last_key.ref_length - extra;
446
447 /* going down ? */
448 if (subkeys<0)
449 {
450 /*
451 yep, going down, to the second-level tree
452 TODO here: subkey-based optimization
453 */
454 ftbw->off=off;
455 ftbw->key_root= info->cur_row.lastpos;
456 ftbw->keyinfo= info->last_key.keyinfo= & info->s->ft2_keyinfo;
457 r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root);
458 DBUG_ASSERT(r==0); /* found something */
459 memcpy(lastkey_buf+off, info->last_key.data,
460 info->last_key.data_length + info->last_key.ref_length);
461 }
462 ftbw->docid[0]= info->cur_row.lastpos;
463 if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
464 ftbw->max_docid_expr->max_docid= info->cur_row.lastpos;
465 return 0;
466 }
467
_ft2_search(FTB * ftb,FTB_WORD * ftbw,my_bool init_search)468 static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
469 {
470 int r;
471 MARIA_SHARE *share= ftb->info->s;
472 if (share->lock_key_trees)
473 mysql_rwlock_rdlock(&share->keyinfo[ftb->keynr].root_lock);
474 r= _ft2_search_no_lock(ftb, ftbw, init_search);
475 if (share->lock_key_trees)
476 mysql_rwlock_unlock(&share->keyinfo[ftb->keynr].root_lock);
477 return r;
478 }
479
480
_ftb_init_index_search(FT_INFO * ftb)481 static void _ftb_init_index_search(FT_INFO *ftb)
482 {
483 int i;
484 FTB_WORD *ftbw;
485
486 if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
487 return;
488 ftb->state=INDEX_SEARCH;
489
490 for (i= queue_last_element(&ftb->queue);
491 (int) i >= (int) queue_first_element(&ftb->queue);
492 i--)
493 {
494 ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
495
496 if (ftbw->flags & FTB_FLAG_TRUNC)
497 {
498 /*
499 special treatment for truncation operator
500 1. there are some (besides this) +words
501 | no need to search in the index, it can never ADD new rows
502 | to the result, and to remove half-matched rows we do scan anyway
503 2. -trunc*
504 | same as 1.
505 3. in 1 and 2, +/- need not be on the same expr. level,
506 but can be on any upper level, as in +word +(trunc1* trunc2*)
507 4. otherwise
508 | We have to index-search for this prefix.
509 | It may cause duplicates, as in the index (sorted by <word,docid>)
510 | <aaaa,row1>
511 | <aabb,row2>
512 | <aacc,row1>
513 | Searching for "aa*" will find row1 twice...
514 */
515 FTB_EXPR *ftbe;
516 for (ftbe=(FTB_EXPR*)ftbw;
517 ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
518 ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
519 {
520 if (ftbe->flags & FTB_FLAG_NO || /* 2 */
521 ftbe->up->ythresh - ftbe->up->yweaks >
522 (uint) MY_TEST(ftbe->flags & FTB_FLAG_YES)) /* 1 */
523 {
524 FTB_EXPR *top_ftbe=ftbe->up;
525 ftbw->docid[0]=HA_OFFSET_ERROR;
526 for (ftbe=(FTB_EXPR *)ftbw;
527 ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
528 ftbe=ftbe->up)
529 ftbe->up->yweaks++;
530 ftbe=0;
531 break;
532 }
533 }
534 if (!ftbe)
535 continue;
536 /* 4 */
537 if (!is_tree_inited(& ftb->no_dupes))
538 init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
539 _ftb_no_dupes_cmp,0,0,0);
540 else
541 reset_tree(& ftb->no_dupes);
542 }
543
544 ftbw->off=0; /* in case of reinit */
545 if (_ft2_search(ftb, ftbw, 1))
546 return;
547 }
548 queue_fix(& ftb->queue);
549 }
550
551
maria_ft_init_boolean_search(MARIA_HA * info,uint keynr,uchar * query,uint query_len,CHARSET_INFO * cs)552 FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr,
553 uchar *query, uint query_len,
554 CHARSET_INFO *cs)
555 {
556 FTB *ftb;
557 FTB_EXPR *ftbe;
558 FTB_WORD *ftbw;
559
560 if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
561 return 0;
562 ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean;
563 ftb->state=UNINITIALIZED;
564 ftb->info=info;
565 ftb->keynr=keynr;
566 ftb->charset=cs;
567 DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
568 ftb->with_scan=0;
569 ftb->lastpos=HA_OFFSET_ERROR;
570 bzero(& ftb->no_dupes, sizeof(TREE));
571 ftb->last_word= 0;
572
573 init_alloc_root(&ftb->mem_root, "fulltext", 1024, 1024, 0);
574 ftb->queue.max_elements= 0;
575 if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
576 goto err;
577 ftbe->weight=1;
578 ftbe->flags=FTB_FLAG_YES;
579 ftbe->nos=1;
580 ftbe->up=0;
581 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
582 ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
583 ftbe->phrase= NULL;
584 ftbe->document= 0;
585 ftb->root=ftbe;
586 if (unlikely(_ftb_parse_query(ftb, query, query_len,
587 keynr == NO_SUCH_KEY ? &ft_default_parser :
588 info->s->keyinfo[keynr].parser)))
589 goto err;
590 /*
591 Hack: instead of init_queue, we'll use reinit queue to be able
592 to alloc queue with alloc_root()
593 */
594 if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
595 (ftb->queue.max_elements + 1) *
596 sizeof(void *))))
597 goto err;
598 reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
599 (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0, 0, 0);
600 for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
601 queue_insert(&ftb->queue, (uchar *)ftbw);
602 ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
603 sizeof(FTB_WORD *)*ftb->queue.elements);
604 memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
605 my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
606 (qsort2_cmp)FTB_WORD_cmp_list, (void*) ftb->charset);
607 if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
608 ftb->state=READY;
609 return ftb;
610 err:
611 free_root(& ftb->mem_root, MYF(0));
612 my_free(ftb);
613 return 0;
614 }
615
616
617 typedef struct st_my_ftb_phrase_param
618 {
619 LIST *phrase;
620 LIST *document;
621 CHARSET_INFO *cs;
622 uint phrase_length;
623 uint document_length;
624 uint match;
625 } MY_FTB_PHRASE_PARAM;
626
627
ftb_phrase_add_word(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)628 static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
629 const char *word, int word_len,
630 MYSQL_FTPARSER_BOOLEAN_INFO
631 *boolean_info __attribute__((unused)))
632 {
633 MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
634 FT_WORD *w= (FT_WORD *)phrase_param->document->data;
635 LIST *phrase, *document;
636 w->pos= (uchar*)word;
637 w->len= word_len;
638 phrase_param->document= phrase_param->document->prev;
639 if (phrase_param->phrase_length > phrase_param->document_length)
640 {
641 phrase_param->document_length++;
642 return 0;
643 }
644 /* TODO: rewrite phrase search to avoid
645 comparing the same word twice. */
646 for (phrase= phrase_param->phrase, document= phrase_param->document->next;
647 phrase; phrase= phrase->next, document= document->next)
648 {
649 FT_WORD *phrase_word= (FT_WORD *)phrase->data;
650 FT_WORD *document_word= (FT_WORD *)document->data;
651 if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
652 phrase_word->len,
653 (uchar*) document_word->pos, document_word->len))
654 return 0;
655 }
656 phrase_param->match++;
657 return 0;
658 }
659
660
ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM * param,const char * document,int len)661 static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
662 const char *document, int len)
663 {
664 FT_WORD word;
665 MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
666 const uchar *docend= (uchar*)document + len;
667 while (maria_ft_simple_get_word(phrase_param->cs, (uchar**)&document,
668 docend, &word, FALSE))
669 {
670 param->mysql_add_word(param, (char*)word.pos, word.len, 0);
671 if (phrase_param->match)
672 break;
673 }
674 return 0;
675 }
676
677
678 /*
679 Checks if given buffer matches phrase list.
680
681 SYNOPSIS
682 _ftb_check_phrase()
683 s0 start of buffer
684 e0 end of buffer
685 phrase broken into list phrase
686 cs charset info
687
688 RETURN VALUE
689 1 is returned if phrase found, 0 else.
690 -1 is returned if error occurs.
691 */
692
_ftb_check_phrase(FTB * ftb,const uchar * document,uint len,FTB_EXPR * ftbe,struct st_mysql_ftparser * parser)693 static int _ftb_check_phrase(FTB *ftb, const uchar *document, uint len,
694 FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
695 {
696 MY_FTB_PHRASE_PARAM ftb_param;
697 MYSQL_FTPARSER_PARAM *param;
698 DBUG_ENTER("_ftb_check_phrase");
699 DBUG_ASSERT(parser);
700
701 if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
702 DBUG_RETURN(0);
703 ftb_param.phrase= ftbe->phrase;
704 ftb_param.document= ftbe->document;
705 ftb_param.cs= ftb->charset;
706 ftb_param.phrase_length= list_length(ftbe->phrase);
707 ftb_param.document_length= 1;
708 ftb_param.match= 0;
709
710 param->mysql_parse= ftb_check_phrase_internal;
711 param->mysql_add_word= ftb_phrase_add_word;
712 param->mysql_ftparam= (void *)&ftb_param;
713 param->cs= ftb->charset;
714 param->doc= (char *)document;
715 param->length= len;
716 param->flags= 0;
717 param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
718 if (unlikely(parser->parse(param)))
719 return -1;
720 DBUG_RETURN(ftb_param.match ? 1 : 0);
721 }
722
723
_ftb_climb_the_tree(FTB * ftb,FTB_WORD * ftbw,FT_SEG_ITERATOR * ftsi_orig)724 static int _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
725 {
726 FT_SEG_ITERATOR ftsi;
727 FTB_EXPR *ftbe;
728 float weight=ftbw->weight;
729 int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0);
730 my_off_t curdoc=ftbw->docid[mode];
731 struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
732 &ft_default_parser :
733 ftb->info->s->keyinfo[ftb->keynr].parser;
734
735 for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
736 {
737 ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
738 if (ftbe->docid[mode] != curdoc)
739 {
740 ftbe->cur_weight=0;
741 ftbe->yesses=ftbe->nos=0;
742 ftbe->docid[mode]=curdoc;
743 }
744 if (ftbe->nos)
745 break;
746 if (yn_flag & FTB_FLAG_YES)
747 {
748 weight /= ftbe->ythresh;
749 ftbe->cur_weight += weight;
750 if ((int) ++ftbe->yesses == ythresh)
751 {
752 yn_flag=ftbe->flags;
753 weight=ftbe->cur_weight*ftbe->weight;
754 if (mode && ftbe->phrase)
755 {
756 int found= 0;
757
758 memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
759 while (_ma_ft_segiterator(&ftsi) && !found)
760 {
761 if (!ftsi.pos)
762 continue;
763 found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
764 if (unlikely(found < 0))
765 return 1;
766 }
767 if (!found)
768 break;
769 } /* ftbe->quot */
770 }
771 else
772 break;
773 }
774 else
775 if (yn_flag & FTB_FLAG_NO)
776 {
777 /*
778 NOTE: special sort function of queue assures that all
779 (yn_flag & FTB_FLAG_NO) != 0
780 events for every particular subexpression will
781 "auto-magically" happen BEFORE all the
782 (yn_flag & FTB_FLAG_YES) != 0 events. So no
783 already matched expression can become not-matched again.
784 */
785 ++ftbe->nos;
786 break;
787 }
788 else
789 {
790 if (ftbe->ythresh)
791 weight/=3;
792 ftbe->cur_weight += weight;
793 if ((int) ftbe->yesses < ythresh)
794 break;
795 if (!(yn_flag & FTB_FLAG_WONLY))
796 yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
797 weight*= ftbe->weight;
798 }
799 }
800 return 0;
801 }
802
803
maria_ft_boolean_read_next(FT_INFO * ftb,char * record)804 int maria_ft_boolean_read_next(FT_INFO *ftb, char *record)
805 {
806 FTB_EXPR *ftbe;
807 FTB_WORD *ftbw;
808 MARIA_HA *info=ftb->info;
809 my_off_t curdoc;
810
811 if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
812 return -1;
813
814 /* black magic ON */
815 if ((int) _ma_check_index(info, ftb->keynr) < 0)
816 return my_errno;
817 if (_ma_readinfo(info, F_RDLCK, 1))
818 return my_errno;
819 /* black magic OFF */
820
821 if (!ftb->queue.elements)
822 return my_errno=HA_ERR_END_OF_FILE;
823
824 /* Attention!!! Address of a local variable is used here! See err: label */
825 ftb->queue.first_cmp_arg=(void *)&curdoc;
826
827 while (ftb->state == INDEX_SEARCH &&
828 (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
829 HA_OFFSET_ERROR)
830 {
831 while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
832 {
833 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
834 {
835 my_errno= HA_ERR_OUT_OF_MEM;
836 goto err;
837 }
838
839 /* update queue */
840 _ft2_search(ftb, ftbw, 0);
841 queue_replace_top(&ftb->queue);
842 }
843
844 ftbe=ftb->root;
845 if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
846 ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
847 {
848 /* curdoc matched ! */
849 if (is_tree_inited(&ftb->no_dupes) &&
850 tree_insert(&ftb->no_dupes, &curdoc, 0,
851 ftb->no_dupes.custom_arg)->count >1)
852 /* but it managed already to get past this line once */
853 continue;
854
855 info->cur_row.lastpos= curdoc;
856 /* Clear all states, except that the table was updated */
857 info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
858
859 if (!(*info->read_record)(info, (uchar *) record, curdoc))
860 {
861 info->update|= HA_STATE_AKTIV; /* Record is read */
862 if (ftb->with_scan &&
863 maria_ft_boolean_find_relevance(ftb, (uchar*)record, 0)==0)
864 continue; /* no match */
865 my_errno=0;
866 goto err;
867 }
868 goto err;
869 }
870 }
871 ftb->state=INDEX_DONE;
872 my_errno=HA_ERR_END_OF_FILE;
873 err:
874 ftb->queue.first_cmp_arg=(void *)0;
875 return my_errno;
876 }
877
878
879 typedef struct st_my_ftb_find_param
880 {
881 FT_INFO *ftb;
882 FT_SEG_ITERATOR *ftsi;
883 } MY_FTB_FIND_PARAM;
884
885
ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM * param,const char * word,int len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)886 static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
887 const char *word, int len,
888 MYSQL_FTPARSER_BOOLEAN_INFO
889 *boolean_info __attribute__((unused)))
890 {
891 MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
892 FT_INFO *ftb= ftb_param->ftb;
893 FTB_WORD *ftbw;
894 int a, b, c;
895 /*
896 Find right-most element in the array of query words matching this
897 word from a document.
898 */
899 for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
900 {
901 ftbw= ftb->list[c];
902 if (ha_compare_text(ftb->charset, (uchar*)word, len,
903 (uchar*)ftbw->word+1, ftbw->len-1,
904 (my_bool)(ftbw->flags&FTB_FLAG_TRUNC)) < 0)
905 b= c;
906 else
907 a= c;
908 }
909 /*
910 If there were no words with truncation operator, we iterate to the
911 beginning of an array until array element is equal to the word from
912 a document. This is done mainly because the same word may be
913 mentioned twice (or more) in the query.
914
915 In case query has words with truncation operator we must iterate
916 to the beginning of the array. There may be non-matching query words
917 between matching word with truncation operator and the right-most
918 matching element. E.g., if we're looking for 'aaa15' in an array of
919 'aaa1* aaa14 aaa15 aaa16'.
920
921 Worse of that there still may be match even if the binary search
922 above didn't find matching element. E.g., if we're looking for
923 'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
924 stop at 'aaa16'.
925 */
926 for (; c >= 0; c--)
927 {
928 ftbw= ftb->list[c];
929 if (ha_compare_text(ftb->charset, (uchar*)word, len,
930 (uchar*)ftbw->word + 1,ftbw->len - 1,
931 (my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
932 {
933 if (ftb->with_scan & FTB_FLAG_TRUNC)
934 continue;
935 else
936 break;
937 }
938 if (ftbw->docid[1] == ftb->info->cur_row.lastpos)
939 continue;
940 ftbw->docid[1]= ftb->info->cur_row.lastpos;
941 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
942 return 1;
943 }
944 return(0);
945 }
946
947
ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM * param,const char * doc,int len)948 static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
949 const char *doc, int len)
950 {
951 MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
952 FT_INFO *ftb= ftb_param->ftb;
953 uchar *end= (uchar*) doc + len;
954 FT_WORD w;
955 while (maria_ft_simple_get_word(ftb->charset, (uchar**)&doc, end, &w, TRUE))
956 param->mysql_add_word(param, (char*)w.pos, w.len, 0);
957 return(0);
958 }
959
960
maria_ft_boolean_find_relevance(FT_INFO * ftb,uchar * record,uint length)961 float maria_ft_boolean_find_relevance(FT_INFO *ftb, uchar *record, uint length)
962 {
963 FTB_EXPR *ftbe;
964 FT_SEG_ITERATOR ftsi, ftsi2;
965 MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos;
966 MY_FTB_FIND_PARAM ftb_param;
967 MYSQL_FTPARSER_PARAM *param;
968 struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
969 &ft_default_parser :
970 ftb->info->s->keyinfo[ftb->keynr].parser;
971
972 if (docid == HA_OFFSET_ERROR)
973 return -2.0;
974 if (!ftb->queue.elements)
975 return 0;
976 if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
977 return 0;
978
979 if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
980 {
981 FTB_EXPR *x;
982 uint i;
983
984 for (i=0; i < ftb->queue.elements; i++)
985 {
986 ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
987 for (x=ftb->list[i]->up; x; x=x->up)
988 x->docid[1]=HA_OFFSET_ERROR;
989 }
990 }
991
992 ftb->lastpos=docid;
993
994 if (ftb->keynr==NO_SUCH_KEY)
995 _ma_ft_segiterator_dummy_init(record, length, &ftsi);
996 else
997 _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
998 memcpy(&ftsi2, &ftsi, sizeof(ftsi));
999
1000 ftb_param.ftb= ftb;
1001 ftb_param.ftsi= &ftsi2;
1002 param->mysql_parse= ftb_find_relevance_parse;
1003 param->mysql_add_word= ftb_find_relevance_add_word;
1004 param->mysql_ftparam= (void *)&ftb_param;
1005 param->flags= 0;
1006 param->cs= ftb->charset;
1007 param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1008
1009 while (_ma_ft_segiterator(&ftsi))
1010 {
1011 if (!ftsi.pos)
1012 continue;
1013 param->doc= (char *)ftsi.pos;
1014 param->length= ftsi.len;
1015 if (unlikely(parser->parse(param)))
1016 return 0;
1017 }
1018 ftbe=ftb->root;
1019 if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
1020 ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1021 { /* row matched ! */
1022 return ftbe->cur_weight;
1023 }
1024 else
1025 { /* match failed ! */
1026 return 0.0;
1027 }
1028 }
1029
1030
maria_ft_boolean_close_search(FT_INFO * ftb)1031 void maria_ft_boolean_close_search(FT_INFO *ftb)
1032 {
1033 if (is_tree_inited(& ftb->no_dupes))
1034 {
1035 delete_tree(&ftb->no_dupes, 0);
1036 }
1037 free_root(& ftb->mem_root, MYF(0));
1038 my_free(ftb);
1039 }
1040
1041
maria_ft_boolean_get_relevance(FT_INFO * ftb)1042 float maria_ft_boolean_get_relevance(FT_INFO *ftb)
1043 {
1044 return ftb->root->cur_weight;
1045 }
1046
1047
maria_ft_boolean_reinit_search(FT_INFO * ftb)1048 void maria_ft_boolean_reinit_search(FT_INFO *ftb)
1049 {
1050 _ftb_init_index_search(ftb);
1051 }
1052