1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17 
18 /*  TODO: add caching - pre-read several index entries at once */
19 
20 /*
21   Added optimization for full-text queries with plus-words. It was
22   implemented by sharing maximal document id (max_docid) variable
23   inside plus subtree. max_docid could be used by any word in plus
24   subtree, but it could be updated by plus-word only.
25 
26   Fulltext "smarter index merge" optimization assumes that rows
27   it gets are ordered by doc_id. That is not the case when we
28   search for a word with truncation operator. It may return
29   rows in random order. Thus we may not use "smarter index merge"
30   optimization with "trunc-words".
31 
32   The idea is: there is no need to search for docid smaller than
33   biggest docid inside current plus subtree or any upper plus subtree.
34 
35   Examples:
36   +word1 word2
37     share same max_docid
38     max_docid updated by word1
39   +word1 +(word2 word3)
40     share same max_docid
41     max_docid updated by word1
42   +(word1 -word2) +(+word3 word4)
43     share same max_docid
44     max_docid updated by word3
45    +word1 word2 (+word3 word4 (+word5 word6))
46     three subexpressions (including the top-level one),
47     every one has its own max_docid, updated by its plus word.
48     but for the search word6 uses
49     MY_MAX(word1.max_docid, word3.max_docid, word5.max_docid),
50     while word4 uses, accordingly,
51     MY_MAX(word1.max_docid, word3.max_docid).
52 */
53 
54 #define FT_CORE
55 #include "ma_ftdefs.h"
56 
57 /* search with boolean queries */
58 
59 static double _wghts[11]=
60 {
61   0.131687242798354,
62   0.197530864197531,
63   0.296296296296296,
64   0.444444444444444,
65   0.666666666666667,
66   1.000000000000000,
67   1.500000000000000,
68   2.250000000000000,
69   3.375000000000000,
70   5.062500000000000,
71   7.593750000000000};
72 static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
73 
74 static double _nwghts[11]=
75 {
76  -0.065843621399177,
77  -0.098765432098766,
78  -0.148148148148148,
79  -0.222222222222222,
80  -0.333333333333334,
81  -0.500000000000000,
82  -0.750000000000000,
83  -1.125000000000000,
84  -1.687500000000000,
85  -2.531250000000000,
86  -3.796875000000000};
87 static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
88 
89 #define FTB_FLAG_TRUNC 1
90 /* At most one of the following flags can be set */
91 #define FTB_FLAG_YES   2
92 #define FTB_FLAG_NO    4
93 #define FTB_FLAG_WONLY 8
94 
95 typedef struct st_ftb_expr FTB_EXPR;
96 struct st_ftb_expr
97 {
98   FTB_EXPR *up;
99   uint      flags;
100 /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
101   my_off_t  docid[2];
102   my_off_t  max_docid;
103   float     weight;
104   float     cur_weight;
105   LIST     *phrase;               /* phrase words */
106   LIST     *document;             /* for phrase search */
107   uint      yesses;               /* number of "yes" words matched */
108   uint      nos;                  /* number of "no"  words matched */
109   uint      ythresh;              /* number of "yes" words in expr */
110   uint      yweaks;               /* number of "yes" words for scan only */
111 };
112 
113 typedef struct st_ftb_word
114 {
115   FTB_EXPR  *up;
116   uint       flags;
117 /* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
118   my_off_t   docid[2];             /* for index search and for scan */
119   my_off_t   key_root;
120   FTB_EXPR  *max_docid_expr;
121   MARIA_KEYDEF *keyinfo;
122   struct st_ftb_word *prev;
123   float      weight;
124   uint       ndepth;
125   uint       len;
126   uchar      off;
127   uchar      word[1];
128 } FTB_WORD;
129 
130 typedef struct st_ft_info
131 {
132   struct _ft_vft *please;
133   MARIA_HA   *info;
134   CHARSET_INFO *charset;
135   FTB_EXPR  *root;
136   FTB_WORD **list;
137   FTB_WORD  *last_word;
138   MEM_ROOT   mem_root;
139   QUEUE      queue;
140   TREE       no_dupes;
141   my_off_t   lastpos;
142   uint       keynr;
143   uchar      with_scan;
144   enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
145 } FTB;
146 
FTB_WORD_cmp(my_off_t * v,FTB_WORD * a,FTB_WORD * b)147 static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
148 {
149   int i;
150 
151   /* if a==curdoc, take it as  a < b */
152   if (v && a->docid[0] == *v)
153     return -1;
154 
155   /* ORDER BY docid, ndepth DESC */
156   i=CMP_NUM(a->docid[0], b->docid[0]);
157   if (!i)
158     i=CMP_NUM(b->ndepth,a->ndepth);
159   return i;
160 }
161 
FTB_WORD_cmp_list(CHARSET_INFO * cs,FTB_WORD ** a,FTB_WORD ** b)162 static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
163 {
164   /* ORDER BY word, ndepth */
165   int i= ha_compare_text(cs, (uchar*) (*a)->word + 1,(*a)->len - 1,
166                              (uchar*) (*b)->word + 1,(*b)->len - 1, 0);
167   if (!i)
168     i=CMP_NUM((*a)->ndepth, (*b)->ndepth);
169   return i;
170 }
171 
172 
173 typedef struct st_my_ftb_param
174 {
175   FTB *ftb;
176   FTB_EXPR *ftbe;
177   uchar *up_quot;
178   uint depth;
179 } MY_FTB_PARAM;
180 
181 
ftb_query_add_word(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * info)182 static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
183                               const char *word, int word_len,
184                               MYSQL_FTPARSER_BOOLEAN_INFO *info)
185 {
186   MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
187   FTB_WORD *ftbw;
188   FTB_EXPR *ftbe, *tmp_expr;
189   FT_WORD *phrase_word;
190   LIST *tmp_element;
191   int r= info->weight_adjust;
192   float weight= (float)
193         (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
194 
195   switch (info->type) {
196     case FT_TOKEN_WORD:
197       ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
198                                    sizeof(FTB_WORD) + HA_MAX_KEY_BUFF);
199       ftbw->len= word_len + 1;
200       ftbw->flags= 0;
201       ftbw->off= 0;
202       if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
203       if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
204       if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
205       ftbw->weight= weight;
206       ftbw->up= ftb_param->ftbe;
207       ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
208       ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
209       ftbw->key_root= HA_OFFSET_ERROR;
210       memcpy(ftbw->word + 1, word, word_len);
211       ftbw->word[0]= word_len;
212       if (info->yesno > 0) ftbw->up->ythresh++;
213       ftb_param->ftb->queue.max_elements++;
214       ftbw->prev= ftb_param->ftb->last_word;
215       ftb_param->ftb->last_word= ftbw;
216       ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
217       for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
218         if (! (tmp_expr->flags & FTB_FLAG_YES))
219           break;
220       ftbw->max_docid_expr= tmp_expr;
221       /* fall through */
222     case FT_TOKEN_STOPWORD:
223       if (! ftb_param->up_quot) break;
224       phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
225       tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
226       phrase_word->pos= (uchar*)word;
227       phrase_word->len= word_len;
228       tmp_element->data= (void *)phrase_word;
229       ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
230       /* Allocate document list at this point.
231          It allows to avoid huge amount of allocs/frees for each row.*/
232       tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
233       tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
234       ftb_param->ftbe->document=
235         list_add(ftb_param->ftbe->document, tmp_element);
236       break;
237     case FT_TOKEN_LEFT_PAREN:
238       ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
239       ftbe->flags= 0;
240       if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
241       if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
242       ftbe->weight= weight;
243       ftbe->up= ftb_param->ftbe;
244       ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
245       ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
246       ftbe->phrase= NULL;
247       ftbe->document= 0;
248       if (info->quot) ftb_param->ftb->with_scan|= 2;
249       if (info->yesno > 0) ftbe->up->ythresh++;
250       ftb_param->ftbe= ftbe;
251       ftb_param->depth++;
252       ftb_param->up_quot= (uchar*)info->quot;
253       break;
254     case FT_TOKEN_RIGHT_PAREN:
255       if (ftb_param->ftbe->document)
256       {
257         /* Circuit document list */
258         for (tmp_element= ftb_param->ftbe->document;
259              tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
260         tmp_element->next= ftb_param->ftbe->document;
261         ftb_param->ftbe->document->prev= tmp_element;
262       }
263       info->quot= 0;
264       if (ftb_param->ftbe->up)
265       {
266         DBUG_ASSERT(ftb_param->depth);
267         ftb_param->ftbe= ftb_param->ftbe->up;
268         ftb_param->depth--;
269         ftb_param->up_quot= 0;
270       }
271       break;
272     case FT_TOKEN_EOF:
273     default:
274       break;
275   }
276   return(0);
277 }
278 
279 
ftb_parse_query_internal(MYSQL_FTPARSER_PARAM * param,const char * query,int len)280 static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
281                                     const char *query, int len)
282 {
283   MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
284   MYSQL_FTPARSER_BOOLEAN_INFO info;
285   CHARSET_INFO *cs= ftb_param->ftb->charset;
286   const uchar **start= (const uchar**) &query;
287   uchar *end= (uchar*) query + len;
288   FT_WORD w;
289 
290   info.prev= ' ';
291   info.quot= 0;
292   while (maria_ft_get_word(cs, start, end, &w, &info))
293     param->mysql_add_word(param, (char*)w.pos, w.len, &info);
294   return(0);
295 }
296 
297 
_ftb_parse_query(FTB * ftb,uchar * query,uint len,struct st_mysql_ftparser * parser)298 static int _ftb_parse_query(FTB *ftb, uchar *query, uint len,
299                             struct st_mysql_ftparser *parser)
300 {
301   MYSQL_FTPARSER_PARAM *param;
302   MY_FTB_PARAM ftb_param;
303   DBUG_ENTER("_ftb_parse_query");
304   DBUG_ASSERT(parser);
305 
306   if (ftb->state != UNINITIALIZED)
307     DBUG_RETURN(0);
308   if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
309     DBUG_RETURN(1);
310 
311   ftb_param.ftb= ftb;
312   ftb_param.depth= 0;
313   ftb_param.ftbe= ftb->root;
314   ftb_param.up_quot= 0;
315 
316   param->mysql_parse= ftb_parse_query_internal;
317   param->mysql_add_word= ftb_query_add_word;
318   param->mysql_ftparam= (void *)&ftb_param;
319   param->cs= ftb->charset;
320   param->doc= (char*)query;
321   param->length= len;
322   param->flags= 0;
323   param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
324   DBUG_RETURN(parser->parse(param));
325 }
326 
327 
_ftb_no_dupes_cmp(void * not_used,const void * a,const void * b)328 static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
329                              const void *a,const void *b)
330 {
331   return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
332 }
333 
334 
335 /* returns 1 if the search was finished (must-word wasn't found) */
336 
_ft2_search_no_lock(FTB * ftb,FTB_WORD * ftbw,my_bool init_search)337 static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
338 {
339   int r;
340   int subkeys=1;
341   my_bool can_go_down;
342   MARIA_HA *info=ftb->info;
343   uint UNINIT_VAR(off), extra=HA_FT_WLEN+info->s->base.rec_reflength;
344   uchar *lastkey_buf= ftbw->word+ftbw->off;
345   MARIA_KEY key;
346 
347   if (ftbw->flags & FTB_FLAG_TRUNC)
348     lastkey_buf+=ftbw->len;
349 
350   if (init_search)
351   {
352     ftbw->key_root=info->s->state.key_root[ftb->keynr];
353     ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
354     info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
355     info->lastinx= ~0;                            /* Safety */
356     key.data= ftbw->word;
357     key.data_length= ftbw->len;
358     key.ref_length= 0;
359     key.flag= 0;
360 
361     r= _ma_search(info, &key, SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
362   }
363   else
364   {
365     uint sflag= SEARCH_BIGGER;
366     my_off_t max_docid=0;
367     FTB_EXPR *tmp;
368 
369     for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
370       set_if_bigger(max_docid, tmp->max_docid);
371 
372     if (ftbw->docid[0] < max_docid)
373     {
374       sflag|= SEARCH_SAME;
375       _ma_dpointer(info->s, (uchar*) (ftbw->word + ftbw->len + HA_FT_WLEN),
376                    max_docid);
377     }
378 
379     info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
380     info->lastinx= ~0;                            /* Safety */
381     key.data= lastkey_buf;
382     key.data_length= USE_WHOLE_KEY;
383     key.ref_length= 0;
384     key.flag= 0;
385 
386     r= _ma_search(info, &key, sflag, ftbw->key_root);
387   }
388 
389   can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
390   /* Skip rows inserted by concurrent insert */
391   while (!r)
392   {
393     if (can_go_down)
394     {
395       /* going down ? */
396       off= info->last_key.data_length + info->last_key.ref_length - extra;
397       subkeys=ft_sintXkorr(info->last_key.data + off);
398     }
399     if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length)
400       break;
401     r= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, ftbw->key_root);
402   }
403 
404   if (!r && !ftbw->off)
405   {
406     r= ha_compare_text(ftb->charset,
407                        info->last_key.data+1,
408                        info->last_key.data_length + info->last_key.ref_length-
409                        extra-1,
410                        (uchar*) ftbw->word+1,
411                        ftbw->len-1,
412                        (my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
413   }
414 
415   if (r) /* not found */
416   {
417     if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
418     {
419       ftbw->docid[0]=HA_OFFSET_ERROR;
420       if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
421       {
422         /*
423           This word MUST BE present in every document returned,
424           so we can stop the search right now
425         */
426         ftb->state=INDEX_DONE;
427         return 1; /* search is done */
428       }
429       else
430         return 0;
431     }
432 
433     /* going up to the first-level tree to continue search there */
434     _ma_dpointer(info->s, (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
435     ftbw->key_root=info->s->state.key_root[ftb->keynr];
436     ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
437     ftbw->off=0;
438     return _ft2_search_no_lock(ftb, ftbw, 0);
439   }
440 
441   /* matching key found */
442   memcpy(lastkey_buf, info->last_key.data,
443          info->last_key.data_length + info->last_key.ref_length);
444   if (lastkey_buf == ftbw->word)
445     ftbw->len= info->last_key.data_length + info->last_key.ref_length - extra;
446 
447   /* going down ? */
448   if (subkeys<0)
449   {
450     /*
451       yep, going down, to the second-level tree
452       TODO here: subkey-based optimization
453     */
454     ftbw->off=off;
455     ftbw->key_root= info->cur_row.lastpos;
456     ftbw->keyinfo= info->last_key.keyinfo= & info->s->ft2_keyinfo;
457     r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root);
458     DBUG_ASSERT(r==0);  /* found something */
459     memcpy(lastkey_buf+off, info->last_key.data,
460            info->last_key.data_length + info->last_key.ref_length);
461   }
462   ftbw->docid[0]= info->cur_row.lastpos;
463   if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
464     ftbw->max_docid_expr->max_docid= info->cur_row.lastpos;
465   return 0;
466 }
467 
_ft2_search(FTB * ftb,FTB_WORD * ftbw,my_bool init_search)468 static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
469 {
470   int r;
471   MARIA_SHARE *share= ftb->info->s;
472   if (share->lock_key_trees)
473     mysql_rwlock_rdlock(&share->keyinfo[ftb->keynr].root_lock);
474   r= _ft2_search_no_lock(ftb, ftbw, init_search);
475   if (share->lock_key_trees)
476     mysql_rwlock_unlock(&share->keyinfo[ftb->keynr].root_lock);
477   return r;
478 }
479 
480 
_ftb_init_index_search(FT_INFO * ftb)481 static void _ftb_init_index_search(FT_INFO *ftb)
482 {
483   int i;
484   FTB_WORD   *ftbw;
485 
486   if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
487     return;
488   ftb->state=INDEX_SEARCH;
489 
490   for (i= queue_last_element(&ftb->queue);
491        (int) i >= (int) queue_first_element(&ftb->queue);
492        i--)
493   {
494     ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
495 
496     if (ftbw->flags & FTB_FLAG_TRUNC)
497     {
498       /*
499         special treatment for truncation operator
500         1. there are some (besides this) +words
501            | no need to search in the index, it can never ADD new rows
502            | to the result, and to remove half-matched rows we do scan anyway
503         2. -trunc*
504            | same as 1.
505         3. in 1 and 2, +/- need not be on the same expr. level,
506            but can be on any upper level, as in +word +(trunc1* trunc2*)
507         4. otherwise
508            | We have to index-search for this prefix.
509            | It may cause duplicates, as in the index (sorted by <word,docid>)
510            |   <aaaa,row1>
511            |   <aabb,row2>
512            |   <aacc,row1>
513            | Searching for "aa*" will find row1 twice...
514       */
515       FTB_EXPR *ftbe;
516       for (ftbe=(FTB_EXPR*)ftbw;
517            ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
518            ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
519       {
520         if (ftbe->flags & FTB_FLAG_NO ||                     /* 2 */
521             ftbe->up->ythresh - ftbe->up->yweaks >
522             (uint) MY_TEST(ftbe->flags & FTB_FLAG_YES))      /* 1 */
523         {
524           FTB_EXPR *top_ftbe=ftbe->up;
525           ftbw->docid[0]=HA_OFFSET_ERROR;
526           for (ftbe=(FTB_EXPR *)ftbw;
527                ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
528                ftbe=ftbe->up)
529               ftbe->up->yweaks++;
530           ftbe=0;
531           break;
532         }
533       }
534       if (!ftbe)
535         continue;
536       /* 4 */
537       if (!is_tree_inited(& ftb->no_dupes))
538         init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
539                   _ftb_no_dupes_cmp,0,0,0);
540       else
541         reset_tree(& ftb->no_dupes);
542     }
543 
544     ftbw->off=0; /* in case of reinit */
545     if (_ft2_search(ftb, ftbw, 1))
546       return;
547   }
548   queue_fix(& ftb->queue);
549 }
550 
551 
maria_ft_init_boolean_search(MARIA_HA * info,uint keynr,uchar * query,uint query_len,CHARSET_INFO * cs)552 FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr,
553                                        uchar *query, uint query_len,
554                                        CHARSET_INFO *cs)
555 {
556   FTB       *ftb;
557   FTB_EXPR  *ftbe;
558   FTB_WORD  *ftbw;
559 
560   if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
561     return 0;
562   ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean;
563   ftb->state=UNINITIALIZED;
564   ftb->info=info;
565   ftb->keynr=keynr;
566   ftb->charset=cs;
567   DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
568   ftb->with_scan=0;
569   ftb->lastpos=HA_OFFSET_ERROR;
570   bzero(& ftb->no_dupes, sizeof(TREE));
571   ftb->last_word= 0;
572 
573   init_alloc_root(&ftb->mem_root, "fulltext", 1024, 1024, 0);
574   ftb->queue.max_elements= 0;
575   if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
576     goto err;
577   ftbe->weight=1;
578   ftbe->flags=FTB_FLAG_YES;
579   ftbe->nos=1;
580   ftbe->up=0;
581   ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
582   ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
583   ftbe->phrase= NULL;
584   ftbe->document= 0;
585   ftb->root=ftbe;
586   if (unlikely(_ftb_parse_query(ftb, query, query_len,
587                                 keynr == NO_SUCH_KEY ? &ft_default_parser :
588                                 info->s->keyinfo[keynr].parser)))
589     goto err;
590   /*
591     Hack: instead of init_queue, we'll use reinit queue to be able
592     to alloc queue with alloc_root()
593   */
594   if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
595                                               (ftb->queue.max_elements + 1) *
596                                               sizeof(void *))))
597     goto err;
598   reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
599                (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0, 0, 0);
600   for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
601     queue_insert(&ftb->queue, (uchar *)ftbw);
602   ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
603                                      sizeof(FTB_WORD *)*ftb->queue.elements);
604   memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
605   my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
606             (qsort2_cmp)FTB_WORD_cmp_list, (void*) ftb->charset);
607   if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
608   ftb->state=READY;
609   return ftb;
610 err:
611   free_root(& ftb->mem_root, MYF(0));
612   my_free(ftb);
613   return 0;
614 }
615 
616 
617 typedef struct st_my_ftb_phrase_param
618 {
619   LIST *phrase;
620   LIST *document;
621   CHARSET_INFO *cs;
622   uint phrase_length;
623   uint document_length;
624   uint match;
625 } MY_FTB_PHRASE_PARAM;
626 
627 
ftb_phrase_add_word(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)628 static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
629                                const char *word, int word_len,
630                                MYSQL_FTPARSER_BOOLEAN_INFO
631                                *boolean_info __attribute__((unused)))
632 {
633   MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
634   FT_WORD *w= (FT_WORD *)phrase_param->document->data;
635   LIST *phrase, *document;
636   w->pos= (uchar*)word;
637   w->len= word_len;
638   phrase_param->document= phrase_param->document->prev;
639   if (phrase_param->phrase_length > phrase_param->document_length)
640   {
641     phrase_param->document_length++;
642     return 0;
643   }
644   /* TODO: rewrite phrase search to avoid
645      comparing the same word twice. */
646   for (phrase= phrase_param->phrase, document= phrase_param->document->next;
647        phrase; phrase= phrase->next, document= document->next)
648   {
649     FT_WORD *phrase_word= (FT_WORD *)phrase->data;
650     FT_WORD *document_word= (FT_WORD *)document->data;
651     if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
652                      phrase_word->len,
653                      (uchar*) document_word->pos, document_word->len))
654       return 0;
655   }
656   phrase_param->match++;
657   return 0;
658 }
659 
660 
ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM * param,const char * document,int len)661 static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
662                                      const char *document, int len)
663 {
664   FT_WORD word;
665   MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
666   const uchar *docend= (uchar*)document + len;
667   while (maria_ft_simple_get_word(phrase_param->cs, (uchar**)&document,
668                                   docend, &word, FALSE))
669   {
670     param->mysql_add_word(param, (char*)word.pos, word.len, 0);
671     if (phrase_param->match)
672       break;
673   }
674   return 0;
675 }
676 
677 
678 /*
679   Checks if given buffer matches phrase list.
680 
681   SYNOPSIS
682     _ftb_check_phrase()
683     s0     start of buffer
684     e0     end of buffer
685     phrase broken into list phrase
686     cs     charset info
687 
688   RETURN VALUE
689     1 is returned if phrase found, 0 else.
690     -1 is returned if error occurs.
691 */
692 
_ftb_check_phrase(FTB * ftb,const uchar * document,uint len,FTB_EXPR * ftbe,struct st_mysql_ftparser * parser)693 static int _ftb_check_phrase(FTB *ftb, const uchar *document, uint len,
694                              FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
695 {
696   MY_FTB_PHRASE_PARAM ftb_param;
697   MYSQL_FTPARSER_PARAM *param;
698   DBUG_ENTER("_ftb_check_phrase");
699   DBUG_ASSERT(parser);
700 
701   if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
702     DBUG_RETURN(0);
703   ftb_param.phrase= ftbe->phrase;
704   ftb_param.document= ftbe->document;
705   ftb_param.cs= ftb->charset;
706   ftb_param.phrase_length= list_length(ftbe->phrase);
707   ftb_param.document_length= 1;
708   ftb_param.match= 0;
709 
710   param->mysql_parse= ftb_check_phrase_internal;
711   param->mysql_add_word= ftb_phrase_add_word;
712   param->mysql_ftparam= (void *)&ftb_param;
713   param->cs= ftb->charset;
714   param->doc= (char *)document;
715   param->length= len;
716   param->flags= 0;
717   param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
718   if (unlikely(parser->parse(param)))
719     return -1;
720   DBUG_RETURN(ftb_param.match ? 1 : 0);
721 }
722 
723 
_ftb_climb_the_tree(FTB * ftb,FTB_WORD * ftbw,FT_SEG_ITERATOR * ftsi_orig)724 static int _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
725 {
726   FT_SEG_ITERATOR ftsi;
727   FTB_EXPR *ftbe;
728   float weight=ftbw->weight;
729   int  yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0);
730   my_off_t curdoc=ftbw->docid[mode];
731   struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
732                                     &ft_default_parser :
733                                     ftb->info->s->keyinfo[ftb->keynr].parser;
734 
735   for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
736   {
737     ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
738     if (ftbe->docid[mode] != curdoc)
739     {
740       ftbe->cur_weight=0;
741       ftbe->yesses=ftbe->nos=0;
742       ftbe->docid[mode]=curdoc;
743     }
744     if (ftbe->nos)
745       break;
746     if (yn_flag & FTB_FLAG_YES)
747     {
748       weight /= ftbe->ythresh;
749       ftbe->cur_weight += weight;
750       if ((int) ++ftbe->yesses == ythresh)
751       {
752         yn_flag=ftbe->flags;
753         weight=ftbe->cur_weight*ftbe->weight;
754         if (mode && ftbe->phrase)
755         {
756           int found= 0;
757 
758           memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
759           while (_ma_ft_segiterator(&ftsi) && !found)
760           {
761             if (!ftsi.pos)
762               continue;
763             found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
764             if (unlikely(found < 0))
765               return 1;
766           }
767           if (!found)
768             break;
769         } /* ftbe->quot */
770       }
771       else
772         break;
773     }
774     else
775     if (yn_flag & FTB_FLAG_NO)
776     {
777       /*
778         NOTE: special sort function of queue assures that all
779         (yn_flag & FTB_FLAG_NO) != 0
780         events for every particular subexpression will
781         "auto-magically" happen BEFORE all the
782         (yn_flag & FTB_FLAG_YES) != 0 events. So no
783         already matched expression can become not-matched again.
784       */
785       ++ftbe->nos;
786       break;
787     }
788     else
789     {
790       if (ftbe->ythresh)
791         weight/=3;
792       ftbe->cur_weight +=  weight;
793       if ((int) ftbe->yesses < ythresh)
794         break;
795       if (!(yn_flag & FTB_FLAG_WONLY))
796         yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
797       weight*= ftbe->weight;
798     }
799   }
800   return 0;
801 }
802 
803 
maria_ft_boolean_read_next(FT_INFO * ftb,char * record)804 int maria_ft_boolean_read_next(FT_INFO *ftb, char *record)
805 {
806   FTB_EXPR  *ftbe;
807   FTB_WORD  *ftbw;
808   MARIA_HA   *info=ftb->info;
809   my_off_t   curdoc;
810 
811   if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
812     return -1;
813 
814   /* black magic ON */
815   if ((int) _ma_check_index(info, ftb->keynr) < 0)
816     return my_errno;
817   if (_ma_readinfo(info, F_RDLCK, 1))
818     return my_errno;
819   /* black magic OFF */
820 
821   if (!ftb->queue.elements)
822     return my_errno=HA_ERR_END_OF_FILE;
823 
824   /* Attention!!! Address of a local variable is used here! See err: label */
825   ftb->queue.first_cmp_arg=(void *)&curdoc;
826 
827   while (ftb->state == INDEX_SEARCH &&
828          (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
829          HA_OFFSET_ERROR)
830   {
831     while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
832     {
833       if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
834       {
835         my_errno= HA_ERR_OUT_OF_MEM;
836         goto err;
837       }
838 
839       /* update queue */
840       _ft2_search(ftb, ftbw, 0);
841       queue_replace_top(&ftb->queue);
842     }
843 
844     ftbe=ftb->root;
845     if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
846         ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
847     {
848       /* curdoc matched ! */
849       if (is_tree_inited(&ftb->no_dupes) &&
850           tree_insert(&ftb->no_dupes, &curdoc, 0,
851                       ftb->no_dupes.custom_arg)->count >1)
852         /* but it managed already to get past this line once */
853         continue;
854 
855       info->cur_row.lastpos= curdoc;
856       /* Clear all states, except that the table was updated */
857       info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
858 
859       if (!(*info->read_record)(info, (uchar *) record, curdoc))
860       {
861         info->update|= HA_STATE_AKTIV;          /* Record is read */
862         if (ftb->with_scan &&
863             maria_ft_boolean_find_relevance(ftb, (uchar*)record, 0)==0)
864             continue; /* no match */
865         my_errno=0;
866         goto err;
867       }
868       goto err;
869     }
870   }
871   ftb->state=INDEX_DONE;
872   my_errno=HA_ERR_END_OF_FILE;
873 err:
874   ftb->queue.first_cmp_arg=(void *)0;
875   return my_errno;
876 }
877 
878 
879 typedef struct st_my_ftb_find_param
880 {
881   FT_INFO *ftb;
882   FT_SEG_ITERATOR *ftsi;
883 } MY_FTB_FIND_PARAM;
884 
885 
ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM * param,const char * word,int len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)886 static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
887                                        const char *word, int len,
888                                        MYSQL_FTPARSER_BOOLEAN_INFO
889                                        *boolean_info __attribute__((unused)))
890 {
891   MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
892   FT_INFO *ftb= ftb_param->ftb;
893   FTB_WORD *ftbw;
894   int a, b, c;
895   /*
896     Find right-most element in the array of query words matching this
897     word from a document.
898   */
899   for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
900   {
901     ftbw= ftb->list[c];
902     if (ha_compare_text(ftb->charset, (uchar*)word, len,
903                         (uchar*)ftbw->word+1, ftbw->len-1,
904                         (my_bool)(ftbw->flags&FTB_FLAG_TRUNC)) < 0)
905       b= c;
906     else
907       a= c;
908   }
909   /*
910     If there were no words with truncation operator, we iterate to the
911     beginning of an array until array element is equal to the word from
912     a document. This is done mainly because the same word may be
913     mentioned twice (or more) in the query.
914 
915     In case query has words with truncation operator we must iterate
916     to the beginning of the array. There may be non-matching query words
917     between matching word with truncation operator and the right-most
918     matching element. E.g., if we're looking for 'aaa15' in an array of
919     'aaa1* aaa14 aaa15 aaa16'.
920 
921     Worse of that there still may be match even if the binary search
922     above didn't find matching element. E.g., if we're looking for
923     'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
924     stop at 'aaa16'.
925   */
926   for (; c >= 0; c--)
927   {
928     ftbw= ftb->list[c];
929     if (ha_compare_text(ftb->charset, (uchar*)word, len,
930                         (uchar*)ftbw->word + 1,ftbw->len - 1,
931                         (my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
932     {
933       if (ftb->with_scan & FTB_FLAG_TRUNC)
934         continue;
935       else
936         break;
937     }
938     if (ftbw->docid[1] == ftb->info->cur_row.lastpos)
939       continue;
940     ftbw->docid[1]= ftb->info->cur_row.lastpos;
941     if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
942       return 1;
943   }
944   return(0);
945 }
946 
947 
ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM * param,const char * doc,int len)948 static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
949                                     const char *doc, int len)
950 {
951   MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
952   FT_INFO *ftb= ftb_param->ftb;
953   uchar *end= (uchar*) doc + len;
954   FT_WORD w;
955   while (maria_ft_simple_get_word(ftb->charset, (uchar**)&doc, end, &w, TRUE))
956     param->mysql_add_word(param, (char*)w.pos, w.len, 0);
957   return(0);
958 }
959 
960 
maria_ft_boolean_find_relevance(FT_INFO * ftb,uchar * record,uint length)961 float maria_ft_boolean_find_relevance(FT_INFO *ftb, uchar *record, uint length)
962 {
963   FTB_EXPR *ftbe;
964   FT_SEG_ITERATOR ftsi, ftsi2;
965   MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos;
966   MY_FTB_FIND_PARAM ftb_param;
967   MYSQL_FTPARSER_PARAM *param;
968   struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
969                                     &ft_default_parser :
970                                     ftb->info->s->keyinfo[ftb->keynr].parser;
971 
972   if (docid == HA_OFFSET_ERROR)
973     return -2.0;
974   if (!ftb->queue.elements)
975     return 0;
976   if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
977     return 0;
978 
979   if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
980   {
981     FTB_EXPR *x;
982     uint i;
983 
984     for (i=0; i < ftb->queue.elements; i++)
985     {
986       ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
987       for (x=ftb->list[i]->up; x; x=x->up)
988         x->docid[1]=HA_OFFSET_ERROR;
989     }
990   }
991 
992   ftb->lastpos=docid;
993 
994   if (ftb->keynr==NO_SUCH_KEY)
995     _ma_ft_segiterator_dummy_init(record, length, &ftsi);
996   else
997     _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
998   memcpy(&ftsi2, &ftsi, sizeof(ftsi));
999 
1000   ftb_param.ftb= ftb;
1001   ftb_param.ftsi= &ftsi2;
1002   param->mysql_parse= ftb_find_relevance_parse;
1003   param->mysql_add_word= ftb_find_relevance_add_word;
1004   param->mysql_ftparam= (void *)&ftb_param;
1005   param->flags= 0;
1006   param->cs= ftb->charset;
1007   param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1008 
1009   while (_ma_ft_segiterator(&ftsi))
1010   {
1011     if (!ftsi.pos)
1012       continue;
1013     param->doc= (char *)ftsi.pos;
1014     param->length= ftsi.len;
1015     if (unlikely(parser->parse(param)))
1016       return 0;
1017   }
1018   ftbe=ftb->root;
1019   if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
1020       ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1021   { /* row matched ! */
1022     return ftbe->cur_weight;
1023   }
1024   else
1025   { /* match failed ! */
1026     return 0.0;
1027   }
1028 }
1029 
1030 
maria_ft_boolean_close_search(FT_INFO * ftb)1031 void maria_ft_boolean_close_search(FT_INFO *ftb)
1032 {
1033   if (is_tree_inited(& ftb->no_dupes))
1034   {
1035     delete_tree(&ftb->no_dupes, 0);
1036   }
1037   free_root(& ftb->mem_root, MYF(0));
1038   my_free(ftb);
1039 }
1040 
1041 
maria_ft_boolean_get_relevance(FT_INFO * ftb)1042 float maria_ft_boolean_get_relevance(FT_INFO *ftb)
1043 {
1044   return ftb->root->cur_weight;
1045 }
1046 
1047 
maria_ft_boolean_reinit_search(FT_INFO * ftb)1048 void maria_ft_boolean_reinit_search(FT_INFO *ftb)
1049 {
1050   _ftb_init_index_search(ftb);
1051 }
1052