1 /* Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
24 
25 #define FT_CORE
26 #include "ftdefs.h"
27 
28 /* search with natural language queries */
29 
30 typedef struct ft_doc_rec
31 {
32   my_off_t  dpos;
33   double    weight;
34 } FT_DOC;
35 
36 struct st_ft_info
37 {
38   struct _ft_vft *please;
39   MI_INFO  *info;
40   int       ndocs;
41   int       curdoc;
42   FT_DOC    doc[1];
43 };
44 
45 typedef struct st_all_in_one
46 {
47   MI_INFO    *info;
48   uint	      keynr;
49   const CHARSET_INFO *charset;
50   uchar      *keybuff;
51   TREE	      dtree;
52 } ALL_IN_ONE;
53 
54 typedef struct st_ft_superdoc
55 {
56     FT_DOC   doc;
57     FT_WORD *word_ptr;
58     double   tmp_weight;
59 } FT_SUPERDOC;
60 
FT_SUPERDOC_cmp(void * cmp_arg MY_ATTRIBUTE ((unused)),FT_SUPERDOC * p1,FT_SUPERDOC * p2)61 static int FT_SUPERDOC_cmp(void* cmp_arg MY_ATTRIBUTE((unused)),
62 			   FT_SUPERDOC *p1, FT_SUPERDOC *p2)
63 {
64   if (p1->doc.dpos < p2->doc.dpos)
65     return -1;
66   if (p1->doc.dpos == p2->doc.dpos)
67     return 0;
68   return 1;
69 }
70 
walk_and_match(FT_WORD * word,uint32 count,ALL_IN_ONE * aio)71 static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
72 {
73   int	       UNINIT_VAR(subkeys), r;
74   uint	       keylen, doc_cnt;
75   FT_SUPERDOC  sdoc, *sptr;
76   TREE_ELEMENT *selem;
77   double       gweight=1;
78   MI_INFO      *info=aio->info;
79   MYISAM_SHARE *share= info->s;
80   uchar        *keybuff=aio->keybuff;
81   MI_KEYDEF    *keyinfo=info->s->keyinfo+aio->keynr;
82   my_off_t     key_root;
83   uint         extra= HA_FT_WLEN + info->s->rec_reflength;
84 #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
85   float tmp_weight;
86 #else
87 #error
88 #endif
89 
90   DBUG_ENTER("walk_and_match");
91 
92   word->weight=LWS_FOR_QUERY;
93 
94   keylen=_ft_make_key(info,aio->keynr,keybuff,word,0);
95   keylen-=HA_FT_WLEN;
96   doc_cnt=0;
97 
98   if (share->concurrent_insert)
99     mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
100 
101   key_root= share->state.key_root[aio->keynr];
102 
103   /* Skip rows inserted by current inserted */
104   for (r=_mi_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ;
105        !r &&
106          (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
107          info->lastpos >= info->state->data_file_length ;
108        r= _mi_search_next(info, keyinfo, info->lastkey,
109                           info->lastkey_length, SEARCH_BIGGER, key_root))
110     ;
111 
112   if (share->concurrent_insert)
113     mysql_rwlock_unlock(&share->key_root_lock[aio->keynr]);
114 
115   info->update|= HA_STATE_AKTIV;              /* for _mi_test_if_changed() */
116 
117   /* The following should be safe, even if we compare doubles */
118   while (!r && gweight)
119   {
120 
121     if (keylen &&
122         ha_compare_text(aio->charset,info->lastkey+1,
123                         info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
124      break;
125 
126     if (subkeys<0)
127     {
128       if (doc_cnt)
129         DBUG_RETURN(1); /* index is corrupted */
130       /*
131         TODO here: unsafe optimization, should this word
132         be skipped (based on subkeys) ?
133       */
134       keybuff+=keylen;
135       keyinfo=& info->s->ft2_keyinfo;
136       key_root=info->lastpos;
137       keylen=0;
138       if (share->concurrent_insert)
139         mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
140       r=_mi_search_first(info, keyinfo, key_root);
141       goto do_skip;
142     }
143 #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
144     ft_floatXget(tmp_weight, info->lastkey+info->lastkey_length-extra);
145 #else
146 #error
147 #endif
148   /* The following should be safe, even if we compare doubles */
149     if (tmp_weight==0)
150       DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */
151 
152     sdoc.doc.dpos=info->lastpos;
153 
154     /* saving document matched into dtree */
155     if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg)))
156       DBUG_RETURN(1);
157 
158     sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem);
159 
160     if (selem->count==1) /* document's first match */
161       sptr->doc.weight=0;
162     else
163       sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight;
164 
165     sptr->word_ptr=word;
166     sptr->tmp_weight=tmp_weight;
167 
168     doc_cnt++;
169 
170     gweight=word->weight*GWS_IN_USE;
171     if (gweight < 0 || doc_cnt > 2000000)
172       gweight=0;
173 
174     if (share->concurrent_insert)
175       mysql_rwlock_rdlock(&share->key_root_lock[aio->keynr]);
176 
177     if (_mi_test_if_changed(info) == 0)
178 	r=_mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
179                           SEARCH_BIGGER, key_root);
180     else
181 	r=_mi_search(info, keyinfo, info->lastkey, info->lastkey_length,
182                      SEARCH_BIGGER, key_root);
183 do_skip:
184     while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
185            !r && info->lastpos >= info->state->data_file_length)
186       r= _mi_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
187                          SEARCH_BIGGER, key_root);
188 
189     if (share->concurrent_insert)
190       mysql_rwlock_unlock(&share->key_root_lock[aio->keynr]);
191   }
192   word->weight=gweight;
193 
194   DBUG_RETURN(0);
195 }
196 
197 
walk_and_copy(FT_SUPERDOC * from,uint32 count MY_ATTRIBUTE ((unused)),FT_DOC ** to)198 static int walk_and_copy(FT_SUPERDOC *from,
199 			 uint32 count MY_ATTRIBUTE((unused)), FT_DOC **to)
200 {
201   DBUG_ENTER("walk_and_copy");
202   from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
203   (*to)->dpos=from->doc.dpos;
204   (*to)->weight=from->doc.weight;
205   (*to)++;
206   DBUG_RETURN(0);
207 }
208 
walk_and_push(FT_SUPERDOC * from,uint32 count MY_ATTRIBUTE ((unused)),QUEUE * best)209 static int walk_and_push(FT_SUPERDOC *from,
210 			 uint32 count MY_ATTRIBUTE((unused)), QUEUE *best)
211 {
212   DBUG_ENTER("walk_and_copy");
213   from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
214   set_if_smaller(best->elements, ft_query_expansion_limit-1);
215   queue_insert(best, (uchar *)& from->doc);
216   DBUG_RETURN(0);
217 }
218 
219 
FT_DOC_cmp(void * unused MY_ATTRIBUTE ((unused)),FT_DOC * a,FT_DOC * b)220 static int FT_DOC_cmp(void *unused MY_ATTRIBUTE((unused)),
221                       FT_DOC *a, FT_DOC *b)
222 {
223   double c= b->weight - a->weight;
224   return ((c < 0) ? -1 : (c > 0) ? 1 : 0);
225 }
226 
227 
ft_init_nlq_search(MI_INFO * info,uint keynr,uchar * query,uint query_len,uint flags,uchar * record)228 FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, uchar *query,
229 			    uint query_len, uint flags, uchar *record)
230 {
231   TREE	      wtree;
232   ALL_IN_ONE  aio;
233   FT_DOC     *dptr;
234   FT_INFO    *dlist=NULL;
235   my_off_t    saved_lastpos=info->lastpos;
236   struct st_mysql_ftparser *parser;
237   MYSQL_FTPARSER_PARAM *ftparser_param;
238   DBUG_ENTER("ft_init_nlq_search");
239 
240 /* black magic ON */
241   if ((int) (keynr = _mi_check_index(info,keynr)) < 0)
242     DBUG_RETURN(NULL);
243   if (_mi_readinfo(info,F_RDLCK,1))
244     DBUG_RETURN(NULL);
245 /* black magic OFF */
246 
247   aio.info=info;
248   aio.keynr=keynr;
249   aio.charset=info->s->keyinfo[keynr].seg->charset;
250   aio.keybuff=info->lastkey+info->s->base.max_key_length;
251   parser= info->s->keyinfo[keynr].parser;
252   if (! (ftparser_param= ftparser_call_initializer(info, keynr, 0)))
253     goto err;
254 
255   memset(&wtree, 0, sizeof(wtree));
256 
257   init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
258             NULL, NULL);
259 
260   ft_parse_init(&wtree, aio.charset);
261   ftparser_param->flags= 0;
262   if (ft_parse(&wtree, query, query_len, parser, ftparser_param,
263                &wtree.mem_root))
264     goto err;
265 
266   if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
267 		left_root_right))
268     goto err;
269 
270   if (flags & FT_EXPAND && ft_query_expansion_limit)
271   {
272     QUEUE best;
273     init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
274 	       0);
275     tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
276               &best, left_root_right);
277     while (best.elements)
278     {
279       my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
280       if (!(*info->read_record)(info,docid,record))
281       {
282         info->update|= HA_STATE_AKTIV;
283         ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY;
284         if (unlikely(_mi_ft_parse(&wtree, info, keynr, record, ftparser_param,
285                                   &wtree.mem_root)))
286         {
287           delete_queue(&best);
288           goto err;
289         }
290       }
291     }
292     delete_queue(&best);
293     reset_tree(&aio.dtree);
294     if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
295                   left_root_right))
296       goto err;
297 
298   }
299 
300   /*
301     If ndocs == 0, this will not allocate RAM for FT_INFO.doc[],
302     so if ndocs == 0, FT_INFO.doc[] must not be accessed.
303    */
304   dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
305 			     sizeof(FT_DOC)*
306 			     (int)(aio.dtree.elements_in_tree-1),
307 			     MYF(0));
308   if (!dlist)
309     goto err;
310 
311   dlist->please= (struct _ft_vft *) & _ft_vft_nlq;
312   dlist->ndocs=aio.dtree.elements_in_tree;
313   dlist->curdoc=-1;
314   dlist->info=aio.info;
315   dptr=dlist->doc;
316 
317   tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy,
318 	    &dptr, left_root_right);
319 
320   if (flags & FT_SORTED)
321     my_qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp,
322               0);
323 
324 err:
325   delete_tree(&aio.dtree);
326   delete_tree(&wtree);
327   info->lastpos=saved_lastpos;
328   DBUG_RETURN(dlist);
329 }
330 
331 
ft_nlq_read_next(FT_INFO * handler,char * record)332 int ft_nlq_read_next(FT_INFO *handler, char *record)
333 {
334   MI_INFO *info= (MI_INFO *) handler->info;
335 
336   if (++handler->curdoc >= handler->ndocs)
337   {
338     --handler->curdoc;
339     return HA_ERR_END_OF_FILE;
340   }
341 
342   info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
343 
344   info->lastpos=handler->doc[handler->curdoc].dpos;
345   if (!(*info->read_record)(info,info->lastpos,(uchar*) record))
346   {
347     info->update|= HA_STATE_AKTIV;		/* Record is read */
348     return 0;
349   }
350   return my_errno;
351 }
352 
353 
ft_nlq_find_relevance(FT_INFO * handler,uchar * record MY_ATTRIBUTE ((unused)),uint length MY_ATTRIBUTE ((unused)))354 float ft_nlq_find_relevance(FT_INFO *handler,
355 			    uchar *record MY_ATTRIBUTE((unused)),
356 			    uint length MY_ATTRIBUTE((unused)))
357 {
358   int a,b,c;
359   FT_DOC  *docs=handler->doc;
360   my_off_t docid=handler->info->lastpos;
361 
362   if (docid == HA_POS_ERROR)
363     return -5.0;
364 
365   /* Assuming docs[] is sorted by dpos... */
366 
367   for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
368   {
369     if (docs[c].dpos > docid)
370       b=c;
371     else
372       a=c;
373   }
374   /* bounds check to avoid accessing unallocated handler->doc  */
375   if (a < handler->ndocs && docs[a].dpos == docid)
376     return (float) docs[a].weight;
377   else
378     return 0.0;
379 }
380 
381 
ft_nlq_close_search(FT_INFO * handler)382 void ft_nlq_close_search(FT_INFO *handler)
383 {
384   my_free(handler);
385 }
386 
387 
ft_nlq_get_relevance(FT_INFO * handler)388 float ft_nlq_get_relevance(FT_INFO *handler)
389 {
390   return (float) handler->doc[handler->curdoc].weight;
391 }
392 
393 
ft_nlq_reinit_search(FT_INFO * handler)394 void ft_nlq_reinit_search(FT_INFO *handler)
395 {
396   handler->curdoc=-1;
397 }
398 
399