1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file fts/fts0fts.cc
28  Full Text Search interface
29  ***********************************************************************/
30 
31 #include <current_thd.h>
32 #include <sys/types.h>
33 #include <new>
34 
35 #include "btr0pcur.h"
36 #include "dict0priv.h"
37 #include "dict0stats.h"
38 #include "dict0types.h"
39 #include "fts0fts.h"
40 #include "fts0plugin.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "ha_prototypes.h"
46 #include "lob0lob.h"
47 
48 #include "my_dbug.h"
49 
50 #include "dict0dd.h"
51 #include "lob0lob.h"
52 #include "row0mysql.h"
53 #include "row0sel.h"
54 #include "row0upd.h"
55 #include "sync0sync.h"
56 #include "trx0roll.h"
57 #include "ut0new.h"
58 
59 static const ulint FTS_MAX_ID_LEN = 32;
60 
61 /** Column name from the FTS config table */
62 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
63 
64 /** Verify if a aux table name is a obsolete table
65 by looking up the key word in the obsolete table names */
66 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
67   (strstr((table_name), "DOC_ID") != NULL ||  \
68    strstr((table_name), "ADDED") != NULL ||   \
69    strstr((table_name), "STOPWORDS") != NULL)
70 
71 /** This is maximum FTS cache for each table and would be
72 a configurable variable */
73 ulong fts_max_cache_size;
74 
75 /** Whether the total memory used for FTS cache is exhausted, and we will
76 need a sync to free some memory */
77 bool fts_need_sync = false;
78 
79 /** Variable specifying the total memory allocated for FTS cache */
80 ulong fts_max_total_cache_size;
81 
82 /** This is FTS result cache limit for each query and would be
83 a configurable variable */
84 ulong fts_result_cache_limit;
85 
86 /** Variable specifying the maximum FTS max token size */
87 ulong fts_max_token_size;
88 
89 /** Variable specifying the minimum FTS max token size */
90 ulong fts_min_token_size;
91 
92 // FIXME: testing
93 static ib_time_t elapsed_time = 0;
94 static ulint n_nodes = 0;
95 
96 #ifdef FTS_CACHE_SIZE_DEBUG
97 /** The cache size permissible lower limit (1K) */
98 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
99 
100 /** The cache size permissible upper limit (1G) */
101 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
102 #endif
103 
104 /** Time to sleep after DEADLOCK error before retrying operation. */
105 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
106 
107 /** variable to record innodb_fts_internal_tbl_name for information
108 schema table INNODB_FTS_INSERTED etc. */
109 char *fts_internal_tbl_name = nullptr;
110 
111 /** InnoDB default stopword list:
112 There are different versions of stopwords, the stop words listed
113 below comes from "Google Stopword" list. Reference:
114 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
115 The final version of InnoDB default stopword list is still pending
116 for decision */
117 const char *fts_default_stopword[] = {
118     "a",    "about", "an",  "are",  "as",   "at",    "be",   "by",
119     "com",  "de",    "en",  "for",  "from", "how",   "i",    "in",
120     "is",   "it",    "la",  "of",   "on",   "or",    "that", "the",
121     "this", "to",    "was", "what", "when", "where", "who",  "will",
122     "with", "und",   "the", "www",  nullptr};
123 
124 /** FTS auxiliary table prefix that are common to all FT indexes.*/
125 const char *FTS_PREFIX = "fts_";
126 
127 /** FTS auxiliary table prefix that are common to all FT indexes.*/
128 const char *FTS_PREFIX_5_7 = "FTS_";
129 
130 /** FTS auxiliary table suffixes that are common to all FT indexes. */
131 const char *fts_common_tables[] = {"being_deleted", "being_deleted_cache",
132                                    "config",        "deleted",
133                                    "deleted_cache", nullptr};
134 
135 const char *FTS_SUFFIX_BEING_DELETED = fts_common_tables[0];
136 const char *FTS_SUFFIX_BEING_DELETED_CACHE = fts_common_tables[1];
137 const char *FTS_SUFFIX_CONFIG = fts_common_tables[2];
138 const char *FTS_SUFFIX_DELETED = fts_common_tables[3];
139 const char *FTS_SUFFIX_DELETED_CACHE = fts_common_tables[4];
140 
141 /** FTS auxiliary table suffixes that are common to all FT indexes. */
142 const char *fts_common_tables_5_7[] = {"BEING_DELETED", "BEING_DELETED_CACHE",
143                                        "CONFIG",        "DELETED",
144                                        "DELETED_CACHE", nullptr};
145 
146 const char *FTS_SUFFIX_CONFIG_5_7 = fts_common_tables_5_7[2];
147 
148 /** FTS auxiliary INDEX split intervals. */
149 const fts_index_selector_t fts_index_selector[] = {
150     {9, "index_1"},  {65, "index_2"}, {70, "index_3"}, {75, "index_4"},
151     {80, "index_5"}, {85, "index_6"}, {0, nullptr}};
152 
153 /** FTS auxiliary INDEX split intervals. */
154 const fts_index_selector_t fts_index_selector_5_7[] = {
155     {9, "INDEX_1"},  {65, "INDEX_2"}, {70, "INDEX_3"}, {75, "INDEX_4"},
156     {80, "INDEX_5"}, {85, "INDEX_6"}, {0, nullptr}};
157 
158 /** Default config values for FTS indexes on a table. */
159 static const char *fts_config_table_insert_values_sql =
160     "BEGIN\n"
161     "\n"
162     "INSERT INTO $config_table VALUES('" FTS_MAX_CACHE_SIZE_IN_MB
163     "', '256');\n"
164     ""
165     "INSERT INTO $config_table VALUES('" FTS_OPTIMIZE_LIMIT_IN_SECS
166     "', '180');\n"
167     ""
168     "INSERT INTO $config_table VALUES ('" FTS_SYNCED_DOC_ID
169     "', '0');\n"
170     ""
171     "INSERT INTO $config_table VALUES ('" FTS_TOTAL_DELETED_COUNT
172     "', '0');\n"
173     "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
174     "INSERT INTO $config_table VALUES ('" FTS_TABLE_STATE "', '0');\n";
175 
176 /** FTS tokenize parmameter for plugin parser */
177 struct fts_tokenize_param_t {
178   fts_doc_t *result_doc; /*!< Result doc for tokens */
179   ulint add_pos;         /*!< Added position for tokens */
180 };
181 
182 /** Run SYNC on the table, i.e., write out data from the cache to the
183 FTS auxiliary INDEX table and clear the cache at the end.
184 @param[in,out]	sync		sync state
185 @param[in]	unlock_cache	whether unlock cache lock when write node
186 @param[in]	wait		whether wait when a sync is in progress
187 @param[in]      has_dict        whether has dict operation lock
188 @return DB_SUCCESS if all OK */
189 static dberr_t fts_sync(fts_sync_t *sync, bool unlock_cache, bool wait,
190                         bool has_dict);
191 
192 /** Release all resources help by the words rb tree e.g., the node ilist. */
193 static void fts_words_free(ib_rbt_t *words); /*!< in: rb tree of words */
194 #ifdef FTS_CACHE_SIZE_DEBUG
195 /** Read the max cache size parameter from the config table. */
196 static void fts_update_max_cache_size(fts_sync_t *sync); /*!< in: sync state */
197 #endif
198 
199 /** This function fetches the document just inserted right before
200 we commit the transaction, and tokenize the inserted text data
201 and insert into FTS auxiliary table and its cache.
202 @param[in]	ftt		FTS transaction table
203 @param[in]	doc_id		doc id
204 @param[in]	fts_indexes	affected FTS indexes
205 @return true if successful */
206 static ulint fts_add_doc_by_id(fts_trx_table_t *ftt, doc_id_t doc_id,
207                                ib_vector_t *fts_indexes MY_ATTRIBUTE((unused)));
208 
209 /** Update the last document id. This function could create a new
210  transaction to update the last document id.
211  @return DB_SUCCESS if OK */
212 static dberr_t fts_update_sync_doc_id(
213     const dict_table_t *table, /*!< in: table */
214     const char *table_name,    /*!< in: table name, or NULL */
215     doc_id_t doc_id,           /*!< in: last document id */
216     trx_t *trx);               /*!< in: update trx, or NULL */
217 
218 /** Tokenize a document.
219 @param[in,out]	doc	document to tokenize
220 @param[out]	result	tokenization result
221 @param[in]	parser	pluggable parser */
222 static void fts_tokenize_document(fts_doc_t *doc, fts_doc_t *result,
223                                   st_mysql_ftparser *parser);
224 
225 /** Continue to tokenize a document.
226 @param[in,out]	doc	document to tokenize
227 @param[in]	add_pos	add this position to all tokens from this tokenization
228 @param[out]	result	tokenization result
229 @param[in]	parser	pluggable parser */
230 static void fts_tokenize_document_next(fts_doc_t *doc, ulint add_pos,
231                                        fts_doc_t *result,
232                                        st_mysql_ftparser *parser);
233 
234 /** Create the vector of fts_get_doc_t instances.
235 @param[in,out]	cache	fts cache
236 @return	vector of fts_get_doc_t instances */
237 static ib_vector_t *fts_get_docs_create(fts_cache_t *cache);
238 
239 /** Free the FTS cache.
240 @param[in,out]	cache to be freed */
fts_cache_destroy(fts_cache_t * cache)241 static void fts_cache_destroy(fts_cache_t *cache) {
242   rw_lock_free(&cache->lock);
243   rw_lock_free(&cache->init_lock);
244   mutex_free(&cache->optimize_lock);
245   mutex_free(&cache->deleted_lock);
246   mutex_free(&cache->doc_id_lock);
247   os_event_destroy(cache->sync->event);
248 
249   if (cache->stopword_info.cached_stopword) {
250     rbt_free(cache->stopword_info.cached_stopword);
251   }
252 
253   if (cache->sync_heap->arg) {
254     mem_heap_free(static_cast<mem_heap_t *>(cache->sync_heap->arg));
255   }
256 
257   mem_heap_free(cache->cache_heap);
258 }
259 
260 /** Get a character set based on precise type.
261 @param prtype precise type
262 @return the corresponding character set */
263 UNIV_INLINE
fts_get_charset(ulint prtype)264 CHARSET_INFO *fts_get_charset(ulint prtype) {
265 #ifdef UNIV_DEBUG
266   switch (prtype & DATA_MYSQL_TYPE_MASK) {
267     case MYSQL_TYPE_BIT:
268     case MYSQL_TYPE_STRING:
269     case MYSQL_TYPE_VAR_STRING:
270     case MYSQL_TYPE_TINY_BLOB:
271     case MYSQL_TYPE_MEDIUM_BLOB:
272     case MYSQL_TYPE_BLOB:
273     case MYSQL_TYPE_LONG_BLOB:
274     case MYSQL_TYPE_VARCHAR:
275       break;
276     default:
277       ut_error;
278   }
279 #endif /* UNIV_DEBUG */
280 
281   uint cs_num = (uint)dtype_get_charset_coll(prtype);
282 
283   if (CHARSET_INFO *cs = get_charset(cs_num, MYF(MY_WME))) {
284     return (cs);
285   }
286 
287   ib::fatal(ER_IB_MSG_461) << "Unable to find charset-collation " << cs_num;
288   return (nullptr);
289 }
290 
291 /** This function loads the default InnoDB stopword list */
fts_load_default_stopword(fts_stopword_t * stopword_info)292 static void fts_load_default_stopword(
293     fts_stopword_t *stopword_info) /*!< in: stopword info */
294 {
295   fts_string_t str;
296   mem_heap_t *heap;
297   ib_alloc_t *allocator;
298   ib_rbt_t *stop_words;
299 
300   allocator = stopword_info->heap;
301   heap = static_cast<mem_heap_t *>(allocator->arg);
302 
303   if (!stopword_info->cached_stopword) {
304     stopword_info->cached_stopword =
305         rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
306                            &my_charset_latin1);
307   }
308 
309   stop_words = stopword_info->cached_stopword;
310 
311   str.f_n_char = 0;
312 
313   for (ulint i = 0; fts_default_stopword[i]; ++i) {
314     char *word;
315     fts_tokenizer_word_t new_word;
316 
317     /* We are going to duplicate the value below. */
318     word = const_cast<char *>(fts_default_stopword[i]);
319 
320     new_word.nodes = ib_vector_create(allocator, sizeof(fts_node_t), 4);
321 
322     str.f_len = ut_strlen(word);
323     str.f_str = reinterpret_cast<byte *>(word);
324 
325     fts_string_dup(&new_word.text, &str, heap);
326 
327     rbt_insert(stop_words, &new_word, &new_word);
328   }
329 
330   stopword_info->status = STOPWORD_FROM_DEFAULT;
331 }
332 
333 /** Callback function to read a single stopword value.
334  @return Always return true */
fts_read_stopword(void * row,void * user_arg)335 static ibool fts_read_stopword(
336     void *row,      /*!< in: sel_node_t* */
337     void *user_arg) /*!< in: pointer to ib_vector_t */
338 {
339   ib_alloc_t *allocator;
340   fts_stopword_t *stopword_info;
341   sel_node_t *sel_node;
342   que_node_t *exp;
343   ib_rbt_t *stop_words;
344   dfield_t *dfield;
345   fts_string_t str;
346   mem_heap_t *heap;
347   ib_rbt_bound_t parent;
348 
349   sel_node = static_cast<sel_node_t *>(row);
350   stopword_info = static_cast<fts_stopword_t *>(user_arg);
351 
352   stop_words = stopword_info->cached_stopword;
353   allocator = static_cast<ib_alloc_t *>(stopword_info->heap);
354   heap = static_cast<mem_heap_t *>(allocator->arg);
355 
356   exp = sel_node->select_list;
357 
358   /* We only need to read the first column */
359   dfield = que_node_get_val(exp);
360 
361   str.f_n_char = 0;
362   str.f_str = static_cast<byte *>(dfield_get_data(dfield));
363   str.f_len = dfield_get_len(dfield);
364 
365   /* Only create new node if it is a value not already existed */
366   if (str.f_len != UNIV_SQL_NULL &&
367       rbt_search(stop_words, &parent, &str) != 0) {
368     fts_tokenizer_word_t new_word;
369 
370     new_word.nodes = ib_vector_create(allocator, sizeof(fts_node_t), 4);
371 
372     new_word.text.f_str =
373         static_cast<byte *>(mem_heap_alloc(heap, str.f_len + 1));
374 
375     memcpy(new_word.text.f_str, str.f_str, str.f_len);
376 
377     new_word.text.f_n_char = 0;
378     new_word.text.f_len = str.f_len;
379     new_word.text.f_str[str.f_len] = 0;
380 
381     rbt_insert(stop_words, &new_word, &new_word);
382   }
383 
384   return (TRUE);
385 }
386 
387 /** Load user defined stopword from designated user table
388  @return true if load operation is successful */
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)389 static ibool fts_load_user_stopword(
390     fts_t *fts,                      /*!< in: FTS struct */
391     const char *stopword_table_name, /*!< in: Stopword table
392                                      name */
393     fts_stopword_t *stopword_info)   /*!< in: Stopword info */
394 {
395   pars_info_t *info;
396   que_t *graph;
397   dberr_t error = DB_SUCCESS;
398   ibool ret = TRUE;
399   trx_t *trx;
400 
401   trx = trx_allocate_for_background();
402   trx->op_info = "Load user stopword table into FTS cache";
403 
404   /* Validate the user table existence and in the right
405   format */
406   stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
407   if (!stopword_info->charset) {
408     ret = FALSE;
409     goto cleanup;
410   } else if (!stopword_info->cached_stopword) {
411     /* Create the stopword RB tree with the stopword column
412     charset. All comparison will use this charset */
413     stopword_info->cached_stopword =
414         rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
415                            stopword_info->charset);
416   }
417 
418   info = pars_info_create();
419 
420   pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
421 
422   pars_info_bind_function(info, "my_func", fts_read_stopword, stopword_info);
423 
424   graph = fts_parse_sql(nullptr, info,
425                         "DECLARE FUNCTION my_func;\n"
426                         "DECLARE CURSOR c IS"
427                         " SELECT value"
428                         " FROM $table_stopword;\n"
429                         "BEGIN\n"
430                         "\n"
431                         "OPEN c;\n"
432                         "WHILE 1 = 1 LOOP\n"
433                         "  FETCH c INTO my_func();\n"
434                         "  IF c % NOTFOUND THEN\n"
435                         "    EXIT;\n"
436                         "  END IF;\n"
437                         "END LOOP;\n"
438                         "CLOSE c;");
439 
440   for (;;) {
441     error = fts_eval_sql(trx, graph);
442 
443     if (error == DB_SUCCESS) {
444       fts_sql_commit(trx);
445       stopword_info->status = STOPWORD_USER_TABLE;
446       break;
447     } else {
448       fts_sql_rollback(trx);
449 
450       if (error == DB_LOCK_WAIT_TIMEOUT) {
451         ib::warn(ER_IB_MSG_462) << "Lock wait timeout reading user"
452                                    " stopword table. Retrying!";
453 
454         trx->error_state = DB_SUCCESS;
455       } else {
456         ib::error(ER_IB_MSG_463) << "Error '" << ut_strerr(error)
457                                  << "' while reading user stopword"
458                                     " table.";
459         ret = FALSE;
460         break;
461       }
462     }
463   }
464 
465   que_graph_free(graph);
466 
467 cleanup:
468   trx_free_for_background(trx);
469   return (ret);
470 }
471 
472 /** Initialize the index cache. */
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)473 static void fts_index_cache_init(
474     ib_alloc_t *allocator,          /*!< in: the allocator to use */
475     fts_index_cache_t *index_cache) /*!< in: index cache */
476 {
477   ulint i;
478 
479   ut_a(index_cache->words == nullptr);
480 
481   index_cache->words =
482       rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
483                          index_cache->charset);
484 
485   ut_a(index_cache->doc_stats == nullptr);
486 
487   index_cache->doc_stats =
488       ib_vector_create(allocator, sizeof(fts_doc_stats_t), 4);
489 
490   for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
491     ut_a(index_cache->ins_graph[i] == nullptr);
492     ut_a(index_cache->sel_graph[i] == nullptr);
493   }
494 }
495 
496 /** Initialize FTS cache. */
fts_cache_init(fts_cache_t * cache)497 void fts_cache_init(fts_cache_t *cache) /*!< in: cache to initialize */
498 {
499   ulint i;
500 
501   /* Just to make sure */
502   ut_a(cache->sync_heap->arg == nullptr);
503 
504   cache->sync_heap->arg = mem_heap_create(1024);
505 
506   cache->total_size = 0;
507 
508   mutex_enter((ib_mutex_t *)&cache->deleted_lock);
509   cache->deleted_doc_ids =
510       ib_vector_create(cache->sync_heap, sizeof(fts_update_t), 4);
511   mutex_exit((ib_mutex_t *)&cache->deleted_lock);
512 
513   /* Reset the cache data for all the FTS indexes. */
514   for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
515     fts_index_cache_t *index_cache;
516 
517     index_cache =
518         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
519 
520     fts_index_cache_init(cache->sync_heap, index_cache);
521   }
522 }
523 
524 /** Create a FTS cache. */
fts_cache_create(dict_table_t * table)525 fts_cache_t *fts_cache_create(
526     dict_table_t *table) /*!< in: table owns the FTS cache */
527 {
528   mem_heap_t *heap;
529   fts_cache_t *cache;
530 
531   heap = static_cast<mem_heap_t *>(mem_heap_create(512));
532 
533   cache = static_cast<fts_cache_t *>(mem_heap_zalloc(heap, sizeof(*cache)));
534 
535   cache->cache_heap = heap;
536 
537   rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
538 
539   rw_lock_create(fts_cache_init_rw_lock_key, &cache->init_lock,
540                  SYNC_FTS_CACHE_INIT);
541 
542   mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
543 
544   mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
545 
546   mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
547 
548   /* This is the heap used to create the cache itself. */
549   cache->self_heap = ib_heap_allocator_create(heap);
550 
551   /* This is a transient heap, used for storing sync data. */
552   cache->sync_heap = ib_heap_allocator_create(heap);
553   cache->sync_heap->arg = nullptr;
554 
555   cache->sync =
556       static_cast<fts_sync_t *>(mem_heap_zalloc(heap, sizeof(fts_sync_t)));
557 
558   cache->sync->table = table;
559   cache->sync->event = os_event_create();
560 
561   /* Create the index cache vector that will hold the inverted indexes. */
562   cache->indexes =
563       ib_vector_create(cache->self_heap, sizeof(fts_index_cache_t), 2);
564 
565   fts_cache_init(cache);
566 
567   cache->stopword_info.cached_stopword = nullptr;
568   cache->stopword_info.charset = nullptr;
569 
570   cache->stopword_info.heap = cache->self_heap;
571 
572   cache->stopword_info.status = STOPWORD_NOT_INIT;
573 
574   return (cache);
575 }
576 
577 /** Add a newly create index into FTS cache */
fts_add_index(dict_index_t * index,dict_table_t * table)578 void fts_add_index(dict_index_t *index, /*!< FTS index to be added */
579                    dict_table_t *table) /*!< table */
580 {
581   fts_t *fts = table->fts;
582   fts_cache_t *cache;
583   fts_index_cache_t *index_cache;
584 
585   ut_ad(fts);
586   cache = table->fts->cache;
587 
588   rw_lock_x_lock(&cache->init_lock);
589 
590   ib_vector_push(fts->indexes, &index);
591 
592   index_cache = fts_find_index_cache(cache, index);
593 
594   if (!index_cache) {
595     /* Add new index cache structure */
596     index_cache = fts_cache_index_cache_create(table, index);
597   }
598 
599   rw_lock_x_unlock(&cache->init_lock);
600 }
601 
602 /** recalibrate get_doc structure after index_cache in cache->indexes changed */
fts_reset_get_doc(fts_cache_t * cache)603 static void fts_reset_get_doc(fts_cache_t *cache) /*!< in: FTS index cache */
604 {
605   fts_get_doc_t *get_doc;
606   ulint i;
607 
608   ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
609 
610   ib_vector_reset(cache->get_docs);
611 
612   for (i = 0; i < ib_vector_size(cache->indexes); i++) {
613     fts_index_cache_t *ind_cache;
614 
615     ind_cache =
616         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
617 
618     get_doc =
619         static_cast<fts_get_doc_t *>(ib_vector_push(cache->get_docs, nullptr));
620 
621     memset(get_doc, 0x0, sizeof(*get_doc));
622 
623     get_doc->index_cache = ind_cache;
624   }
625 
626   ut_ad(ib_vector_size(cache->get_docs) == ib_vector_size(cache->indexes));
627 }
628 
629 /** Check an index is in the table->indexes list
630  @return true if it exists */
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)631 static ibool fts_in_dict_index(
632     dict_table_t *table,       /*!< in: Table */
633     dict_index_t *index_check) /*!< in: index to be checked */
634 {
635   dict_index_t *index;
636 
637   for (index = table->first_index(); index != nullptr; index = index->next()) {
638     if (index == index_check) {
639       return (TRUE);
640     }
641   }
642 
643   return (FALSE);
644 }
645 
646 /** Check an index is in the fts->cache->indexes list
647  @return true if it exists */
fts_in_index_cache(dict_table_t * table,dict_index_t * index)648 static ibool fts_in_index_cache(
649     dict_table_t *table, /*!< in: Table */
650     dict_index_t *index) /*!< in: index to be checked */
651 {
652   ulint i;
653 
654   for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
655     fts_index_cache_t *index_cache;
656 
657     index_cache = static_cast<fts_index_cache_t *>(
658         ib_vector_get(table->fts->cache->indexes, i));
659 
660     if (index_cache->index == index) {
661       return (TRUE);
662     }
663   }
664 
665   return (FALSE);
666 }
667 
668 /** Check indexes in the fts->indexes is also present in index cache and
669  table->indexes list
670  @return true if all indexes match */
fts_check_cached_index(dict_table_t * table)671 ibool fts_check_cached_index(
672     dict_table_t *table) /*!< in: Table where indexes are dropped */
673 {
674   ulint i;
675 
676   if (!table->fts || !table->fts->cache) {
677     return (TRUE);
678   }
679 
680   ut_a(ib_vector_size(table->fts->indexes) ==
681        ib_vector_size(table->fts->cache->indexes));
682 
683   for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
684     dict_index_t *index;
685 
686     index = static_cast<dict_index_t *>(ib_vector_getp(table->fts->indexes, i));
687 
688     if (!fts_in_index_cache(table, index)) {
689       return (FALSE);
690     }
691 
692     if (!fts_in_dict_index(table, index)) {
693       return (FALSE);
694     }
695   }
696 
697   return (TRUE);
698 }
699 
700 /** Drop auxiliary tables related to an FTS index
701 @param[in]	table		Table where indexes are dropped
702 @param[in]	index		Index to be dropped
703 @param[in]	trx		Transaction for the drop
704 @param[in,out]	aux_vec		Aux table name vector
705 @return DB_SUCCESS or error number */
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx,aux_name_vec_t * aux_vec)706 dberr_t fts_drop_index(dict_table_t *table, dict_index_t *index, trx_t *trx,
707                        aux_name_vec_t *aux_vec) {
708   ib_vector_t *indexes = table->fts->indexes;
709   dberr_t err = DB_SUCCESS;
710 
711   ut_a(indexes);
712 
713   if ((ib_vector_size(indexes) == 1 &&
714        (index ==
715         static_cast<dict_index_t *>(ib_vector_getp(table->fts->indexes, 0)))) ||
716       ib_vector_is_empty(indexes)) {
717     doc_id_t current_doc_id;
718     doc_id_t first_doc_id;
719 
720     /* If we are dropping the only FTS index of the table,
721     remove it from optimize thread */
722     fts_optimize_remove_table(table);
723 
724     DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
725 
726     /* If Doc ID column is not added internally by FTS index,
727     we can drop all FTS auxiliary tables. Otherwise, we will
728     need to keep some common table such as CONFIG table, so
729     as to keep track of incrementing Doc IDs */
730     if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
731       err = fts_drop_tables(trx, table, aux_vec);
732 
733       fts_free(table);
734 
735       return (err);
736     } else {
737       if (!(index->type & DICT_CORRUPT)) {
738         err = fts_empty_common_tables(trx, table);
739         ut_ad(err == DB_SUCCESS);
740       }
741     }
742 
743     current_doc_id = table->fts->cache->next_doc_id;
744     first_doc_id = table->fts->cache->first_doc_id;
745     fts_cache_clear(table->fts->cache);
746     fts_cache_destroy(table->fts->cache);
747     table->fts->cache = fts_cache_create(table);
748     table->fts->cache->next_doc_id = current_doc_id;
749     table->fts->cache->first_doc_id = first_doc_id;
750   } else {
751     fts_cache_t *cache = table->fts->cache;
752     fts_index_cache_t *index_cache;
753 
754     rw_lock_x_lock(&cache->init_lock);
755 
756     index_cache = fts_find_index_cache(cache, index);
757 
758     if (index_cache != nullptr) {
759       if (index_cache->words) {
760         fts_words_free(index_cache->words);
761         rbt_free(index_cache->words);
762       }
763 
764       ib_vector_remove(cache->indexes, *(void **)index_cache);
765     }
766 
767     if (cache->get_docs) {
768       fts_reset_get_doc(cache);
769     }
770 
771     rw_lock_x_unlock(&cache->init_lock);
772   }
773 
774   err = fts_drop_index_tables(trx, index, aux_vec);
775 
776   ib_vector_remove(indexes, (const void *)index);
777 
778   return (err);
779 }
780 
781 /** Create an FTS index cache. */
fts_index_get_charset(dict_index_t * index)782 CHARSET_INFO *fts_index_get_charset(dict_index_t *index) /*!< in: FTS index */
783 {
784   CHARSET_INFO *charset = nullptr;
785   dict_field_t *field;
786   ulint prtype;
787 
788   field = index->get_field(0);
789   prtype = field->col->prtype;
790 
791   charset = fts_get_charset(prtype);
792 
793 #ifdef FTS_DEBUG
794   /* Set up charset info for this index. Please note all
795   field of the FTS index should have the same charset */
796   for (i = 1; i < index->n_fields; i++) {
797     CHARSET_INFO *fld_charset;
798 
799     field = index->get_field(i);
800     prtype = field->col->prtype;
801 
802     fld_charset = fts_get_charset(prtype);
803 
804     /* All FTS columns should have the same charset */
805     if (charset) {
806       ut_a(charset == fld_charset);
807     } else {
808       charset = fld_charset;
809     }
810   }
811 #endif
812 
813   return (charset);
814 }
815 /** Create an FTS index cache.
816  @return Index Cache */
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)817 fts_index_cache_t *fts_cache_index_cache_create(
818     dict_table_t *table, /*!< in: table with FTS index */
819     dict_index_t *index) /*!< in: FTS index */
820 {
821   ulint n_bytes;
822   fts_index_cache_t *index_cache;
823   fts_cache_t *cache = table->fts->cache;
824 
825   ut_a(cache != nullptr);
826 
827   ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
828 
829   /* Must not already exist in the cache vector. */
830   ut_a(fts_find_index_cache(cache, index) == nullptr);
831 
832   index_cache =
833       static_cast<fts_index_cache_t *>(ib_vector_push(cache->indexes, nullptr));
834 
835   memset(index_cache, 0x0, sizeof(*index_cache));
836 
837   index_cache->index = index;
838 
839   index_cache->charset = fts_index_get_charset(index);
840 
841   n_bytes = sizeof(que_t *) * FTS_NUM_AUX_INDEX;
842 
843   index_cache->ins_graph = static_cast<que_t **>(mem_heap_zalloc(
844       static_cast<mem_heap_t *>(cache->self_heap->arg), n_bytes));
845 
846   index_cache->sel_graph = static_cast<que_t **>(mem_heap_zalloc(
847       static_cast<mem_heap_t *>(cache->self_heap->arg), n_bytes));
848 
849   fts_index_cache_init(cache->sync_heap, index_cache);
850 
851   if (cache->get_docs) {
852     fts_reset_get_doc(cache);
853   }
854 
855   return (index_cache);
856 }
857 
858 /** Remove a FTS index cache
859 @param[in]	table	table with FTS index
860 @param[in]	index	FTS index */
fts_cache_index_cache_remove(dict_table_t * table,dict_index_t * index)861 void fts_cache_index_cache_remove(dict_table_t *table, dict_index_t *index) {
862   ut_ad(table->fts != nullptr);
863   ut_ad(index->type & DICT_FTS);
864 
865   fts_index_cache_t *index_cache;
866 
867   rw_lock_x_lock(&table->fts->cache->init_lock);
868 
869   index_cache = static_cast<fts_index_cache_t *>(
870       fts_find_index_cache(table->fts->cache, index));
871 
872   if (index_cache->words != nullptr) {
873     rbt_free(index_cache->words);
874     index_cache->words = nullptr;
875   }
876 
877   ib_vector_remove(table->fts->cache->indexes,
878                    *reinterpret_cast<void **>(index_cache));
879 
880   rw_lock_x_unlock(&table->fts->cache->init_lock);
881 }
882 
883 /** Release all resources help by the words rb tree e.g., the node ilist. */
fts_words_free(ib_rbt_t * words)884 static void fts_words_free(ib_rbt_t *words) /*!< in: rb tree of words */
885 {
886   const ib_rbt_node_t *rbt_node;
887 
888   /* Free the resources held by a word. */
889   for (rbt_node = rbt_first(words); rbt_node != nullptr;
890        rbt_node = rbt_first(words)) {
891     ulint i;
892     fts_tokenizer_word_t *word;
893 
894     word = rbt_value(fts_tokenizer_word_t, rbt_node);
895 
896     /* Free the ilists of this word. */
897     for (i = 0; i < ib_vector_size(word->nodes); ++i) {
898       fts_node_t *fts_node =
899           static_cast<fts_node_t *>(ib_vector_get(word->nodes, i));
900 
901       ut_free(fts_node->ilist);
902       fts_node->ilist = nullptr;
903     }
904 
905     /* NOTE: We are responsible for free'ing the node */
906     ut_free(rbt_remove_node(words, rbt_node));
907   }
908 }
909 
910 /** Clear cache.
911 @param[in,out]	cache	fts cache */
fts_cache_clear(fts_cache_t * cache)912 void fts_cache_clear(fts_cache_t *cache) {
913   ulint i;
914 
915   for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
916     ulint j;
917     fts_index_cache_t *index_cache;
918 
919     index_cache =
920         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
921 
922     fts_words_free(index_cache->words);
923 
924     rbt_free(index_cache->words);
925 
926     index_cache->words = nullptr;
927 
928     for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
929       if (index_cache->ins_graph[j] != nullptr) {
930         que_graph_free(index_cache->ins_graph[j]);
931 
932         index_cache->ins_graph[j] = nullptr;
933       }
934 
935       if (index_cache->sel_graph[j] != nullptr) {
936         que_graph_free(index_cache->sel_graph[j]);
937 
938         index_cache->sel_graph[j] = nullptr;
939       }
940     }
941 
942     index_cache->doc_stats = nullptr;
943   }
944 
945   mem_heap_free(static_cast<mem_heap_t *>(cache->sync_heap->arg));
946   cache->sync_heap->arg = nullptr;
947 
948   fts_need_sync = false;
949 
950   cache->total_size = 0;
951 
952   mutex_enter((ib_mutex_t *)&cache->deleted_lock);
953   cache->deleted_doc_ids = nullptr;
954   mutex_exit((ib_mutex_t *)&cache->deleted_lock);
955 }
956 
957 /** Search the index specific cache for a particular FTS index.
958  @return the index cache else NULL */
959 UNIV_INLINE
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)960 fts_index_cache_t *fts_get_index_cache(
961     fts_cache_t *cache,        /*!< in: cache to search */
962     const dict_index_t *index) /*!< in: index to search for */
963 {
964   ulint i;
965 
966   ut_ad(rw_lock_own((rw_lock_t *)&cache->lock, RW_LOCK_X) ||
967         rw_lock_own((rw_lock_t *)&cache->init_lock, RW_LOCK_X));
968 
969   for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
970     fts_index_cache_t *index_cache;
971 
972     index_cache =
973         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
974 
975     if (index_cache->index == index) {
976       return (index_cache);
977     }
978   }
979 
980   return (nullptr);
981 }
982 
983 #ifdef FTS_DEBUG
984 /** Search the index cache for a get_doc structure.
985  @return the fts_get_doc_t item else NULL */
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)986 static fts_get_doc_t *fts_get_index_get_doc(
987     fts_cache_t *cache,        /*!< in: cache to search */
988     const dict_index_t *index) /*!< in: index to search for */
989 {
990   ulint i;
991 
992   ut_ad(rw_lock_own((rw_lock_t *)&cache->init_lock, RW_LOCK_X));
993 
994   for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
995     fts_get_doc_t *get_doc;
996 
997     get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
998 
999     if (get_doc->index_cache->index == index) {
1000       return (get_doc);
1001     }
1002   }
1003 
1004   return (NULL);
1005 }
1006 #endif
1007 
1008 /** Find an existing word, or if not found, create one and return it.
1009  @return specified word token */
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1010 static fts_tokenizer_word_t *fts_tokenizer_word_get(
1011     fts_cache_t *cache,             /*!< in: cache */
1012     fts_index_cache_t *index_cache, /*!< in: index cache */
1013     fts_string_t *text)             /*!< in: node text */
1014 {
1015   fts_tokenizer_word_t *word;
1016   ib_rbt_bound_t parent;
1017 
1018   ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1019 
1020   /* If it is a stopword, do not index it */
1021   if (!fts_check_token(text, cache->stopword_info.cached_stopword,
1022                        index_cache->index->is_ngram, index_cache->charset)) {
1023     return (nullptr);
1024   }
1025 
1026   /* Check if we found a match, if not then add word to tree. */
1027   if (rbt_search(index_cache->words, &parent, text) != 0) {
1028     mem_heap_t *heap;
1029     fts_tokenizer_word_t new_word;
1030 
1031     heap = static_cast<mem_heap_t *>(cache->sync_heap->arg);
1032 
1033     new_word.nodes = ib_vector_create(cache->sync_heap, sizeof(fts_node_t), 4);
1034 
1035     fts_string_dup(&new_word.text, text, heap);
1036 
1037     parent.last = rbt_add_node(index_cache->words, &parent, &new_word);
1038 
1039     /* Take into account the RB tree memory use and the vector. */
1040     cache->total_size += sizeof(new_word) + sizeof(ib_rbt_node_t) +
1041                          text->f_len + (sizeof(fts_node_t) * 4) +
1042                          sizeof(*new_word.nodes);
1043 
1044     ut_ad(rbt_validate(index_cache->words));
1045   }
1046 
1047   word = rbt_value(fts_tokenizer_word_t, parent.last);
1048 
1049   return (word);
1050 }
1051 
1052 /** Add the given doc_id/word positions to the given node's ilist. */
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1053 void fts_cache_node_add_positions(
1054     fts_cache_t *cache,     /*!< in: cache */
1055     fts_node_t *node,       /*!< in: word node */
1056     doc_id_t doc_id,        /*!< in: doc id */
1057     ib_vector_t *positions) /*!< in: fts_token_t::positions */
1058 {
1059   ulint i;
1060   byte *ptr;
1061   byte *ilist;
1062   ulint enc_len;
1063   ulint last_pos;
1064   byte *ptr_start;
1065   ulint doc_id_delta;
1066 
1067 #ifdef UNIV_DEBUG
1068   if (cache) {
1069     ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1070   }
1071 #endif /* UNIV_DEBUG */
1072 
1073   ut_ad(doc_id >= node->last_doc_id);
1074 
1075   /* Calculate the space required to store the ilist. */
1076   doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1077   enc_len = fts_get_encoded_len(doc_id_delta);
1078 
1079   last_pos = 0;
1080   for (i = 0; i < ib_vector_size(positions); i++) {
1081     ulint pos = *(static_cast<ulint *>(ib_vector_get(positions, i)));
1082 
1083     ut_ad(last_pos == 0 || pos > last_pos);
1084 
1085     enc_len += fts_get_encoded_len(pos - last_pos);
1086     last_pos = pos;
1087   }
1088 
1089   /* The 0x00 byte at the end of the token positions list. */
1090   enc_len++;
1091 
1092   if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1093     /* No need to allocate more space, we can fit in the new
1094     data at the end of the old one. */
1095     ilist = nullptr;
1096     ptr = node->ilist + node->ilist_size;
1097   } else {
1098     ulint new_size = node->ilist_size + enc_len;
1099 
1100     /* Over-reserve space by a fixed size for small lengths and
1101     by 20% for lengths >= 48 bytes. */
1102     if (new_size < 16) {
1103       new_size = 16;
1104     } else if (new_size < 32) {
1105       new_size = 32;
1106     } else if (new_size < 48) {
1107       new_size = 48;
1108     } else {
1109       new_size = (ulint)(1.2 * new_size);
1110     }
1111 
1112     ilist = static_cast<byte *>(ut_malloc_nokey(new_size));
1113     ptr = ilist + node->ilist_size;
1114 
1115     node->ilist_size_alloc = new_size;
1116   }
1117 
1118   ptr_start = ptr;
1119 
1120   /* Encode the new fragment. */
1121   ptr += fts_encode_int(doc_id_delta, ptr);
1122 
1123   last_pos = 0;
1124   for (i = 0; i < ib_vector_size(positions); i++) {
1125     ulint pos = *(static_cast<ulint *>(ib_vector_get(positions, i)));
1126 
1127     ptr += fts_encode_int(pos - last_pos, ptr);
1128     last_pos = pos;
1129   }
1130 
1131   *ptr++ = 0;
1132 
1133   ut_a(enc_len == (ulint)(ptr - ptr_start));
1134 
1135   if (ilist) {
1136     /* Copy old ilist to the start of the new one and switch the
1137     new one into place in the node. */
1138     if (node->ilist_size > 0) {
1139       memcpy(ilist, node->ilist, node->ilist_size);
1140       ut_free(node->ilist);
1141     }
1142 
1143     node->ilist = ilist;
1144   }
1145 
1146   node->ilist_size += enc_len;
1147 
1148   if (cache) {
1149     cache->total_size += enc_len;
1150   }
1151 
1152   if (node->first_doc_id == FTS_NULL_DOC_ID) {
1153     node->first_doc_id = doc_id;
1154   }
1155 
1156   node->last_doc_id = doc_id;
1157   ++node->doc_count;
1158 }
1159 
1160 /** Add document to the cache. */
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1161 static void fts_cache_add_doc(
1162     fts_cache_t *cache,             /*!< in: cache */
1163     fts_index_cache_t *index_cache, /*!< in: index cache */
1164     doc_id_t doc_id,                /*!< in: doc id to add */
1165     ib_rbt_t *tokens)               /*!< in: document tokens */
1166 {
1167   const ib_rbt_node_t *node;
1168   ulint n_words;
1169   fts_doc_stats_t *doc_stats;
1170 
1171   if (!tokens) {
1172     return;
1173   }
1174 
1175   ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1176 
1177   n_words = rbt_size(tokens);
1178 
1179   for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1180     fts_tokenizer_word_t *word;
1181     fts_node_t *fts_node = nullptr;
1182     fts_token_t *token = rbt_value(fts_token_t, node);
1183 
1184     /* Find and/or add token to the cache. */
1185     word = fts_tokenizer_word_get(cache, index_cache, &token->text);
1186 
1187     if (!word) {
1188       ut_free(rbt_remove_node(tokens, node));
1189       continue;
1190     }
1191 
1192     if (ib_vector_size(word->nodes) > 0) {
1193       fts_node = static_cast<fts_node_t *>(ib_vector_last(word->nodes));
1194     }
1195 
1196     if (fts_node == nullptr || fts_node->synced ||
1197         fts_node->ilist_size > FTS_ILIST_MAX_SIZE ||
1198         doc_id < fts_node->last_doc_id) {
1199       fts_node =
1200           static_cast<fts_node_t *>(ib_vector_push(word->nodes, nullptr));
1201 
1202       memset(fts_node, 0x0, sizeof(*fts_node));
1203 
1204       cache->total_size += sizeof(*fts_node);
1205     }
1206 
1207     fts_cache_node_add_positions(cache, fts_node, doc_id, token->positions);
1208 
1209     ut_free(rbt_remove_node(tokens, node));
1210   }
1211 
1212   ut_a(rbt_empty(tokens));
1213 
1214   /* Add to doc ids processed so far. */
1215   doc_stats = static_cast<fts_doc_stats_t *>(
1216       ib_vector_push(index_cache->doc_stats, nullptr));
1217 
1218   doc_stats->doc_id = doc_id;
1219   doc_stats->word_count = n_words;
1220 
1221   /* Add the doc stats memory usage too. */
1222   cache->total_size += sizeof(*doc_stats);
1223 
1224   if (doc_id > cache->sync->max_doc_id) {
1225     cache->sync->max_doc_id = doc_id;
1226   }
1227 }
1228 
1229 /** Drop FTS AUX table DD table objects in vector
1230 @param[in]	aux_vec		aux table name vector
1231 @param[in]	file_per_table	whether file per table
1232 @return true on success, false on failure. */
fts_drop_dd_tables(const aux_name_vec_t * aux_vec,bool file_per_table)1233 bool fts_drop_dd_tables(const aux_name_vec_t *aux_vec, bool file_per_table) {
1234   bool ret = true;
1235 
1236   if (aux_vec == nullptr || aux_vec->aux_name.size() == 0) {
1237     return (true);
1238   }
1239 
1240   for (ulint i = 0; i < aux_vec->aux_name.size(); i++) {
1241     bool retval;
1242 
1243     retval = dd_drop_fts_table(aux_vec->aux_name[i], file_per_table);
1244 
1245     if (!retval) {
1246       ret = false;
1247     }
1248   }
1249 
1250   return (ret);
1251 }
1252 
1253 /** Free FTS AUX table names in vector
1254 @param[in]	aux_vec		aux table name vector */
fts_free_aux_names(aux_name_vec_t * aux_vec)1255 void fts_free_aux_names(aux_name_vec_t *aux_vec) {
1256   if (aux_vec == nullptr || aux_vec->aux_name.size() == 0) {
1257     return;
1258   }
1259 
1260   while (aux_vec->aux_name.size() > 0) {
1261     char *name = aux_vec->aux_name.back();
1262     ut_free(name);
1263     aux_vec->aux_name.pop_back();
1264   }
1265 
1266   ut_ad(aux_vec->aux_name.size() == 0);
1267 }
1268 
1269 /** Drops a table. If the table can't be found we return a SUCCESS code.
1270 @param[in,out]	trx		transaction
1271 @param[in]	table_name	table to drop
1272 @param[in,out]	aux_vec		fts aux table name vector
1273 @return DB_SUCCESS or error code */
fts_drop_table(trx_t * trx,const char * table_name,aux_name_vec_t * aux_vec)1274 static dberr_t fts_drop_table(trx_t *trx, const char *table_name,
1275                               aux_name_vec_t *aux_vec) {
1276   dict_table_t *table;
1277   dberr_t error = DB_SUCCESS;
1278   THD *thd = current_thd;
1279   MDL_ticket *mdl = nullptr;
1280 
1281   /* Check that the table exists in our data dictionary.
1282   Similar to regular drop table case, we will open table with
1283   DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1284   table = dd_table_open_on_name(
1285       thd, &mdl, table_name, true,
1286       static_cast<dict_err_ignore_t>(DICT_ERR_IGNORE_INDEX_ROOT |
1287                                      DICT_ERR_IGNORE_CORRUPT));
1288 
1289   if (table != nullptr) {
1290     char table_name2[MAX_FULL_NAME_LEN];
1291 
1292     strcpy(table_name2, table_name);
1293 
1294     bool file_per_table = dict_table_is_file_per_table(table);
1295 
1296     dd_table_close(table, thd, &mdl, true);
1297 
1298     /* Pass nonatomic=false (dont allow data dict unlock),
1299     because the transaction may hold locks on SYS_* tables from
1300     previous calls to fts_drop_table(). */
1301     error = row_drop_table_for_mysql(table_name, trx, false, nullptr);
1302 
1303     if (error != DB_SUCCESS) {
1304       ib::error(ER_IB_MSG_464) << "Unable to drop FTS index aux table "
1305                                << table_name << ": " << ut_strerr(error);
1306       return (error);
1307     }
1308 
1309     if (aux_vec == nullptr) {
1310       mutex_exit(&dict_sys->mutex);
1311 
1312       if (!dd_drop_fts_table(table_name2, file_per_table)) {
1313         error = DB_FAIL;
1314       }
1315 
1316       mutex_enter(&dict_sys->mutex);
1317     } else {
1318       aux_vec->aux_name.push_back(mem_strdup(table_name2));
1319     }
1320 
1321   } else {
1322     error = DB_FAIL;
1323   }
1324 
1325   return (error);
1326 }
1327 
1328 /** Rename a single auxiliary table due to database name change.
1329  @return DB_SUCCESS or error code */
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx,bool replay)1330 static MY_ATTRIBUTE((warn_unused_result)) dberr_t fts_rename_one_aux_table(
1331     const char *new_name,           /*!< in: new parent tbl name */
1332     const char *fts_table_old_name, /*!< in: old aux tbl name */
1333     trx_t *trx,                     /*!< in: transaction */
1334     bool replay)                    /*!< Whether in replay stage */
1335 {
1336   char fts_table_new_name[MAX_TABLE_NAME_LEN];
1337   ulint new_db_name_len = dict_get_db_name_len(new_name);
1338   ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1339   ulint table_new_name_len =
1340       strlen(fts_table_old_name) + new_db_name_len - old_db_name_len;
1341 
1342   /* Check if the new and old database names are the same, if so,
1343   nothing to do */
1344   ut_ad((new_db_name_len != old_db_name_len) ||
1345         strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1346 
1347   /* Get the database name from "new_name", and table name
1348   from the fts_table_old_name */
1349   strncpy(fts_table_new_name, new_name, new_db_name_len);
1350   strncpy(fts_table_new_name + new_db_name_len, strchr(fts_table_old_name, '/'),
1351           table_new_name_len - new_db_name_len);
1352   fts_table_new_name[table_new_name_len] = 0;
1353 
1354   dberr_t error;
1355   error = row_rename_table_for_mysql(fts_table_old_name, fts_table_new_name,
1356                                      nullptr, trx, replay);
1357 
1358   if (error == DB_SUCCESS) {
1359     /* Update dd tablespace filename. */
1360     dict_table_t *table;
1361     table = dict_table_check_if_in_cache_low(fts_table_new_name);
1362     ut_ad(table != nullptr);
1363 
1364     /* Release dict_sys->mutex to avoid mutex reentrant. */
1365     table->acquire();
1366     mutex_exit(&dict_sys->mutex);
1367 
1368     if (!replay && !dd_rename_fts_table(table, fts_table_old_name)) {
1369       ut_ad(0);
1370     }
1371 
1372     mutex_enter(&dict_sys->mutex);
1373     table->release();
1374   }
1375 
1376   return (error);
1377 }
1378 
1379 /** Rename auxiliary tables for all fts index for a table. This(rename)
1380  is due to database name change
1381  @return DB_SUCCESS or error code */
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx,bool replay)1382 dberr_t fts_rename_aux_tables(dict_table_t *table,  /*!< in: user Table */
1383                               const char *new_name, /*!< in: new table name */
1384                               trx_t *trx,           /*!< in: transaction */
1385                               bool replay)          /*!< in: Whether in replay
1386                                                         stage */
1387 {
1388   ulint i;
1389   fts_table_t fts_table;
1390 
1391   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1392 
1393   /* Rename common auxiliary tables */
1394   for (i = 0; fts_common_tables[i] != nullptr; ++i) {
1395     char old_table_name[MAX_FULL_NAME_LEN];
1396     dberr_t err = DB_SUCCESS;
1397 
1398     fts_table.suffix = fts_common_tables[i];
1399 
1400     fts_get_table_name(&fts_table, old_table_name);
1401 
1402     err = fts_rename_one_aux_table(new_name, old_table_name, trx, replay);
1403 
1404     if (err != DB_SUCCESS) {
1405       return (err);
1406     }
1407   }
1408 
1409   fts_t *fts = table->fts;
1410 
1411   /* Rename index specific auxiliary tables */
1412   for (i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
1413        ++i) {
1414     dict_index_t *index;
1415 
1416     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
1417 
1418     FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
1419 
1420     for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1421       dberr_t err;
1422       char old_table_name[MAX_FULL_NAME_LEN];
1423 
1424       fts_table.suffix = fts_get_suffix(j);
1425 
1426       fts_get_table_name(&fts_table, old_table_name);
1427 
1428       err = fts_rename_one_aux_table(new_name, old_table_name, trx, replay);
1429 
1430       DBUG_EXECUTE_IF("fts_rename_failure", err = DB_DEADLOCK;);
1431 
1432       if (err != DB_SUCCESS) {
1433         return (err);
1434       }
1435     }
1436   }
1437 
1438   return (DB_SUCCESS);
1439 }
1440 
1441 /** Drops the common ancillary tables needed for supporting an FTS index
1442 on the given table. row_mysql_lock_data_dictionary must have been called
1443 before this.
1444 @param[in,out]	trx		transaction
1445 @param[in,out]	fts_table	table with fts index
1446 @param[in,out]	aux_vec		fts table name vector
1447 @return DB_SUCCESS or error code */
1448 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table,aux_name_vec_t * aux_vec)1449     fts_drop_common_tables(trx_t *trx, fts_table_t *fts_table,
1450                            aux_name_vec_t *aux_vec) {
1451   ulint i;
1452   dberr_t error = DB_SUCCESS;
1453 
1454   for (i = 0; fts_common_tables[i] != nullptr; ++i) {
1455     dberr_t err;
1456     char table_name[MAX_FULL_NAME_LEN];
1457 
1458     fts_table->suffix = fts_common_tables[i];
1459 
1460     fts_get_table_name(fts_table, table_name);
1461 
1462     err = fts_drop_table(trx, table_name, aux_vec);
1463 
1464     /* We only return the status of the last error. */
1465     if (err != DB_SUCCESS && err != DB_FAIL) {
1466       error = err;
1467     }
1468   }
1469 
1470   return (error);
1471 }
1472 
1473 /** Since we do a horizontal split on the index table, we need to drop
1474 all the split tables.
1475 @param[in]	trx		transaction
1476 @param[in]	index		fts index
1477 @param[out]	aux_vec		dropped table names vector
1478 @return DB_SUCCESS or error code */
fts_drop_index_tables(trx_t * trx,dict_index_t * index,aux_name_vec_t * aux_vec)1479 dberr_t fts_drop_index_tables(trx_t *trx, dict_index_t *index,
1480                               aux_name_vec_t *aux_vec) {
1481   ulint i;
1482   fts_table_t fts_table;
1483   dberr_t error = DB_SUCCESS;
1484 
1485   FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
1486 
1487   for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1488     dberr_t err;
1489     char table_name[MAX_FULL_NAME_LEN];
1490 
1491     fts_table.suffix = fts_get_suffix(i);
1492 
1493     fts_get_table_name(&fts_table, table_name);
1494 
1495     err = fts_drop_table(trx, table_name, aux_vec);
1496 
1497     /* We only return the status of the last error. */
1498     if (err != DB_SUCCESS && err != DB_FAIL) {
1499       error = err;
1500     }
1501   }
1502 
1503   return (error);
1504 }
1505 
1506 /** Write the default settings to the config table.
1507 @param[in]	fts_table	fts table
1508 @return	DB_SUCCESS or error code. */
fts_init_config_table(fts_table_t * fts_table)1509 static dberr_t fts_init_config_table(fts_table_t *fts_table) {
1510   pars_info_t *info;
1511   que_t *graph;
1512   char table_name[MAX_FULL_NAME_LEN];
1513   dberr_t error = DB_SUCCESS;
1514   trx_t *trx;
1515 
1516   ut_ad(!mutex_own(&dict_sys->mutex));
1517 
1518   info = pars_info_create();
1519 
1520   fts_table->suffix = FTS_SUFFIX_CONFIG;
1521   fts_get_table_name(fts_table, table_name);
1522   pars_info_bind_id(info, true, "config_table", table_name);
1523   trx = trx_allocate_for_background();
1524 
1525   graph = fts_parse_sql(fts_table, info, fts_config_table_insert_values_sql);
1526 
1527   error = fts_eval_sql(trx, graph);
1528 
1529   que_graph_free(graph);
1530 
1531   if (error == DB_SUCCESS) {
1532     fts_sql_commit(trx);
1533   } else {
1534     fts_sql_rollback(trx);
1535   }
1536 
1537   trx_free_for_background(trx);
1538 
1539   return (error);
1540 }
1541 
1542 /** Empty a common talbes.
1543 @param[in,out]	trx		transaction
1544 @param[in]	fts_table	fts table
1545 @return	DB_SUCCESS or error code. */
fts_empty_table(trx_t * trx,fts_table_t * fts_table)1546 static dberr_t fts_empty_table(trx_t *trx, fts_table_t *fts_table) {
1547   pars_info_t *info;
1548   que_t *graph;
1549   char table_name[MAX_FULL_NAME_LEN];
1550   dberr_t error = DB_SUCCESS;
1551 
1552   info = pars_info_create();
1553 
1554   fts_get_table_name(fts_table, table_name);
1555   pars_info_bind_id(info, true, "table_name", table_name);
1556 
1557   ut_ad(mutex_own(&dict_sys->mutex));
1558 
1559   mutex_exit(&dict_sys->mutex);
1560 
1561   graph = fts_parse_sql(fts_table, info, "BEGIN DELETE FROM $table_name;");
1562 
1563   error = fts_eval_sql(trx, graph);
1564 
1565   que_graph_free(graph);
1566 
1567   mutex_enter(&dict_sys->mutex);
1568 
1569   return (error);
1570 }
1571 
1572 /** Empty all common talbes.
1573 @param[in,out]	trx	transaction
1574 @param[in]	table	dict table
1575 @return	DB_SUCCESS or error code. */
fts_empty_common_tables(trx_t * trx,dict_table_t * table)1576 dberr_t fts_empty_common_tables(trx_t *trx, dict_table_t *table) {
1577   ulint i;
1578   fts_table_t fts_table;
1579   dberr_t error = DB_SUCCESS;
1580 
1581   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1582 
1583   for (i = 0; fts_common_tables[i] != nullptr; ++i) {
1584     dberr_t err;
1585 
1586     fts_table.suffix = fts_common_tables[i];
1587 
1588     /* "config" table should not be emptied, as it has the
1589     last used DOC ID info */
1590     if (i == 2) {
1591       ut_ad(ut_strcmp(fts_table.suffix, "config") == 0);
1592       continue;
1593     }
1594 
1595     err = fts_empty_table(trx, &fts_table);
1596 
1597     if (err != DB_SUCCESS) {
1598       error = err;
1599     }
1600   }
1601 
1602   return (error);
1603 }
1604 
1605 /** Drops FTS ancillary tables needed for supporting an FTS index
1606 on the given table. row_mysql_lock_data_dictionary must have been called
1607 before this.
1608 @param[in,out]	trx	transaction
1609 @param[in]	fts	fts instance
1610 @param[in,out]	aux_vec	fts aux table name vector
1611 @return DB_SUCCESS or error code */
1612 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts,aux_name_vec_t * aux_vec)1613     fts_drop_all_index_tables(trx_t *trx, fts_t *fts, aux_name_vec_t *aux_vec) {
1614   dberr_t error = DB_SUCCESS;
1615 
1616   for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
1617        ++i) {
1618     dberr_t err;
1619     dict_index_t *index;
1620 
1621     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
1622 
1623     err = fts_drop_index_tables(trx, index, aux_vec);
1624 
1625     if (err != DB_SUCCESS) {
1626       error = err;
1627     }
1628   }
1629 
1630   return (error);
1631 }
1632 
1633 /** Drops the ancillary tables needed for supporting an FTS index on a
1634 given table. row_mysql_lock_data_dictionary must have been called before
1635 this.
1636 @param[in,out]	trx	transaction
1637 @param[in]	table	table has the fts index
1638 @param[in,out]	aux_vec	fts aux table name vector
1639 @return DB_SUCCESS or error code */
fts_drop_tables(trx_t * trx,dict_table_t * table,aux_name_vec_t * aux_vec)1640 dberr_t fts_drop_tables(trx_t *trx, dict_table_t *table,
1641                         aux_name_vec_t *aux_vec) {
1642   dberr_t error;
1643   fts_table_t fts_table;
1644 
1645   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1646 
1647   error = fts_drop_common_tables(trx, &fts_table, aux_vec);
1648 
1649   if (error == DB_SUCCESS) {
1650     error = fts_drop_all_index_tables(trx, table->fts, aux_vec);
1651   }
1652 
1653   return (error);
1654 }
1655 
1656 /** Lock all FTS AUX COMMON tables (for dropping table)
1657 @param[in]	thd	thread locking the AUX table
1658 @param[in,out]	fts_table	table with fts index
1659 @return DB_SUCCESS or error code */
1660 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_lock_common_tables(THD * thd,fts_table_t * fts_table)1661     fts_lock_common_tables(THD *thd, fts_table_t *fts_table) {
1662   for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
1663     fts_table->suffix = fts_common_tables[i];
1664 
1665     char table_name[MAX_FULL_NAME_LEN];
1666     fts_get_table_name(fts_table, table_name);
1667 
1668     std::string db_n;
1669     std::string table_n;
1670     dict_name::get_table(table_name, db_n, table_n);
1671 
1672     MDL_ticket *exclusiv_mdl = nullptr;
1673     if (dd::acquire_exclusive_table_mdl(thd, db_n.c_str(), table_n.c_str(),
1674                                         false, &exclusiv_mdl)) {
1675       return (DB_ERROR);
1676     }
1677   }
1678 
1679   return (DB_SUCCESS);
1680 }
1681 
1682 /** Lock all FTS INDEX AUX tables (for dropping table)
1683 @param[in]	thd	thread locking the AUX table
1684 @param[in]	index	fts index
1685 @return DB_SUCCESS or error code */
fts_lock_index_tables(THD * thd,dict_index_t * index)1686 dberr_t fts_lock_index_tables(THD *thd, dict_index_t *index) {
1687   ulint i;
1688   fts_table_t fts_table;
1689 
1690   FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
1691 
1692   for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1693     fts_table.suffix = fts_get_suffix(i);
1694 
1695     char table_name[MAX_FULL_NAME_LEN];
1696     fts_get_table_name(&fts_table, table_name);
1697 
1698     std::string db_n;
1699     std::string table_n;
1700     dict_name::get_table(table_name, db_n, table_n);
1701 
1702     MDL_ticket *exclusiv_mdl = nullptr;
1703     if (dd::acquire_exclusive_table_mdl(thd, db_n.c_str(), table_n.c_str(),
1704                                         false, &exclusiv_mdl)) {
1705       return (DB_ERROR);
1706     }
1707   }
1708   return (DB_SUCCESS);
1709 }
1710 
1711 /** Lock all FTS index AUX tables (for dropping table)
1712 @param[in]	thd	thread locking the AUX table
1713 @param[in]	fts	fts instance
1714 @return DB_SUCCESS or error code */
1715 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_lock_all_index_tables(THD * thd,fts_t * fts)1716     fts_lock_all_index_tables(THD *thd, fts_t *fts) {
1717   dberr_t error = DB_SUCCESS;
1718 
1719   for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
1720        ++i) {
1721     dberr_t err;
1722     dict_index_t *index;
1723 
1724     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
1725 
1726     err = fts_lock_index_tables(thd, index);
1727 
1728     if (err != DB_SUCCESS) {
1729       error = err;
1730     }
1731   }
1732 
1733   return (error);
1734 }
1735 
1736 /** Lock all FTS AUX tables (for dropping table)
1737 @param[in]	thd	thread locking the AUX table
1738 @param[in]	table	table has the fts index
1739 @return DB_SUCCESS or error code */
fts_lock_all_aux_tables(THD * thd,dict_table_t * table)1740 dberr_t fts_lock_all_aux_tables(THD *thd, dict_table_t *table) {
1741   dberr_t error;
1742   fts_table_t fts_table;
1743 
1744   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1745 
1746   error = fts_lock_common_tables(thd, &fts_table);
1747 
1748   if (error == DB_SUCCESS) {
1749     error = fts_lock_all_index_tables(thd, table->fts);
1750   }
1751 
1752   return (error);
1753 }
1754 
1755 /** Extract only the required flags from table->flags2 for FTS Aux
1756 tables.
1757 @param[in]	flags2	Table flags2
1758 @return extracted flags2 for FTS aux tables */
fts_get_table_flags2_for_aux_tables(uint32_t flags2)1759 static inline uint32_t fts_get_table_flags2_for_aux_tables(uint32_t flags2) {
1760   /* Extract the file_per_table flag, temporary file flag and encryption flag
1761   from the main FTS table flags2 */
1762   return ((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1763           (flags2 & DICT_TF2_ENCRYPTION_FILE_PER_TABLE) |
1764           (flags2 & DICT_TF2_TEMPORARY) | DICT_TF2_AUX);
1765 }
1766 
1767 /** Create dict_table_t object for FTS Aux tables.
1768 @param[in]	aux_table_name	FTS Aux table name
1769 @param[in]	table		table object of FTS Index
1770 @param[in]	n_cols		number of columns for FTS Aux table
1771 @return table object for FTS Aux table */
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1772 static dict_table_t *fts_create_in_mem_aux_table(const char *aux_table_name,
1773                                                  const dict_table_t *table,
1774                                                  ulint n_cols) {
1775   dict_table_t *new_table = dict_mem_table_create(
1776       aux_table_name, table->space, n_cols, 0, 0, table->flags,
1777       fts_get_table_flags2_for_aux_tables(table->flags2));
1778 
1779   if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1780     ut_ad(table->space == fil_space_get_id_by_name(table->tablespace()));
1781     new_table->tablespace = mem_heap_strdup(new_table->heap, table->tablespace);
1782   }
1783 
1784   if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1785     ut_ad(table->data_dir_path != nullptr);
1786     new_table->data_dir_path =
1787         mem_heap_strdup(new_table->heap, table->data_dir_path);
1788   }
1789 
1790   return (new_table);
1791 }
1792 
1793 /** Function to create on FTS common table.
1794 @param[in,out]	trx		InnoDB transaction
1795 @param[in]	table		Table that has FTS Index
1796 @param[in]	fts_table_name	FTS AUX table name
1797 @param[in]	fts_suffix	FTS AUX table suffix
1798 @param[in]	heap		heap
1799 @return table object if created, else NULL */
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1800 static dict_table_t *fts_create_one_common_table(trx_t *trx,
1801                                                  const dict_table_t *table,
1802                                                  const char *fts_table_name,
1803                                                  const char *fts_suffix,
1804                                                  mem_heap_t *heap) {
1805   dict_table_t *new_table = nullptr;
1806   dberr_t error;
1807   bool is_config = fts_suffix == FTS_SUFFIX_CONFIG;
1808 
1809   if (!is_config) {
1810     new_table = fts_create_in_mem_aux_table(fts_table_name, table,
1811                                             FTS_DELETED_TABLE_NUM_COLS);
1812 
1813     dict_mem_table_add_col(new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1814                            FTS_DELETED_TABLE_COL_LEN, true);
1815   } else {
1816     /* Config table has different schema. */
1817     new_table = fts_create_in_mem_aux_table(fts_table_name, table,
1818                                             FTS_CONFIG_TABLE_NUM_COLS);
1819 
1820     dict_mem_table_add_col(new_table, heap, "key", DATA_VARCHAR, 0,
1821                            FTS_CONFIG_TABLE_KEY_COL_LEN, true);
1822 
1823     dict_mem_table_add_col(new_table, heap, "value", DATA_VARCHAR,
1824                            DATA_NOT_NULL, FTS_CONFIG_TABLE_VALUE_COL_LEN, true);
1825   }
1826 
1827   error = row_create_table_for_mysql(new_table, nullptr, trx);
1828 
1829   if (error == DB_SUCCESS) {
1830     dict_index_t *index = dict_mem_index_create(
1831         fts_table_name, "FTS_COMMON_TABLE_IND", new_table->space,
1832         DICT_UNIQUE | DICT_CLUSTERED, 1);
1833 
1834     if (!is_config) {
1835       index->add_field("doc_id", 0, true);
1836     } else {
1837       index->add_field("key", 0, true);
1838     }
1839 
1840     /* We save and restore trx->dict_operation because
1841     row_create_index_for_mysql() changes the operation to
1842     TRX_DICT_OP_TABLE. */
1843     trx_dict_op_t op = trx_get_dict_operation(trx);
1844 
1845     error = row_create_index_for_mysql(index, trx, nullptr, nullptr);
1846 
1847     trx->dict_operation = op;
1848   }
1849 
1850   if (error != DB_SUCCESS) {
1851     trx->error_state = error;
1852     new_table = nullptr;
1853     ib::warn(ER_IB_MSG_465)
1854         << "Failed to create FTS common table " << fts_table_name;
1855   }
1856 
1857   return (new_table);
1858 }
1859 
1860 /** Check if common tables already exist
1861 @param[in]	table	table with fts index
1862 @return true on success, false on failure */
fts_check_common_tables_exist(const dict_table_t * table)1863 bool fts_check_common_tables_exist(const dict_table_t *table) {
1864   fts_table_t fts_table;
1865   char fts_name[MAX_FULL_NAME_LEN];
1866 
1867   /* TODO: set a new flag for the situation table has hidden
1868   FTS_DOC_ID but no FTS indexes. */
1869   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1870   fts_table.suffix = FTS_SUFFIX_CONFIG;
1871   fts_get_table_name(&fts_table, fts_name);
1872 
1873   dict_table_t *config_table;
1874   THD *thd = current_thd;
1875   MDL_ticket *mdl = reinterpret_cast<MDL_ticket *>(-1);
1876 
1877   /* Check that the table exists in our data dictionary */
1878   config_table = dd_table_open_on_name(
1879       thd, &mdl, fts_name, false,
1880       static_cast<dict_err_ignore_t>(DICT_ERR_IGNORE_INDEX_ROOT |
1881                                      DICT_ERR_IGNORE_CORRUPT));
1882 
1883   bool exist = false;
1884   if (config_table != nullptr) {
1885     dd_table_close(config_table, thd, &mdl, false);
1886     exist = true;
1887   }
1888 
1889   return (exist);
1890 }
1891 
1892 /** Creates the common auxiliary tables needed for supporting an FTS index
1893 on the given table. row_mysql_lock_data_dictionary must have been called
1894 before this.
1895 The following tables are created.
1896 CREATE TABLE $FTS_PREFIX_DELETED
1897         (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1898 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1899         (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1900 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1901         (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1902 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1903         (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1904 CREATE TABLE $FTS_PREFIX_CONFIG
1905         (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1906 @param[in,out]	trx			transaction
1907 @param[in]	table			table with FTS index
1908 @param[in]	name			table name normalized
1909 @param[in]	skip_doc_id_index	Skip index on doc id
1910 @return DB_SUCCESS if succeed */
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1911 dberr_t fts_create_common_tables(trx_t *trx, const dict_table_t *table,
1912                                  const char *name, bool skip_doc_id_index) {
1913   dberr_t error;
1914   fts_table_t fts_table;
1915   char full_name[sizeof(fts_common_tables) / sizeof(char *)][MAX_FULL_NAME_LEN];
1916   dict_index_t *index = nullptr;
1917   trx_dict_op_t op;
1918 
1919   ut_ad(!mutex_own(&dict_sys->mutex));
1920   ut_ad(!fts_check_common_tables_exist(table));
1921 
1922   mem_heap_t *heap = mem_heap_create(1024);
1923 
1924   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1925 
1926   /* Create the FTS tables that are common to an FTS index. */
1927   for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
1928     fts_table.suffix = fts_common_tables[i];
1929     fts_get_table_name(&fts_table, full_name[i]);
1930     dict_table_t *common_table = fts_create_one_common_table(
1931         trx, table, full_name[i], fts_table.suffix, heap);
1932 
1933     if (common_table == nullptr) {
1934       error = DB_ERROR;
1935       goto func_exit;
1936     }
1937 
1938     DBUG_EXECUTE_IF(
1939         "ib_fts_aux_table_error",
1940         /* Return error after creating FTS_AUX_CONFIG table. */
1941         if (i == 4) {
1942           error = DB_ERROR;
1943           goto func_exit;
1944         });
1945   }
1946 
1947   /* Write the default settings to the config table. */
1948   error = fts_init_config_table(&fts_table);
1949 
1950   if (error != DB_SUCCESS || skip_doc_id_index) {
1951     goto func_exit;
1952   }
1953 
1954   index = dict_mem_index_create(name, FTS_DOC_ID_INDEX_NAME, table->space,
1955                                 DICT_UNIQUE, 1);
1956   index->add_field(FTS_DOC_ID_COL_NAME, 0, true);
1957 
1958   op = trx_get_dict_operation(trx);
1959 
1960   error = row_create_index_for_mysql(index, trx, nullptr, nullptr);
1961 
1962   trx->dict_operation = op;
1963 
1964 func_exit:
1965   mem_heap_free(heap);
1966 
1967   return (error);
1968 }
1969 
1970 /** Creates one FTS auxiliary index table for an FTS index.
1971 @param[in,out]	trx		transaction
1972 @param[in]	index		the index instance
1973 @param[in]	fts_table	fts_table structure
1974 @param[in]	heap		memory heap
1975 @return DB_SUCCESS or error code */
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)1976 static dict_table_t *fts_create_one_index_table(trx_t *trx,
1977                                                 const dict_index_t *index,
1978                                                 fts_table_t *fts_table,
1979                                                 mem_heap_t *heap) {
1980   dict_field_t *field;
1981   dict_table_t *new_table = nullptr;
1982   char table_name[MAX_FULL_NAME_LEN];
1983   dberr_t error;
1984   CHARSET_INFO *charset;
1985 
1986   ut_ad(index->type & DICT_FTS);
1987 
1988   fts_get_table_name(fts_table, table_name);
1989 
1990   new_table = fts_create_in_mem_aux_table(table_name, fts_table->table,
1991                                           FTS_AUX_INDEX_TABLE_NUM_COLS);
1992 
1993   field = index->get_field(0);
1994   charset = fts_get_charset(field->col->prtype);
1995 
1996   dict_mem_table_add_col(
1997       new_table, heap, "word",
1998       charset == &my_charset_latin1 ? DATA_VARCHAR : DATA_VARMYSQL,
1999       field->col->prtype, FTS_INDEX_WORD_LEN, true);
2000 
2001   dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2002                          DATA_NOT_NULL | DATA_UNSIGNED,
2003                          FTS_INDEX_FIRST_DOC_ID_LEN, true);
2004 
2005   dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2006                          DATA_NOT_NULL | DATA_UNSIGNED,
2007                          FTS_INDEX_LAST_DOC_ID_LEN, true);
2008 
2009   dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2010                          DATA_NOT_NULL | DATA_UNSIGNED, FTS_INDEX_DOC_COUNT_LEN,
2011                          true);
2012 
2013   /* The precise type calculation is as follows:
2014   least signficiant byte: MySQL type code (not applicable for sys cols)
2015   second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2016   third least  : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2017 
2018   dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
2019                          (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2020                          FTS_INDEX_ILIST_LEN, true);
2021 
2022   error = row_create_table_for_mysql(new_table, nullptr, trx);
2023 
2024   if (error == DB_SUCCESS) {
2025     dict_index_t *index = dict_mem_index_create(
2026         table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2027         DICT_UNIQUE | DICT_CLUSTERED, 2);
2028     index->add_field("word", 0, true);
2029     index->add_field("first_doc_id", 0, true);
2030 
2031     trx_dict_op_t op = trx_get_dict_operation(trx);
2032 
2033     error = row_create_index_for_mysql(index, trx, nullptr, nullptr);
2034 
2035     trx->dict_operation = op;
2036   }
2037 
2038   if (error != DB_SUCCESS) {
2039     trx->error_state = error;
2040     new_table = nullptr;
2041     ib::warn(ER_IB_MSG_466)
2042         << "Failed to create FTS index table " << table_name;
2043   }
2044 
2045   return (new_table);
2046 }
2047 
2048 /** Freeze all auiliary tables to be not evictable if exist, with dict_mutex
2049 held
2050 @param[in]	table		InnoDB table object */
fts_freeze_aux_tables(const dict_table_t * table)2051 void fts_freeze_aux_tables(const dict_table_t *table) {
2052   fts_table_t fts_table;
2053   char table_name[MAX_FULL_NAME_LEN];
2054 
2055   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
2056 
2057   for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
2058     fts_table.suffix = fts_common_tables[i];
2059     fts_get_table_name(&fts_table, table_name);
2060 
2061     dict_table_t *common;
2062     common = dd_table_open_on_name_in_mem(table_name, true);
2063     if (common != nullptr && common->can_be_evicted) {
2064       dict_table_prevent_eviction(common);
2065     }
2066 
2067     if (common != nullptr) {
2068       dd_table_close(common, nullptr, nullptr, true);
2069     }
2070   }
2071 
2072   fts_t *fts = table->fts;
2073   if (fts == nullptr) {
2074     return;
2075   }
2076 
2077   for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
2078        ++i) {
2079     dict_index_t *index;
2080     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
2081 
2082     FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
2083 
2084     for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
2085       fts_table.suffix = fts_get_suffix(j);
2086       fts_get_table_name(&fts_table, table_name);
2087 
2088       dict_table_t *index_table;
2089       index_table = dd_table_open_on_name_in_mem(table_name, true);
2090       if (index_table != nullptr && index_table->can_be_evicted) {
2091         dict_table_prevent_eviction(index_table);
2092       }
2093 
2094       if (index_table != nullptr) {
2095         dd_table_close(index_table, nullptr, nullptr, true);
2096       }
2097     }
2098   }
2099 }
2100 
2101 /** Allow all the auxiliary tables of specified base table to be evictable
2102 if they exist, if not exist just ignore
2103 @param[in]	table		InnoDB table object
2104 @param[in]	dict_locked	True if we have dict_sys mutex */
fts_detach_aux_tables(const dict_table_t * table,bool dict_locked)2105 void fts_detach_aux_tables(const dict_table_t *table, bool dict_locked) {
2106   fts_table_t fts_table;
2107   char table_name[MAX_FULL_NAME_LEN];
2108 
2109   if (!dict_locked) {
2110     mutex_enter(&dict_sys->mutex);
2111   }
2112 
2113   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
2114 
2115   for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
2116     fts_table.suffix = fts_common_tables[i];
2117     fts_get_table_name(&fts_table, table_name);
2118 
2119     dict_table_t *common;
2120     common = dd_table_open_on_name_in_mem(table_name, true);
2121     if (common != nullptr && !common->can_be_evicted) {
2122       dict_table_allow_eviction(common);
2123     }
2124 
2125     if (common != nullptr) {
2126       dd_table_close(common, nullptr, nullptr, true);
2127     }
2128   }
2129 
2130   fts_t *fts = table->fts;
2131   if (fts == nullptr) {
2132     if (!dict_locked) {
2133       mutex_exit(&dict_sys->mutex);
2134     }
2135 
2136     return;
2137   }
2138 
2139   for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
2140        ++i) {
2141     dict_index_t *index;
2142     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
2143 
2144     FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
2145 
2146     for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
2147       fts_table.suffix = fts_get_suffix(j);
2148       fts_get_table_name(&fts_table, table_name);
2149 
2150       dict_table_t *index_table;
2151       index_table = dd_table_open_on_name_in_mem(table_name, true);
2152       if (index_table != nullptr && !index_table->can_be_evicted) {
2153         dict_table_allow_eviction(index_table);
2154       }
2155 
2156       if (index_table != nullptr) {
2157         dd_table_close(index_table, nullptr, nullptr, true);
2158       }
2159     }
2160   }
2161 
2162   if (!dict_locked) {
2163     mutex_exit(&dict_sys->mutex);
2164   }
2165 }
2166 
2167 /** Update DD system table for auxiliary common tables for an FTS index.
2168 @param[in]	table		dict table instance
2169 @return true on success, false on failure */
fts_create_common_dd_tables(const dict_table_t * table)2170 bool fts_create_common_dd_tables(const dict_table_t *table) {
2171   fts_table_t fts_table;
2172   bool ret = true;
2173 
2174   FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
2175 
2176   /* Create the FTS tables that are common to an FTS index. */
2177   for (ulint i = 0; fts_common_tables[i] != nullptr && ret; ++i) {
2178     char table_name[MAX_FULL_NAME_LEN];
2179 
2180     fts_table.suffix = fts_common_tables[i];
2181     fts_get_table_name(&fts_table, table_name);
2182 
2183     dict_table_t *common_table;
2184     common_table = dd_table_open_on_name_in_mem(table_name, false);
2185     ut_ad(common_table != nullptr);
2186 
2187     bool is_config = fts_table.suffix == FTS_SUFFIX_CONFIG;
2188     ret = dd_create_fts_common_table(table, common_table, is_config);
2189 
2190     dd_table_close(common_table, nullptr, nullptr, false);
2191   }
2192 
2193   return (ret);
2194 }
2195 
2196 /** Update DD system table for auxiliary index tables for an FTS index.
2197 @param[in]	index		the index instance
2198 @return DB_SUCCESS or error code */
fts_create_one_index_dd_tables(const dict_index_t * index)2199 static dberr_t fts_create_one_index_dd_tables(const dict_index_t *index) {
2200   ulint i;
2201   fts_table_t fts_table;
2202   dberr_t error = DB_SUCCESS;
2203   char *parent_name = index->table->name.m_name;
2204 
2205   fts_table.type = FTS_INDEX_TABLE;
2206   fts_table.index_id = index->id;
2207   fts_table.table_id = index->table->id;
2208   fts_table.parent = parent_name;
2209   fts_table.table = index->table;
2210 
2211   for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2212     dict_table_t *new_table;
2213     char table_name[MAX_FULL_NAME_LEN];
2214     CHARSET_INFO *charset;
2215     dict_field_t *field;
2216 
2217     ut_ad(index->type & DICT_FTS);
2218 
2219     field = index->get_field(0);
2220     charset = fts_get_charset(field->col->prtype);
2221 
2222     fts_table.suffix = fts_get_suffix(i);
2223     fts_get_table_name(&fts_table, table_name);
2224 
2225     new_table = dd_table_open_on_name_in_mem(table_name, false);
2226     ut_ad(new_table != nullptr);
2227 
2228     if (!dd_create_fts_index_table(fts_table.table, new_table, charset)) {
2229       ib::warn(ER_IB_MSG_467)
2230           << "Failed to create FTS index dd table " << table_name;
2231       error = DB_FAIL;
2232     }
2233 
2234     dd_table_close(new_table, nullptr, nullptr, false);
2235   }
2236 
2237   return (error);
2238 }
2239 
2240 /** Check if a table has FTS index needs to have its auxiliary index
2241 tables' metadata updated in DD
2242 @param[in,out]	table		table to check
2243 @return DB_SUCCESS or error code */
fts_create_index_dd_tables(dict_table_t * table)2244 dberr_t fts_create_index_dd_tables(dict_table_t *table) {
2245   dberr_t error = DB_SUCCESS;
2246 
2247   for (dict_index_t *index = table->first_index();
2248        index != nullptr && error == DB_SUCCESS; index = index->next()) {
2249     if ((index->type & DICT_FTS) && index->fill_dd) {
2250       error = fts_create_one_index_dd_tables(index);
2251       index->fill_dd = false;
2252     }
2253 
2254     ut_ad(!index->fill_dd);
2255   }
2256 
2257   return (error);
2258 }
2259 
2260 /** Create auxiliary index tables for an FTS index.
2261 @param[in,out]	trx		transaction
2262 @param[in]	index		the index instance
2263 @param[in]	table_name	table name
2264 @param[in]	table_id	the table id
2265 @return DB_SUCCESS or error code */
fts_create_index_tables_low(trx_t * trx,dict_index_t * index,const char * table_name,table_id_t table_id)2266 dberr_t fts_create_index_tables_low(trx_t *trx, dict_index_t *index,
2267                                     const char *table_name,
2268                                     table_id_t table_id) {
2269   ulint i;
2270   fts_table_t fts_table;
2271   dberr_t error = DB_SUCCESS;
2272   mem_heap_t *heap = mem_heap_create(1024);
2273 
2274   fts_table.type = FTS_INDEX_TABLE;
2275   fts_table.index_id = index->id;
2276   fts_table.table_id = table_id;
2277   fts_table.parent = table_name;
2278   fts_table.table = index->table;
2279 
2280   for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2281     dict_table_t *new_table;
2282 
2283     /* Create the FTS auxiliary tables that are specific
2284     to an FTS index. We need to preserve the table_id %s
2285     which fts_parse_sql() will fill in for us. */
2286     fts_table.suffix = fts_get_suffix(i);
2287 
2288     new_table = fts_create_one_index_table(trx, index, &fts_table, heap);
2289 
2290     if (new_table == nullptr) {
2291       error = DB_FAIL;
2292       break;
2293     }
2294 
2295     DBUG_EXECUTE_IF(
2296         "ib_fts_index_table_error",
2297         /* Return error after creating FTS_INDEX_5
2298         aux table. */
2299         if (i == 4) {
2300           error = DB_FAIL;
2301           break;
2302         });
2303   }
2304 
2305   if (error == DB_SUCCESS) {
2306     index->fill_dd = true;
2307   }
2308 
2309   mem_heap_free(heap);
2310 
2311   return (error);
2312 }
2313 
2314 /** Creates the column specific ancillary tables needed for supporting an
2315 FTS index on the given table. row_mysql_lock_data_dictionary must have
2316 been called before this.
2317 
2318 All FTS AUX Index tables have the following schema.
2319 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2320         word		VARCHAR(FTS_MAX_WORD_LEN),
2321         first_doc_id	INT NOT NULL,
2322         last_doc_id	UNSIGNED NOT NULL,
2323         doc_count	UNSIGNED INT NOT NULL,
2324         ilist		VARBINARY NOT NULL,
2325         UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2326 @param[in,out]	trx	transaction
2327 @param[in]	index	index instance
2328 @return DB_SUCCESS or error code */
fts_create_index_tables(trx_t * trx,dict_index_t * index)2329 dberr_t fts_create_index_tables(trx_t *trx, dict_index_t *index) {
2330   dberr_t err;
2331   dict_table_t *table;
2332 
2333   ut_ad(!mutex_own(&dict_sys->mutex));
2334 
2335   table = dd_table_open_on_name_in_mem(index->table_name, false);
2336   ut_a(table != nullptr);
2337   ut_d(mutex_enter(&dict_sys->mutex));
2338   ut_ad(table->get_ref_count() > 1);
2339   ut_d(mutex_exit(&dict_sys->mutex));
2340 
2341   err = fts_create_index_tables_low(trx, index, table->name.m_name, table->id);
2342 
2343   dd_table_close(table, nullptr, nullptr, false);
2344 
2345   return (err);
2346 }
2347 #if 0
2348 /******************************************************************//**
2349 Return string representation of state. */
2350 static
2351 const char*
2352 fts_get_state_str(
2353 				/* out: string representation of state */
2354     fts_row_state zstate) /*!< in: state */
2355 {
2356 	switch (state) {
2357 	case FTS_INSERT:
2358 		return("INSERT");
2359 
2360 	case FTS_MODIFY:
2361 		return("MODIFY");
2362 
2363 	case FTS_DELETE:
2364 		return("DELETE");
2365 
2366 	case FTS_NOTHING:
2367 		return("NOTHING");
2368 
2369 	case FTS_INVALID:
2370 		return("INVALID");
2371 
2372 	default:
2373 		return("UNKNOWN");
2374 	}
2375 }
2376 #endif
2377 
2378 /** Calculate the new state of a row given the existing state and a new event.
2379  @return new state of row */
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2380 static fts_row_state fts_trx_row_get_new_state(
2381     fts_row_state old_state, /*!< in: existing state of row */
2382     fts_row_state event)     /*!< in: new event */
2383 {
2384   /* The rules for transforming states:
2385 
2386   I = inserted
2387   M = modified
2388   D = deleted
2389   N = nothing
2390 
2391   M+D -> D:
2392 
2393   If the row existed before the transaction started and it is modified
2394   during the transaction, followed by a deletion of the row, only the
2395   deletion will be signaled.
2396 
2397   M+ -> M:
2398 
2399   If the row existed before the transaction started and it is modified
2400   more than once during the transaction, only the last modification
2401   will be signaled.
2402 
2403   IM*D -> N:
2404 
2405   If a new row is added during the transaction (and possibly modified
2406   after its initial insertion) but it is deleted before the end of the
2407   transaction, nothing will be signaled.
2408 
2409   IM* -> I:
2410 
2411   If a new row is added during the transaction and modified after its
2412   initial insertion, only the addition will be signaled.
2413 
2414   M*DI -> M:
2415 
2416   If the row existed before the transaction started and it is deleted,
2417   then re-inserted, only a modification will be signaled. Note that
2418   this case is only possible if the table is using the row's primary
2419   key for FTS row ids, since those can be re-inserted by the user,
2420   which is not true for InnoDB generated row ids.
2421 
2422   It is easily seen that the above rules decompose such that we do not
2423   need to store the row's entire history of events. Instead, we can
2424   store just one state for the row and update that when new events
2425   arrive. Then we can implement the above rules as a two-dimensional
2426   look-up table, and get checking of invalid combinations "for free"
2427   in the process. */
2428 
2429   /* The lookup table for transforming states. old_state is the
2430   Y-axis, event is the X-axis. */
2431   static const fts_row_state table[4][4] = {
2432       /*    I            M            D            N */
2433       /* I */ {FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID},
2434       /* M */ {FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID},
2435       /* D */ {FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID},
2436       /* N */ {FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID}};
2437 
2438   fts_row_state result;
2439 
2440   ut_a(old_state < FTS_INVALID);
2441   ut_a(event < FTS_INVALID);
2442 
2443   result = table[(int)old_state][(int)event];
2444   ut_a(result != FTS_INVALID);
2445 
2446   return (result);
2447 }
2448 
2449 /** Create a savepoint instance.
2450  @return savepoint instance */
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2451 static fts_savepoint_t *fts_savepoint_create(
2452     ib_vector_t *savepoints, /*!< out: InnoDB transaction */
2453     const char *name,        /*!< in: savepoint name */
2454     mem_heap_t *heap)        /*!< in: heap */
2455 {
2456   fts_savepoint_t *savepoint;
2457 
2458   savepoint =
2459       static_cast<fts_savepoint_t *>(ib_vector_push(savepoints, nullptr));
2460 
2461   memset(savepoint, 0x0, sizeof(*savepoint));
2462 
2463   if (name) {
2464     savepoint->name = mem_heap_strdup(heap, name);
2465   }
2466 
2467   savepoint->tables = rbt_create(sizeof(fts_trx_table_t *), fts_trx_table_cmp);
2468 
2469   return (savepoint);
2470 }
2471 
2472 /** Create an FTS trx.
2473 @param[in,out]	trx	InnoDB Transaction
2474 @return FTS transaction. */
fts_trx_create(trx_t * trx)2475 fts_trx_t *fts_trx_create(trx_t *trx) {
2476   fts_trx_t *ftt;
2477   ib_alloc_t *heap_alloc;
2478   mem_heap_t *heap = mem_heap_create(1024);
2479   trx_named_savept_t *savep;
2480 
2481   ut_a(trx->fts_trx == nullptr);
2482 
2483   ftt = static_cast<fts_trx_t *>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2484   ftt->trx = trx;
2485   ftt->heap = heap;
2486 
2487   heap_alloc = ib_heap_allocator_create(heap);
2488 
2489   ftt->savepoints = static_cast<ib_vector_t *>(
2490       ib_vector_create(heap_alloc, sizeof(fts_savepoint_t), 4));
2491 
2492   ftt->last_stmt = static_cast<ib_vector_t *>(
2493       ib_vector_create(heap_alloc, sizeof(fts_savepoint_t), 4));
2494 
2495   /* Default instance has no name and no heap. */
2496   fts_savepoint_create(ftt->savepoints, nullptr, nullptr);
2497   fts_savepoint_create(ftt->last_stmt, nullptr, nullptr);
2498 
2499   /* Copy savepoints that already set before. */
2500   for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints); savep != nullptr;
2501        savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2502     fts_savepoint_take(trx, ftt, savep->name);
2503   }
2504 
2505   return (ftt);
2506 }
2507 
2508 /** Create an FTS trx table.
2509  @return FTS trx table */
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2510 static fts_trx_table_t *fts_trx_table_create(
2511     fts_trx_t *fts_trx,  /*!< in: FTS trx */
2512     dict_table_t *table) /*!< in: table */
2513 {
2514   fts_trx_table_t *ftt;
2515 
2516   ftt = static_cast<fts_trx_table_t *>(
2517       mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2518 
2519   memset(ftt, 0x0, sizeof(*ftt));
2520 
2521   ftt->table = table;
2522   ftt->fts_trx = fts_trx;
2523 
2524   ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2525 
2526   return (ftt);
2527 }
2528 
2529 /** Clone an FTS trx table.
2530  @return FTS trx table */
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2531 static fts_trx_table_t *fts_trx_table_clone(
2532     const fts_trx_table_t *ftt_src) /*!< in: FTS trx */
2533 {
2534   fts_trx_table_t *ftt;
2535 
2536   ftt = static_cast<fts_trx_table_t *>(
2537       mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2538 
2539   memset(ftt, 0x0, sizeof(*ftt));
2540 
2541   ftt->table = ftt_src->table;
2542   ftt->fts_trx = ftt_src->fts_trx;
2543 
2544   ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2545 
2546   /* Copy the rb tree values to the new savepoint. */
2547   rbt_merge_uniq(ftt->rows, ftt_src->rows);
2548 
2549   /* These are only added on commit. At this stage we only have
2550   the updated row state. */
2551   ut_a(ftt_src->added_doc_ids == nullptr);
2552 
2553   return (ftt);
2554 }
2555 
2556 /** Initialize the FTS trx instance.
2557  @return FTS trx instance */
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2558 static fts_trx_table_t *fts_trx_init(
2559     trx_t *trx,              /*!< in: transaction */
2560     dict_table_t *table,     /*!< in: FTS table instance */
2561     ib_vector_t *savepoints) /*!< in: Savepoints */
2562 {
2563   fts_trx_table_t *ftt;
2564   ib_rbt_bound_t parent;
2565   ib_rbt_t *tables;
2566   fts_savepoint_t *savepoint;
2567 
2568   savepoint = static_cast<fts_savepoint_t *>(ib_vector_last(savepoints));
2569 
2570   tables = savepoint->tables;
2571   rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, nullptr);
2572 
2573   if (parent.result == 0) {
2574     fts_trx_table_t **fttp;
2575 
2576     fttp = rbt_value(fts_trx_table_t *, parent.last);
2577     ftt = *fttp;
2578   } else {
2579     ftt = fts_trx_table_create(trx->fts_trx, table);
2580     rbt_add_node(tables, &parent, &ftt);
2581   }
2582 
2583   ut_a(ftt->table == table);
2584 
2585   return (ftt);
2586 }
2587 
2588 /** Notify the FTS system about an operation on an FTS-indexed table. */
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2589 static void fts_trx_table_add_op(
2590     fts_trx_table_t *ftt,     /*!< in: FTS trx table */
2591     doc_id_t doc_id,          /*!< in: doc id */
2592     fts_row_state state,      /*!< in: state of the row */
2593     ib_vector_t *fts_indexes) /*!< in: FTS indexes affected */
2594 {
2595   ib_rbt_t *rows;
2596   ib_rbt_bound_t parent;
2597 
2598   rows = ftt->rows;
2599   rbt_search(rows, &parent, &doc_id);
2600 
2601   /* Row id found, update state, and if new state is FTS_NOTHING,
2602   we delete the row from our tree. */
2603   if (parent.result == 0) {
2604     fts_trx_row_t *row = rbt_value(fts_trx_row_t, parent.last);
2605 
2606     row->state = fts_trx_row_get_new_state(row->state, state);
2607 
2608     if (row->state == FTS_NOTHING) {
2609       if (row->fts_indexes) {
2610         ib_vector_free(row->fts_indexes);
2611       }
2612 
2613       ut_free(rbt_remove_node(rows, parent.last));
2614       row = nullptr;
2615     } else if (row->fts_indexes != nullptr) {
2616       ib_vector_free(row->fts_indexes);
2617       row->fts_indexes = fts_indexes;
2618     }
2619 
2620   } else { /* Row-id not found, create a new one. */
2621     fts_trx_row_t row;
2622 
2623     row.doc_id = doc_id;
2624     row.state = state;
2625     row.fts_indexes = fts_indexes;
2626 
2627     rbt_add_node(rows, &parent, &row);
2628   }
2629 }
2630 
2631 /** Notify the FTS system about an operation on an FTS-indexed table. */
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2632 void fts_trx_add_op(trx_t *trx,               /*!< in: InnoDB transaction */
2633                     dict_table_t *table,      /*!< in: table */
2634                     doc_id_t doc_id,          /*!< in: new doc id */
2635                     fts_row_state state,      /*!< in: state of the row */
2636                     ib_vector_t *fts_indexes) /*!< in: FTS indexes affected
2637                                               (NULL=all) */
2638 {
2639   fts_trx_table_t *tran_ftt;
2640   fts_trx_table_t *stmt_ftt;
2641 
2642   if (!trx->fts_trx) {
2643     trx->fts_trx = fts_trx_create(trx);
2644   }
2645 
2646   tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2647   stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2648 
2649   fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2650   fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2651 }
2652 
2653 /** Fetch callback that converts a textual document id to a binary value and
2654  stores it in the given place.
2655  @return always returns NULL */
fts_fetch_store_doc_id(void * row,void * user_arg)2656 static ibool fts_fetch_store_doc_id(void *row,      /*!< in: sel_node_t* */
2657                                     void *user_arg) /*!< in: doc_id_t* to store
2658                                                     doc_id in */
2659 {
2660   int n_parsed;
2661   sel_node_t *node = static_cast<sel_node_t *>(row);
2662   doc_id_t *doc_id = static_cast<doc_id_t *>(user_arg);
2663   dfield_t *dfield = que_node_get_val(node->select_list);
2664   dtype_t *type = dfield_get_type(dfield);
2665   ulint len = dfield_get_len(dfield);
2666 
2667   char buf[32];
2668 
2669   ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2670   ut_a(len > 0 && len < sizeof(buf));
2671 
2672   memcpy(buf, dfield_get_data(dfield), len);
2673   buf[len] = '\0';
2674 
2675   n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2676   ut_a(n_parsed == 1);
2677 
2678   return (FALSE);
2679 }
2680 
2681 #ifdef FTS_CACHE_SIZE_DEBUG
2682 /** Get the max cache size in bytes. If there is an error reading the
2683  value we simply print an error message here and return the default
2684  value to the caller.
2685  @return max cache size in bytes */
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2686 static ulint fts_get_max_cache_size(
2687     trx_t *trx,             /*!< in: transaction */
2688     fts_table_t *fts_table) /*!< in: table instance */
2689 {
2690   dberr_t error;
2691   fts_string_t value;
2692   ulint cache_size_in_mb;
2693 
2694   /* Set to the default value. */
2695   cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2696 
2697   /* We set the length of value to the max bytes it can hold. This
2698   information is used by the callback that reads the value. */
2699   value.f_n_char = 0;
2700   value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2701   value.f_str = ut_malloc_nokey(value.f_len + 1);
2702 
2703   error =
2704       fts_config_get_value(trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2705 
2706   if (error == DB_SUCCESS) {
2707     value.f_str[value.f_len] = 0;
2708     cache_size_in_mb = strtoul((char *)value.f_str, NULL, 10);
2709 
2710     if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2711       ib::warn(ER_IB_MSG_468)
2712           << "FTS max cache size (" << cache_size_in_mb
2713           << ") out of range."
2714              " Minimum value is "
2715           << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB << "MB and the maximum value is "
2716           << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2717           << "MB, setting cache size to upper limit";
2718 
2719       cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2720 
2721     } else if (cache_size_in_mb < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2722       ib::warn(ER_IB_MSG_469)
2723           << "FTS max cache size (" << cache_size_in_mb
2724           << ") out of range."
2725              " Minimum value is "
2726           << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB << "MB and the maximum value is"
2727           << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2728           << "MB, setting cache size to lower limit";
2729 
2730       cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2731     }
2732   } else {
2733     ib::error(ER_IB_MSG_470) << "(" << ut_strerr(error)
2734                              << ") reading max"
2735                                 " cache config value from config table";
2736   }
2737 
2738   ut_free(value.f_str);
2739 
2740   return (cache_size_in_mb * 1024 * 1024);
2741 }
2742 #endif
2743 
2744 /** Update the next and last Doc ID in the CONFIG table to be the input
2745  "doc_id" value (+ 1). We would do so after each FTS index build or
2746  table truncate */
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2747 void fts_update_next_doc_id(
2748     trx_t *trx,                /*!< in/out: transaction */
2749     const dict_table_t *table, /*!< in: table */
2750     const char *table_name,    /*!< in: table name, or NULL */
2751     doc_id_t doc_id)           /*!< in: DOC ID to set */
2752 {
2753   table->fts->cache->synced_doc_id = doc_id;
2754   table->fts->cache->next_doc_id = doc_id + 1;
2755 
2756   table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2757 
2758   fts_update_sync_doc_id(table, table_name, table->fts->cache->synced_doc_id,
2759                          trx);
2760 }
2761 
2762 /** Get the next available document id.
2763  @return DB_SUCCESS if OK */
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2764 dberr_t fts_get_next_doc_id(const dict_table_t *table, /*!< in: table */
2765                             doc_id_t *doc_id) /*!< out: new document id */
2766 {
2767   fts_cache_t *cache = table->fts->cache;
2768 
2769   /* If the Doc ID system has not yet been initialized, we
2770   will consult the CONFIG table and user table to re-establish
2771   the initial value of the Doc ID */
2772   if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2773     fts_init_doc_id(table);
2774   }
2775 
2776   if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2777     *doc_id = FTS_NULL_DOC_ID;
2778     return (DB_SUCCESS);
2779   }
2780 
2781   mutex_enter(&cache->doc_id_lock);
2782   *doc_id = ++cache->next_doc_id;
2783   mutex_exit(&cache->doc_id_lock);
2784 
2785   return (DB_SUCCESS);
2786 }
2787 
2788 /** This function fetch the Doc ID from CONFIG table, and compare with
2789  the Doc ID supplied. And store the larger one to the CONFIG table.
2790  @return DB_SUCCESS if OK */
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2791 static dberr_t fts_cmp_set_sync_doc_id(
2792     const dict_table_t *table, /*!< in: table */
2793     doc_id_t doc_id_cmp,       /*!< in: Doc ID to compare */
2794     ibool read_only,           /*!< in: TRUE if read the
2795                                synced_doc_id only */
2796     doc_id_t *doc_id)          /*!< out: larger document id
2797                                after comparing "doc_id_cmp"
2798                                to the one stored in CONFIG
2799                                table */
2800 {
2801   trx_t *trx;
2802   pars_info_t *info;
2803   dberr_t error;
2804   fts_table_t fts_table;
2805   que_t *graph = nullptr;
2806   fts_cache_t *cache = table->fts->cache;
2807   char table_name[MAX_FULL_NAME_LEN];
2808 retry:
2809   ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2810 
2811   fts_table.suffix = FTS_SUFFIX_CONFIG;
2812   fts_table.table_id = table->id;
2813   fts_table.type = FTS_COMMON_TABLE;
2814   fts_table.table = table;
2815 
2816   fts_table.parent = table->name.m_name;
2817 
2818   trx = trx_allocate_for_background();
2819 
2820   trx->op_info = "update the next FTS document id";
2821 
2822   info = pars_info_create();
2823 
2824   pars_info_bind_function(info, "my_func", fts_fetch_store_doc_id, doc_id);
2825 
2826   fts_get_table_name(&fts_table, table_name);
2827   pars_info_bind_id(info, true, "config_table", table_name);
2828 
2829   graph = fts_parse_sql(&fts_table, info,
2830                         "DECLARE FUNCTION my_func;\n"
2831                         "DECLARE CURSOR c IS SELECT value FROM $config_table"
2832                         " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2833                         "BEGIN\n"
2834                         ""
2835                         "OPEN c;\n"
2836                         "WHILE 1 = 1 LOOP\n"
2837                         "  FETCH c INTO my_func();\n"
2838                         "  IF c % NOTFOUND THEN\n"
2839                         "    EXIT;\n"
2840                         "  END IF;\n"
2841                         "END LOOP;\n"
2842                         "CLOSE c;");
2843 
2844   *doc_id = 0;
2845 
2846   error = fts_eval_sql(trx, graph);
2847 
2848   que_graph_free(graph);
2849 
2850   // FIXME: We need to retry deadlock errors
2851   if (error != DB_SUCCESS) {
2852     goto func_exit;
2853   }
2854 
2855   if (read_only) {
2856     goto func_exit;
2857   }
2858 
2859   if (doc_id_cmp == 0 && *doc_id) {
2860     cache->synced_doc_id = *doc_id - 1;
2861   } else {
2862     cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2863   }
2864 
2865   mutex_enter(&cache->doc_id_lock);
2866   /* For each sync operation, we will add next_doc_id by 1,
2867   so to mark a sync operation */
2868   if (cache->next_doc_id < cache->synced_doc_id + 1) {
2869     cache->next_doc_id = cache->synced_doc_id + 1;
2870   }
2871   mutex_exit(&cache->doc_id_lock);
2872 
2873   if (doc_id_cmp > *doc_id) {
2874     error = fts_update_sync_doc_id(table, table->name.m_name,
2875                                    cache->synced_doc_id, trx);
2876   }
2877 
2878   *doc_id = cache->next_doc_id;
2879 
2880 func_exit:
2881 
2882   if (error == DB_SUCCESS) {
2883     fts_sql_commit(trx);
2884   } else {
2885     *doc_id = 0;
2886 
2887     ib::error(ER_IB_MSG_471) << "(" << ut_strerr(error)
2888                              << ") while getting"
2889                                 " next doc id.";
2890     fts_sql_rollback(trx);
2891 
2892     if (error == DB_DEADLOCK) {
2893       os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2894       goto retry;
2895     }
2896   }
2897 
2898   trx_free_for_background(trx);
2899 
2900   return (error);
2901 }
2902 
2903 /** Update the last document id. This function could create a new
2904  transaction to update the last document id.
2905  @return DB_SUCCESS if OK */
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2906 static dberr_t fts_update_sync_doc_id(
2907     const dict_table_t *table, /*!< in: table */
2908     const char *table_name,    /*!< in: table name, or NULL */
2909     doc_id_t doc_id,           /*!< in: last document id */
2910     trx_t *trx)                /*!< in: update trx, or NULL */
2911 {
2912   byte id[FTS_MAX_ID_LEN];
2913   pars_info_t *info;
2914   fts_table_t fts_table;
2915   ulint id_len;
2916   que_t *graph = nullptr;
2917   dberr_t error;
2918   ibool local_trx = FALSE;
2919   fts_cache_t *cache = table->fts->cache;
2920   char fts_name[MAX_FULL_NAME_LEN];
2921 
2922   fts_table.suffix = FTS_SUFFIX_CONFIG;
2923   fts_table.table_id = table->id;
2924   fts_table.type = FTS_COMMON_TABLE;
2925   fts_table.table = table;
2926   if (table_name) {
2927     fts_table.parent = table_name;
2928   } else {
2929     fts_table.parent = table->name.m_name;
2930   }
2931 
2932   if (!trx) {
2933     trx = trx_allocate_for_background();
2934 
2935     trx->op_info = "setting last FTS document id";
2936     local_trx = TRUE;
2937   }
2938 
2939   info = pars_info_create();
2940 
2941   id_len = snprintf((char *)id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2942 
2943   pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2944 
2945   fts_get_table_name(&fts_table, fts_name);
2946   pars_info_bind_id(info, true, "table_name", fts_name);
2947 
2948   graph = fts_parse_sql(&fts_table, info,
2949                         "BEGIN"
2950                         " UPDATE $table_name SET value = :doc_id"
2951                         " WHERE key = 'synced_doc_id';");
2952 
2953   error = fts_eval_sql(trx, graph);
2954 
2955   que_graph_free(graph);
2956 
2957   if (local_trx) {
2958     if (error == DB_SUCCESS) {
2959       fts_sql_commit(trx);
2960       cache->synced_doc_id = doc_id;
2961     } else {
2962       ib::error(ER_IB_MSG_472) << "(" << ut_strerr(error)
2963                                << ") while"
2964                                   " updating last doc id.";
2965 
2966       fts_sql_rollback(trx);
2967     }
2968     trx_free_for_background(trx);
2969   }
2970 
2971   return (error);
2972 }
2973 
2974 /** Create a new fts_doc_ids_t.
2975  @return new fts_doc_ids_t */
fts_doc_ids_create(void)2976 fts_doc_ids_t *fts_doc_ids_create(void) {
2977   fts_doc_ids_t *fts_doc_ids;
2978   mem_heap_t *heap = mem_heap_create(512);
2979 
2980   fts_doc_ids =
2981       static_cast<fts_doc_ids_t *>(mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2982 
2983   fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2984 
2985   fts_doc_ids->doc_ids = static_cast<ib_vector_t *>(
2986       ib_vector_create(fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2987 
2988   return (fts_doc_ids);
2989 }
2990 
2991 /** Free a fts_doc_ids_t. */
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2992 void fts_doc_ids_free(fts_doc_ids_t *fts_doc_ids) {
2993   mem_heap_t *heap = static_cast<mem_heap_t *>(fts_doc_ids->self_heap->arg);
2994 
2995   memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
2996 
2997   mem_heap_free(heap);
2998 }
2999 
3000 /** Do commit-phase steps necessary for the insertion of a new row.
3001 @param[in]	ftt	FTS transaction table
3002 @param[in]	row	row to be inserted in index
3003 */
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3004 static void fts_add(fts_trx_table_t *ftt, fts_trx_row_t *row) {
3005   dict_table_t *table = ftt->table;
3006   doc_id_t doc_id = row->doc_id;
3007 
3008   ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3009 
3010   fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3011 
3012   mutex_enter(&table->fts->cache->deleted_lock);
3013   ++table->fts->cache->added;
3014   mutex_exit(&table->fts->cache->deleted_lock);
3015 
3016   if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) &&
3017       doc_id >= table->fts->cache->next_doc_id) {
3018     table->fts->cache->next_doc_id = doc_id + 1;
3019   }
3020 }
3021 
3022 /** Do commit-phase steps necessary for the deletion of a row.
3023  @return DB_SUCCESS or error code */
3024 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3025     fts_delete(fts_trx_table_t *ftt, /*!< in: FTS trx table */
3026                fts_trx_row_t *row)   /*!< in: row */
3027 {
3028   que_t *graph;
3029   fts_table_t fts_table;
3030   dberr_t error = DB_SUCCESS;
3031   doc_id_t write_doc_id;
3032   dict_table_t *table = ftt->table;
3033   doc_id_t doc_id = row->doc_id;
3034   trx_t *trx = ftt->fts_trx->trx;
3035   pars_info_t *info = pars_info_create();
3036   fts_cache_t *cache = table->fts->cache;
3037 
3038   /* we do not index Documents whose Doc ID value is 0 */
3039   if (doc_id == FTS_NULL_DOC_ID) {
3040     ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3041     return (error);
3042   }
3043 
3044   ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3045 
3046   FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_DELETED, FTS_COMMON_TABLE, table);
3047 
3048   /* Convert to "storage" byte order. */
3049   fts_write_doc_id((byte *)&write_doc_id, doc_id);
3050   fts_bind_doc_id(info, "doc_id", &write_doc_id);
3051 
3052   /* It is possible we update a record that has not yet been sync-ed
3053   into cache from last crash (delete Doc will not initialize the
3054   sync). Avoid any added counter accounting until the FTS cache
3055   is re-established and sync-ed */
3056   if (table->fts->fts_status & ADDED_TABLE_SYNCED &&
3057       doc_id > cache->synced_doc_id) {
3058     mutex_enter(&table->fts->cache->deleted_lock);
3059 
3060     /* The Doc ID could belong to those left in
3061     ADDED table from last crash. So need to check
3062     if it is less than first_doc_id when we initialize
3063     the Doc ID system after reboot */
3064     if (doc_id >= table->fts->cache->first_doc_id &&
3065         table->fts->cache->added > 0) {
3066       --table->fts->cache->added;
3067     }
3068 
3069     mutex_exit(&table->fts->cache->deleted_lock);
3070 
3071     /* Only if the row was really deleted. */
3072     ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3073   }
3074 
3075   /* Note the deleted document for OPTIMIZE to purge. */
3076   if (error == DB_SUCCESS) {
3077     char table_name[MAX_FULL_NAME_LEN];
3078 
3079     trx->op_info = "adding doc id to FTS DELETED";
3080 
3081     info->graph_owns_us = TRUE;
3082 
3083     fts_table.suffix = FTS_SUFFIX_DELETED;
3084 
3085     fts_get_table_name(&fts_table, table_name);
3086     pars_info_bind_id(info, true, "deleted", table_name);
3087 
3088     graph = fts_parse_sql(&fts_table, info,
3089                           "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3090 
3091     error = fts_eval_sql(trx, graph);
3092 
3093     fts_que_graph_free(graph);
3094   } else {
3095     pars_info_free(info);
3096   }
3097 
3098   /* Increment the total deleted count, this is used to calculate the
3099   number of documents indexed. */
3100   if (error == DB_SUCCESS) {
3101     mutex_enter(&table->fts->cache->deleted_lock);
3102 
3103     ++table->fts->cache->deleted;
3104 
3105     mutex_exit(&table->fts->cache->deleted_lock);
3106   }
3107 
3108   return (error);
3109 }
3110 
3111 /** Do commit-phase steps necessary for the modification of a row.
3112  @return DB_SUCCESS or error code */
3113 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3114     fts_modify(fts_trx_table_t *ftt, /*!< in: FTS trx table */
3115                fts_trx_row_t *row)   /*!< in: row */
3116 {
3117   dberr_t error;
3118 
3119   ut_a(row->state == FTS_MODIFY);
3120 
3121   error = fts_delete(ftt, row);
3122 
3123   if (error == DB_SUCCESS) {
3124     fts_add(ftt, row);
3125   }
3126 
3127   return (error);
3128 }
3129 
3130 /** Create a new document id.
3131  @return DB_SUCCESS if all went well else error */
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3132 dberr_t fts_create_doc_id(dict_table_t *table, /*!< in: row is of this table. */
3133                           dtuple_t *row,    /* in/out: add doc id value to this
3134                                             row. This is the current row that is
3135                                             being inserted. */
3136                           mem_heap_t *heap) /*!< in: heap */
3137 {
3138   doc_id_t doc_id;
3139   dberr_t error = DB_SUCCESS;
3140 
3141   ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3142 
3143   if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3144     if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3145       error = fts_get_next_doc_id(table, &doc_id);
3146     }
3147     return (error);
3148   }
3149 
3150   error = fts_get_next_doc_id(table, &doc_id);
3151 
3152   if (error == DB_SUCCESS) {
3153     dfield_t *dfield;
3154     doc_id_t *write_doc_id;
3155 
3156     ut_a(doc_id > 0);
3157 
3158     dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3159     write_doc_id =
3160         static_cast<doc_id_t *>(mem_heap_alloc(heap, sizeof(*write_doc_id)));
3161 
3162     ut_a(doc_id != FTS_NULL_DOC_ID);
3163     ut_a(sizeof(doc_id) == dfield->type.len);
3164     fts_write_doc_id((byte *)write_doc_id, doc_id);
3165 
3166     dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3167   }
3168 
3169   return (error);
3170 }
3171 
3172 /** The given transaction is about to be committed; do whatever is necessary
3173  from the FTS system's POV.
3174  @return DB_SUCCESS or error code */
3175 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_commit_table(fts_trx_table_t * ftt)3176     fts_commit_table(fts_trx_table_t *ftt) /*!< in: FTS table to commit*/
3177 {
3178   const ib_rbt_node_t *node;
3179   ib_rbt_t *rows;
3180   dberr_t error = DB_SUCCESS;
3181   fts_cache_t *cache = ftt->table->fts->cache;
3182   trx_t *trx = trx_allocate_for_background();
3183 
3184   rows = ftt->rows;
3185 
3186   ftt->fts_trx->trx = trx;
3187 
3188   if (cache->get_docs == nullptr) {
3189     rw_lock_x_lock(&cache->init_lock);
3190     if (cache->get_docs == nullptr) {
3191       cache->get_docs = fts_get_docs_create(cache);
3192     }
3193     rw_lock_x_unlock(&cache->init_lock);
3194   }
3195 
3196   for (node = rbt_first(rows); node != nullptr && error == DB_SUCCESS;
3197        node = rbt_next(rows, node)) {
3198     fts_trx_row_t *row = rbt_value(fts_trx_row_t, node);
3199 
3200     switch (row->state) {
3201       case FTS_INSERT:
3202         fts_add(ftt, row);
3203         break;
3204 
3205       case FTS_MODIFY:
3206         error = fts_modify(ftt, row);
3207         break;
3208 
3209       case FTS_DELETE:
3210         error = fts_delete(ftt, row);
3211         break;
3212 
3213       default:
3214         ut_error;
3215     }
3216   }
3217 
3218   fts_sql_commit(trx);
3219 
3220   trx_free_for_background(trx);
3221 
3222   return (error);
3223 }
3224 
3225 /** The given transaction is about to be committed; do whatever is necessary
3226  from the FTS system's POV.
3227  @return DB_SUCCESS or error code */
fts_commit(trx_t * trx)3228 dberr_t fts_commit(trx_t *trx) /*!< in: transaction */
3229 {
3230   const ib_rbt_node_t *node;
3231   dberr_t error;
3232   ib_rbt_t *tables;
3233   fts_savepoint_t *savepoint;
3234 
3235   savepoint =
3236       static_cast<fts_savepoint_t *>(ib_vector_last(trx->fts_trx->savepoints));
3237   tables = savepoint->tables;
3238 
3239   for (node = rbt_first(tables), error = DB_SUCCESS;
3240        node != nullptr && error == DB_SUCCESS; node = rbt_next(tables, node)) {
3241     fts_trx_table_t **ftt;
3242 
3243     ftt = rbt_value(fts_trx_table_t *, node);
3244 
3245     error = fts_commit_table(*ftt);
3246   }
3247 
3248   return (error);
3249 }
3250 
3251 /** Initialize a document. */
fts_doc_init(fts_doc_t * doc)3252 void fts_doc_init(fts_doc_t *doc) /*!< in: doc to initialize */
3253 {
3254   mem_heap_t *heap = mem_heap_create(32);
3255 
3256   memset(doc, 0, sizeof(*doc));
3257 
3258   doc->self_heap = ib_heap_allocator_create(heap);
3259 }
3260 
3261 /** Free document. */
fts_doc_free(fts_doc_t * doc)3262 void fts_doc_free(fts_doc_t *doc) /*!< in: document */
3263 {
3264   mem_heap_t *heap = static_cast<mem_heap_t *>(doc->self_heap->arg);
3265 
3266   if (doc->tokens) {
3267     rbt_free(doc->tokens);
3268   }
3269 
3270   ut_d(memset(doc, 0, sizeof(*doc)));
3271 
3272   mem_heap_free(heap);
3273 }
3274 
3275 /** Callback function for fetch that stores the text of an FTS document,
3276  converting each column to UTF-16.
3277  @return always false */
fts_query_expansion_fetch_doc(void * row,void * user_arg)3278 ibool fts_query_expansion_fetch_doc(void *row,      /*!< in: sel_node_t* */
3279                                     void *user_arg) /*!< in: fts_doc_t* */
3280 {
3281   que_node_t *exp;
3282   sel_node_t *node = static_cast<sel_node_t *>(row);
3283   fts_doc_t *result_doc = static_cast<fts_doc_t *>(user_arg);
3284   dfield_t *dfield;
3285   ulint len;
3286   ulint doc_len;
3287   fts_doc_t doc;
3288   CHARSET_INFO *doc_charset = nullptr;
3289   ulint field_no = 0;
3290 
3291   len = 0;
3292 
3293   fts_doc_init(&doc);
3294   doc.found = TRUE;
3295 
3296   exp = node->select_list;
3297   doc_len = 0;
3298 
3299   doc_charset = result_doc->charset;
3300 
3301   /* Copy each indexed column content into doc->text.f_str */
3302   while (exp) {
3303     dfield = que_node_get_val(exp);
3304     len = dfield_get_len(dfield);
3305 
3306     /* NULL column */
3307     if (len == UNIV_SQL_NULL) {
3308       exp = que_node_get_next(exp);
3309       continue;
3310     }
3311 
3312     if (!doc_charset) {
3313       doc_charset = fts_get_charset(dfield->type.prtype);
3314     }
3315 
3316     doc.charset = doc_charset;
3317     doc.is_ngram = result_doc->is_ngram;
3318 
3319     if (dfield_is_ext(dfield)) {
3320       /* We ignore columns that are stored externally, this
3321       could result in too many words to search */
3322       exp = que_node_get_next(exp);
3323       continue;
3324     } else {
3325       doc.text.f_n_char = 0;
3326 
3327       doc.text.f_str = static_cast<byte *>(dfield_get_data(dfield));
3328 
3329       doc.text.f_len = len;
3330     }
3331 
3332     if (field_no == 0) {
3333       fts_tokenize_document(&doc, result_doc, result_doc->parser);
3334     } else {
3335       fts_tokenize_document_next(&doc, doc_len, result_doc, result_doc->parser);
3336     }
3337 
3338     exp = que_node_get_next(exp);
3339 
3340     doc_len += (exp) ? len + 1 : len;
3341 
3342     field_no++;
3343   }
3344 
3345   ut_ad(doc_charset);
3346 
3347   if (!result_doc->charset) {
3348     result_doc->charset = doc_charset;
3349   }
3350 
3351   fts_doc_free(&doc);
3352 
3353   return (FALSE);
3354 }
3355 
3356 /** fetch and tokenize the document. */
fts_fetch_doc_from_rec(trx_t * trx,fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3357 static void fts_fetch_doc_from_rec(
3358     trx_t *trx,                /*!< in: current transaction */
3359     fts_get_doc_t *get_doc,    /*!< in: FTS index's get_doc struct */
3360     dict_index_t *clust_index, /*!< in: cluster index */
3361     btr_pcur_t *pcur,          /*!< in: cursor whose position
3362                                has been stored */
3363     ulint *offsets,            /*!< in: offsets */
3364     fts_doc_t *doc)            /*!< out: fts doc to hold parsed
3365                                documents */
3366 {
3367   dict_index_t *index;
3368   dict_table_t *table;
3369   const rec_t *clust_rec;
3370   ulint num_field;
3371   const dict_field_t *ifield;
3372   const dict_col_t *col;
3373   uint16_t clust_pos;
3374   ulint i;
3375   ulint doc_len = 0;
3376   ulint processed_doc = 0;
3377   st_mysql_ftparser *parser;
3378 
3379   if (!get_doc) {
3380     return;
3381   }
3382 
3383   index = get_doc->index_cache->index;
3384   table = get_doc->index_cache->index->table;
3385   parser = get_doc->index_cache->index->parser;
3386 
3387   clust_rec = btr_pcur_get_rec(pcur);
3388 
3389   num_field = dict_index_get_n_fields(index);
3390 
3391   for (i = 0; i < num_field; i++) {
3392     ifield = index->get_field(i);
3393     col = ifield->col;
3394     clust_pos = static_cast<uint16_t>(dict_col_get_clust_pos(col, clust_index));
3395 
3396     if (!get_doc->index_cache->charset) {
3397       get_doc->index_cache->charset = fts_get_charset(ifield->col->prtype);
3398     }
3399 
3400     if (rec_offs_nth_extern(offsets, clust_pos)) {
3401       doc->text.f_str = lob::btr_rec_copy_externally_stored_field(
3402           nullptr, clust_index, clust_rec, offsets, dict_table_page_size(table),
3403           clust_pos, &doc->text.f_len, nullptr, false,
3404           static_cast<mem_heap_t *>(doc->self_heap->arg));
3405     } else {
3406       doc->text.f_str = const_cast<byte *>(rec_get_nth_field_instant(
3407           clust_rec, offsets, clust_pos, clust_index, &doc->text.f_len));
3408     }
3409 
3410     doc->found = TRUE;
3411     doc->charset = get_doc->index_cache->charset;
3412     doc->is_ngram = index->is_ngram;
3413 
3414     /* Null Field */
3415     if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3416       continue;
3417     }
3418 
3419     if (processed_doc == 0) {
3420       fts_tokenize_document(doc, nullptr, parser);
3421     } else {
3422       fts_tokenize_document_next(doc, doc_len, nullptr, parser);
3423     }
3424 
3425     processed_doc++;
3426     doc_len += doc->text.f_len + 1;
3427   }
3428 }
3429 
3430 /** Fetch the data from tuple and tokenize the document.
3431 @param[in]	get_doc	FTS index's get_doc struct
3432 @param[in]	tuple	tuple should be arranged in table schema order
3433 @param[out]	doc	fts doc to hold parsed documents. */
fts_fetch_doc_from_tuple(fts_get_doc_t * get_doc,const dtuple_t * tuple,fts_doc_t * doc)3434 static void fts_fetch_doc_from_tuple(fts_get_doc_t *get_doc,
3435                                      const dtuple_t *tuple, fts_doc_t *doc) {
3436   dict_index_t *index;
3437   st_mysql_ftparser *parser;
3438   ulint doc_len = 0;
3439   ulint processed_doc = 0;
3440   ulint num_field;
3441 
3442   if (get_doc == nullptr) {
3443     return;
3444   }
3445 
3446   index = get_doc->index_cache->index;
3447   parser = get_doc->index_cache->index->parser;
3448   num_field = dict_index_get_n_fields(index);
3449 
3450   for (ulint i = 0; i < num_field; i++) {
3451     const dict_field_t *ifield;
3452     const dict_col_t *col;
3453     ulint pos;
3454     dfield_t *field;
3455 
3456     ifield = index->get_field(i);
3457     col = ifield->col;
3458     pos = dict_col_get_no(col);
3459     field = dtuple_get_nth_field(tuple, pos);
3460 
3461     if (!get_doc->index_cache->charset) {
3462       get_doc->index_cache->charset = fts_get_charset(ifield->col->prtype);
3463     }
3464 
3465     ut_ad(!dfield_is_ext(field));
3466 
3467     doc->text.f_str = (byte *)dfield_get_data(field);
3468     doc->text.f_len = dfield_get_len(field);
3469     doc->found = TRUE;
3470     doc->charset = get_doc->index_cache->charset;
3471     doc->is_ngram = index->is_ngram;
3472 
3473     /* field data is NULL. */
3474     if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3475       continue;
3476     }
3477 
3478     if (processed_doc == 0) {
3479       fts_tokenize_document(doc, nullptr, parser);
3480     } else {
3481       fts_tokenize_document_next(doc, doc_len, nullptr, parser);
3482     }
3483 
3484     processed_doc++;
3485     doc_len += doc->text.f_len + 1;
3486   }
3487 }
3488 
3489 /** Fetch the document from tuple, tokenize the text data and
3490 insert the text data into fts auxiliary table and
3491 its cache. Moreover this tuple fields doesn't contain any information
3492 about externally stored field. This tuple contains data directly
3493 converted from mysql.
3494 @param[in]	ftt	FTS transaction table
3495 @param[in]	doc_id	doc id
3496 @param[in]	tuple	tuple from where data can be retrieved
3497                         and tuple should be arranged in table
3498                         schema order. */
fts_add_doc_from_tuple(fts_trx_table_t * ftt,doc_id_t doc_id,const dtuple_t * tuple)3499 void fts_add_doc_from_tuple(fts_trx_table_t *ftt, doc_id_t doc_id,
3500                             const dtuple_t *tuple) {
3501   mtr_t mtr;
3502   fts_cache_t *cache = ftt->table->fts->cache;
3503 
3504   ut_ad(cache->get_docs);
3505 
3506   if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3507     fts_init_index(ftt->table, FALSE);
3508   }
3509 
3510   mtr_start(&mtr);
3511 
3512   ulint num_idx = ib_vector_size(cache->get_docs);
3513 
3514   for (ulint i = 0; i < num_idx; ++i) {
3515     fts_doc_t doc;
3516     dict_table_t *table;
3517     fts_get_doc_t *get_doc;
3518 
3519     get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
3520     table = get_doc->index_cache->index->table;
3521 
3522     fts_doc_init(&doc);
3523     fts_fetch_doc_from_tuple(get_doc, tuple, &doc);
3524 
3525     if (doc.found) {
3526       mtr_commit(&mtr);
3527       rw_lock_x_lock(&table->fts->cache->lock);
3528 
3529       if (table->fts->cache->stopword_info.status & STOPWORD_NOT_INIT) {
3530         fts_load_stopword(table, nullptr, nullptr, nullptr, TRUE, TRUE);
3531       }
3532 
3533       fts_cache_add_doc(table->fts->cache, get_doc->index_cache, doc_id,
3534                         doc.tokens);
3535 
3536       rw_lock_x_unlock(&table->fts->cache->lock);
3537 
3538       if (cache->total_size > fts_max_cache_size / 5 || fts_need_sync) {
3539         fts_sync(cache->sync, true, false, false);
3540       }
3541 
3542       mtr_start(&mtr);
3543     }
3544 
3545     fts_doc_free(&doc);
3546   }
3547 
3548   mtr_commit(&mtr);
3549 }
3550 
3551 /** Fetch the document just inserted right before we commit
3552 the transaction, and tokenize the inserted text data
3553 and insert into FTS auxiliary table and its cache.
3554 @param[in]	ftt		FTS transaction table
3555 @param[in]	doc_id		doc id
3556 @param[in]	fts_indexes	affected FTS indexes
3557 @return true if successful */
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3558 static ulint fts_add_doc_by_id(fts_trx_table_t *ftt, doc_id_t doc_id,
3559                                ib_vector_t *fts_indexes
3560                                    MY_ATTRIBUTE((unused))) {
3561   mtr_t mtr;
3562   mem_heap_t *heap;
3563   btr_pcur_t pcur;
3564   dict_table_t *table;
3565   dtuple_t *tuple;
3566   dfield_t *dfield;
3567   fts_get_doc_t *get_doc;
3568   doc_id_t temp_doc_id;
3569   dict_index_t *clust_index;
3570   dict_index_t *fts_id_index;
3571   ibool is_id_cluster;
3572   fts_cache_t *cache = ftt->table->fts->cache;
3573 
3574   ut_ad(cache->get_docs);
3575 
3576   /* If Doc ID has been supplied by the user, then the table
3577   might not yet be sync-ed */
3578 
3579   if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3580     fts_init_index(ftt->table, FALSE);
3581   }
3582 
3583   /* Get the first FTS index's get_doc */
3584   get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, 0));
3585   ut_ad(get_doc);
3586 
3587   table = get_doc->index_cache->index->table;
3588 
3589   heap = mem_heap_create(512);
3590 
3591   clust_index = table->first_index();
3592   fts_id_index = table->fts_doc_id_index;
3593 
3594   /* Check whether the index on FTS_DOC_ID is cluster index */
3595   is_id_cluster = (clust_index == fts_id_index);
3596 
3597   mtr_start(&mtr);
3598   btr_pcur_init(&pcur);
3599 
3600   /* Search based on Doc ID. Here, we'll need to consider the case
3601   when there is no primary index on Doc ID */
3602   tuple = dtuple_create(heap, 1);
3603   dfield = dtuple_get_nth_field(tuple, 0);
3604   dfield->type.mtype = DATA_INT;
3605   dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3606 
3607   mach_write_to_8((byte *)&temp_doc_id, doc_id);
3608   dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3609 
3610   btr_pcur_open_with_no_init(fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3611                              &pcur, 0, &mtr);
3612 
3613   /* If we have a match, add the data to doc structure */
3614   if (btr_pcur_get_low_match(&pcur) == 1) {
3615     const rec_t *rec;
3616     btr_pcur_t *doc_pcur;
3617     const rec_t *clust_rec;
3618     btr_pcur_t clust_pcur;
3619     ulint *offsets = nullptr;
3620     ulint num_idx = ib_vector_size(cache->get_docs);
3621 
3622     rec = btr_pcur_get_rec(&pcur);
3623 
3624     /* Doc could be deleted */
3625     if (page_rec_is_infimum(rec) ||
3626         rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3627       goto func_exit;
3628     }
3629 
3630     if (is_id_cluster) {
3631       clust_rec = rec;
3632       doc_pcur = &pcur;
3633     } else {
3634       dtuple_t *clust_ref;
3635       ulint n_fields;
3636 
3637       btr_pcur_init(&clust_pcur);
3638       n_fields = dict_index_get_n_unique(clust_index);
3639 
3640       clust_ref = dtuple_create(heap, n_fields);
3641       dict_index_copy_types(clust_ref, clust_index, n_fields);
3642 
3643       row_build_row_ref_in_tuple(clust_ref, rec, fts_id_index, nullptr,
3644                                  nullptr);
3645 
3646       btr_pcur_open_with_no_init(clust_index, clust_ref, PAGE_CUR_LE,
3647                                  BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3648 
3649       doc_pcur = &clust_pcur;
3650       clust_rec = btr_pcur_get_rec(&clust_pcur);
3651     }
3652 
3653     offsets = rec_get_offsets(clust_rec, clust_index, nullptr, ULINT_UNDEFINED,
3654                               &heap);
3655 
3656     for (ulint i = 0; i < num_idx; ++i) {
3657       fts_doc_t doc;
3658       dict_table_t *table;
3659       fts_get_doc_t *get_doc;
3660 
3661       get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
3662 
3663       table = get_doc->index_cache->index->table;
3664 
3665       fts_doc_init(&doc);
3666 
3667       fts_fetch_doc_from_rec(ftt->fts_trx->trx, get_doc, clust_index, doc_pcur,
3668                              offsets, &doc);
3669 
3670       if (doc.found) {
3671         ibool success MY_ATTRIBUTE((unused));
3672 
3673         btr_pcur_store_position(doc_pcur, &mtr);
3674         mtr_commit(&mtr);
3675 
3676         DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3677         rw_lock_x_lock(&table->fts->cache->lock);
3678 
3679         if (table->fts->cache->stopword_info.status & STOPWORD_NOT_INIT) {
3680           fts_load_stopword(table, nullptr, nullptr, nullptr, TRUE, TRUE);
3681         }
3682 
3683         fts_cache_add_doc(table->fts->cache, get_doc->index_cache, doc_id,
3684                           doc.tokens);
3685 
3686         bool need_sync = false;
3687         if ((cache->total_size > fts_max_cache_size / 10 || fts_need_sync) &&
3688             !cache->sync->in_progress) {
3689           need_sync = true;
3690         }
3691 
3692         rw_lock_x_unlock(&table->fts->cache->lock);
3693 
3694         DBUG_EXECUTE_IF("fts_instrument_sync_cache_wait",
3695                         srv_fatal_semaphore_wait_threshold = 25;
3696                         fts_max_cache_size = 100;
3697                         fts_sync(cache->sync, true, true, false););
3698 
3699         DBUG_EXECUTE_IF("fts_instrument_sync",
3700                         fts_optimize_request_sync_table(table);
3701                         os_event_wait(cache->sync->event););
3702 
3703         DBUG_EXECUTE_IF("fts_instrument_sync_debug",
3704                         fts_sync(cache->sync, true, true, false););
3705 
3706         DEBUG_SYNC_C("fts_instrument_sync_request");
3707         DBUG_EXECUTE_IF("fts_instrument_sync_request",
3708                         fts_optimize_request_sync_table(table););
3709 
3710         if (need_sync) {
3711           fts_optimize_request_sync_table(table);
3712         }
3713 
3714         mtr_start(&mtr);
3715 
3716         if (i < num_idx - 1) {
3717           success = btr_pcur_restore_position(BTR_SEARCH_LEAF, doc_pcur, &mtr);
3718 
3719           ut_ad(success);
3720         }
3721       }
3722 
3723       fts_doc_free(&doc);
3724     }
3725 
3726     if (!is_id_cluster) {
3727       btr_pcur_close(doc_pcur);
3728     }
3729   }
3730 func_exit:
3731   mtr_commit(&mtr);
3732 
3733   btr_pcur_close(&pcur);
3734 
3735   mem_heap_free(heap);
3736   return (TRUE);
3737 }
3738 
3739 /** Callback function to read a single ulint column.
3740  return always returns TRUE */
fts_read_ulint(void * row,void * user_arg)3741 static ibool fts_read_ulint(void *row,      /*!< in: sel_node_t* */
3742                             void *user_arg) /*!< in: pointer to ulint */
3743 {
3744   sel_node_t *sel_node = static_cast<sel_node_t *>(row);
3745   ulint *value = static_cast<ulint *>(user_arg);
3746   que_node_t *exp = sel_node->select_list;
3747   dfield_t *dfield = que_node_get_val(exp);
3748   void *data = dfield_get_data(dfield);
3749 
3750   *value =
3751       static_cast<ulint>(mach_read_from_4(static_cast<const byte *>(data)));
3752 
3753   return (TRUE);
3754 }
3755 
3756 /** Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3757  @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
fts_get_max_doc_id(dict_table_t * table)3758 doc_id_t fts_get_max_doc_id(dict_table_t *table) /*!< in: user table */
3759 {
3760   dict_index_t *index;
3761   dict_field_t *dfield MY_ATTRIBUTE((unused)) = nullptr;
3762   doc_id_t doc_id = 0;
3763   mtr_t mtr;
3764   btr_pcur_t pcur;
3765 
3766   index = table->fts_doc_id_index;
3767 
3768   if (!index) {
3769     return (0);
3770   }
3771 
3772   dfield = index->get_field(0);
3773 
3774 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3775 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3776 #endif
3777 
3778   mtr_start(&mtr);
3779 
3780   /* fetch the largest indexes value */
3781   btr_pcur_open_at_index_side(false, index, BTR_SEARCH_LEAF, &pcur, true, 0,
3782                               &mtr);
3783 
3784   if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3785     const rec_t *rec = nullptr;
3786     ulint offsets_[REC_OFFS_NORMAL_SIZE];
3787     ulint *offsets = offsets_;
3788     mem_heap_t *heap = nullptr;
3789     ulint len;
3790     const void *data;
3791 
3792     rec_offs_init(offsets_);
3793 
3794     do {
3795       rec = btr_pcur_get_rec(&pcur);
3796 
3797       if (page_rec_is_user_rec(rec)) {
3798         break;
3799       }
3800     } while (btr_pcur_move_to_prev(&pcur, &mtr));
3801 
3802     if (!rec) {
3803       goto func_exit;
3804     }
3805 
3806     offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3807 
3808     data = rec_get_nth_field(rec, offsets, 0, &len);
3809 
3810     doc_id =
3811         static_cast<doc_id_t>(fts_read_doc_id(static_cast<const byte *>(data)));
3812   }
3813 
3814 func_exit:
3815   btr_pcur_close(&pcur);
3816   mtr_commit(&mtr);
3817   return (doc_id);
3818 }
3819 
3820 /** Fetch document with the given document id.
3821  @return DB_SUCCESS if OK else error */
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3822 dberr_t fts_doc_fetch_by_doc_id(
3823     fts_get_doc_t *get_doc,     /*!< in: state */
3824     doc_id_t doc_id,            /*!< in: id of document to
3825                                 fetch */
3826     dict_index_t *index_to_use, /*!< in: caller supplied FTS index,
3827                                 or NULL */
3828     ulint option,               /*!< in: search option, if it is
3829                                 greater than doc_id or equal */
3830     fts_sql_callback callback,  /*!< in: callback to read */
3831     void *arg)                  /*!< in: callback arg */
3832 {
3833   pars_info_t *info;
3834   dberr_t error;
3835   const char *select_str;
3836   doc_id_t write_doc_id;
3837   dict_index_t *index;
3838   trx_t *trx = trx_allocate_for_background();
3839   que_t *graph;
3840 
3841   trx->op_info = "fetching indexed FTS document";
3842 
3843   /* The FTS index can be supplied by caller directly with
3844   "index_to_use", otherwise, get it from "get_doc" */
3845   index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3846 
3847   if (get_doc && get_doc->get_document_graph) {
3848     info = get_doc->get_document_graph->info;
3849   } else {
3850     info = pars_info_create();
3851   }
3852 
3853   /* Convert to "storage" byte order. */
3854   fts_write_doc_id((byte *)&write_doc_id, doc_id);
3855   fts_bind_doc_id(info, "doc_id", &write_doc_id);
3856   pars_info_bind_function(info, "my_func", callback, arg);
3857 
3858   select_str = fts_get_select_columns_str(index, info, info->heap);
3859   pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3860 
3861   if (!get_doc || !get_doc->get_document_graph) {
3862     if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3863       graph = fts_parse_sql(nullptr, info,
3864                             mem_heap_printf(info->heap,
3865                                             "DECLARE FUNCTION my_func;\n"
3866                                             "DECLARE CURSOR c IS"
3867                                             " SELECT %s FROM $table_name"
3868                                             " WHERE %s = :doc_id;\n"
3869                                             "BEGIN\n"
3870                                             ""
3871                                             "OPEN c;\n"
3872                                             "WHILE 1 = 1 LOOP\n"
3873                                             "  FETCH c INTO my_func();\n"
3874                                             "  IF c %% NOTFOUND THEN\n"
3875                                             "    EXIT;\n"
3876                                             "  END IF;\n"
3877                                             "END LOOP;\n"
3878                                             "CLOSE c;",
3879                                             select_str, FTS_DOC_ID_COL_NAME));
3880     } else {
3881       ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3882 
3883       /* This is used for crash recovery of table with
3884       hidden DOC ID or FTS indexes. We will scan the table
3885       to re-processing user table rows whose DOC ID or
3886       FTS indexed documents have not been sync-ed to disc
3887       during recent crash.
3888       In the case that all fulltext indexes are dropped
3889       for a table, we will keep the "hidden" FTS_DOC_ID
3890       column, and this scan is to retreive the largest
3891       DOC ID being used in the table to determine the
3892       appropriate next DOC ID.
3893       In the case of there exists fulltext index(es), this
3894       operation will re-tokenize any docs that have not
3895       been sync-ed to the disk, and re-prime the FTS
3896       cached */
3897       graph = fts_parse_sql(nullptr, info,
3898                             mem_heap_printf(info->heap,
3899                                             "DECLARE FUNCTION my_func;\n"
3900                                             "DECLARE CURSOR c IS"
3901                                             " SELECT %s, %s FROM $table_name"
3902                                             " WHERE %s > :doc_id;\n"
3903                                             "BEGIN\n"
3904                                             ""
3905                                             "OPEN c;\n"
3906                                             "WHILE 1 = 1 LOOP\n"
3907                                             "  FETCH c INTO my_func();\n"
3908                                             "  IF c %% NOTFOUND THEN\n"
3909                                             "    EXIT;\n"
3910                                             "  END IF;\n"
3911                                             "END LOOP;\n"
3912                                             "CLOSE c;",
3913                                             FTS_DOC_ID_COL_NAME, select_str,
3914                                             FTS_DOC_ID_COL_NAME));
3915     }
3916     if (get_doc) {
3917       get_doc->get_document_graph = graph;
3918     }
3919   } else {
3920     graph = get_doc->get_document_graph;
3921   }
3922 
3923   error = fts_eval_sql(trx, graph);
3924 
3925   if (error == DB_SUCCESS) {
3926     fts_sql_commit(trx);
3927   } else {
3928     fts_sql_rollback(trx);
3929   }
3930 
3931   trx_free_for_background(trx);
3932 
3933   if (!get_doc) {
3934     fts_que_graph_free(graph);
3935   }
3936 
3937   return (error);
3938 }
3939 
3940 /** Write out a single word's data as new entry/entries in the INDEX table.
3941  @return DB_SUCCESS if all OK. */
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3942 dberr_t fts_write_node(trx_t *trx,             /*!< in: transaction */
3943                        que_t **graph,          /*!< in: query graph */
3944                        fts_table_t *fts_table, /*!< in: aux table */
3945                        fts_string_t *word,     /*!< in: word in UTF-8 */
3946                        fts_node_t *node)       /*!< in: node columns */
3947 {
3948   pars_info_t *info;
3949   dberr_t error;
3950   ib_uint32_t doc_count;
3951   doc_id_t last_doc_id;
3952   doc_id_t first_doc_id;
3953   char table_name[MAX_FULL_NAME_LEN];
3954 
3955   ut_a(node->ilist != nullptr);
3956 
3957   if (*graph) {
3958     info = (*graph)->info;
3959   } else {
3960     info = pars_info_create();
3961 
3962     fts_get_table_name(fts_table, table_name);
3963     pars_info_bind_id(info, true, "index_table_name", table_name);
3964   }
3965 
3966   pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3967 
3968   /* Convert to "storage" byte order. */
3969   fts_write_doc_id((byte *)&first_doc_id, node->first_doc_id);
3970   fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3971 
3972   /* Convert to "storage" byte order. */
3973   fts_write_doc_id((byte *)&last_doc_id, node->last_doc_id);
3974   fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3975 
3976   ut_a(node->last_doc_id >= node->first_doc_id);
3977 
3978   /* Convert to "storage" byte order. */
3979   mach_write_to_4((byte *)&doc_count, node->doc_count);
3980   pars_info_bind_int4_literal(info, "doc_count",
3981                               (const ib_uint32_t *)&doc_count);
3982 
3983   /* Set copy_name to FALSE since it's a static. */
3984   pars_info_bind_literal(info, "ilist", node->ilist, node->ilist_size,
3985                          DATA_BLOB, DATA_BINARY_TYPE);
3986 
3987   if (!*graph) {
3988     *graph = fts_parse_sql(fts_table, info,
3989                            "BEGIN\n"
3990                            "INSERT INTO $index_table_name VALUES"
3991                            " (:token, :first_doc_id,"
3992                            "  :last_doc_id, :doc_count, :ilist);");
3993   }
3994 
3995   const auto start_time = ut_time_monotonic();
3996   error = fts_eval_sql(trx, *graph);
3997   elapsed_time += ut_time_monotonic() - start_time;
3998   ++n_nodes;
3999 
4000   return (error);
4001 }
4002 
4003 /** Add rows to the DELETED_CACHE table.
4004  @return DB_SUCCESS if all went well else error code*/
4005 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4006     fts_sync_add_deleted_cache(fts_sync_t *sync,     /*!< in: sync state */
4007                                ib_vector_t *doc_ids) /*!< in: doc ids to add */
4008 {
4009   ulint i;
4010   pars_info_t *info;
4011   que_t *graph;
4012   fts_table_t fts_table;
4013   char table_name[MAX_FULL_NAME_LEN];
4014   doc_id_t dummy = 0;
4015   dberr_t error = DB_SUCCESS;
4016   ulint n_elems = ib_vector_size(doc_ids);
4017 
4018   ut_a(ib_vector_size(doc_ids) > 0);
4019 
4020   ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4021 
4022   info = pars_info_create();
4023 
4024   fts_bind_doc_id(info, "doc_id", &dummy);
4025 
4026   FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_DELETED_CACHE, FTS_COMMON_TABLE,
4027                      sync->table);
4028 
4029   fts_get_table_name(&fts_table, table_name);
4030   pars_info_bind_id(info, true, "table_name", table_name);
4031 
4032   graph = fts_parse_sql(&fts_table, info,
4033                         "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4034 
4035   for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4036     fts_update_t *update;
4037     doc_id_t write_doc_id;
4038 
4039     update = static_cast<fts_update_t *>(ib_vector_get(doc_ids, i));
4040 
4041     /* Convert to "storage" byte order. */
4042     fts_write_doc_id((byte *)&write_doc_id, update->doc_id);
4043     fts_bind_doc_id(info, "doc_id", &write_doc_id);
4044 
4045     error = fts_eval_sql(sync->trx, graph);
4046   }
4047 
4048   fts_que_graph_free(graph);
4049 
4050   return (error);
4051 }
4052 
4053 /** Write the words and ilist to disk.
4054 @param[in,out]	trx		transaction
4055 @param[in]	index_cache	index cache
4056 @param[in]	unlock_cache	whether unlock cache when write node
4057 @param[in]      sync_start_time Holds the timestamp of start of sync
4058                                 for deducing the length of sync time
4059 @return DB_SUCCESS if all went well else error code */
4060 static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4061     fts_sync_write_words(trx_t *trx, fts_index_cache_t *index_cache,
4062                          bool unlock_cache, ib_time_t sync_start_time) {
4063   fts_table_t fts_table;
4064   ulint n_nodes = 0;
4065   ulint n_words = 0;
4066   const ib_rbt_node_t *rbt_node;
4067   dberr_t error = DB_SUCCESS;
4068   ibool print_error = FALSE;
4069   dict_table_t *table = index_cache->index->table;
4070   const float cutoff = 0.98f;
4071   ulint lock_threshold = static_cast<ulint>(
4072       (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION) *
4073       cutoff);
4074   bool timeout_extended = false;
4075 
4076   FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE,
4077                        index_cache->index);
4078 
4079   n_words = rbt_size(index_cache->words);
4080 
4081   /* We iterate over the entire tree, even if there is an error,
4082   since we want to free the memory used during caching. */
4083   for (rbt_node = rbt_first(index_cache->words); rbt_node;
4084        rbt_node = rbt_next(index_cache->words, rbt_node)) {
4085     ulint i;
4086     ulint selected;
4087     fts_tokenizer_word_t *word;
4088 
4089     word = rbt_value(fts_tokenizer_word_t, rbt_node);
4090 
4091     selected = fts_select_index(index_cache->charset, word->text.f_str,
4092                                 word->text.f_len);
4093 
4094     fts_table.suffix = fts_get_suffix(selected);
4095 
4096     /* We iterate over all the nodes even if there was an error */
4097     for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4098       fts_node_t *fts_node =
4099           static_cast<fts_node_t *>(ib_vector_get(word->nodes, i));
4100 
4101       if (fts_node->synced) {
4102         continue;
4103       } else {
4104         fts_node->synced = true;
4105       }
4106 
4107       /*FIXME: we need to handle the error properly. */
4108       if (error == DB_SUCCESS) {
4109         DBUG_EXECUTE_IF("fts_instrument_sync_write",
4110                         os_thread_sleep(10000000););
4111         if (!unlock_cache) {
4112           ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4113           if (cache_lock_time > lock_threshold) {
4114             if (!timeout_extended) {
4115               os_atomic_increment_ulint(&srv_fatal_semaphore_wait_threshold,
4116                                         SRV_SEMAPHORE_WAIT_EXTENSION);
4117               timeout_extended = true;
4118               lock_threshold += SRV_SEMAPHORE_WAIT_EXTENSION;
4119             } else {
4120               unlock_cache = true;
4121               os_atomic_decrement_ulint(&srv_fatal_semaphore_wait_threshold,
4122                                         SRV_SEMAPHORE_WAIT_EXTENSION);
4123               timeout_extended = false;
4124             }
4125           }
4126         }
4127 
4128         if (unlock_cache) {
4129           rw_lock_x_unlock(&table->fts->cache->lock);
4130         }
4131 
4132         error = fts_write_node(trx, &index_cache->ins_graph[selected],
4133                                &fts_table, &word->text, fts_node);
4134 
4135         DBUG_EXECUTE_IF("fts_instrument_sync_write",
4136                         os_thread_sleep(10000000););
4137 
4138         DEBUG_SYNC_C("fts_write_node");
4139         DBUG_EXECUTE_IF("fts_write_node_crash", DBUG_SUICIDE(););
4140 
4141         DBUG_EXECUTE_IF("fts_instrument_sync_sleep", os_thread_sleep(1000000););
4142 
4143         if (unlock_cache) {
4144           rw_lock_x_lock(&table->fts->cache->lock);
4145         }
4146       }
4147     }
4148 
4149     n_nodes += ib_vector_size(word->nodes);
4150 
4151     if (error != DB_SUCCESS && !print_error) {
4152       ib::error(ER_IB_MSG_473) << "(" << ut_strerr(error)
4153                                << ") writing"
4154                                   " word node to FTS auxiliary index table.";
4155       print_error = TRUE;
4156     }
4157   }
4158 
4159   if (fts_enable_diag_print) {
4160     printf("Avg number of nodes: %lf\n",
4161            (double)n_nodes / (double)(n_words > 1 ? n_words : 1));
4162   }
4163 
4164   return (error);
4165 }
4166 
4167 /** Begin Sync, create transaction, acquire locks, etc. */
fts_sync_begin(fts_sync_t * sync)4168 static void fts_sync_begin(fts_sync_t *sync) /*!< in: sync state */
4169 {
4170   fts_cache_t *cache = sync->table->fts->cache;
4171 
4172   n_nodes = 0;
4173   elapsed_time = 0;
4174 
4175   sync->start_time = ut_time_monotonic();
4176 
4177   sync->trx = trx_allocate_for_background();
4178 
4179   if (fts_enable_diag_print) {
4180     ib::info(ER_IB_MSG_474)
4181         << "FTS SYNC for table " << sync->table->name
4182         << ", deleted count: " << ib_vector_size(cache->deleted_doc_ids)
4183         << " size: " << cache->total_size << " bytes";
4184   }
4185 }
4186 
4187 /** Run SYNC on the table, i.e., write out data from the index specific
4188  cache to the FTS aux INDEX table and FTS aux doc id stats table.
4189  @return DB_SUCCESS if all OK */
4190 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4191     fts_sync_index(fts_sync_t *sync,               /*!< in: sync state */
4192                    fts_index_cache_t *index_cache) /*!< in: index cache */
4193 {
4194   trx_t *trx = sync->trx;
4195 
4196   trx->op_info = "doing SYNC index";
4197 
4198   if (fts_enable_diag_print) {
4199     ib::info(ER_IB_MSG_475) << "SYNC words: " << rbt_size(index_cache->words);
4200   }
4201 
4202   ut_ad(rbt_validate(index_cache->words));
4203 
4204   return (fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4205                                sync->start_time));
4206 }
4207 
4208 /** Check if index cache has been synced completely
4209 @param[in,out]	index_cache	index cache
4210 @return true if index is synced, otherwise false. */
fts_sync_index_check(fts_index_cache_t * index_cache)4211 static bool fts_sync_index_check(fts_index_cache_t *index_cache) {
4212   const ib_rbt_node_t *rbt_node;
4213 
4214   for (rbt_node = rbt_first(index_cache->words); rbt_node != nullptr;
4215        rbt_node = rbt_next(index_cache->words, rbt_node)) {
4216     fts_tokenizer_word_t *word;
4217     word = rbt_value(fts_tokenizer_word_t, rbt_node);
4218 
4219     fts_node_t *fts_node;
4220     fts_node = static_cast<fts_node_t *>(ib_vector_last(word->nodes));
4221 
4222     if (!fts_node->synced) {
4223       return (false);
4224     }
4225   }
4226 
4227   return (true);
4228 }
4229 
4230 /** Reset synced flag in index cache when rollback
4231 @param[in,out]	index_cache	index cache */
fts_sync_index_reset(fts_index_cache_t * index_cache)4232 static void fts_sync_index_reset(fts_index_cache_t *index_cache) {
4233   const ib_rbt_node_t *rbt_node;
4234 
4235   for (rbt_node = rbt_first(index_cache->words); rbt_node != nullptr;
4236        rbt_node = rbt_next(index_cache->words, rbt_node)) {
4237     fts_tokenizer_word_t *word;
4238     word = rbt_value(fts_tokenizer_word_t, rbt_node);
4239 
4240     fts_node_t *fts_node;
4241     fts_node = static_cast<fts_node_t *>(ib_vector_last(word->nodes));
4242 
4243     fts_node->synced = false;
4244   }
4245 }
4246 
4247 /** Commit the SYNC, change state of processed doc ids etc.
4248 @param[in,out]	sync	sync state
4249 @return DB_SUCCESS if all OK */
4250 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_sync_commit(fts_sync_t * sync)4251     fts_sync_commit(fts_sync_t *sync) {
4252   dberr_t error;
4253   trx_t *trx = sync->trx;
4254   fts_cache_t *cache = sync->table->fts->cache;
4255   doc_id_t last_doc_id;
4256 
4257   trx->op_info = "doing SYNC commit";
4258 
4259   /* After each Sync, update the CONFIG table about the max doc id
4260   we just sync-ed to index table */
4261   error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4262                                   &last_doc_id);
4263 
4264   /* Get the list of deleted documents that are either in the
4265   cache or were headed there but were deleted before the add
4266   thread got to them. */
4267 
4268   if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4269     error = fts_sync_add_deleted_cache(sync, cache->deleted_doc_ids);
4270   }
4271 
4272   /* We need to do this within the deleted lock since fts_delete() can
4273   attempt to add a deleted doc id to the cache deleted id array. */
4274   fts_cache_clear(cache);
4275   DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4276   fts_cache_init(cache);
4277   rw_lock_x_unlock(&cache->lock);
4278 
4279   if (error == DB_SUCCESS) {
4280     fts_sql_commit(trx);
4281 
4282   } else if (error != DB_SUCCESS) {
4283     fts_sql_rollback(trx);
4284 
4285     ib::error(ER_IB_MSG_476) << "(" << ut_strerr(error) << ") during SYNC.";
4286   }
4287 
4288   if (fts_enable_diag_print && elapsed_time) {
4289     ib::info(ER_IB_MSG_477)
4290         << "SYNC for table " << sync->table->name
4291         << ": SYNC time: " << (ut_time_monotonic() - sync->start_time)
4292         << " secs: elapsed " << (double)n_nodes / elapsed_time << " ins/sec";
4293   }
4294 
4295   /* Avoid assertion in trx_free(). */
4296   trx->dict_operation_lock_mode = 0;
4297   trx_free_for_background(trx);
4298 
4299   return (error);
4300 }
4301 
4302 /** Rollback a sync operation */
fts_sync_rollback(fts_sync_t * sync)4303 static void fts_sync_rollback(fts_sync_t *sync) /*!< in: sync state */
4304 {
4305   trx_t *trx = sync->trx;
4306   fts_cache_t *cache = sync->table->fts->cache;
4307 
4308   for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4309     ulint j;
4310     fts_index_cache_t *index_cache;
4311 
4312     index_cache =
4313         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
4314 
4315     /* Reset synced flag so nodes will not be skipped
4316     in the next sync, see fts_sync_write_words(). */
4317     fts_sync_index_reset(index_cache);
4318 
4319     for (j = 0; fts_index_selector[j].value; ++j) {
4320       if (index_cache->ins_graph[j] != nullptr) {
4321         que_graph_free(index_cache->ins_graph[j]);
4322 
4323         index_cache->ins_graph[j] = nullptr;
4324       }
4325 
4326       if (index_cache->sel_graph[j] != nullptr) {
4327         que_graph_free(index_cache->sel_graph[j]);
4328 
4329         index_cache->sel_graph[j] = nullptr;
4330       }
4331     }
4332   }
4333 
4334   rw_lock_x_unlock(&cache->lock);
4335 
4336   fts_sql_rollback(trx);
4337 
4338   /* Avoid assertion in trx_free(). */
4339   trx->dict_operation_lock_mode = 0;
4340   trx_free_for_background(trx);
4341 }
4342 
4343 /** Run SYNC on the table, i.e., write out data from the cache to the
4344 FTS auxiliary INDEX table and clear the cache at the end.
4345 @param[in,out]	sync		sync state
4346 @param[in]	unlock_cache	whether unlock cache lock when write node
4347 @param[in]	wait		whether wait when a sync is in progress
4348 @param[in]      has_dict        whether has dict operation lock
4349 @return DB_SUCCESS if all OK */
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict)4350 static dberr_t fts_sync(fts_sync_t *sync, bool unlock_cache, bool wait,
4351                         bool has_dict) {
4352   ulint i;
4353   dberr_t error = DB_SUCCESS;
4354   fts_cache_t *cache = sync->table->fts->cache;
4355 
4356   rw_lock_x_lock(&cache->lock);
4357 
4358   /* Check if cache is being synced.
4359   Note: we release cache lock in fts_sync_write_words() to
4360   avoid long wait for the lock by other threads. */
4361   while (sync->in_progress) {
4362     rw_lock_x_unlock(&cache->lock);
4363 
4364     if (wait) {
4365       os_event_wait(sync->event);
4366     } else {
4367       return (DB_SUCCESS);
4368     }
4369 
4370     rw_lock_x_lock(&cache->lock);
4371   }
4372   sync->unlock_cache = unlock_cache;
4373   sync->in_progress = true;
4374 
4375   DEBUG_SYNC_C("fts_sync_begin");
4376   fts_sync_begin(sync);
4377 
4378   /* When sync in background, we hold dict operation lock
4379   to prevent DDL like DROP INDEX, etc. */
4380   if (has_dict) {
4381     sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4382   }
4383 
4384 begin_sync:
4385   if (cache->total_size > fts_max_cache_size) {
4386     /* Avoid the case: sync never finish when
4387     insert/update keeps comming. */
4388     ut_ad(sync->unlock_cache);
4389     sync->unlock_cache = false;
4390   }
4391 
4392   DEBUG_SYNC_C("fts_instrument_sync1");
4393   for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4394     fts_index_cache_t *index_cache;
4395 
4396     index_cache =
4397         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
4398 
4399     error = fts_sync_index(sync, index_cache);
4400 
4401     if (error != DB_SUCCESS && !sync->interrupted) {
4402       goto end_sync;
4403     }
4404   }
4405 
4406   DBUG_EXECUTE_IF("fts_instrument_sync_interrupted", sync->interrupted = true;
4407                   error = DB_INTERRUPTED; goto end_sync;);
4408 
4409   /* Make sure all the caches are synced. */
4410   for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4411     fts_index_cache_t *index_cache;
4412 
4413     index_cache =
4414         static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
4415 
4416     if (index_cache->index->to_be_dropped ||
4417         fts_sync_index_check(index_cache)) {
4418       continue;
4419     }
4420 
4421     goto begin_sync;
4422   }
4423 
4424 end_sync:
4425   if (error == DB_SUCCESS && !sync->interrupted) {
4426     error = fts_sync_commit(sync);
4427   } else {
4428     fts_sync_rollback(sync);
4429   }
4430 
4431   rw_lock_x_lock(&cache->lock);
4432   sync->interrupted = false;
4433   sync->in_progress = false;
4434   os_event_set(sync->event);
4435   rw_lock_x_unlock(&cache->lock);
4436 
4437   /* We need to check whether an optimize is required, for that
4438   we make copies of the two variables that control the trigger. These
4439   variables can change behind our back and we don't want to hold the
4440   lock for longer than is needed. */
4441   mutex_enter(&cache->deleted_lock);
4442 
4443   cache->added = 0;
4444   cache->deleted = 0;
4445 
4446   mutex_exit(&cache->deleted_lock);
4447 
4448   return (error);
4449 }
4450 
4451 /** Run SYNC on the table, i.e., write out data from the cache to the
4452 FTS auxiliary INDEX table and clear the cache at the end.
4453 @param[in,out]	table		fts table
4454 @param[in]	unlock_cache	whether unlock cache when write node
4455 @param[in]	wait		whether wait for existing sync to finish
4456 @param[in]	has_dict	whether has dict operation lock
4457 @return DB_SUCCESS on success, error code on failure. */
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4458 dberr_t fts_sync_table(dict_table_t *table, bool unlock_cache, bool wait,
4459                        bool has_dict) {
4460   dberr_t err = DB_SUCCESS;
4461 
4462   ut_ad(table->fts);
4463 
4464   if (!dict_table_is_discarded(table) && table->fts->cache &&
4465       !table->is_corrupted()) {
4466     err = fts_sync(table->fts->cache->sync, unlock_cache, wait, has_dict);
4467   }
4468 
4469   return (err);
4470 }
4471 
4472 /** Check fts token
4473 1. for ngram token, check whether the token contains any words in stopwords
4474 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4475 or greater than fts_max_token_size.
4476 @param[in]	token		token string
4477 @param[in]	stopwords	stopwords rb tree
4478 @param[in]	is_ngram	is ngram parser
4479 @param[in]	cs		token charset
4480 @retval	true	if it is not stopword and length in range
4481 @retval	false	if it is stopword or lenght not in range */
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs)4482 bool fts_check_token(const fts_string_t *token, const ib_rbt_t *stopwords,
4483                      bool is_ngram, const CHARSET_INFO *cs) {
4484   ut_ad(cs != nullptr || stopwords == nullptr);
4485 
4486   if (!is_ngram) {
4487     ib_rbt_bound_t parent;
4488 
4489     if (token->f_n_char < fts_min_token_size ||
4490         token->f_n_char > fts_max_token_size ||
4491         (stopwords != nullptr && rbt_search(stopwords, &parent, token) == 0)) {
4492       return (false);
4493     } else {
4494       return (true);
4495     }
4496   }
4497 
4498   /* Check token for ngram. */
4499   DBUG_EXECUTE_IF("fts_instrument_ignore_ngram_check", return (true););
4500 
4501   /* We ignore fts_min_token_size when ngram */
4502   ut_ad(token->f_n_char > 0 && token->f_n_char <= fts_max_token_size);
4503 
4504   if (stopwords == nullptr) {
4505     return (true);
4506   }
4507 
4508   /*Ngram checks whether the token contains any words in stopwords.
4509   We can't simply use CONTAIN to search in stopwords, because it's
4510   built on COMPARE. So we need to tokenize the token into words
4511   from unigram to f_n_char, and check them separately. */
4512   for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4513        ngram_token_size++) {
4514     const char *start;
4515     const char *next;
4516     const char *end;
4517     ulint char_len;
4518     ulint n_chars;
4519 
4520     start = reinterpret_cast<char *>(token->f_str);
4521     next = start;
4522     end = start + token->f_len;
4523     n_chars = 0;
4524 
4525     while (next < end) {
4526       char_len = my_mbcharlen_ptr(cs, next, end);
4527 
4528       if (next + char_len > end || char_len == 0) {
4529         break;
4530       } else {
4531         /* Skip SPACE */
4532         if (char_len == 1 && *next == ' ') {
4533           start = next + 1;
4534           next = start;
4535           n_chars = 0;
4536 
4537           continue;
4538         }
4539 
4540         next += char_len;
4541         n_chars++;
4542       }
4543 
4544       if (n_chars == ngram_token_size) {
4545         fts_string_t ngram_token;
4546         ngram_token.f_str = reinterpret_cast<byte *>(const_cast<char *>(start));
4547         ngram_token.f_len = next - start;
4548         ngram_token.f_n_char = ngram_token_size;
4549 
4550         ib_rbt_bound_t parent;
4551         if (rbt_search(stopwords, &parent, &ngram_token) == 0) {
4552           return (false);
4553         }
4554 
4555         /* Move a char forward */
4556         start += my_mbcharlen_ptr(cs, start, end);
4557         n_chars = ngram_token_size - 1;
4558       }
4559     }
4560   }
4561 
4562   return (true);
4563 }
4564 
4565 /** Add the token and its start position to the token's list of positions.
4566 @param[in,out]	result_doc	result doc rb tree
4567 @param[in]	str		token string
4568 @param[in]	position	token position */
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)4569 static void fts_add_token(fts_doc_t *result_doc, fts_string_t str,
4570                           ulint position) {
4571   /* Ignore string whose character number is less than
4572   "fts_min_token_size" or more than "fts_max_token_size" */
4573 
4574   if (fts_check_token(&str, nullptr, result_doc->is_ngram,
4575                       result_doc->charset)) {
4576     mem_heap_t *heap;
4577     fts_string_t t_str;
4578     fts_token_t *token;
4579     ib_rbt_bound_t parent;
4580     ulint newlen;
4581 
4582     heap = static_cast<mem_heap_t *>(result_doc->self_heap->arg);
4583 
4584     t_str.f_n_char = str.f_n_char;
4585 
4586     t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4587 
4588     t_str.f_str = static_cast<byte *>(mem_heap_alloc(heap, t_str.f_len));
4589 
4590     /* For binary collations, a case sensitive search is
4591     performed. Hence don't convert to lower case. */
4592     if (my_binary_compare(result_doc->charset)) {
4593       memcpy(t_str.f_str, str.f_str, str.f_len);
4594       t_str.f_str[str.f_len] = 0;
4595       newlen = str.f_len;
4596     } else {
4597       newlen =
4598           innobase_fts_casedn_str(result_doc->charset, (char *)str.f_str,
4599                                   str.f_len, (char *)t_str.f_str, t_str.f_len);
4600     }
4601 
4602     t_str.f_len = newlen;
4603     t_str.f_str[newlen] = 0;
4604 
4605     /* Add the word to the document statistics. If the word
4606     hasn't been seen before we create a new entry for it. */
4607     if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4608       fts_token_t new_token;
4609 
4610       new_token.text.f_len = newlen;
4611       new_token.text.f_str = t_str.f_str;
4612       new_token.text.f_n_char = t_str.f_n_char;
4613 
4614       new_token.positions =
4615           ib_vector_create(result_doc->self_heap, sizeof(ulint), 32);
4616 
4617       parent.last = rbt_add_node(result_doc->tokens, &parent, &new_token);
4618 
4619       ut_ad(rbt_validate(result_doc->tokens));
4620     }
4621 
4622     token = rbt_value(fts_token_t, parent.last);
4623     ib_vector_push(token->positions, &position);
4624   }
4625 }
4626 
4627 /** Process next token from document starting at the given position, i.e., add
4628 the token's start position to the token's list of positions.
4629 @param[in,out]	doc		document to tokenize
4630 @param[out]	result		if provided, save result here
4631 @param[in]	start_pos	start position in text
4632 @param[in]	add_pos		add this position to all tokens from this
4633                                 tokenization
4634 @return number of characters handled in this call */
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4635 static ulint fts_process_token(fts_doc_t *doc, fts_doc_t *result,
4636                                ulint start_pos, ulint add_pos) {
4637   ulint ret;
4638   fts_string_t str;
4639   ulint position;
4640   fts_doc_t *result_doc;
4641   byte buf[FTS_MAX_WORD_LEN + 1];
4642 
4643   str.f_str = buf;
4644 
4645   /* Determine where to save the result. */
4646   result_doc = (result != nullptr) ? result : doc;
4647 
4648   /* The length of a string in characters is set here only. */
4649 
4650   ret = innobase_mysql_fts_get_token(doc->charset, doc->text.f_str + start_pos,
4651                                      doc->text.f_str + doc->text.f_len, &str);
4652 
4653   position = start_pos + ret - str.f_len + add_pos;
4654 
4655   fts_add_token(result_doc, str, position);
4656 
4657   return (ret);
4658 }
4659 
4660 /** Get token char size by charset
4661  @return token size */
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)4662 ulint fts_get_token_size(const CHARSET_INFO *cs, /*!< in: Character set */
4663                          const char *token,      /*!< in: token */
4664                          ulint len)              /*!< in: token length */
4665 {
4666   char *start;
4667   char *end;
4668   ulint size = 0;
4669 
4670   /* const_cast is for reinterpret_cast below, or it will fail. */
4671   start = const_cast<char *>(token);
4672   end = start + len;
4673   while (start < end) {
4674     int ctype;
4675     int mbl;
4676 
4677     mbl = cs->cset->ctype(cs, &ctype, reinterpret_cast<uchar *>(start),
4678                           reinterpret_cast<uchar *>(end));
4679 
4680     size++;
4681 
4682     start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4683   }
4684 
4685   return (size);
4686 }
4687 
4688 /** FTS plugin parser 'myql_parser' callback function for document tokenize.
4689  Refer to 'MYSQL_FTPARSER_PARAM' for more detail.
4690  @return always returns 0 */
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)4691 int fts_tokenize_document_internal(
4692     MYSQL_FTPARSER_PARAM *param, /*!< in: parser parameter */
4693     char *doc,                   /*!< in/out: document */
4694     int len)                     /*!< in: document length */
4695 {
4696   fts_string_t str;
4697   byte buf[FTS_MAX_WORD_LEN + 1];
4698   MYSQL_FTPARSER_BOOLEAN_INFO bool_info = {FT_TOKEN_WORD, 0,      0, 0, 0, 0,
4699                                            ' ',           nullptr};
4700 
4701   ut_ad(len >= 0);
4702 
4703   str.f_str = buf;
4704 
4705   for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4706     inc =
4707         innobase_mysql_fts_get_token(const_cast<CHARSET_INFO *>(param->cs),
4708                                      reinterpret_cast<byte *>(doc) + i,
4709                                      reinterpret_cast<byte *>(doc) + len, &str);
4710 
4711     if (str.f_len > 0) {
4712       bool_info.position = static_cast<int>(i + inc - str.f_len);
4713       ut_ad(bool_info.position >= 0);
4714 
4715       /* Stop when add word fails */
4716       if (param->mysql_add_word(param, reinterpret_cast<char *>(str.f_str),
4717                                 static_cast<int>(str.f_len), &bool_info)) {
4718         break;
4719       }
4720     }
4721   }
4722 
4723   return (0);
4724 }
4725 
4726 /** FTS plugin parser 'myql_add_word' callback function for document tokenize.
4727  Refer to 'MYSQL_FTPARSER_PARAM' for more detail.
4728  @return always returns 0 */
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)4729 static int fts_tokenize_add_word_for_parser(
4730     MYSQL_FTPARSER_PARAM *param,               /* in: parser paramter */
4731     char *word,                                /* in: token word */
4732     int word_len,                              /* in: word len */
4733     MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info) /* in: word boolean info */
4734 {
4735   fts_string_t str;
4736   fts_tokenize_param_t *fts_param;
4737   fts_doc_t *result_doc;
4738   ulint position;
4739 
4740   fts_param = static_cast<fts_tokenize_param_t *>(param->mysql_ftparam);
4741   result_doc = fts_param->result_doc;
4742   ut_ad(result_doc != nullptr);
4743 
4744   str.f_str = reinterpret_cast<byte *>(word);
4745   str.f_len = word_len;
4746   str.f_n_char =
4747       fts_get_token_size(const_cast<CHARSET_INFO *>(param->cs), word, word_len);
4748 
4749   ut_ad(boolean_info->position >= 0);
4750   position = boolean_info->position + fts_param->add_pos;
4751 
4752   fts_add_token(result_doc, str, position);
4753 
4754   return (0);
4755 }
4756 
4757 /** Parse a document using an external / user supplied parser */
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)4758 static void fts_tokenize_by_parser(
4759     fts_doc_t *doc,                  /* in/out: document to tokenize */
4760     st_mysql_ftparser *parser,       /* in: plugin fts parser */
4761     fts_tokenize_param_t *fts_param) /* in: fts tokenize param */
4762 {
4763   MYSQL_FTPARSER_PARAM param;
4764 
4765   ut_a(parser);
4766 
4767   /* Set paramters for param */
4768   param.mysql_parse = fts_tokenize_document_internal;
4769   param.mysql_add_word = fts_tokenize_add_word_for_parser;
4770   param.mysql_ftparam = fts_param;
4771   param.cs = doc->charset;
4772   param.doc = reinterpret_cast<char *>(doc->text.f_str);
4773   param.length = static_cast<int>(doc->text.f_len);
4774   param.mode = MYSQL_FTPARSER_SIMPLE_MODE;
4775 
4776   PARSER_INIT(parser, &param);
4777   parser->parse(&param);
4778   PARSER_DEINIT(parser, &param);
4779 }
4780 
4781 /** Tokenize a document.
4782 @param[in,out]	doc	document to tokenize
4783 @param[out]	result	tokenization result
4784 @param[in]	parser	pluggable parser */
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)4785 static void fts_tokenize_document(fts_doc_t *doc, fts_doc_t *result,
4786                                   st_mysql_ftparser *parser) {
4787   ut_a(!doc->tokens);
4788   ut_a(doc->charset);
4789 
4790   doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t), innobase_fts_text_cmp,
4791                                    doc->charset);
4792 
4793   if (parser != nullptr) {
4794     fts_tokenize_param_t fts_param;
4795 
4796     fts_param.result_doc = (result != nullptr) ? result : doc;
4797     fts_param.add_pos = 0;
4798 
4799     fts_tokenize_by_parser(doc, parser, &fts_param);
4800   } else {
4801     ulint inc;
4802 
4803     for (ulint i = 0; i < doc->text.f_len; i += inc) {
4804       inc = fts_process_token(doc, result, i, 0);
4805       ut_a(inc > 0);
4806     }
4807   }
4808 }
4809 
4810 /** Continue to tokenize a document.
4811 @param[in,out]	doc	document to tokenize
4812 @param[in]	add_pos	add this position to all tokens from this tokenization
4813 @param[out]	result	tokenization result
4814 @param[in]	parser	pluggable parser */
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)4815 static void fts_tokenize_document_next(fts_doc_t *doc, ulint add_pos,
4816                                        fts_doc_t *result,
4817                                        st_mysql_ftparser *parser) {
4818   ut_a(doc->tokens);
4819 
4820   if (parser) {
4821     fts_tokenize_param_t fts_param;
4822 
4823     fts_param.result_doc = (result != nullptr) ? result : doc;
4824     fts_param.add_pos = add_pos;
4825 
4826     fts_tokenize_by_parser(doc, parser, &fts_param);
4827   } else {
4828     ulint inc;
4829 
4830     for (ulint i = 0; i < doc->text.f_len; i += inc) {
4831       inc = fts_process_token(doc, result, i, add_pos);
4832       ut_a(inc > 0);
4833     }
4834   }
4835 }
4836 
4837 /** Create the vector of fts_get_doc_t instances.
4838 @param[in,out]	cache	fts cache
4839 @return	vector of fts_get_doc_t instances */
fts_get_docs_create(fts_cache_t * cache)4840 static ib_vector_t *fts_get_docs_create(fts_cache_t *cache) {
4841   ib_vector_t *get_docs;
4842 
4843   ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4844 
4845   /* We need one instance of fts_get_doc_t per index. */
4846   get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4847 
4848   /* Create the get_doc instance, we need one of these
4849   per FTS index. */
4850   for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4851     dict_index_t **index;
4852     fts_get_doc_t *get_doc;
4853 
4854     index = static_cast<dict_index_t **>(ib_vector_get(cache->indexes, i));
4855 
4856     get_doc = static_cast<fts_get_doc_t *>(ib_vector_push(get_docs, nullptr));
4857 
4858     memset(get_doc, 0x0, sizeof(*get_doc));
4859 
4860     get_doc->index_cache = fts_get_index_cache(cache, *index);
4861     get_doc->cache = cache;
4862 
4863     /* Must find the index cache. */
4864     ut_a(get_doc->index_cache != nullptr);
4865   }
4866 
4867   return (get_docs);
4868 }
4869 
4870 /********************************************************************
4871 Release any resources held by the fts_get_doc_t instances. */
fts_get_docs_clear(ib_vector_t * get_docs)4872 static void fts_get_docs_clear(
4873     ib_vector_t *get_docs) /*!< in: Doc retrieval vector */
4874 {
4875   ulint i;
4876 
4877   /* Release the get doc graphs if any. */
4878   for (i = 0; i < ib_vector_size(get_docs); ++i) {
4879     fts_get_doc_t *get_doc =
4880         static_cast<fts_get_doc_t *>(ib_vector_get(get_docs, i));
4881 
4882     if (get_doc->get_document_graph != nullptr) {
4883       ut_a(get_doc->index_cache);
4884 
4885       fts_que_graph_free(get_doc->get_document_graph);
4886       get_doc->get_document_graph = nullptr;
4887     }
4888   }
4889 }
4890 
4891 /** Get the initial Doc ID by consulting the CONFIG table
4892  @return initial Doc ID */
fts_init_doc_id(const dict_table_t * table)4893 doc_id_t fts_init_doc_id(const dict_table_t *table) /*!< in: table */
4894 {
4895   doc_id_t max_doc_id = 0;
4896 
4897   rw_lock_x_lock(&table->fts->cache->lock);
4898 
4899   /* Return if the table is already initialized for DOC ID */
4900   if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4901     rw_lock_x_unlock(&table->fts->cache->lock);
4902     return (0);
4903   }
4904 
4905   DEBUG_SYNC_C("fts_initialize_doc_id");
4906 
4907   /* Then compare this value with the ID value stored in the CONFIG
4908   table. The larger one will be our new initial Doc ID */
4909   fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4910 
4911   /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4912   creating index (and add doc id column. No need to recovery
4913   documents */
4914   if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4915     fts_init_index((dict_table_t *)table, TRUE);
4916   }
4917 
4918   table->fts->fts_status |= ADDED_TABLE_SYNCED;
4919 
4920   table->fts->cache->first_doc_id = max_doc_id;
4921 
4922   rw_lock_x_unlock(&table->fts->cache->lock);
4923 
4924   ut_ad(max_doc_id > 0);
4925 
4926   return (max_doc_id);
4927 }
4928 
4929 #ifdef FTS_MULT_INDEX
4930 /** Check if the index is in the affected set.
4931  @return true if index is updated */
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)4932 static ibool fts_is_index_updated(
4933     const ib_vector_t *fts_indexes, /*!< in: affected FTS indexes */
4934     const fts_get_doc_t *get_doc)   /*!< in: info for reading
4935                                     document */
4936 {
4937   ulint i;
4938   dict_index_t *index = get_doc->index_cache->index;
4939 
4940   for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4941     const dict_index_t *updated_fts_index;
4942 
4943     updated_fts_index =
4944         static_cast<const dict_index_t *>(ib_vector_getp_const(fts_indexes, i));
4945 
4946     ut_a(updated_fts_index != NULL);
4947 
4948     if (updated_fts_index == index) {
4949       return (TRUE);
4950     }
4951   }
4952 
4953   return (FALSE);
4954 }
4955 #endif
4956 
4957 /** Fetch COUNT(*) from specified table.
4958  @return the number of rows in the table */
fts_get_rows_count(fts_table_t * fts_table)4959 ulint fts_get_rows_count(fts_table_t *fts_table) /*!< in: fts table to read */
4960 {
4961   trx_t *trx;
4962   pars_info_t *info;
4963   que_t *graph;
4964   dberr_t error;
4965   ulint count = 0;
4966   char table_name[MAX_FULL_NAME_LEN];
4967 
4968   trx = trx_allocate_for_background();
4969 
4970   trx->op_info = "fetching FT table rows count";
4971 
4972   info = pars_info_create();
4973 
4974   pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
4975 
4976   fts_get_table_name(fts_table, table_name);
4977   pars_info_bind_id(info, true, "table_name", table_name);
4978 
4979   graph = fts_parse_sql(fts_table, info,
4980                         "DECLARE FUNCTION my_func;\n"
4981                         "DECLARE CURSOR c IS"
4982                         " SELECT COUNT(*)"
4983                         " FROM $table_name;\n"
4984                         "BEGIN\n"
4985                         "\n"
4986                         "OPEN c;\n"
4987                         "WHILE 1 = 1 LOOP\n"
4988                         "  FETCH c INTO my_func();\n"
4989                         "  IF c % NOTFOUND THEN\n"
4990                         "    EXIT;\n"
4991                         "  END IF;\n"
4992                         "END LOOP;\n"
4993                         "CLOSE c;");
4994 
4995   for (;;) {
4996     error = fts_eval_sql(trx, graph);
4997 
4998     if (error == DB_SUCCESS) {
4999       fts_sql_commit(trx);
5000 
5001       break; /* Exit the loop. */
5002     } else {
5003       fts_sql_rollback(trx);
5004 
5005       if (error == DB_LOCK_WAIT_TIMEOUT) {
5006         ib::warn(ER_IB_MSG_478) << "lock wait timeout reading"
5007                                    " FTS table. Retrying!";
5008 
5009         trx->error_state = DB_SUCCESS;
5010       } else {
5011         ib::error(ER_IB_MSG_479)
5012             << "(" << ut_strerr(error) << ") while reading FTS table.";
5013 
5014         break; /* Exit the loop. */
5015       }
5016     }
5017   }
5018 
5019   fts_que_graph_free(graph);
5020 
5021   trx_free_for_background(trx);
5022 
5023   return (count);
5024 }
5025 
5026 #ifdef FTS_CACHE_SIZE_DEBUG
5027 /** Read the max cache size parameter from the config table. */
fts_update_max_cache_size(fts_sync_t * sync)5028 static void fts_update_max_cache_size(fts_sync_t *sync) /*!< in: sync state */
5029 {
5030   trx_t *trx;
5031   fts_table_t fts_table;
5032 
5033   trx = trx_allocate_for_background();
5034 
5035   FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_CONFIG, FTS_COMMON_TABLE,
5036                      sync->table);
5037 
5038   /* The size returned is in bytes. */
5039   sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5040 
5041   fts_sql_commit(trx);
5042 
5043   trx_free_for_background(trx);
5044 }
5045 #endif /* FTS_CACHE_SIZE_DEBUG */
5046 
5047 /** Free the modified rows of a table. */
5048 UNIV_INLINE
fts_trx_table_rows_free(ib_rbt_t * rows)5049 void fts_trx_table_rows_free(ib_rbt_t *rows) /*!< in: rbt of rows to free */
5050 {
5051   const ib_rbt_node_t *node;
5052 
5053   for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5054     fts_trx_row_t *row;
5055 
5056     row = rbt_value(fts_trx_row_t, node);
5057 
5058     if (row->fts_indexes != nullptr) {
5059       /* This vector shouldn't be using the
5060       heap allocator.  */
5061       ut_a(row->fts_indexes->allocator->arg == nullptr);
5062 
5063       ib_vector_free(row->fts_indexes);
5064       row->fts_indexes = nullptr;
5065     }
5066 
5067     ut_free(rbt_remove_node(rows, node));
5068   }
5069 
5070   ut_a(rbt_empty(rows));
5071   rbt_free(rows);
5072 }
5073 
5074 /** Free an FTS savepoint instance. */
5075 UNIV_INLINE
fts_savepoint_free(fts_savepoint_t * savepoint)5076 void fts_savepoint_free(
5077     fts_savepoint_t *savepoint) /*!< in: savepoint instance */
5078 {
5079   const ib_rbt_node_t *node;
5080   ib_rbt_t *tables = savepoint->tables;
5081 
5082   /* Nothing to free! */
5083   if (tables == nullptr) {
5084     return;
5085   }
5086 
5087   for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5088     fts_trx_table_t *ftt;
5089     fts_trx_table_t **fttp;
5090 
5091     fttp = rbt_value(fts_trx_table_t *, node);
5092     ftt = *fttp;
5093 
5094     /* This can be NULL if a savepoint was released. */
5095     if (ftt->rows != nullptr) {
5096       fts_trx_table_rows_free(ftt->rows);
5097       ftt->rows = nullptr;
5098     }
5099 
5100     /* This can be NULL if a savepoint was released. */
5101     if (ftt->added_doc_ids != nullptr) {
5102       fts_doc_ids_free(ftt->added_doc_ids);
5103       ftt->added_doc_ids = nullptr;
5104     }
5105 
5106     /* The default savepoint name must be NULL. */
5107     if (ftt->docs_added_graph) {
5108       fts_que_graph_free(ftt->docs_added_graph);
5109     }
5110 
5111     /* NOTE: We are responsible for free'ing the node */
5112     ut_free(rbt_remove_node(tables, node));
5113   }
5114 
5115   ut_a(rbt_empty(tables));
5116   rbt_free(tables);
5117   savepoint->tables = nullptr;
5118 }
5119 
5120 /** Free an FTS trx. */
fts_trx_free(fts_trx_t * fts_trx)5121 void fts_trx_free(fts_trx_t *fts_trx) /* in, own: FTS trx */
5122 {
5123   ulint i;
5124 
5125   for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5126     fts_savepoint_t *savepoint;
5127 
5128     savepoint =
5129         static_cast<fts_savepoint_t *>(ib_vector_get(fts_trx->savepoints, i));
5130 
5131     /* The default savepoint name must be NULL. */
5132     if (i == 0) {
5133       ut_a(savepoint->name == nullptr);
5134     }
5135 
5136     fts_savepoint_free(savepoint);
5137   }
5138 
5139   for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5140     fts_savepoint_t *savepoint;
5141 
5142     savepoint =
5143         static_cast<fts_savepoint_t *>(ib_vector_get(fts_trx->last_stmt, i));
5144 
5145     /* The default savepoint name must be NULL. */
5146     if (i == 0) {
5147       ut_a(savepoint->name == nullptr);
5148     }
5149 
5150     fts_savepoint_free(savepoint);
5151   }
5152 
5153   if (fts_trx->heap) {
5154     mem_heap_free(fts_trx->heap);
5155   }
5156 }
5157 
5158 /** Extract the doc id from the FTS hidden column.
5159  @return doc id that was extracted from rec */
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5160 doc_id_t fts_get_doc_id_from_row(dict_table_t *table, /*!< in: table */
5161                                  dtuple_t *row) /*!< in: row whose FTS doc id we
5162                                                 want to extract.*/
5163 {
5164   dfield_t *field;
5165   doc_id_t doc_id = 0;
5166 
5167   ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5168 
5169   field = dtuple_get_nth_field(row, table->fts->doc_col);
5170 
5171   ut_a(dfield_get_len(field) == sizeof(doc_id));
5172   ut_a(dfield_get_type(field)->mtype == DATA_INT);
5173 
5174   doc_id = fts_read_doc_id(static_cast<const byte *>(dfield_get_data(field)));
5175 
5176   return (doc_id);
5177 }
5178 
5179 /** Extract the doc id from the record that belongs to index.
5180 @param[in]	table	table
5181 @param[in]	rec	record contains FTS_DOC_ID
5182 @param[in]	index	index of rec
5183 @param[in]	heap	heap memory
5184 @return doc id that was extracted from rec */
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5185 doc_id_t fts_get_doc_id_from_rec(dict_table_t *table, const rec_t *rec,
5186                                  const dict_index_t *index, mem_heap_t *heap) {
5187   ulint len;
5188   const byte *data;
5189   ulint col_no;
5190   doc_id_t doc_id = 0;
5191   ulint offsets_[REC_OFFS_NORMAL_SIZE];
5192   ulint *offsets = offsets_;
5193   mem_heap_t *my_heap = heap;
5194 
5195   ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5196 
5197   rec_offs_init(offsets_);
5198 
5199   offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5200 
5201   col_no = index->get_col_pos(table->fts->doc_col);
5202 
5203   ut_ad(col_no != ULINT_UNDEFINED);
5204 
5205   data = rec_get_nth_field(rec, offsets, col_no, &len);
5206 
5207   ut_a(len == 8);
5208   ut_ad(8 == sizeof(doc_id));
5209   doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5210 
5211   if (my_heap && !heap) {
5212     mem_heap_free(my_heap);
5213   }
5214 
5215   return (doc_id);
5216 }
5217 
5218 /** Search the index specific cache for a particular FTS index.
5219  @return the index specific cache else NULL */
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5220 fts_index_cache_t *fts_find_index_cache(
5221     const fts_cache_t *cache,  /*!< in: cache to search */
5222     const dict_index_t *index) /*!< in: index to search for */
5223 {
5224   /* We cast away the const because our internal function, takes
5225   non-const cache arg and returns a non-const pointer. */
5226   return (static_cast<fts_index_cache_t *>(
5227       fts_get_index_cache((fts_cache_t *)cache, index)));
5228 }
5229 
5230 /** Search cache for word.
5231  @return the word node vector if found else NULL */
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5232 const ib_vector_t *fts_cache_find_word(
5233     const fts_index_cache_t *index_cache, /*!< in: cache to search */
5234     const fts_string_t *text)             /*!< in: word to search for */
5235 {
5236   ib_rbt_bound_t parent;
5237   const ib_vector_t *nodes = nullptr;
5238 #ifdef UNIV_DEBUG
5239   dict_table_t *table = index_cache->index->table;
5240   fts_cache_t *cache = table->fts->cache;
5241 
5242   ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5243 #endif /* UNIV_DEBUG */
5244 
5245   /* Lookup the word in the rb tree */
5246   if (rbt_search(index_cache->words, &parent, text) == 0) {
5247     const fts_tokenizer_word_t *word;
5248 
5249     word = rbt_value(fts_tokenizer_word_t, parent.last);
5250 
5251     nodes = word->nodes;
5252   }
5253 
5254   return (nodes);
5255 }
5256 
5257 /** Append deleted doc ids to vector. */
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5258 void fts_cache_append_deleted_doc_ids(
5259     const fts_cache_t *cache, /*!< in: cache to use */
5260     ib_vector_t *vector)      /*!< in: append to this vector */
5261 {
5262   mutex_enter(const_cast<ib_mutex_t *>(&cache->deleted_lock));
5263 
5264   if (cache->deleted_doc_ids == nullptr) {
5265     mutex_exit((ib_mutex_t *)&cache->deleted_lock);
5266     return;
5267   }
5268 
5269   for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5270     fts_update_t *update;
5271 
5272     update =
5273         static_cast<fts_update_t *>(ib_vector_get(cache->deleted_doc_ids, i));
5274 
5275     ib_vector_push(vector, &update->doc_id);
5276   }
5277 
5278   mutex_exit((ib_mutex_t *)&cache->deleted_lock);
5279 }
5280 
5281 /** Wait for the background thread to start. We poll to detect change
5282  of state, which is acceptable, since the wait should happen only
5283  once during startup.
5284  @return true if the thread started else false (i.e timed out) */
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5285 ibool fts_wait_for_background_thread_to_start(
5286     dict_table_t *table, /*!< in: table to which the thread
5287                          is attached */
5288     ulint max_wait)      /*!< in: time in microseconds, if
5289                          set to 0 then it disables
5290                          timeout checking */
5291 {
5292   ulint count = 0;
5293   ibool done = FALSE;
5294 
5295   ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5296 
5297   for (;;) {
5298     fts_t *fts = table->fts;
5299 
5300     mutex_enter(&fts->bg_threads_mutex);
5301 
5302     if (fts->fts_status & BG_THREAD_READY) {
5303       done = TRUE;
5304     }
5305 
5306     mutex_exit(&fts->bg_threads_mutex);
5307 
5308     if (!done) {
5309       os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5310 
5311       if (max_wait > 0) {
5312         max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5313 
5314         /* We ignore the residual value. */
5315         if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5316           break;
5317         }
5318       }
5319 
5320       ++count;
5321     } else {
5322       break;
5323     }
5324 
5325     if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5326       ib::error(ER_IB_MSG_480) << "The background thread for the FTS"
5327                                   " table "
5328                                << table->name << " refuses to start";
5329 
5330       count = 0;
5331     }
5332   }
5333 
5334   return (done);
5335 }
5336 
5337 /** Add the FTS document id hidden column. */
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5338 void fts_add_doc_id_column(
5339     dict_table_t *table, /*!< in/out: Table with FTS index */
5340     mem_heap_t *heap)    /*!< in: temporary memory heap, or NULL */
5341 {
5342   dict_mem_table_add_col(
5343       table, heap, FTS_DOC_ID_COL_NAME, DATA_INT,
5344       dtype_form_prtype(
5345           DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE | DATA_FTS_DOC_ID,
5346           0),
5347       sizeof(doc_id_t), false);
5348   DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5349 }
5350 
5351 /** Add new fts doc id to the update vector.
5352 @param[in]	table		the table that contains the FTS index.
5353 @param[in,out]	ufield		the fts doc id field in the update vector.
5354                                 No new memory is allocated for this in this
5355                                 function.
5356 @param[in,out]	next_doc_id	the fts doc id that has been added to the
5357                                 update vector.  If 0, a new fts doc id is
5358                                 automatically generated.  The memory provided
5359                                 for this argument will be used by the update
5360                                 vector. Ensure that the life time of this
5361                                 memory matches that of the update vector.
5362 @return the fts doc id used in the update vector */
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5363 doc_id_t fts_update_doc_id(dict_table_t *table, upd_field_t *ufield,
5364                            doc_id_t *next_doc_id) {
5365   doc_id_t doc_id;
5366   dberr_t error = DB_SUCCESS;
5367 
5368   if (*next_doc_id) {
5369     doc_id = *next_doc_id;
5370   } else {
5371     /* Get the new document id that will be added. */
5372     error = fts_get_next_doc_id(table, &doc_id);
5373   }
5374 
5375   if (error == DB_SUCCESS) {
5376     dict_index_t *clust_index;
5377     dict_col_t *col = table->get_col(table->fts->doc_col);
5378 
5379     ufield->exp = nullptr;
5380 
5381     ufield->new_val.len = sizeof(doc_id);
5382 
5383     clust_index = table->first_index();
5384 
5385     ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5386     col->copy_type(dfield_get_type(&ufield->new_val));
5387 
5388     /* It is possible we update record that has
5389     not yet be sync-ed from last crash. */
5390 
5391     /* Convert to storage byte order. */
5392     ut_a(doc_id != FTS_NULL_DOC_ID);
5393     fts_write_doc_id((byte *)next_doc_id, doc_id);
5394 
5395     ufield->new_val.data = next_doc_id;
5396     ufield->new_val.ext = 0;
5397   }
5398 
5399   return (doc_id);
5400 }
5401 
5402 /** fts_t constructor.
5403 @param[in]	table	table with FTS indexes
5404 @param[in,out]	heap	memory heap where 'this' is stored */
fts_t(dict_table_t * table,mem_heap_t * heap)5405 fts_t::fts_t(dict_table_t *table, mem_heap_t *heap)
5406     : bg_threads(0),
5407       fts_status(0),
5408       add_wq(nullptr),
5409       cache(nullptr),
5410       doc_col(ULINT_UNDEFINED),
5411       fts_heap(heap) {
5412   ut_a(table->fts == nullptr);
5413 
5414   mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
5415 
5416   ib_alloc_t *heap_alloc = ib_heap_allocator_create(fts_heap);
5417 
5418   indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t *), 4);
5419 
5420   dict_table_get_all_fts_indexes(table, indexes);
5421 }
5422 
5423 /** fts_t destructor. */
~fts_t()5424 fts_t::~fts_t() {
5425   mutex_free(&bg_threads_mutex);
5426 
5427   ut_ad(add_wq == nullptr);
5428 
5429   if (cache != nullptr) {
5430     fts_cache_clear(cache);
5431     fts_cache_destroy(cache);
5432     cache = nullptr;
5433   }
5434 
5435   /* There is no need to call ib_vector_free() on this->indexes
5436   because it is stored in this->fts_heap. */
5437 }
5438 
5439 /** Create an instance of fts_t.
5440  @return instance of fts_t */
fts_create(dict_table_t * table)5441 fts_t *fts_create(dict_table_t *table) /*!< in/out: table with FTS indexes */
5442 {
5443   fts_t *fts;
5444   mem_heap_t *heap;
5445 
5446   heap = mem_heap_create(512);
5447 
5448   fts = static_cast<fts_t *>(mem_heap_alloc(heap, sizeof(*fts)));
5449 
5450   new (fts) fts_t(table, heap);
5451 
5452   return (fts);
5453 }
5454 
5455 /** Free the FTS resources. */
fts_free(dict_table_t * table)5456 void fts_free(dict_table_t *table) /*!< in/out: table with FTS indexes */
5457 {
5458   fts_t *fts = table->fts;
5459 
5460   fts->~fts_t();
5461 
5462   mem_heap_free(fts->fts_heap);
5463 
5464   table->fts = nullptr;
5465 }
5466 
5467 #if 0  // TODO: Enable this in WL#6608
5468 /*********************************************************************//**
5469 Signal FTS threads to initiate shutdown. */
5470 void
5471 fts_start_shutdown(
5472 	dict_table_t*	table,		/*!< in: table with FTS indexes */
5473 	fts_t*		fts)		/*!< in: fts instance that needs
5474 					to be informed about shutdown */
5475 {
5476 	mutex_enter(&fts->bg_threads_mutex);
5477 
5478 	fts->fts_status |= BG_THREAD_STOP;
5479 
5480 	mutex_exit(&fts->bg_threads_mutex);
5481 
5482 }
5483 
5484 /*********************************************************************//**
5485 Wait for FTS threads to shutdown. */
5486 void
5487 fts_shutdown(
5488 	dict_table_t*	table,		/*!< in: table with FTS indexes */
5489 	fts_t*		fts)		/*!< in: fts instance to shutdown */
5490 {
5491 	mutex_enter(&fts->bg_threads_mutex);
5492 
5493 	ut_a(fts->fts_status & BG_THREAD_STOP);
5494 
5495 	dict_table_wait_for_bg_threads_to_exit(table, 20000);
5496 
5497 	mutex_exit(&fts->bg_threads_mutex);
5498 }
5499 #endif
5500 
5501 /** Take a FTS savepoint. */
5502 UNIV_INLINE
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5503 void fts_savepoint_copy(const fts_savepoint_t *src, /*!< in: source savepoint */
5504                         fts_savepoint_t *dst) /*!< out: destination savepoint */
5505 {
5506   const ib_rbt_node_t *node;
5507   const ib_rbt_t *tables;
5508 
5509   tables = src->tables;
5510 
5511   for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5512     fts_trx_table_t *ftt_dst;
5513     const fts_trx_table_t **ftt_src;
5514 
5515     ftt_src = rbt_value(const fts_trx_table_t *, node);
5516 
5517     ftt_dst = fts_trx_table_clone(*ftt_src);
5518 
5519     rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5520   }
5521 }
5522 
5523 /** Take a FTS savepoint. */
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)5524 void fts_savepoint_take(trx_t *trx,         /*!< in: transaction */
5525                         fts_trx_t *fts_trx, /*!< in: fts transaction */
5526                         const char *name)   /*!< in: savepoint name */
5527 {
5528   mem_heap_t *heap;
5529   fts_savepoint_t *savepoint;
5530   fts_savepoint_t *last_savepoint;
5531 
5532   ut_a(name != nullptr);
5533 
5534   heap = fts_trx->heap;
5535 
5536   /* The implied savepoint must exist. */
5537   ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5538 
5539   last_savepoint =
5540       static_cast<fts_savepoint_t *>(ib_vector_last(fts_trx->savepoints));
5541   savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5542 
5543   if (last_savepoint->tables != nullptr) {
5544     fts_savepoint_copy(last_savepoint, savepoint);
5545   }
5546 }
5547 
5548 /** Lookup a savepoint instance by name.
5549  @return ULINT_UNDEFINED if not found */
5550 UNIV_INLINE
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5551 ulint fts_savepoint_lookup(ib_vector_t *savepoints, /*!< in: savepoints */
5552                            const char *name)        /*!< in: savepoint name */
5553 {
5554   ulint i;
5555 
5556   ut_a(ib_vector_size(savepoints) > 0);
5557 
5558   for (i = 1; i < ib_vector_size(savepoints); ++i) {
5559     fts_savepoint_t *savepoint;
5560 
5561     savepoint = static_cast<fts_savepoint_t *>(ib_vector_get(savepoints, i));
5562 
5563     if (strcmp(name, savepoint->name) == 0) {
5564       return (i);
5565     }
5566   }
5567 
5568   return (ULINT_UNDEFINED);
5569 }
5570 
5571 /** Release the savepoint data identified by name. All savepoints created
5572  after the named savepoint are kept. */
fts_savepoint_release(trx_t * trx,const char * name)5573 void fts_savepoint_release(trx_t *trx,       /*!< in: transaction */
5574                            const char *name) /*!< in: savepoint name */
5575 {
5576   ut_a(name != nullptr);
5577 
5578   ib_vector_t *savepoints = trx->fts_trx->savepoints;
5579 
5580   ut_a(ib_vector_size(savepoints) > 0);
5581 
5582   ulint i = fts_savepoint_lookup(savepoints, name);
5583   if (i != ULINT_UNDEFINED) {
5584     ut_a(i >= 1);
5585 
5586     fts_savepoint_t *savepoint;
5587     savepoint = static_cast<fts_savepoint_t *>(ib_vector_get(savepoints, i));
5588 
5589     if (i == ib_vector_size(savepoints) - 1) {
5590       /* If the savepoint is the last, we save its
5591       tables to the  previous savepoint. */
5592       fts_savepoint_t *prev_savepoint;
5593       prev_savepoint =
5594           static_cast<fts_savepoint_t *>(ib_vector_get(savepoints, i - 1));
5595 
5596       ib_rbt_t *tables = savepoint->tables;
5597       savepoint->tables = prev_savepoint->tables;
5598       prev_savepoint->tables = tables;
5599     }
5600 
5601     fts_savepoint_free(savepoint);
5602     ib_vector_remove(savepoints, *(void **)savepoint);
5603 
5604     /* Make sure we don't delete the implied savepoint. */
5605     ut_a(ib_vector_size(savepoints) > 0);
5606   }
5607 }
5608 
5609 /** Refresh last statement savepoint. */
fts_savepoint_laststmt_refresh(trx_t * trx)5610 void fts_savepoint_laststmt_refresh(trx_t *trx) /*!< in: transaction */
5611 {
5612   fts_trx_t *fts_trx;
5613   fts_savepoint_t *savepoint;
5614 
5615   fts_trx = trx->fts_trx;
5616 
5617   savepoint = static_cast<fts_savepoint_t *>(ib_vector_pop(fts_trx->last_stmt));
5618   fts_savepoint_free(savepoint);
5619 
5620   ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5621   savepoint = fts_savepoint_create(fts_trx->last_stmt, nullptr, nullptr);
5622 }
5623 
5624 /********************************************************************
5625 Undo the Doc ID add/delete operations in last stmt */
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5626 static void fts_undo_last_stmt(
5627     fts_trx_table_t *s_ftt, /*!< in: Transaction FTS table */
5628     fts_trx_table_t *l_ftt) /*!< in: last stmt FTS table */
5629 {
5630   ib_rbt_t *s_rows;
5631   ib_rbt_t *l_rows;
5632   const ib_rbt_node_t *node;
5633 
5634   l_rows = l_ftt->rows;
5635   s_rows = s_ftt->rows;
5636 
5637   for (node = rbt_first(l_rows); node; node = rbt_next(l_rows, node)) {
5638     fts_trx_row_t *l_row = rbt_value(fts_trx_row_t, node);
5639     ib_rbt_bound_t parent;
5640 
5641     rbt_search(s_rows, &parent, &(l_row->doc_id));
5642 
5643     if (parent.result == 0) {
5644       fts_trx_row_t *s_row = rbt_value(fts_trx_row_t, parent.last);
5645 
5646       switch (l_row->state) {
5647         case FTS_INSERT:
5648           ut_free(rbt_remove_node(s_rows, parent.last));
5649           break;
5650 
5651         case FTS_DELETE:
5652           if (s_row->state == FTS_NOTHING) {
5653             s_row->state = FTS_INSERT;
5654           } else if (s_row->state == FTS_DELETE) {
5655             ut_free(rbt_remove_node(s_rows, parent.last));
5656           }
5657           break;
5658 
5659         /* FIXME: Check if FTS_MODIFY need to be addressed */
5660         case FTS_MODIFY:
5661         case FTS_NOTHING:
5662           break;
5663         default:
5664           ut_error;
5665       }
5666     }
5667   }
5668 }
5669 
5670 /** Rollback to savepoint indentified by name. */
fts_savepoint_rollback_last_stmt(trx_t * trx)5671 void fts_savepoint_rollback_last_stmt(trx_t *trx) /*!< in: transaction */
5672 {
5673   ib_vector_t *savepoints;
5674   fts_savepoint_t *savepoint;
5675   fts_savepoint_t *last_stmt;
5676   fts_trx_t *fts_trx;
5677   ib_rbt_bound_t parent;
5678   const ib_rbt_node_t *node;
5679   ib_rbt_t *l_tables;
5680   ib_rbt_t *s_tables;
5681 
5682   fts_trx = trx->fts_trx;
5683   savepoints = fts_trx->savepoints;
5684 
5685   savepoint = static_cast<fts_savepoint_t *>(ib_vector_last(savepoints));
5686   last_stmt =
5687       static_cast<fts_savepoint_t *>(ib_vector_last(fts_trx->last_stmt));
5688 
5689   l_tables = last_stmt->tables;
5690   s_tables = savepoint->tables;
5691 
5692   for (node = rbt_first(l_tables); node; node = rbt_next(l_tables, node)) {
5693     fts_trx_table_t **l_ftt;
5694 
5695     l_ftt = rbt_value(fts_trx_table_t *, node);
5696 
5697     rbt_search_cmp(s_tables, &parent, &(*l_ftt)->table->id,
5698                    fts_trx_table_id_cmp, nullptr);
5699 
5700     if (parent.result == 0) {
5701       fts_trx_table_t **s_ftt;
5702 
5703       s_ftt = rbt_value(fts_trx_table_t *, parent.last);
5704 
5705       fts_undo_last_stmt(*s_ftt, *l_ftt);
5706     }
5707   }
5708 }
5709 
5710 /** Rollback to savepoint indentified by name. */
fts_savepoint_rollback(trx_t * trx,const char * name)5711 void fts_savepoint_rollback(trx_t *trx,       /*!< in: transaction */
5712                             const char *name) /*!< in: savepoint name */
5713 {
5714   ulint i;
5715   ib_vector_t *savepoints;
5716 
5717   ut_a(name != nullptr);
5718 
5719   savepoints = trx->fts_trx->savepoints;
5720 
5721   /* We pop all savepoints from the the top of the stack up to
5722   and including the instance that was found. */
5723   i = fts_savepoint_lookup(savepoints, name);
5724 
5725   if (i != ULINT_UNDEFINED) {
5726     fts_savepoint_t *savepoint;
5727 
5728     ut_a(i > 0);
5729 
5730     while (ib_vector_size(savepoints) > i) {
5731       fts_savepoint_t *savepoint;
5732 
5733       savepoint = static_cast<fts_savepoint_t *>(ib_vector_pop(savepoints));
5734 
5735       if (savepoint->name != nullptr) {
5736         /* Since name was allocated on the heap, the
5737         memory will be released when the transaction
5738         completes. */
5739         savepoint->name = nullptr;
5740 
5741         fts_savepoint_free(savepoint);
5742       }
5743     }
5744 
5745     /* Pop all a elements from the top of the stack that may
5746     have been released. We have to be careful that we don't
5747     delete the implied savepoint. */
5748 
5749     for (savepoint = static_cast<fts_savepoint_t *>(ib_vector_last(savepoints));
5750          ib_vector_size(savepoints) > 1 && savepoint->name == nullptr;
5751          savepoint =
5752              static_cast<fts_savepoint_t *>(ib_vector_last(savepoints))) {
5753       ib_vector_pop(savepoints);
5754     }
5755 
5756     /* Make sure we don't delete the implied savepoint. */
5757     ut_a(ib_vector_size(savepoints) > 0);
5758 
5759     /* Restore the savepoint. */
5760     fts_savepoint_take(trx, trx->fts_trx, name);
5761   }
5762 }
5763 
5764 /** Check if a table is an FTS auxiliary table name.
5765 @param[out]	table	FTS table info
5766 @param[in]	name	Table name
5767 @param[in]	len	Length of table name
5768 @return true if the name matches an auxiliary table name pattern */
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)5769 bool fts_is_aux_table_name(fts_aux_table_t *table, const char *name,
5770                            ulint len) {
5771   const char *ptr;
5772   char *end;
5773   char my_name[MAX_FULL_NAME_LEN + 1];
5774 
5775   ut_ad(len <= MAX_FULL_NAME_LEN);
5776   ut_memcpy(my_name, name, len);
5777   my_name[len] = 0;
5778   end = my_name + len;
5779 
5780   ptr = static_cast<const char *>(memchr(my_name, '/', len));
5781 
5782   if (ptr != nullptr) {
5783     /* We will start the match after the '/' */
5784     ++ptr;
5785     len = end - ptr;
5786   }
5787 
5788   /* All auxiliary tables are prefixed with "FTS_" and the name
5789   length will be at the very least greater than 20 bytes. */
5790   if (ptr != nullptr && len > 20 &&
5791       (strncmp(ptr, FTS_PREFIX, 4) == 0 ||
5792        strncmp(ptr, FTS_PREFIX_5_7, 4) == 0)) {
5793     ulint i;
5794 
5795     /* Skip the prefix. */
5796     ptr += 4;
5797     len -= 4;
5798 
5799     /* Try and read the table id. */
5800     if (!fts_read_object_id(&table->parent_id, ptr)) {
5801       return (false);
5802     }
5803 
5804     /* Skip the table id. */
5805     ptr = static_cast<const char *>(memchr(ptr, '_', len));
5806 
5807     if (ptr == nullptr) {
5808       return (false);
5809     }
5810 
5811     /* Skip the underscore. */
5812     ++ptr;
5813     ut_a(end > ptr);
5814     len = end - ptr;
5815 
5816     /* First search the common table suffix array. */
5817     for (i = 0; fts_common_tables[i] != nullptr; ++i) {
5818       if (strncmp(ptr, fts_common_tables[i], len) == 0 ||
5819           strncmp(ptr, fts_common_tables_5_7[i], len) == 0) {
5820         table->type = FTS_COMMON_TABLE;
5821         return (true);
5822       }
5823     }
5824 
5825     /* Could be obsolete common tables. */
5826     if (native_strncasecmp(ptr, "ADDED", len) == 0 ||
5827         native_strncasecmp(ptr, "STOPWORDS", len) == 0) {
5828       table->type = FTS_OBSOLETED_TABLE;
5829       return (true);
5830     }
5831 
5832     /* Try and read the index id. */
5833     if (!fts_read_object_id(&table->index_id, ptr)) {
5834       return (false);
5835     }
5836 
5837     /* Skip the table id. */
5838     ptr = static_cast<const char *>(memchr(ptr, '_', len));
5839 
5840     if (ptr == nullptr) {
5841       return (false);
5842     }
5843 
5844     /* Skip the underscore. */
5845     ++ptr;
5846     ut_a(end > ptr);
5847     len = end - ptr;
5848 
5849     /* Search the FT index specific array. */
5850     for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
5851       if (strncmp(ptr, fts_get_suffix(i), len) == 0 ||
5852           strncmp(ptr, fts_get_suffix_5_7(i), len) == 0) {
5853         table->type = FTS_INDEX_TABLE;
5854         return (true);
5855       }
5856     }
5857 
5858     /* Other FT index specific table(s). */
5859     if (native_strncasecmp(ptr, "DOC_ID", len) == 0) {
5860       table->type = FTS_OBSOLETED_TABLE;
5861       return (true);
5862     }
5863   }
5864 
5865   return (false);
5866 }
5867 
5868 /** Check whether user supplied stopword table is of the right format.
5869  Caller is responsible to hold dictionary locks.
5870  @return the stopword column charset if qualifies */
fts_valid_stopword_table(const char * stopword_table_name)5871 CHARSET_INFO *fts_valid_stopword_table(
5872     const char *stopword_table_name) /*!< in: Stopword table
5873                                      name */
5874 {
5875   dict_table_t *table;
5876   dict_col_t *col = nullptr;
5877 
5878   if (!stopword_table_name) {
5879     return (nullptr);
5880   }
5881 
5882   MDL_ticket *mdl = nullptr;
5883   THD *thd = current_thd;
5884   table = dd_table_open_on_name(thd, &mdl, stopword_table_name, false,
5885                                 DICT_ERR_IGNORE_NONE);
5886 
5887   if (!table) {
5888     ib::error(ER_IB_MSG_481)
5889         << "User stopword table " << stopword_table_name << " does not exist.";
5890 
5891     return (nullptr);
5892   } else {
5893     const char *col_name;
5894 
5895     dd_table_close(table, thd, &mdl, false);
5896 
5897     col_name = table->get_col_name(0);
5898 
5899     if (ut_strcmp(col_name, "value")) {
5900       ib::error(ER_IB_MSG_482) << "Invalid column name for stopword"
5901                                   " table "
5902                                << stopword_table_name
5903                                << ". Its"
5904                                   " first column must be named as 'value'.";
5905 
5906       return (nullptr);
5907     }
5908 
5909     col = table->get_col(0);
5910 
5911     if (col->mtype != DATA_VARCHAR && col->mtype != DATA_VARMYSQL) {
5912       ib::error(ER_IB_MSG_483) << "Invalid column type for stopword"
5913                                   " table "
5914                                << stopword_table_name
5915                                << ". Its"
5916                                   " first column must be of varchar type";
5917 
5918       return (nullptr);
5919     }
5920   }
5921 
5922   ut_ad(col);
5923 
5924   return (fts_get_charset(col->prtype));
5925 }
5926 
5927 /** This function loads the stopword into the FTS cache. It also
5928  records/fetches stopword configuration to/from FTS configure
5929  table, depending on whether we are creating or reloading the
5930  FTS.
5931  @return true if load operation is successful */
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)5932 ibool fts_load_stopword(
5933     const dict_table_t *table,          /*!< in: Table with FTS */
5934     trx_t *trx,                         /*!< in: Transactions */
5935     const char *global_stopword_table,  /*!< in: Global stopword table
5936                                         name */
5937     const char *session_stopword_table, /*!< in: Session stopword table
5938                                         name */
5939     ibool stopword_is_on,               /*!< in: Whether stopword
5940                                         option is turned on/off */
5941     ibool reload)                       /*!< in: Whether it is
5942                                         for reloading FTS table */
5943 {
5944   fts_table_t fts_table;
5945   fts_string_t str;
5946   dberr_t error = DB_SUCCESS;
5947   ulint use_stopword;
5948   fts_cache_t *cache;
5949   const char *stopword_to_use = nullptr;
5950   ibool new_trx = FALSE;
5951   byte str_buffer[MAX_FULL_NAME_LEN + 1];
5952 
5953   FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_CONFIG, FTS_COMMON_TABLE, table);
5954 
5955   cache = table->fts->cache;
5956 
5957   if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
5958     return (TRUE);
5959   }
5960 
5961   if (!trx) {
5962     trx = trx_allocate_for_background();
5963     trx->op_info = "upload FTS stopword";
5964     new_trx = TRUE;
5965   }
5966 
5967   /* First check whether stopword filtering is turned off */
5968   if (reload) {
5969     error =
5970         fts_config_get_ulint(trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
5971   } else {
5972     use_stopword = (ulint)stopword_is_on;
5973 
5974     error =
5975         fts_config_set_ulint(trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
5976   }
5977 
5978   if (error != DB_SUCCESS) {
5979     goto cleanup;
5980   }
5981 
5982   /* If stopword is turned off, no need to continue to load the
5983   stopword into cache, but still need to do initialization */
5984   if (!use_stopword) {
5985     cache->stopword_info.status = STOPWORD_OFF;
5986     goto cleanup;
5987   }
5988 
5989   if (reload) {
5990     /* Fetch the stopword table name from FTS config
5991     table */
5992     str.f_n_char = 0;
5993     str.f_str = str_buffer;
5994     str.f_len = sizeof(str_buffer) - 1;
5995 
5996     error =
5997         fts_config_get_value(trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
5998 
5999     if (error != DB_SUCCESS) {
6000       goto cleanup;
6001     }
6002 
6003     if (strlen((char *)str.f_str) > 0) {
6004       stopword_to_use = (const char *)str.f_str;
6005     }
6006   } else {
6007     stopword_to_use = (session_stopword_table) ? session_stopword_table
6008                                                : global_stopword_table;
6009   }
6010 
6011   if (stopword_to_use && fts_load_user_stopword(table->fts, stopword_to_use,
6012                                                 &cache->stopword_info)) {
6013     /* Save the stopword table name to the configure
6014     table */
6015     if (!reload) {
6016       str.f_n_char = 0;
6017       str.f_str = (byte *)stopword_to_use;
6018       str.f_len = ut_strlen(stopword_to_use);
6019 
6020       error =
6021           fts_config_set_value(trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6022     }
6023   } else {
6024     /* Load system default stopword list */
6025     fts_load_default_stopword(&cache->stopword_info);
6026   }
6027 
6028 cleanup:
6029   if (new_trx) {
6030     if (error == DB_SUCCESS) {
6031       fts_sql_commit(trx);
6032     } else {
6033       fts_sql_rollback(trx);
6034     }
6035 
6036     trx_free_for_background(trx);
6037   }
6038 
6039   if (!cache->stopword_info.cached_stopword) {
6040     cache->stopword_info.cached_stopword =
6041         rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
6042                            &my_charset_latin1);
6043   }
6044 
6045   return (error == DB_SUCCESS);
6046 }
6047 
6048 /** Callback function when we initialize the FTS at the start up
6049  time. It recovers the maximum Doc IDs presented in the current table.
6050  @return: always returns true */
fts_init_get_doc_id(void * row,void * user_arg)6051 static ibool fts_init_get_doc_id(void *row,      /*!< in: sel_node_t* */
6052                                  void *user_arg) /*!< in: fts cache */
6053 {
6054   doc_id_t doc_id = FTS_NULL_DOC_ID;
6055   sel_node_t *node = static_cast<sel_node_t *>(row);
6056   que_node_t *exp = node->select_list;
6057   fts_cache_t *cache = static_cast<fts_cache_t *>(user_arg);
6058 
6059   ut_ad(ib_vector_is_empty(cache->get_docs));
6060 
6061   /* Copy each indexed column content into doc->text.f_str */
6062   if (exp) {
6063     dfield_t *dfield = que_node_get_val(exp);
6064     dtype_t *type = dfield_get_type(dfield);
6065     void *data = dfield_get_data(dfield);
6066 
6067     ut_a(dtype_get_mtype(type) == DATA_INT);
6068 
6069     doc_id = static_cast<doc_id_t>(
6070         mach_read_from_8(static_cast<const byte *>(data)));
6071 
6072     if (doc_id >= cache->next_doc_id) {
6073       cache->next_doc_id = doc_id + 1;
6074     }
6075   }
6076 
6077   return (TRUE);
6078 }
6079 
6080 /** Callback function when we initialize the FTS at the start up
6081  time. It recovers Doc IDs that have not sync-ed to the auxiliary
6082  table, and require to bring them back into FTS index.
6083  @return: always returns true */
fts_init_recover_doc(void * row,void * user_arg)6084 static ibool fts_init_recover_doc(void *row,      /*!< in: sel_node_t* */
6085                                   void *user_arg) /*!< in: fts cache */
6086 {
6087   fts_doc_t doc;
6088   ulint doc_len = 0;
6089   ulint field_no = 0;
6090   fts_get_doc_t *get_doc = static_cast<fts_get_doc_t *>(user_arg);
6091   doc_id_t doc_id = FTS_NULL_DOC_ID;
6092   sel_node_t *node = static_cast<sel_node_t *>(row);
6093   que_node_t *exp = node->select_list;
6094   fts_cache_t *cache = get_doc->cache;
6095   st_mysql_ftparser *parser = get_doc->index_cache->index->parser;
6096 
6097   fts_doc_init(&doc);
6098   doc.found = TRUE;
6099 
6100   ut_ad(cache);
6101 
6102   /* Copy each indexed column content into doc->text.f_str */
6103   while (exp) {
6104     dfield_t *dfield = que_node_get_val(exp);
6105     ulint len = dfield_get_len(dfield);
6106 
6107     if (field_no == 0) {
6108       dtype_t *type = dfield_get_type(dfield);
6109       void *data = dfield_get_data(dfield);
6110 
6111       ut_a(dtype_get_mtype(type) == DATA_INT);
6112 
6113       doc_id = static_cast<doc_id_t>(
6114           mach_read_from_8(static_cast<const byte *>(data)));
6115 
6116       field_no++;
6117       exp = que_node_get_next(exp);
6118       continue;
6119     }
6120 
6121     if (len == UNIV_SQL_NULL) {
6122       exp = que_node_get_next(exp);
6123       continue;
6124     }
6125 
6126     ut_ad(get_doc);
6127 
6128     if (!get_doc->index_cache->charset) {
6129       get_doc->index_cache->charset = fts_get_charset(dfield->type.prtype);
6130     }
6131 
6132     doc.charset = get_doc->index_cache->charset;
6133     doc.is_ngram = get_doc->index_cache->index->is_ngram;
6134 
6135     if (dfield_is_ext(dfield)) {
6136       dict_table_t *table = cache->sync->table;
6137 
6138       /** When a nullptr is passed for trx, it means we will
6139       fetch the latest LOB (and no MVCC will be done). */
6140       doc.text.f_str = lob::btr_copy_externally_stored_field(
6141           nullptr, get_doc->index_cache->index, &doc.text.f_len, nullptr,
6142           static_cast<byte *>(dfield_get_data(dfield)),
6143           dict_table_page_size(table), len, false,
6144           static_cast<mem_heap_t *>(doc.self_heap->arg));
6145     } else {
6146       doc.text.f_str = static_cast<byte *>(dfield_get_data(dfield));
6147 
6148       doc.text.f_len = len;
6149     }
6150 
6151     if (field_no == 1) {
6152       fts_tokenize_document(&doc, nullptr, parser);
6153     } else {
6154       fts_tokenize_document_next(&doc, doc_len, nullptr, parser);
6155     }
6156 
6157     exp = que_node_get_next(exp);
6158 
6159     doc_len += (exp) ? len + 1 : len;
6160 
6161     field_no++;
6162   }
6163 
6164   fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
6165 
6166   fts_doc_free(&doc);
6167 
6168   cache->added++;
6169 
6170   if (doc_id >= cache->next_doc_id) {
6171     cache->next_doc_id = doc_id + 1;
6172   }
6173 
6174   return (TRUE);
6175 }
6176 
6177 /** This function brings FTS index in sync when FTS index is first
6178  used. There are documents that have not yet sync-ed to auxiliary
6179  tables from last server abnormally shutdown, we will need to bring
6180  such document into FTS cache before any further operations
6181  @return true if all OK */
fts_init_index(dict_table_t * table,ibool has_cache_lock)6182 ibool fts_init_index(dict_table_t *table,  /*!< in: Table with FTS */
6183                      ibool has_cache_lock) /*!< in: Whether we already have
6184                                            cache lock */
6185 {
6186   dict_index_t *index;
6187   doc_id_t start_doc;
6188   fts_get_doc_t *get_doc = nullptr;
6189   fts_cache_t *cache = table->fts->cache;
6190   bool need_init = false;
6191 
6192   ut_ad(!mutex_own(&dict_sys->mutex));
6193 
6194   /* First check cache->get_docs is initialized */
6195   if (!has_cache_lock) {
6196     rw_lock_x_lock(&cache->lock);
6197   }
6198 
6199   rw_lock_x_lock(&cache->init_lock);
6200   if (cache->get_docs == nullptr) {
6201     cache->get_docs = fts_get_docs_create(cache);
6202   }
6203   rw_lock_x_unlock(&cache->init_lock);
6204 
6205   if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
6206     goto func_exit;
6207   }
6208 
6209   need_init = true;
6210 
6211   start_doc = cache->synced_doc_id;
6212 
6213   if (!start_doc) {
6214     fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
6215     cache->synced_doc_id = start_doc;
6216   }
6217 
6218   /* No FTS index, this is the case when previous FTS index
6219   dropped, and we re-initialize the Doc ID system for subsequent
6220   insertion */
6221   if (ib_vector_is_empty(cache->get_docs)) {
6222     index = table->fts_doc_id_index;
6223 
6224     ut_a(index);
6225 
6226     fts_doc_fetch_by_doc_id(nullptr, start_doc, index,
6227                             FTS_FETCH_DOC_BY_ID_LARGE, fts_init_get_doc_id,
6228                             cache);
6229   } else {
6230     if (table->fts->cache->stopword_info.status & STOPWORD_NOT_INIT) {
6231       fts_load_stopword(table, nullptr, nullptr, nullptr, TRUE, TRUE);
6232     }
6233 
6234     for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
6235       get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
6236 
6237       index = get_doc->index_cache->index;
6238 
6239       fts_doc_fetch_by_doc_id(nullptr, start_doc, index,
6240                               FTS_FETCH_DOC_BY_ID_LARGE, fts_init_recover_doc,
6241                               get_doc);
6242     }
6243   }
6244 
6245   table->fts->fts_status |= ADDED_TABLE_SYNCED;
6246 
6247   fts_get_docs_clear(cache->get_docs);
6248 
6249 func_exit:
6250   if (!has_cache_lock) {
6251     rw_lock_x_unlock(&cache->lock);
6252   }
6253 
6254   if (need_init) {
6255     mutex_enter(&dict_sys->mutex);
6256     /* Register the table with the optimize thread. */
6257     fts_optimize_add_table(table);
6258     mutex_exit(&dict_sys->mutex);
6259   }
6260 
6261   return (TRUE);
6262 }
6263 
6264 /** Rename old FTS common and aux tables with the new table_id
6265 @param[in]	old_name	old name of FTS AUX table
6266 @param[in]	new_name	new name of FTS AUX table
6267 @return new fts table if success, else nullptr on failure */
fts_upgrade_rename_aux_table_low(const char * old_name,const char * new_name)6268 static dict_table_t *fts_upgrade_rename_aux_table_low(const char *old_name,
6269                                                       const char *new_name) {
6270   mutex_enter(&dict_sys->mutex);
6271 
6272   dict_table_t *old_aux_table =
6273       dict_table_open_on_name(old_name, true, false, DICT_ERR_IGNORE_NONE);
6274 
6275   ut_ad(old_aux_table != nullptr);
6276   dict_table_close(old_aux_table, true, false);
6277   dberr_t err = dict_table_rename_in_cache(old_aux_table, new_name, false);
6278   if (err != DB_SUCCESS) {
6279     mutex_exit(&dict_sys->mutex);
6280     return (nullptr);
6281   }
6282 
6283   dict_table_t *new_aux_table =
6284       dict_table_open_on_name(new_name, true, false, DICT_ERR_IGNORE_NONE);
6285   ut_ad(new_aux_table != nullptr);
6286   mutex_exit(&dict_sys->mutex);
6287 
6288   return (new_aux_table);
6289 }
6290 
6291 /** Rename old FTS common and aux tables with the new table_id
6292 @param[in]	old_name	old name of FTS AUX table
6293 @param[in]	new_name	new name of FTS AUX table
6294 @param[in]	rollback	if true, do the rename back
6295                                 else mark original AUX tables
6296                                 evictable */
fts_upgrade_rename_aux_table(const char * old_name,const char * new_name,bool rollback)6297 static void fts_upgrade_rename_aux_table(const char *old_name,
6298                                          const char *new_name, bool rollback) {
6299   dict_table_t *new_table = nullptr;
6300 
6301   if (rollback) {
6302     new_table = fts_upgrade_rename_aux_table_low(old_name, new_name);
6303 
6304   } else {
6305     new_table =
6306         dict_table_open_on_name(old_name, false, false, DICT_ERR_IGNORE_NONE);
6307   }
6308 
6309   if (new_table == nullptr) {
6310     return;
6311   }
6312 
6313   mutex_enter(&dict_sys->mutex);
6314   dict_table_allow_eviction(new_table);
6315   dict_table_close(new_table, true, false);
6316   mutex_exit(&dict_sys->mutex);
6317 }
6318 
6319 /** During upgrade, tables are moved by DICT_MAX_DD_TABLES
6320 offset, remove this offset to get 5.7 fts aux table names
6321 @param[in]	table_id	8.0 table id */
fts_upgrade_get_5_7_table_id(table_id_t table_id)6322 inline table_id_t fts_upgrade_get_5_7_table_id(table_id_t table_id) {
6323   return (table_id - DICT_MAX_DD_TABLES);
6324 }
6325 
6326 /** Upgrade FTS AUX Tables. The FTS common and aux tables are
6327 renamed because they have table_id in their name. We move table_ids
6328 by DICT_MAX_DD_TABLES offset. Aux tables are registered into DD
6329 afer rename.
6330 @param[in]	table		InnoDB table object
6331 @return DB_SUCCESS or error code */
fts_upgrade_aux_tables(dict_table_t * table)6332 dberr_t fts_upgrade_aux_tables(dict_table_t *table) {
6333   fts_table_t fts_old_table;
6334 
6335   ut_ad(srv_is_upgrade_mode);
6336 
6337   FTS_INIT_FTS_TABLE(&fts_old_table, nullptr, FTS_COMMON_TABLE, table);
6338   fts_table_t fts_new_table = fts_old_table;
6339 
6340   fts_old_table.table_id = fts_upgrade_get_5_7_table_id(fts_old_table.table_id);
6341 
6342   /* Rename common auxiliary tables */
6343   for (ulint i = 0; fts_common_tables_5_7[i] != nullptr; ++i) {
6344     fts_old_table.suffix = fts_common_tables_5_7[i];
6345 
6346     bool is_config = fts_old_table.suffix == FTS_SUFFIX_CONFIG_5_7;
6347     char old_name[MAX_FULL_NAME_LEN];
6348     char new_name[MAX_FULL_NAME_LEN];
6349 
6350     fts_get_table_name_5_7(&fts_old_table, old_name);
6351 
6352     DBUG_EXECUTE_IF("dd_upgrade", ib::info(ER_IB_MSG_484)
6353                                       << "Old fts table name is " << old_name;);
6354 
6355     fts_new_table.suffix = fts_common_tables[i];
6356     fts_get_table_name(&fts_new_table, new_name);
6357 
6358     DBUG_EXECUTE_IF("dd_upgrade", ib::info(ER_IB_MSG_485)
6359                                       << "New fts table name is " << new_name;);
6360 
6361     dict_table_t *new_table =
6362         fts_upgrade_rename_aux_table_low(old_name, new_name);
6363 
6364     if (new_table == nullptr) {
6365       return (DB_ERROR);
6366     }
6367 
6368     mutex_enter(&dict_sys->mutex);
6369     dict_table_prevent_eviction(new_table);
6370     mutex_exit(&dict_sys->mutex);
6371 
6372     if (!dd_create_fts_common_table(table, new_table, is_config)) {
6373       dict_table_close(new_table, false, false);
6374       return (DB_FAIL);
6375     }
6376     dict_table_close(new_table, false, false);
6377   }
6378 
6379   fts_t *fts = table->fts;
6380 
6381   /* Rename index specific auxiliary tables */
6382   for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
6383        ++i) {
6384     dict_index_t *index;
6385 
6386     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
6387 
6388     FTS_INIT_INDEX_TABLE(&fts_old_table, nullptr, FTS_INDEX_TABLE, index);
6389     fts_new_table = fts_old_table;
6390 
6391     fts_old_table.table_id =
6392         fts_upgrade_get_5_7_table_id(fts_old_table.table_id);
6393 
6394     for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
6395       fts_old_table.suffix = fts_get_suffix_5_7(j);
6396 
6397       char old_name[MAX_FULL_NAME_LEN];
6398       char new_name[MAX_FULL_NAME_LEN];
6399 
6400       fts_get_table_name_5_7(&fts_old_table, old_name);
6401 
6402       fts_new_table.suffix = fts_get_suffix(j);
6403       fts_get_table_name(&fts_new_table, new_name);
6404 
6405       dict_table_t *new_table =
6406           fts_upgrade_rename_aux_table_low(old_name, new_name);
6407 
6408       if (new_table == nullptr) {
6409         return (DB_ERROR);
6410       }
6411 
6412       mutex_enter(&dict_sys->mutex);
6413       dict_table_prevent_eviction(new_table);
6414       mutex_exit(&dict_sys->mutex);
6415 
6416       CHARSET_INFO *charset = fts_get_charset(index->get_field(0)->col->prtype);
6417 
6418       if (!dd_create_fts_index_table(table, new_table, charset)) {
6419         dict_table_close(new_table, false, false);
6420         return (DB_FAIL);
6421       }
6422       dict_table_close(new_table, false, false);
6423     }
6424   }
6425 
6426   return (DB_SUCCESS);
6427 }
6428 
6429 /** Rename FTS AUX tablespace name from 8.0 format to 5.7 format.
6430 This will be done on upgrade failure
6431 @param[in]	table		parent table
6432 @param[in]	rollback	rollback the rename from 8.0 to 5.7
6433                                 if true, rename to 5.7 format
6434                                 if false, mark the table as evictable
6435 @return DB_SUCCESS on success, DB_ERROR on error */
fts_upgrade_rename(const dict_table_t * table,bool rollback)6436 dberr_t fts_upgrade_rename(const dict_table_t *table, bool rollback) {
6437   fts_table_t fts_old_table;
6438 
6439   ut_ad(srv_is_upgrade_mode);
6440 
6441   FTS_INIT_FTS_TABLE(&fts_old_table, nullptr, FTS_COMMON_TABLE, table);
6442 
6443   fts_table_t fts_new_table = fts_old_table;
6444 
6445   fts_new_table.table_id = fts_upgrade_get_5_7_table_id(fts_new_table.table_id);
6446 
6447   /* Rename common auxiliary tables */
6448   for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
6449     fts_old_table.suffix = fts_common_tables[i];
6450 
6451     char old_name[MAX_FULL_NAME_LEN];
6452     char new_name[MAX_FULL_NAME_LEN];
6453 
6454     fts_get_table_name(&fts_old_table, old_name);
6455 
6456     fts_new_table.suffix = fts_common_tables_5_7[i];
6457     fts_get_table_name_5_7(&fts_new_table, new_name);
6458 
6459     fts_upgrade_rename_aux_table(old_name, new_name, rollback);
6460   }
6461 
6462   fts_t *fts = table->fts;
6463 
6464   /* Rename index specific auxiliary tables */
6465   for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
6466        ++i) {
6467     dict_index_t *index;
6468 
6469     index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
6470 
6471     FTS_INIT_INDEX_TABLE(&fts_old_table, nullptr, FTS_INDEX_TABLE, index);
6472     fts_new_table = fts_old_table;
6473 
6474     fts_new_table.table_id =
6475         fts_upgrade_get_5_7_table_id(fts_new_table.table_id);
6476 
6477     for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
6478       fts_old_table.suffix = fts_get_suffix(j);
6479 
6480       char old_name[MAX_FULL_NAME_LEN];
6481       char new_name[MAX_FULL_NAME_LEN];
6482 
6483       fts_get_table_name(&fts_old_table, old_name);
6484 
6485       fts_new_table.suffix = fts_get_suffix_5_7(j);
6486       fts_get_table_name_5_7(&fts_new_table, new_name);
6487 
6488       fts_upgrade_rename_aux_table(old_name, new_name, rollback);
6489     }
6490   }
6491   return (DB_SUCCESS);
6492 }
6493