1 /*****************************************************************************
2
3 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26
27 /** @file fts/fts0fts.cc
28 Full Text Search interface
29 ***********************************************************************/
30
31 #include <current_thd.h>
32 #include <sys/types.h>
33 #include <new>
34
35 #include "btr0pcur.h"
36 #include "dict0priv.h"
37 #include "dict0stats.h"
38 #include "dict0types.h"
39 #include "fts0fts.h"
40 #include "fts0plugin.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "ha_prototypes.h"
46 #include "lob0lob.h"
47
48 #include "my_dbug.h"
49
50 #include "dict0dd.h"
51 #include "lob0lob.h"
52 #include "row0mysql.h"
53 #include "row0sel.h"
54 #include "row0upd.h"
55 #include "sync0sync.h"
56 #include "trx0roll.h"
57 #include "ut0new.h"
58
59 static const ulint FTS_MAX_ID_LEN = 32;
60
61 /** Column name from the FTS config table */
62 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
63
64 /** Verify if a aux table name is a obsolete table
65 by looking up the key word in the obsolete table names */
66 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
67 (strstr((table_name), "DOC_ID") != NULL || \
68 strstr((table_name), "ADDED") != NULL || \
69 strstr((table_name), "STOPWORDS") != NULL)
70
71 /** This is maximum FTS cache for each table and would be
72 a configurable variable */
73 ulong fts_max_cache_size;
74
75 /** Whether the total memory used for FTS cache is exhausted, and we will
76 need a sync to free some memory */
77 bool fts_need_sync = false;
78
79 /** Variable specifying the total memory allocated for FTS cache */
80 ulong fts_max_total_cache_size;
81
82 /** This is FTS result cache limit for each query and would be
83 a configurable variable */
84 ulong fts_result_cache_limit;
85
86 /** Variable specifying the maximum FTS max token size */
87 ulong fts_max_token_size;
88
89 /** Variable specifying the minimum FTS max token size */
90 ulong fts_min_token_size;
91
92 // FIXME: testing
93 static ib_time_t elapsed_time = 0;
94 static ulint n_nodes = 0;
95
96 #ifdef FTS_CACHE_SIZE_DEBUG
97 /** The cache size permissible lower limit (1K) */
98 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
99
100 /** The cache size permissible upper limit (1G) */
101 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
102 #endif
103
104 /** Time to sleep after DEADLOCK error before retrying operation. */
105 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
106
107 /** variable to record innodb_fts_internal_tbl_name for information
108 schema table INNODB_FTS_INSERTED etc. */
109 char *fts_internal_tbl_name = nullptr;
110
111 /** InnoDB default stopword list:
112 There are different versions of stopwords, the stop words listed
113 below comes from "Google Stopword" list. Reference:
114 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
115 The final version of InnoDB default stopword list is still pending
116 for decision */
117 const char *fts_default_stopword[] = {
118 "a", "about", "an", "are", "as", "at", "be", "by",
119 "com", "de", "en", "for", "from", "how", "i", "in",
120 "is", "it", "la", "of", "on", "or", "that", "the",
121 "this", "to", "was", "what", "when", "where", "who", "will",
122 "with", "und", "the", "www", nullptr};
123
124 /** FTS auxiliary table prefix that are common to all FT indexes.*/
125 const char *FTS_PREFIX = "fts_";
126
127 /** FTS auxiliary table prefix that are common to all FT indexes.*/
128 const char *FTS_PREFIX_5_7 = "FTS_";
129
130 /** FTS auxiliary table suffixes that are common to all FT indexes. */
131 const char *fts_common_tables[] = {"being_deleted", "being_deleted_cache",
132 "config", "deleted",
133 "deleted_cache", nullptr};
134
135 const char *FTS_SUFFIX_BEING_DELETED = fts_common_tables[0];
136 const char *FTS_SUFFIX_BEING_DELETED_CACHE = fts_common_tables[1];
137 const char *FTS_SUFFIX_CONFIG = fts_common_tables[2];
138 const char *FTS_SUFFIX_DELETED = fts_common_tables[3];
139 const char *FTS_SUFFIX_DELETED_CACHE = fts_common_tables[4];
140
141 /** FTS auxiliary table suffixes that are common to all FT indexes. */
142 const char *fts_common_tables_5_7[] = {"BEING_DELETED", "BEING_DELETED_CACHE",
143 "CONFIG", "DELETED",
144 "DELETED_CACHE", nullptr};
145
146 const char *FTS_SUFFIX_CONFIG_5_7 = fts_common_tables_5_7[2];
147
148 /** FTS auxiliary INDEX split intervals. */
149 const fts_index_selector_t fts_index_selector[] = {
150 {9, "index_1"}, {65, "index_2"}, {70, "index_3"}, {75, "index_4"},
151 {80, "index_5"}, {85, "index_6"}, {0, nullptr}};
152
153 /** FTS auxiliary INDEX split intervals. */
154 const fts_index_selector_t fts_index_selector_5_7[] = {
155 {9, "INDEX_1"}, {65, "INDEX_2"}, {70, "INDEX_3"}, {75, "INDEX_4"},
156 {80, "INDEX_5"}, {85, "INDEX_6"}, {0, nullptr}};
157
158 /** Default config values for FTS indexes on a table. */
159 static const char *fts_config_table_insert_values_sql =
160 "BEGIN\n"
161 "\n"
162 "INSERT INTO $config_table VALUES('" FTS_MAX_CACHE_SIZE_IN_MB
163 "', '256');\n"
164 ""
165 "INSERT INTO $config_table VALUES('" FTS_OPTIMIZE_LIMIT_IN_SECS
166 "', '180');\n"
167 ""
168 "INSERT INTO $config_table VALUES ('" FTS_SYNCED_DOC_ID
169 "', '0');\n"
170 ""
171 "INSERT INTO $config_table VALUES ('" FTS_TOTAL_DELETED_COUNT
172 "', '0');\n"
173 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
174 "INSERT INTO $config_table VALUES ('" FTS_TABLE_STATE "', '0');\n";
175
176 /** FTS tokenize parmameter for plugin parser */
177 struct fts_tokenize_param_t {
178 fts_doc_t *result_doc; /*!< Result doc for tokens */
179 ulint add_pos; /*!< Added position for tokens */
180 };
181
182 /** Run SYNC on the table, i.e., write out data from the cache to the
183 FTS auxiliary INDEX table and clear the cache at the end.
184 @param[in,out] sync sync state
185 @param[in] unlock_cache whether unlock cache lock when write node
186 @param[in] wait whether wait when a sync is in progress
187 @param[in] has_dict whether has dict operation lock
188 @return DB_SUCCESS if all OK */
189 static dberr_t fts_sync(fts_sync_t *sync, bool unlock_cache, bool wait,
190 bool has_dict);
191
192 /** Release all resources help by the words rb tree e.g., the node ilist. */
193 static void fts_words_free(ib_rbt_t *words); /*!< in: rb tree of words */
194 #ifdef FTS_CACHE_SIZE_DEBUG
195 /** Read the max cache size parameter from the config table. */
196 static void fts_update_max_cache_size(fts_sync_t *sync); /*!< in: sync state */
197 #endif
198
199 /** This function fetches the document just inserted right before
200 we commit the transaction, and tokenize the inserted text data
201 and insert into FTS auxiliary table and its cache.
202 @param[in] ftt FTS transaction table
203 @param[in] doc_id doc id
204 @param[in] fts_indexes affected FTS indexes
205 @return true if successful */
206 static ulint fts_add_doc_by_id(fts_trx_table_t *ftt, doc_id_t doc_id,
207 ib_vector_t *fts_indexes MY_ATTRIBUTE((unused)));
208
209 /** Update the last document id. This function could create a new
210 transaction to update the last document id.
211 @return DB_SUCCESS if OK */
212 static dberr_t fts_update_sync_doc_id(
213 const dict_table_t *table, /*!< in: table */
214 const char *table_name, /*!< in: table name, or NULL */
215 doc_id_t doc_id, /*!< in: last document id */
216 trx_t *trx); /*!< in: update trx, or NULL */
217
218 /** Tokenize a document.
219 @param[in,out] doc document to tokenize
220 @param[out] result tokenization result
221 @param[in] parser pluggable parser */
222 static void fts_tokenize_document(fts_doc_t *doc, fts_doc_t *result,
223 st_mysql_ftparser *parser);
224
225 /** Continue to tokenize a document.
226 @param[in,out] doc document to tokenize
227 @param[in] add_pos add this position to all tokens from this tokenization
228 @param[out] result tokenization result
229 @param[in] parser pluggable parser */
230 static void fts_tokenize_document_next(fts_doc_t *doc, ulint add_pos,
231 fts_doc_t *result,
232 st_mysql_ftparser *parser);
233
234 /** Create the vector of fts_get_doc_t instances.
235 @param[in,out] cache fts cache
236 @return vector of fts_get_doc_t instances */
237 static ib_vector_t *fts_get_docs_create(fts_cache_t *cache);
238
239 /** Free the FTS cache.
240 @param[in,out] cache to be freed */
fts_cache_destroy(fts_cache_t * cache)241 static void fts_cache_destroy(fts_cache_t *cache) {
242 rw_lock_free(&cache->lock);
243 rw_lock_free(&cache->init_lock);
244 mutex_free(&cache->optimize_lock);
245 mutex_free(&cache->deleted_lock);
246 mutex_free(&cache->doc_id_lock);
247 os_event_destroy(cache->sync->event);
248
249 if (cache->stopword_info.cached_stopword) {
250 rbt_free(cache->stopword_info.cached_stopword);
251 }
252
253 if (cache->sync_heap->arg) {
254 mem_heap_free(static_cast<mem_heap_t *>(cache->sync_heap->arg));
255 }
256
257 mem_heap_free(cache->cache_heap);
258 }
259
260 /** Get a character set based on precise type.
261 @param prtype precise type
262 @return the corresponding character set */
263 UNIV_INLINE
fts_get_charset(ulint prtype)264 CHARSET_INFO *fts_get_charset(ulint prtype) {
265 #ifdef UNIV_DEBUG
266 switch (prtype & DATA_MYSQL_TYPE_MASK) {
267 case MYSQL_TYPE_BIT:
268 case MYSQL_TYPE_STRING:
269 case MYSQL_TYPE_VAR_STRING:
270 case MYSQL_TYPE_TINY_BLOB:
271 case MYSQL_TYPE_MEDIUM_BLOB:
272 case MYSQL_TYPE_BLOB:
273 case MYSQL_TYPE_LONG_BLOB:
274 case MYSQL_TYPE_VARCHAR:
275 break;
276 default:
277 ut_error;
278 }
279 #endif /* UNIV_DEBUG */
280
281 uint cs_num = (uint)dtype_get_charset_coll(prtype);
282
283 if (CHARSET_INFO *cs = get_charset(cs_num, MYF(MY_WME))) {
284 return (cs);
285 }
286
287 ib::fatal(ER_IB_MSG_461) << "Unable to find charset-collation " << cs_num;
288 return (nullptr);
289 }
290
291 /** This function loads the default InnoDB stopword list */
fts_load_default_stopword(fts_stopword_t * stopword_info)292 static void fts_load_default_stopword(
293 fts_stopword_t *stopword_info) /*!< in: stopword info */
294 {
295 fts_string_t str;
296 mem_heap_t *heap;
297 ib_alloc_t *allocator;
298 ib_rbt_t *stop_words;
299
300 allocator = stopword_info->heap;
301 heap = static_cast<mem_heap_t *>(allocator->arg);
302
303 if (!stopword_info->cached_stopword) {
304 stopword_info->cached_stopword =
305 rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
306 &my_charset_latin1);
307 }
308
309 stop_words = stopword_info->cached_stopword;
310
311 str.f_n_char = 0;
312
313 for (ulint i = 0; fts_default_stopword[i]; ++i) {
314 char *word;
315 fts_tokenizer_word_t new_word;
316
317 /* We are going to duplicate the value below. */
318 word = const_cast<char *>(fts_default_stopword[i]);
319
320 new_word.nodes = ib_vector_create(allocator, sizeof(fts_node_t), 4);
321
322 str.f_len = ut_strlen(word);
323 str.f_str = reinterpret_cast<byte *>(word);
324
325 fts_string_dup(&new_word.text, &str, heap);
326
327 rbt_insert(stop_words, &new_word, &new_word);
328 }
329
330 stopword_info->status = STOPWORD_FROM_DEFAULT;
331 }
332
333 /** Callback function to read a single stopword value.
334 @return Always return true */
fts_read_stopword(void * row,void * user_arg)335 static ibool fts_read_stopword(
336 void *row, /*!< in: sel_node_t* */
337 void *user_arg) /*!< in: pointer to ib_vector_t */
338 {
339 ib_alloc_t *allocator;
340 fts_stopword_t *stopword_info;
341 sel_node_t *sel_node;
342 que_node_t *exp;
343 ib_rbt_t *stop_words;
344 dfield_t *dfield;
345 fts_string_t str;
346 mem_heap_t *heap;
347 ib_rbt_bound_t parent;
348
349 sel_node = static_cast<sel_node_t *>(row);
350 stopword_info = static_cast<fts_stopword_t *>(user_arg);
351
352 stop_words = stopword_info->cached_stopword;
353 allocator = static_cast<ib_alloc_t *>(stopword_info->heap);
354 heap = static_cast<mem_heap_t *>(allocator->arg);
355
356 exp = sel_node->select_list;
357
358 /* We only need to read the first column */
359 dfield = que_node_get_val(exp);
360
361 str.f_n_char = 0;
362 str.f_str = static_cast<byte *>(dfield_get_data(dfield));
363 str.f_len = dfield_get_len(dfield);
364
365 /* Only create new node if it is a value not already existed */
366 if (str.f_len != UNIV_SQL_NULL &&
367 rbt_search(stop_words, &parent, &str) != 0) {
368 fts_tokenizer_word_t new_word;
369
370 new_word.nodes = ib_vector_create(allocator, sizeof(fts_node_t), 4);
371
372 new_word.text.f_str =
373 static_cast<byte *>(mem_heap_alloc(heap, str.f_len + 1));
374
375 memcpy(new_word.text.f_str, str.f_str, str.f_len);
376
377 new_word.text.f_n_char = 0;
378 new_word.text.f_len = str.f_len;
379 new_word.text.f_str[str.f_len] = 0;
380
381 rbt_insert(stop_words, &new_word, &new_word);
382 }
383
384 return (TRUE);
385 }
386
387 /** Load user defined stopword from designated user table
388 @return true if load operation is successful */
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)389 static ibool fts_load_user_stopword(
390 fts_t *fts, /*!< in: FTS struct */
391 const char *stopword_table_name, /*!< in: Stopword table
392 name */
393 fts_stopword_t *stopword_info) /*!< in: Stopword info */
394 {
395 pars_info_t *info;
396 que_t *graph;
397 dberr_t error = DB_SUCCESS;
398 ibool ret = TRUE;
399 trx_t *trx;
400
401 trx = trx_allocate_for_background();
402 trx->op_info = "Load user stopword table into FTS cache";
403
404 /* Validate the user table existence and in the right
405 format */
406 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
407 if (!stopword_info->charset) {
408 ret = FALSE;
409 goto cleanup;
410 } else if (!stopword_info->cached_stopword) {
411 /* Create the stopword RB tree with the stopword column
412 charset. All comparison will use this charset */
413 stopword_info->cached_stopword =
414 rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
415 stopword_info->charset);
416 }
417
418 info = pars_info_create();
419
420 pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
421
422 pars_info_bind_function(info, "my_func", fts_read_stopword, stopword_info);
423
424 graph = fts_parse_sql(nullptr, info,
425 "DECLARE FUNCTION my_func;\n"
426 "DECLARE CURSOR c IS"
427 " SELECT value"
428 " FROM $table_stopword;\n"
429 "BEGIN\n"
430 "\n"
431 "OPEN c;\n"
432 "WHILE 1 = 1 LOOP\n"
433 " FETCH c INTO my_func();\n"
434 " IF c % NOTFOUND THEN\n"
435 " EXIT;\n"
436 " END IF;\n"
437 "END LOOP;\n"
438 "CLOSE c;");
439
440 for (;;) {
441 error = fts_eval_sql(trx, graph);
442
443 if (error == DB_SUCCESS) {
444 fts_sql_commit(trx);
445 stopword_info->status = STOPWORD_USER_TABLE;
446 break;
447 } else {
448 fts_sql_rollback(trx);
449
450 if (error == DB_LOCK_WAIT_TIMEOUT) {
451 ib::warn(ER_IB_MSG_462) << "Lock wait timeout reading user"
452 " stopword table. Retrying!";
453
454 trx->error_state = DB_SUCCESS;
455 } else {
456 ib::error(ER_IB_MSG_463) << "Error '" << ut_strerr(error)
457 << "' while reading user stopword"
458 " table.";
459 ret = FALSE;
460 break;
461 }
462 }
463 }
464
465 que_graph_free(graph);
466
467 cleanup:
468 trx_free_for_background(trx);
469 return (ret);
470 }
471
472 /** Initialize the index cache. */
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)473 static void fts_index_cache_init(
474 ib_alloc_t *allocator, /*!< in: the allocator to use */
475 fts_index_cache_t *index_cache) /*!< in: index cache */
476 {
477 ulint i;
478
479 ut_a(index_cache->words == nullptr);
480
481 index_cache->words =
482 rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
483 index_cache->charset);
484
485 ut_a(index_cache->doc_stats == nullptr);
486
487 index_cache->doc_stats =
488 ib_vector_create(allocator, sizeof(fts_doc_stats_t), 4);
489
490 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
491 ut_a(index_cache->ins_graph[i] == nullptr);
492 ut_a(index_cache->sel_graph[i] == nullptr);
493 }
494 }
495
496 /** Initialize FTS cache. */
fts_cache_init(fts_cache_t * cache)497 void fts_cache_init(fts_cache_t *cache) /*!< in: cache to initialize */
498 {
499 ulint i;
500
501 /* Just to make sure */
502 ut_a(cache->sync_heap->arg == nullptr);
503
504 cache->sync_heap->arg = mem_heap_create(1024);
505
506 cache->total_size = 0;
507
508 mutex_enter((ib_mutex_t *)&cache->deleted_lock);
509 cache->deleted_doc_ids =
510 ib_vector_create(cache->sync_heap, sizeof(fts_update_t), 4);
511 mutex_exit((ib_mutex_t *)&cache->deleted_lock);
512
513 /* Reset the cache data for all the FTS indexes. */
514 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
515 fts_index_cache_t *index_cache;
516
517 index_cache =
518 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
519
520 fts_index_cache_init(cache->sync_heap, index_cache);
521 }
522 }
523
524 /** Create a FTS cache. */
fts_cache_create(dict_table_t * table)525 fts_cache_t *fts_cache_create(
526 dict_table_t *table) /*!< in: table owns the FTS cache */
527 {
528 mem_heap_t *heap;
529 fts_cache_t *cache;
530
531 heap = static_cast<mem_heap_t *>(mem_heap_create(512));
532
533 cache = static_cast<fts_cache_t *>(mem_heap_zalloc(heap, sizeof(*cache)));
534
535 cache->cache_heap = heap;
536
537 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
538
539 rw_lock_create(fts_cache_init_rw_lock_key, &cache->init_lock,
540 SYNC_FTS_CACHE_INIT);
541
542 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
543
544 mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
545
546 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
547
548 /* This is the heap used to create the cache itself. */
549 cache->self_heap = ib_heap_allocator_create(heap);
550
551 /* This is a transient heap, used for storing sync data. */
552 cache->sync_heap = ib_heap_allocator_create(heap);
553 cache->sync_heap->arg = nullptr;
554
555 cache->sync =
556 static_cast<fts_sync_t *>(mem_heap_zalloc(heap, sizeof(fts_sync_t)));
557
558 cache->sync->table = table;
559 cache->sync->event = os_event_create();
560
561 /* Create the index cache vector that will hold the inverted indexes. */
562 cache->indexes =
563 ib_vector_create(cache->self_heap, sizeof(fts_index_cache_t), 2);
564
565 fts_cache_init(cache);
566
567 cache->stopword_info.cached_stopword = nullptr;
568 cache->stopword_info.charset = nullptr;
569
570 cache->stopword_info.heap = cache->self_heap;
571
572 cache->stopword_info.status = STOPWORD_NOT_INIT;
573
574 return (cache);
575 }
576
577 /** Add a newly create index into FTS cache */
fts_add_index(dict_index_t * index,dict_table_t * table)578 void fts_add_index(dict_index_t *index, /*!< FTS index to be added */
579 dict_table_t *table) /*!< table */
580 {
581 fts_t *fts = table->fts;
582 fts_cache_t *cache;
583 fts_index_cache_t *index_cache;
584
585 ut_ad(fts);
586 cache = table->fts->cache;
587
588 rw_lock_x_lock(&cache->init_lock);
589
590 ib_vector_push(fts->indexes, &index);
591
592 index_cache = fts_find_index_cache(cache, index);
593
594 if (!index_cache) {
595 /* Add new index cache structure */
596 index_cache = fts_cache_index_cache_create(table, index);
597 }
598
599 rw_lock_x_unlock(&cache->init_lock);
600 }
601
602 /** recalibrate get_doc structure after index_cache in cache->indexes changed */
fts_reset_get_doc(fts_cache_t * cache)603 static void fts_reset_get_doc(fts_cache_t *cache) /*!< in: FTS index cache */
604 {
605 fts_get_doc_t *get_doc;
606 ulint i;
607
608 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
609
610 ib_vector_reset(cache->get_docs);
611
612 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
613 fts_index_cache_t *ind_cache;
614
615 ind_cache =
616 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
617
618 get_doc =
619 static_cast<fts_get_doc_t *>(ib_vector_push(cache->get_docs, nullptr));
620
621 memset(get_doc, 0x0, sizeof(*get_doc));
622
623 get_doc->index_cache = ind_cache;
624 }
625
626 ut_ad(ib_vector_size(cache->get_docs) == ib_vector_size(cache->indexes));
627 }
628
629 /** Check an index is in the table->indexes list
630 @return true if it exists */
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)631 static ibool fts_in_dict_index(
632 dict_table_t *table, /*!< in: Table */
633 dict_index_t *index_check) /*!< in: index to be checked */
634 {
635 dict_index_t *index;
636
637 for (index = table->first_index(); index != nullptr; index = index->next()) {
638 if (index == index_check) {
639 return (TRUE);
640 }
641 }
642
643 return (FALSE);
644 }
645
646 /** Check an index is in the fts->cache->indexes list
647 @return true if it exists */
fts_in_index_cache(dict_table_t * table,dict_index_t * index)648 static ibool fts_in_index_cache(
649 dict_table_t *table, /*!< in: Table */
650 dict_index_t *index) /*!< in: index to be checked */
651 {
652 ulint i;
653
654 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
655 fts_index_cache_t *index_cache;
656
657 index_cache = static_cast<fts_index_cache_t *>(
658 ib_vector_get(table->fts->cache->indexes, i));
659
660 if (index_cache->index == index) {
661 return (TRUE);
662 }
663 }
664
665 return (FALSE);
666 }
667
668 /** Check indexes in the fts->indexes is also present in index cache and
669 table->indexes list
670 @return true if all indexes match */
fts_check_cached_index(dict_table_t * table)671 ibool fts_check_cached_index(
672 dict_table_t *table) /*!< in: Table where indexes are dropped */
673 {
674 ulint i;
675
676 if (!table->fts || !table->fts->cache) {
677 return (TRUE);
678 }
679
680 ut_a(ib_vector_size(table->fts->indexes) ==
681 ib_vector_size(table->fts->cache->indexes));
682
683 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
684 dict_index_t *index;
685
686 index = static_cast<dict_index_t *>(ib_vector_getp(table->fts->indexes, i));
687
688 if (!fts_in_index_cache(table, index)) {
689 return (FALSE);
690 }
691
692 if (!fts_in_dict_index(table, index)) {
693 return (FALSE);
694 }
695 }
696
697 return (TRUE);
698 }
699
700 /** Drop auxiliary tables related to an FTS index
701 @param[in] table Table where indexes are dropped
702 @param[in] index Index to be dropped
703 @param[in] trx Transaction for the drop
704 @param[in,out] aux_vec Aux table name vector
705 @return DB_SUCCESS or error number */
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx,aux_name_vec_t * aux_vec)706 dberr_t fts_drop_index(dict_table_t *table, dict_index_t *index, trx_t *trx,
707 aux_name_vec_t *aux_vec) {
708 ib_vector_t *indexes = table->fts->indexes;
709 dberr_t err = DB_SUCCESS;
710
711 ut_a(indexes);
712
713 if ((ib_vector_size(indexes) == 1 &&
714 (index ==
715 static_cast<dict_index_t *>(ib_vector_getp(table->fts->indexes, 0)))) ||
716 ib_vector_is_empty(indexes)) {
717 doc_id_t current_doc_id;
718 doc_id_t first_doc_id;
719
720 /* If we are dropping the only FTS index of the table,
721 remove it from optimize thread */
722 fts_optimize_remove_table(table);
723
724 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
725
726 /* If Doc ID column is not added internally by FTS index,
727 we can drop all FTS auxiliary tables. Otherwise, we will
728 need to keep some common table such as CONFIG table, so
729 as to keep track of incrementing Doc IDs */
730 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
731 err = fts_drop_tables(trx, table, aux_vec);
732
733 fts_free(table);
734
735 return (err);
736 } else {
737 if (!(index->type & DICT_CORRUPT)) {
738 err = fts_empty_common_tables(trx, table);
739 ut_ad(err == DB_SUCCESS);
740 }
741 }
742
743 current_doc_id = table->fts->cache->next_doc_id;
744 first_doc_id = table->fts->cache->first_doc_id;
745 fts_cache_clear(table->fts->cache);
746 fts_cache_destroy(table->fts->cache);
747 table->fts->cache = fts_cache_create(table);
748 table->fts->cache->next_doc_id = current_doc_id;
749 table->fts->cache->first_doc_id = first_doc_id;
750 } else {
751 fts_cache_t *cache = table->fts->cache;
752 fts_index_cache_t *index_cache;
753
754 rw_lock_x_lock(&cache->init_lock);
755
756 index_cache = fts_find_index_cache(cache, index);
757
758 if (index_cache != nullptr) {
759 if (index_cache->words) {
760 fts_words_free(index_cache->words);
761 rbt_free(index_cache->words);
762 }
763
764 ib_vector_remove(cache->indexes, *(void **)index_cache);
765 }
766
767 if (cache->get_docs) {
768 fts_reset_get_doc(cache);
769 }
770
771 rw_lock_x_unlock(&cache->init_lock);
772 }
773
774 err = fts_drop_index_tables(trx, index, aux_vec);
775
776 ib_vector_remove(indexes, (const void *)index);
777
778 return (err);
779 }
780
781 /** Create an FTS index cache. */
fts_index_get_charset(dict_index_t * index)782 CHARSET_INFO *fts_index_get_charset(dict_index_t *index) /*!< in: FTS index */
783 {
784 CHARSET_INFO *charset = nullptr;
785 dict_field_t *field;
786 ulint prtype;
787
788 field = index->get_field(0);
789 prtype = field->col->prtype;
790
791 charset = fts_get_charset(prtype);
792
793 #ifdef FTS_DEBUG
794 /* Set up charset info for this index. Please note all
795 field of the FTS index should have the same charset */
796 for (i = 1; i < index->n_fields; i++) {
797 CHARSET_INFO *fld_charset;
798
799 field = index->get_field(i);
800 prtype = field->col->prtype;
801
802 fld_charset = fts_get_charset(prtype);
803
804 /* All FTS columns should have the same charset */
805 if (charset) {
806 ut_a(charset == fld_charset);
807 } else {
808 charset = fld_charset;
809 }
810 }
811 #endif
812
813 return (charset);
814 }
815 /** Create an FTS index cache.
816 @return Index Cache */
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)817 fts_index_cache_t *fts_cache_index_cache_create(
818 dict_table_t *table, /*!< in: table with FTS index */
819 dict_index_t *index) /*!< in: FTS index */
820 {
821 ulint n_bytes;
822 fts_index_cache_t *index_cache;
823 fts_cache_t *cache = table->fts->cache;
824
825 ut_a(cache != nullptr);
826
827 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
828
829 /* Must not already exist in the cache vector. */
830 ut_a(fts_find_index_cache(cache, index) == nullptr);
831
832 index_cache =
833 static_cast<fts_index_cache_t *>(ib_vector_push(cache->indexes, nullptr));
834
835 memset(index_cache, 0x0, sizeof(*index_cache));
836
837 index_cache->index = index;
838
839 index_cache->charset = fts_index_get_charset(index);
840
841 n_bytes = sizeof(que_t *) * FTS_NUM_AUX_INDEX;
842
843 index_cache->ins_graph = static_cast<que_t **>(mem_heap_zalloc(
844 static_cast<mem_heap_t *>(cache->self_heap->arg), n_bytes));
845
846 index_cache->sel_graph = static_cast<que_t **>(mem_heap_zalloc(
847 static_cast<mem_heap_t *>(cache->self_heap->arg), n_bytes));
848
849 fts_index_cache_init(cache->sync_heap, index_cache);
850
851 if (cache->get_docs) {
852 fts_reset_get_doc(cache);
853 }
854
855 return (index_cache);
856 }
857
858 /** Remove a FTS index cache
859 @param[in] table table with FTS index
860 @param[in] index FTS index */
fts_cache_index_cache_remove(dict_table_t * table,dict_index_t * index)861 void fts_cache_index_cache_remove(dict_table_t *table, dict_index_t *index) {
862 ut_ad(table->fts != nullptr);
863 ut_ad(index->type & DICT_FTS);
864
865 fts_index_cache_t *index_cache;
866
867 rw_lock_x_lock(&table->fts->cache->init_lock);
868
869 index_cache = static_cast<fts_index_cache_t *>(
870 fts_find_index_cache(table->fts->cache, index));
871
872 if (index_cache->words != nullptr) {
873 rbt_free(index_cache->words);
874 index_cache->words = nullptr;
875 }
876
877 ib_vector_remove(table->fts->cache->indexes,
878 *reinterpret_cast<void **>(index_cache));
879
880 rw_lock_x_unlock(&table->fts->cache->init_lock);
881 }
882
883 /** Release all resources help by the words rb tree e.g., the node ilist. */
fts_words_free(ib_rbt_t * words)884 static void fts_words_free(ib_rbt_t *words) /*!< in: rb tree of words */
885 {
886 const ib_rbt_node_t *rbt_node;
887
888 /* Free the resources held by a word. */
889 for (rbt_node = rbt_first(words); rbt_node != nullptr;
890 rbt_node = rbt_first(words)) {
891 ulint i;
892 fts_tokenizer_word_t *word;
893
894 word = rbt_value(fts_tokenizer_word_t, rbt_node);
895
896 /* Free the ilists of this word. */
897 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
898 fts_node_t *fts_node =
899 static_cast<fts_node_t *>(ib_vector_get(word->nodes, i));
900
901 ut_free(fts_node->ilist);
902 fts_node->ilist = nullptr;
903 }
904
905 /* NOTE: We are responsible for free'ing the node */
906 ut_free(rbt_remove_node(words, rbt_node));
907 }
908 }
909
910 /** Clear cache.
911 @param[in,out] cache fts cache */
fts_cache_clear(fts_cache_t * cache)912 void fts_cache_clear(fts_cache_t *cache) {
913 ulint i;
914
915 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
916 ulint j;
917 fts_index_cache_t *index_cache;
918
919 index_cache =
920 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
921
922 fts_words_free(index_cache->words);
923
924 rbt_free(index_cache->words);
925
926 index_cache->words = nullptr;
927
928 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
929 if (index_cache->ins_graph[j] != nullptr) {
930 que_graph_free(index_cache->ins_graph[j]);
931
932 index_cache->ins_graph[j] = nullptr;
933 }
934
935 if (index_cache->sel_graph[j] != nullptr) {
936 que_graph_free(index_cache->sel_graph[j]);
937
938 index_cache->sel_graph[j] = nullptr;
939 }
940 }
941
942 index_cache->doc_stats = nullptr;
943 }
944
945 mem_heap_free(static_cast<mem_heap_t *>(cache->sync_heap->arg));
946 cache->sync_heap->arg = nullptr;
947
948 fts_need_sync = false;
949
950 cache->total_size = 0;
951
952 mutex_enter((ib_mutex_t *)&cache->deleted_lock);
953 cache->deleted_doc_ids = nullptr;
954 mutex_exit((ib_mutex_t *)&cache->deleted_lock);
955 }
956
957 /** Search the index specific cache for a particular FTS index.
958 @return the index cache else NULL */
959 UNIV_INLINE
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)960 fts_index_cache_t *fts_get_index_cache(
961 fts_cache_t *cache, /*!< in: cache to search */
962 const dict_index_t *index) /*!< in: index to search for */
963 {
964 ulint i;
965
966 ut_ad(rw_lock_own((rw_lock_t *)&cache->lock, RW_LOCK_X) ||
967 rw_lock_own((rw_lock_t *)&cache->init_lock, RW_LOCK_X));
968
969 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
970 fts_index_cache_t *index_cache;
971
972 index_cache =
973 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
974
975 if (index_cache->index == index) {
976 return (index_cache);
977 }
978 }
979
980 return (nullptr);
981 }
982
983 #ifdef FTS_DEBUG
984 /** Search the index cache for a get_doc structure.
985 @return the fts_get_doc_t item else NULL */
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)986 static fts_get_doc_t *fts_get_index_get_doc(
987 fts_cache_t *cache, /*!< in: cache to search */
988 const dict_index_t *index) /*!< in: index to search for */
989 {
990 ulint i;
991
992 ut_ad(rw_lock_own((rw_lock_t *)&cache->init_lock, RW_LOCK_X));
993
994 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
995 fts_get_doc_t *get_doc;
996
997 get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
998
999 if (get_doc->index_cache->index == index) {
1000 return (get_doc);
1001 }
1002 }
1003
1004 return (NULL);
1005 }
1006 #endif
1007
1008 /** Find an existing word, or if not found, create one and return it.
1009 @return specified word token */
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1010 static fts_tokenizer_word_t *fts_tokenizer_word_get(
1011 fts_cache_t *cache, /*!< in: cache */
1012 fts_index_cache_t *index_cache, /*!< in: index cache */
1013 fts_string_t *text) /*!< in: node text */
1014 {
1015 fts_tokenizer_word_t *word;
1016 ib_rbt_bound_t parent;
1017
1018 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1019
1020 /* If it is a stopword, do not index it */
1021 if (!fts_check_token(text, cache->stopword_info.cached_stopword,
1022 index_cache->index->is_ngram, index_cache->charset)) {
1023 return (nullptr);
1024 }
1025
1026 /* Check if we found a match, if not then add word to tree. */
1027 if (rbt_search(index_cache->words, &parent, text) != 0) {
1028 mem_heap_t *heap;
1029 fts_tokenizer_word_t new_word;
1030
1031 heap = static_cast<mem_heap_t *>(cache->sync_heap->arg);
1032
1033 new_word.nodes = ib_vector_create(cache->sync_heap, sizeof(fts_node_t), 4);
1034
1035 fts_string_dup(&new_word.text, text, heap);
1036
1037 parent.last = rbt_add_node(index_cache->words, &parent, &new_word);
1038
1039 /* Take into account the RB tree memory use and the vector. */
1040 cache->total_size += sizeof(new_word) + sizeof(ib_rbt_node_t) +
1041 text->f_len + (sizeof(fts_node_t) * 4) +
1042 sizeof(*new_word.nodes);
1043
1044 ut_ad(rbt_validate(index_cache->words));
1045 }
1046
1047 word = rbt_value(fts_tokenizer_word_t, parent.last);
1048
1049 return (word);
1050 }
1051
1052 /** Add the given doc_id/word positions to the given node's ilist. */
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1053 void fts_cache_node_add_positions(
1054 fts_cache_t *cache, /*!< in: cache */
1055 fts_node_t *node, /*!< in: word node */
1056 doc_id_t doc_id, /*!< in: doc id */
1057 ib_vector_t *positions) /*!< in: fts_token_t::positions */
1058 {
1059 ulint i;
1060 byte *ptr;
1061 byte *ilist;
1062 ulint enc_len;
1063 ulint last_pos;
1064 byte *ptr_start;
1065 ulint doc_id_delta;
1066
1067 #ifdef UNIV_DEBUG
1068 if (cache) {
1069 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1070 }
1071 #endif /* UNIV_DEBUG */
1072
1073 ut_ad(doc_id >= node->last_doc_id);
1074
1075 /* Calculate the space required to store the ilist. */
1076 doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1077 enc_len = fts_get_encoded_len(doc_id_delta);
1078
1079 last_pos = 0;
1080 for (i = 0; i < ib_vector_size(positions); i++) {
1081 ulint pos = *(static_cast<ulint *>(ib_vector_get(positions, i)));
1082
1083 ut_ad(last_pos == 0 || pos > last_pos);
1084
1085 enc_len += fts_get_encoded_len(pos - last_pos);
1086 last_pos = pos;
1087 }
1088
1089 /* The 0x00 byte at the end of the token positions list. */
1090 enc_len++;
1091
1092 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1093 /* No need to allocate more space, we can fit in the new
1094 data at the end of the old one. */
1095 ilist = nullptr;
1096 ptr = node->ilist + node->ilist_size;
1097 } else {
1098 ulint new_size = node->ilist_size + enc_len;
1099
1100 /* Over-reserve space by a fixed size for small lengths and
1101 by 20% for lengths >= 48 bytes. */
1102 if (new_size < 16) {
1103 new_size = 16;
1104 } else if (new_size < 32) {
1105 new_size = 32;
1106 } else if (new_size < 48) {
1107 new_size = 48;
1108 } else {
1109 new_size = (ulint)(1.2 * new_size);
1110 }
1111
1112 ilist = static_cast<byte *>(ut_malloc_nokey(new_size));
1113 ptr = ilist + node->ilist_size;
1114
1115 node->ilist_size_alloc = new_size;
1116 }
1117
1118 ptr_start = ptr;
1119
1120 /* Encode the new fragment. */
1121 ptr += fts_encode_int(doc_id_delta, ptr);
1122
1123 last_pos = 0;
1124 for (i = 0; i < ib_vector_size(positions); i++) {
1125 ulint pos = *(static_cast<ulint *>(ib_vector_get(positions, i)));
1126
1127 ptr += fts_encode_int(pos - last_pos, ptr);
1128 last_pos = pos;
1129 }
1130
1131 *ptr++ = 0;
1132
1133 ut_a(enc_len == (ulint)(ptr - ptr_start));
1134
1135 if (ilist) {
1136 /* Copy old ilist to the start of the new one and switch the
1137 new one into place in the node. */
1138 if (node->ilist_size > 0) {
1139 memcpy(ilist, node->ilist, node->ilist_size);
1140 ut_free(node->ilist);
1141 }
1142
1143 node->ilist = ilist;
1144 }
1145
1146 node->ilist_size += enc_len;
1147
1148 if (cache) {
1149 cache->total_size += enc_len;
1150 }
1151
1152 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1153 node->first_doc_id = doc_id;
1154 }
1155
1156 node->last_doc_id = doc_id;
1157 ++node->doc_count;
1158 }
1159
1160 /** Add document to the cache. */
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1161 static void fts_cache_add_doc(
1162 fts_cache_t *cache, /*!< in: cache */
1163 fts_index_cache_t *index_cache, /*!< in: index cache */
1164 doc_id_t doc_id, /*!< in: doc id to add */
1165 ib_rbt_t *tokens) /*!< in: document tokens */
1166 {
1167 const ib_rbt_node_t *node;
1168 ulint n_words;
1169 fts_doc_stats_t *doc_stats;
1170
1171 if (!tokens) {
1172 return;
1173 }
1174
1175 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1176
1177 n_words = rbt_size(tokens);
1178
1179 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1180 fts_tokenizer_word_t *word;
1181 fts_node_t *fts_node = nullptr;
1182 fts_token_t *token = rbt_value(fts_token_t, node);
1183
1184 /* Find and/or add token to the cache. */
1185 word = fts_tokenizer_word_get(cache, index_cache, &token->text);
1186
1187 if (!word) {
1188 ut_free(rbt_remove_node(tokens, node));
1189 continue;
1190 }
1191
1192 if (ib_vector_size(word->nodes) > 0) {
1193 fts_node = static_cast<fts_node_t *>(ib_vector_last(word->nodes));
1194 }
1195
1196 if (fts_node == nullptr || fts_node->synced ||
1197 fts_node->ilist_size > FTS_ILIST_MAX_SIZE ||
1198 doc_id < fts_node->last_doc_id) {
1199 fts_node =
1200 static_cast<fts_node_t *>(ib_vector_push(word->nodes, nullptr));
1201
1202 memset(fts_node, 0x0, sizeof(*fts_node));
1203
1204 cache->total_size += sizeof(*fts_node);
1205 }
1206
1207 fts_cache_node_add_positions(cache, fts_node, doc_id, token->positions);
1208
1209 ut_free(rbt_remove_node(tokens, node));
1210 }
1211
1212 ut_a(rbt_empty(tokens));
1213
1214 /* Add to doc ids processed so far. */
1215 doc_stats = static_cast<fts_doc_stats_t *>(
1216 ib_vector_push(index_cache->doc_stats, nullptr));
1217
1218 doc_stats->doc_id = doc_id;
1219 doc_stats->word_count = n_words;
1220
1221 /* Add the doc stats memory usage too. */
1222 cache->total_size += sizeof(*doc_stats);
1223
1224 if (doc_id > cache->sync->max_doc_id) {
1225 cache->sync->max_doc_id = doc_id;
1226 }
1227 }
1228
1229 /** Drop FTS AUX table DD table objects in vector
1230 @param[in] aux_vec aux table name vector
1231 @param[in] file_per_table whether file per table
1232 @return true on success, false on failure. */
fts_drop_dd_tables(const aux_name_vec_t * aux_vec,bool file_per_table)1233 bool fts_drop_dd_tables(const aux_name_vec_t *aux_vec, bool file_per_table) {
1234 bool ret = true;
1235
1236 if (aux_vec == nullptr || aux_vec->aux_name.size() == 0) {
1237 return (true);
1238 }
1239
1240 for (ulint i = 0; i < aux_vec->aux_name.size(); i++) {
1241 bool retval;
1242
1243 retval = dd_drop_fts_table(aux_vec->aux_name[i], file_per_table);
1244
1245 if (!retval) {
1246 ret = false;
1247 }
1248 }
1249
1250 return (ret);
1251 }
1252
1253 /** Free FTS AUX table names in vector
1254 @param[in] aux_vec aux table name vector */
fts_free_aux_names(aux_name_vec_t * aux_vec)1255 void fts_free_aux_names(aux_name_vec_t *aux_vec) {
1256 if (aux_vec == nullptr || aux_vec->aux_name.size() == 0) {
1257 return;
1258 }
1259
1260 while (aux_vec->aux_name.size() > 0) {
1261 char *name = aux_vec->aux_name.back();
1262 ut_free(name);
1263 aux_vec->aux_name.pop_back();
1264 }
1265
1266 ut_ad(aux_vec->aux_name.size() == 0);
1267 }
1268
1269 /** Drops a table. If the table can't be found we return a SUCCESS code.
1270 @param[in,out] trx transaction
1271 @param[in] table_name table to drop
1272 @param[in,out] aux_vec fts aux table name vector
1273 @return DB_SUCCESS or error code */
fts_drop_table(trx_t * trx,const char * table_name,aux_name_vec_t * aux_vec)1274 static dberr_t fts_drop_table(trx_t *trx, const char *table_name,
1275 aux_name_vec_t *aux_vec) {
1276 dict_table_t *table;
1277 dberr_t error = DB_SUCCESS;
1278 THD *thd = current_thd;
1279 MDL_ticket *mdl = nullptr;
1280
1281 /* Check that the table exists in our data dictionary.
1282 Similar to regular drop table case, we will open table with
1283 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1284 table = dd_table_open_on_name(
1285 thd, &mdl, table_name, true,
1286 static_cast<dict_err_ignore_t>(DICT_ERR_IGNORE_INDEX_ROOT |
1287 DICT_ERR_IGNORE_CORRUPT));
1288
1289 if (table != nullptr) {
1290 char table_name2[MAX_FULL_NAME_LEN];
1291
1292 strcpy(table_name2, table_name);
1293
1294 bool file_per_table = dict_table_is_file_per_table(table);
1295
1296 dd_table_close(table, thd, &mdl, true);
1297
1298 /* Pass nonatomic=false (dont allow data dict unlock),
1299 because the transaction may hold locks on SYS_* tables from
1300 previous calls to fts_drop_table(). */
1301 error = row_drop_table_for_mysql(table_name, trx, false, nullptr);
1302
1303 if (error != DB_SUCCESS) {
1304 ib::error(ER_IB_MSG_464) << "Unable to drop FTS index aux table "
1305 << table_name << ": " << ut_strerr(error);
1306 return (error);
1307 }
1308
1309 if (aux_vec == nullptr) {
1310 mutex_exit(&dict_sys->mutex);
1311
1312 if (!dd_drop_fts_table(table_name2, file_per_table)) {
1313 error = DB_FAIL;
1314 }
1315
1316 mutex_enter(&dict_sys->mutex);
1317 } else {
1318 aux_vec->aux_name.push_back(mem_strdup(table_name2));
1319 }
1320
1321 } else {
1322 error = DB_FAIL;
1323 }
1324
1325 return (error);
1326 }
1327
1328 /** Rename a single auxiliary table due to database name change.
1329 @return DB_SUCCESS or error code */
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx,bool replay)1330 static MY_ATTRIBUTE((warn_unused_result)) dberr_t fts_rename_one_aux_table(
1331 const char *new_name, /*!< in: new parent tbl name */
1332 const char *fts_table_old_name, /*!< in: old aux tbl name */
1333 trx_t *trx, /*!< in: transaction */
1334 bool replay) /*!< Whether in replay stage */
1335 {
1336 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1337 ulint new_db_name_len = dict_get_db_name_len(new_name);
1338 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1339 ulint table_new_name_len =
1340 strlen(fts_table_old_name) + new_db_name_len - old_db_name_len;
1341
1342 /* Check if the new and old database names are the same, if so,
1343 nothing to do */
1344 ut_ad((new_db_name_len != old_db_name_len) ||
1345 strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1346
1347 /* Get the database name from "new_name", and table name
1348 from the fts_table_old_name */
1349 strncpy(fts_table_new_name, new_name, new_db_name_len);
1350 strncpy(fts_table_new_name + new_db_name_len, strchr(fts_table_old_name, '/'),
1351 table_new_name_len - new_db_name_len);
1352 fts_table_new_name[table_new_name_len] = 0;
1353
1354 dberr_t error;
1355 error = row_rename_table_for_mysql(fts_table_old_name, fts_table_new_name,
1356 nullptr, trx, replay);
1357
1358 if (error == DB_SUCCESS) {
1359 /* Update dd tablespace filename. */
1360 dict_table_t *table;
1361 table = dict_table_check_if_in_cache_low(fts_table_new_name);
1362 ut_ad(table != nullptr);
1363
1364 /* Release dict_sys->mutex to avoid mutex reentrant. */
1365 table->acquire();
1366 mutex_exit(&dict_sys->mutex);
1367
1368 if (!replay && !dd_rename_fts_table(table, fts_table_old_name)) {
1369 ut_ad(0);
1370 }
1371
1372 mutex_enter(&dict_sys->mutex);
1373 table->release();
1374 }
1375
1376 return (error);
1377 }
1378
1379 /** Rename auxiliary tables for all fts index for a table. This(rename)
1380 is due to database name change
1381 @return DB_SUCCESS or error code */
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx,bool replay)1382 dberr_t fts_rename_aux_tables(dict_table_t *table, /*!< in: user Table */
1383 const char *new_name, /*!< in: new table name */
1384 trx_t *trx, /*!< in: transaction */
1385 bool replay) /*!< in: Whether in replay
1386 stage */
1387 {
1388 ulint i;
1389 fts_table_t fts_table;
1390
1391 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1392
1393 /* Rename common auxiliary tables */
1394 for (i = 0; fts_common_tables[i] != nullptr; ++i) {
1395 char old_table_name[MAX_FULL_NAME_LEN];
1396 dberr_t err = DB_SUCCESS;
1397
1398 fts_table.suffix = fts_common_tables[i];
1399
1400 fts_get_table_name(&fts_table, old_table_name);
1401
1402 err = fts_rename_one_aux_table(new_name, old_table_name, trx, replay);
1403
1404 if (err != DB_SUCCESS) {
1405 return (err);
1406 }
1407 }
1408
1409 fts_t *fts = table->fts;
1410
1411 /* Rename index specific auxiliary tables */
1412 for (i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
1413 ++i) {
1414 dict_index_t *index;
1415
1416 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
1417
1418 FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
1419
1420 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1421 dberr_t err;
1422 char old_table_name[MAX_FULL_NAME_LEN];
1423
1424 fts_table.suffix = fts_get_suffix(j);
1425
1426 fts_get_table_name(&fts_table, old_table_name);
1427
1428 err = fts_rename_one_aux_table(new_name, old_table_name, trx, replay);
1429
1430 DBUG_EXECUTE_IF("fts_rename_failure", err = DB_DEADLOCK;);
1431
1432 if (err != DB_SUCCESS) {
1433 return (err);
1434 }
1435 }
1436 }
1437
1438 return (DB_SUCCESS);
1439 }
1440
1441 /** Drops the common ancillary tables needed for supporting an FTS index
1442 on the given table. row_mysql_lock_data_dictionary must have been called
1443 before this.
1444 @param[in,out] trx transaction
1445 @param[in,out] fts_table table with fts index
1446 @param[in,out] aux_vec fts table name vector
1447 @return DB_SUCCESS or error code */
1448 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table,aux_name_vec_t * aux_vec)1449 fts_drop_common_tables(trx_t *trx, fts_table_t *fts_table,
1450 aux_name_vec_t *aux_vec) {
1451 ulint i;
1452 dberr_t error = DB_SUCCESS;
1453
1454 for (i = 0; fts_common_tables[i] != nullptr; ++i) {
1455 dberr_t err;
1456 char table_name[MAX_FULL_NAME_LEN];
1457
1458 fts_table->suffix = fts_common_tables[i];
1459
1460 fts_get_table_name(fts_table, table_name);
1461
1462 err = fts_drop_table(trx, table_name, aux_vec);
1463
1464 /* We only return the status of the last error. */
1465 if (err != DB_SUCCESS && err != DB_FAIL) {
1466 error = err;
1467 }
1468 }
1469
1470 return (error);
1471 }
1472
1473 /** Since we do a horizontal split on the index table, we need to drop
1474 all the split tables.
1475 @param[in] trx transaction
1476 @param[in] index fts index
1477 @param[out] aux_vec dropped table names vector
1478 @return DB_SUCCESS or error code */
fts_drop_index_tables(trx_t * trx,dict_index_t * index,aux_name_vec_t * aux_vec)1479 dberr_t fts_drop_index_tables(trx_t *trx, dict_index_t *index,
1480 aux_name_vec_t *aux_vec) {
1481 ulint i;
1482 fts_table_t fts_table;
1483 dberr_t error = DB_SUCCESS;
1484
1485 FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
1486
1487 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1488 dberr_t err;
1489 char table_name[MAX_FULL_NAME_LEN];
1490
1491 fts_table.suffix = fts_get_suffix(i);
1492
1493 fts_get_table_name(&fts_table, table_name);
1494
1495 err = fts_drop_table(trx, table_name, aux_vec);
1496
1497 /* We only return the status of the last error. */
1498 if (err != DB_SUCCESS && err != DB_FAIL) {
1499 error = err;
1500 }
1501 }
1502
1503 return (error);
1504 }
1505
1506 /** Write the default settings to the config table.
1507 @param[in] fts_table fts table
1508 @return DB_SUCCESS or error code. */
fts_init_config_table(fts_table_t * fts_table)1509 static dberr_t fts_init_config_table(fts_table_t *fts_table) {
1510 pars_info_t *info;
1511 que_t *graph;
1512 char table_name[MAX_FULL_NAME_LEN];
1513 dberr_t error = DB_SUCCESS;
1514 trx_t *trx;
1515
1516 ut_ad(!mutex_own(&dict_sys->mutex));
1517
1518 info = pars_info_create();
1519
1520 fts_table->suffix = FTS_SUFFIX_CONFIG;
1521 fts_get_table_name(fts_table, table_name);
1522 pars_info_bind_id(info, true, "config_table", table_name);
1523 trx = trx_allocate_for_background();
1524
1525 graph = fts_parse_sql(fts_table, info, fts_config_table_insert_values_sql);
1526
1527 error = fts_eval_sql(trx, graph);
1528
1529 que_graph_free(graph);
1530
1531 if (error == DB_SUCCESS) {
1532 fts_sql_commit(trx);
1533 } else {
1534 fts_sql_rollback(trx);
1535 }
1536
1537 trx_free_for_background(trx);
1538
1539 return (error);
1540 }
1541
1542 /** Empty a common talbes.
1543 @param[in,out] trx transaction
1544 @param[in] fts_table fts table
1545 @return DB_SUCCESS or error code. */
fts_empty_table(trx_t * trx,fts_table_t * fts_table)1546 static dberr_t fts_empty_table(trx_t *trx, fts_table_t *fts_table) {
1547 pars_info_t *info;
1548 que_t *graph;
1549 char table_name[MAX_FULL_NAME_LEN];
1550 dberr_t error = DB_SUCCESS;
1551
1552 info = pars_info_create();
1553
1554 fts_get_table_name(fts_table, table_name);
1555 pars_info_bind_id(info, true, "table_name", table_name);
1556
1557 ut_ad(mutex_own(&dict_sys->mutex));
1558
1559 mutex_exit(&dict_sys->mutex);
1560
1561 graph = fts_parse_sql(fts_table, info, "BEGIN DELETE FROM $table_name;");
1562
1563 error = fts_eval_sql(trx, graph);
1564
1565 que_graph_free(graph);
1566
1567 mutex_enter(&dict_sys->mutex);
1568
1569 return (error);
1570 }
1571
1572 /** Empty all common talbes.
1573 @param[in,out] trx transaction
1574 @param[in] table dict table
1575 @return DB_SUCCESS or error code. */
fts_empty_common_tables(trx_t * trx,dict_table_t * table)1576 dberr_t fts_empty_common_tables(trx_t *trx, dict_table_t *table) {
1577 ulint i;
1578 fts_table_t fts_table;
1579 dberr_t error = DB_SUCCESS;
1580
1581 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1582
1583 for (i = 0; fts_common_tables[i] != nullptr; ++i) {
1584 dberr_t err;
1585
1586 fts_table.suffix = fts_common_tables[i];
1587
1588 /* "config" table should not be emptied, as it has the
1589 last used DOC ID info */
1590 if (i == 2) {
1591 ut_ad(ut_strcmp(fts_table.suffix, "config") == 0);
1592 continue;
1593 }
1594
1595 err = fts_empty_table(trx, &fts_table);
1596
1597 if (err != DB_SUCCESS) {
1598 error = err;
1599 }
1600 }
1601
1602 return (error);
1603 }
1604
1605 /** Drops FTS ancillary tables needed for supporting an FTS index
1606 on the given table. row_mysql_lock_data_dictionary must have been called
1607 before this.
1608 @param[in,out] trx transaction
1609 @param[in] fts fts instance
1610 @param[in,out] aux_vec fts aux table name vector
1611 @return DB_SUCCESS or error code */
1612 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts,aux_name_vec_t * aux_vec)1613 fts_drop_all_index_tables(trx_t *trx, fts_t *fts, aux_name_vec_t *aux_vec) {
1614 dberr_t error = DB_SUCCESS;
1615
1616 for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
1617 ++i) {
1618 dberr_t err;
1619 dict_index_t *index;
1620
1621 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
1622
1623 err = fts_drop_index_tables(trx, index, aux_vec);
1624
1625 if (err != DB_SUCCESS) {
1626 error = err;
1627 }
1628 }
1629
1630 return (error);
1631 }
1632
1633 /** Drops the ancillary tables needed for supporting an FTS index on a
1634 given table. row_mysql_lock_data_dictionary must have been called before
1635 this.
1636 @param[in,out] trx transaction
1637 @param[in] table table has the fts index
1638 @param[in,out] aux_vec fts aux table name vector
1639 @return DB_SUCCESS or error code */
fts_drop_tables(trx_t * trx,dict_table_t * table,aux_name_vec_t * aux_vec)1640 dberr_t fts_drop_tables(trx_t *trx, dict_table_t *table,
1641 aux_name_vec_t *aux_vec) {
1642 dberr_t error;
1643 fts_table_t fts_table;
1644
1645 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1646
1647 error = fts_drop_common_tables(trx, &fts_table, aux_vec);
1648
1649 if (error == DB_SUCCESS) {
1650 error = fts_drop_all_index_tables(trx, table->fts, aux_vec);
1651 }
1652
1653 return (error);
1654 }
1655
1656 /** Lock all FTS AUX COMMON tables (for dropping table)
1657 @param[in] thd thread locking the AUX table
1658 @param[in,out] fts_table table with fts index
1659 @return DB_SUCCESS or error code */
1660 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_lock_common_tables(THD * thd,fts_table_t * fts_table)1661 fts_lock_common_tables(THD *thd, fts_table_t *fts_table) {
1662 for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
1663 fts_table->suffix = fts_common_tables[i];
1664
1665 char table_name[MAX_FULL_NAME_LEN];
1666 fts_get_table_name(fts_table, table_name);
1667
1668 std::string db_n;
1669 std::string table_n;
1670 dict_name::get_table(table_name, db_n, table_n);
1671
1672 MDL_ticket *exclusiv_mdl = nullptr;
1673 if (dd::acquire_exclusive_table_mdl(thd, db_n.c_str(), table_n.c_str(),
1674 false, &exclusiv_mdl)) {
1675 return (DB_ERROR);
1676 }
1677 }
1678
1679 return (DB_SUCCESS);
1680 }
1681
1682 /** Lock all FTS INDEX AUX tables (for dropping table)
1683 @param[in] thd thread locking the AUX table
1684 @param[in] index fts index
1685 @return DB_SUCCESS or error code */
fts_lock_index_tables(THD * thd,dict_index_t * index)1686 dberr_t fts_lock_index_tables(THD *thd, dict_index_t *index) {
1687 ulint i;
1688 fts_table_t fts_table;
1689
1690 FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
1691
1692 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1693 fts_table.suffix = fts_get_suffix(i);
1694
1695 char table_name[MAX_FULL_NAME_LEN];
1696 fts_get_table_name(&fts_table, table_name);
1697
1698 std::string db_n;
1699 std::string table_n;
1700 dict_name::get_table(table_name, db_n, table_n);
1701
1702 MDL_ticket *exclusiv_mdl = nullptr;
1703 if (dd::acquire_exclusive_table_mdl(thd, db_n.c_str(), table_n.c_str(),
1704 false, &exclusiv_mdl)) {
1705 return (DB_ERROR);
1706 }
1707 }
1708 return (DB_SUCCESS);
1709 }
1710
1711 /** Lock all FTS index AUX tables (for dropping table)
1712 @param[in] thd thread locking the AUX table
1713 @param[in] fts fts instance
1714 @return DB_SUCCESS or error code */
1715 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_lock_all_index_tables(THD * thd,fts_t * fts)1716 fts_lock_all_index_tables(THD *thd, fts_t *fts) {
1717 dberr_t error = DB_SUCCESS;
1718
1719 for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
1720 ++i) {
1721 dberr_t err;
1722 dict_index_t *index;
1723
1724 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
1725
1726 err = fts_lock_index_tables(thd, index);
1727
1728 if (err != DB_SUCCESS) {
1729 error = err;
1730 }
1731 }
1732
1733 return (error);
1734 }
1735
1736 /** Lock all FTS AUX tables (for dropping table)
1737 @param[in] thd thread locking the AUX table
1738 @param[in] table table has the fts index
1739 @return DB_SUCCESS or error code */
fts_lock_all_aux_tables(THD * thd,dict_table_t * table)1740 dberr_t fts_lock_all_aux_tables(THD *thd, dict_table_t *table) {
1741 dberr_t error;
1742 fts_table_t fts_table;
1743
1744 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1745
1746 error = fts_lock_common_tables(thd, &fts_table);
1747
1748 if (error == DB_SUCCESS) {
1749 error = fts_lock_all_index_tables(thd, table->fts);
1750 }
1751
1752 return (error);
1753 }
1754
1755 /** Extract only the required flags from table->flags2 for FTS Aux
1756 tables.
1757 @param[in] flags2 Table flags2
1758 @return extracted flags2 for FTS aux tables */
fts_get_table_flags2_for_aux_tables(uint32_t flags2)1759 static inline uint32_t fts_get_table_flags2_for_aux_tables(uint32_t flags2) {
1760 /* Extract the file_per_table flag, temporary file flag and encryption flag
1761 from the main FTS table flags2 */
1762 return ((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1763 (flags2 & DICT_TF2_ENCRYPTION_FILE_PER_TABLE) |
1764 (flags2 & DICT_TF2_TEMPORARY) | DICT_TF2_AUX);
1765 }
1766
1767 /** Create dict_table_t object for FTS Aux tables.
1768 @param[in] aux_table_name FTS Aux table name
1769 @param[in] table table object of FTS Index
1770 @param[in] n_cols number of columns for FTS Aux table
1771 @return table object for FTS Aux table */
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1772 static dict_table_t *fts_create_in_mem_aux_table(const char *aux_table_name,
1773 const dict_table_t *table,
1774 ulint n_cols) {
1775 dict_table_t *new_table = dict_mem_table_create(
1776 aux_table_name, table->space, n_cols, 0, 0, table->flags,
1777 fts_get_table_flags2_for_aux_tables(table->flags2));
1778
1779 if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1780 ut_ad(table->space == fil_space_get_id_by_name(table->tablespace()));
1781 new_table->tablespace = mem_heap_strdup(new_table->heap, table->tablespace);
1782 }
1783
1784 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1785 ut_ad(table->data_dir_path != nullptr);
1786 new_table->data_dir_path =
1787 mem_heap_strdup(new_table->heap, table->data_dir_path);
1788 }
1789
1790 return (new_table);
1791 }
1792
1793 /** Function to create on FTS common table.
1794 @param[in,out] trx InnoDB transaction
1795 @param[in] table Table that has FTS Index
1796 @param[in] fts_table_name FTS AUX table name
1797 @param[in] fts_suffix FTS AUX table suffix
1798 @param[in] heap heap
1799 @return table object if created, else NULL */
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1800 static dict_table_t *fts_create_one_common_table(trx_t *trx,
1801 const dict_table_t *table,
1802 const char *fts_table_name,
1803 const char *fts_suffix,
1804 mem_heap_t *heap) {
1805 dict_table_t *new_table = nullptr;
1806 dberr_t error;
1807 bool is_config = fts_suffix == FTS_SUFFIX_CONFIG;
1808
1809 if (!is_config) {
1810 new_table = fts_create_in_mem_aux_table(fts_table_name, table,
1811 FTS_DELETED_TABLE_NUM_COLS);
1812
1813 dict_mem_table_add_col(new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1814 FTS_DELETED_TABLE_COL_LEN, true);
1815 } else {
1816 /* Config table has different schema. */
1817 new_table = fts_create_in_mem_aux_table(fts_table_name, table,
1818 FTS_CONFIG_TABLE_NUM_COLS);
1819
1820 dict_mem_table_add_col(new_table, heap, "key", DATA_VARCHAR, 0,
1821 FTS_CONFIG_TABLE_KEY_COL_LEN, true);
1822
1823 dict_mem_table_add_col(new_table, heap, "value", DATA_VARCHAR,
1824 DATA_NOT_NULL, FTS_CONFIG_TABLE_VALUE_COL_LEN, true);
1825 }
1826
1827 error = row_create_table_for_mysql(new_table, nullptr, trx);
1828
1829 if (error == DB_SUCCESS) {
1830 dict_index_t *index = dict_mem_index_create(
1831 fts_table_name, "FTS_COMMON_TABLE_IND", new_table->space,
1832 DICT_UNIQUE | DICT_CLUSTERED, 1);
1833
1834 if (!is_config) {
1835 index->add_field("doc_id", 0, true);
1836 } else {
1837 index->add_field("key", 0, true);
1838 }
1839
1840 /* We save and restore trx->dict_operation because
1841 row_create_index_for_mysql() changes the operation to
1842 TRX_DICT_OP_TABLE. */
1843 trx_dict_op_t op = trx_get_dict_operation(trx);
1844
1845 error = row_create_index_for_mysql(index, trx, nullptr, nullptr);
1846
1847 trx->dict_operation = op;
1848 }
1849
1850 if (error != DB_SUCCESS) {
1851 trx->error_state = error;
1852 new_table = nullptr;
1853 ib::warn(ER_IB_MSG_465)
1854 << "Failed to create FTS common table " << fts_table_name;
1855 }
1856
1857 return (new_table);
1858 }
1859
1860 /** Check if common tables already exist
1861 @param[in] table table with fts index
1862 @return true on success, false on failure */
fts_check_common_tables_exist(const dict_table_t * table)1863 bool fts_check_common_tables_exist(const dict_table_t *table) {
1864 fts_table_t fts_table;
1865 char fts_name[MAX_FULL_NAME_LEN];
1866
1867 /* TODO: set a new flag for the situation table has hidden
1868 FTS_DOC_ID but no FTS indexes. */
1869 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1870 fts_table.suffix = FTS_SUFFIX_CONFIG;
1871 fts_get_table_name(&fts_table, fts_name);
1872
1873 dict_table_t *config_table;
1874 THD *thd = current_thd;
1875 MDL_ticket *mdl = reinterpret_cast<MDL_ticket *>(-1);
1876
1877 /* Check that the table exists in our data dictionary */
1878 config_table = dd_table_open_on_name(
1879 thd, &mdl, fts_name, false,
1880 static_cast<dict_err_ignore_t>(DICT_ERR_IGNORE_INDEX_ROOT |
1881 DICT_ERR_IGNORE_CORRUPT));
1882
1883 bool exist = false;
1884 if (config_table != nullptr) {
1885 dd_table_close(config_table, thd, &mdl, false);
1886 exist = true;
1887 }
1888
1889 return (exist);
1890 }
1891
1892 /** Creates the common auxiliary tables needed for supporting an FTS index
1893 on the given table. row_mysql_lock_data_dictionary must have been called
1894 before this.
1895 The following tables are created.
1896 CREATE TABLE $FTS_PREFIX_DELETED
1897 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1898 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1899 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1900 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1901 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1902 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1903 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1904 CREATE TABLE $FTS_PREFIX_CONFIG
1905 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1906 @param[in,out] trx transaction
1907 @param[in] table table with FTS index
1908 @param[in] name table name normalized
1909 @param[in] skip_doc_id_index Skip index on doc id
1910 @return DB_SUCCESS if succeed */
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1911 dberr_t fts_create_common_tables(trx_t *trx, const dict_table_t *table,
1912 const char *name, bool skip_doc_id_index) {
1913 dberr_t error;
1914 fts_table_t fts_table;
1915 char full_name[sizeof(fts_common_tables) / sizeof(char *)][MAX_FULL_NAME_LEN];
1916 dict_index_t *index = nullptr;
1917 trx_dict_op_t op;
1918
1919 ut_ad(!mutex_own(&dict_sys->mutex));
1920 ut_ad(!fts_check_common_tables_exist(table));
1921
1922 mem_heap_t *heap = mem_heap_create(1024);
1923
1924 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
1925
1926 /* Create the FTS tables that are common to an FTS index. */
1927 for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
1928 fts_table.suffix = fts_common_tables[i];
1929 fts_get_table_name(&fts_table, full_name[i]);
1930 dict_table_t *common_table = fts_create_one_common_table(
1931 trx, table, full_name[i], fts_table.suffix, heap);
1932
1933 if (common_table == nullptr) {
1934 error = DB_ERROR;
1935 goto func_exit;
1936 }
1937
1938 DBUG_EXECUTE_IF(
1939 "ib_fts_aux_table_error",
1940 /* Return error after creating FTS_AUX_CONFIG table. */
1941 if (i == 4) {
1942 error = DB_ERROR;
1943 goto func_exit;
1944 });
1945 }
1946
1947 /* Write the default settings to the config table. */
1948 error = fts_init_config_table(&fts_table);
1949
1950 if (error != DB_SUCCESS || skip_doc_id_index) {
1951 goto func_exit;
1952 }
1953
1954 index = dict_mem_index_create(name, FTS_DOC_ID_INDEX_NAME, table->space,
1955 DICT_UNIQUE, 1);
1956 index->add_field(FTS_DOC_ID_COL_NAME, 0, true);
1957
1958 op = trx_get_dict_operation(trx);
1959
1960 error = row_create_index_for_mysql(index, trx, nullptr, nullptr);
1961
1962 trx->dict_operation = op;
1963
1964 func_exit:
1965 mem_heap_free(heap);
1966
1967 return (error);
1968 }
1969
1970 /** Creates one FTS auxiliary index table for an FTS index.
1971 @param[in,out] trx transaction
1972 @param[in] index the index instance
1973 @param[in] fts_table fts_table structure
1974 @param[in] heap memory heap
1975 @return DB_SUCCESS or error code */
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)1976 static dict_table_t *fts_create_one_index_table(trx_t *trx,
1977 const dict_index_t *index,
1978 fts_table_t *fts_table,
1979 mem_heap_t *heap) {
1980 dict_field_t *field;
1981 dict_table_t *new_table = nullptr;
1982 char table_name[MAX_FULL_NAME_LEN];
1983 dberr_t error;
1984 CHARSET_INFO *charset;
1985
1986 ut_ad(index->type & DICT_FTS);
1987
1988 fts_get_table_name(fts_table, table_name);
1989
1990 new_table = fts_create_in_mem_aux_table(table_name, fts_table->table,
1991 FTS_AUX_INDEX_TABLE_NUM_COLS);
1992
1993 field = index->get_field(0);
1994 charset = fts_get_charset(field->col->prtype);
1995
1996 dict_mem_table_add_col(
1997 new_table, heap, "word",
1998 charset == &my_charset_latin1 ? DATA_VARCHAR : DATA_VARMYSQL,
1999 field->col->prtype, FTS_INDEX_WORD_LEN, true);
2000
2001 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2002 DATA_NOT_NULL | DATA_UNSIGNED,
2003 FTS_INDEX_FIRST_DOC_ID_LEN, true);
2004
2005 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2006 DATA_NOT_NULL | DATA_UNSIGNED,
2007 FTS_INDEX_LAST_DOC_ID_LEN, true);
2008
2009 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2010 DATA_NOT_NULL | DATA_UNSIGNED, FTS_INDEX_DOC_COUNT_LEN,
2011 true);
2012
2013 /* The precise type calculation is as follows:
2014 least signficiant byte: MySQL type code (not applicable for sys cols)
2015 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2016 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2017
2018 dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
2019 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2020 FTS_INDEX_ILIST_LEN, true);
2021
2022 error = row_create_table_for_mysql(new_table, nullptr, trx);
2023
2024 if (error == DB_SUCCESS) {
2025 dict_index_t *index = dict_mem_index_create(
2026 table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2027 DICT_UNIQUE | DICT_CLUSTERED, 2);
2028 index->add_field("word", 0, true);
2029 index->add_field("first_doc_id", 0, true);
2030
2031 trx_dict_op_t op = trx_get_dict_operation(trx);
2032
2033 error = row_create_index_for_mysql(index, trx, nullptr, nullptr);
2034
2035 trx->dict_operation = op;
2036 }
2037
2038 if (error != DB_SUCCESS) {
2039 trx->error_state = error;
2040 new_table = nullptr;
2041 ib::warn(ER_IB_MSG_466)
2042 << "Failed to create FTS index table " << table_name;
2043 }
2044
2045 return (new_table);
2046 }
2047
2048 /** Freeze all auiliary tables to be not evictable if exist, with dict_mutex
2049 held
2050 @param[in] table InnoDB table object */
fts_freeze_aux_tables(const dict_table_t * table)2051 void fts_freeze_aux_tables(const dict_table_t *table) {
2052 fts_table_t fts_table;
2053 char table_name[MAX_FULL_NAME_LEN];
2054
2055 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
2056
2057 for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
2058 fts_table.suffix = fts_common_tables[i];
2059 fts_get_table_name(&fts_table, table_name);
2060
2061 dict_table_t *common;
2062 common = dd_table_open_on_name_in_mem(table_name, true);
2063 if (common != nullptr && common->can_be_evicted) {
2064 dict_table_prevent_eviction(common);
2065 }
2066
2067 if (common != nullptr) {
2068 dd_table_close(common, nullptr, nullptr, true);
2069 }
2070 }
2071
2072 fts_t *fts = table->fts;
2073 if (fts == nullptr) {
2074 return;
2075 }
2076
2077 for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
2078 ++i) {
2079 dict_index_t *index;
2080 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
2081
2082 FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
2083
2084 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
2085 fts_table.suffix = fts_get_suffix(j);
2086 fts_get_table_name(&fts_table, table_name);
2087
2088 dict_table_t *index_table;
2089 index_table = dd_table_open_on_name_in_mem(table_name, true);
2090 if (index_table != nullptr && index_table->can_be_evicted) {
2091 dict_table_prevent_eviction(index_table);
2092 }
2093
2094 if (index_table != nullptr) {
2095 dd_table_close(index_table, nullptr, nullptr, true);
2096 }
2097 }
2098 }
2099 }
2100
2101 /** Allow all the auxiliary tables of specified base table to be evictable
2102 if they exist, if not exist just ignore
2103 @param[in] table InnoDB table object
2104 @param[in] dict_locked True if we have dict_sys mutex */
fts_detach_aux_tables(const dict_table_t * table,bool dict_locked)2105 void fts_detach_aux_tables(const dict_table_t *table, bool dict_locked) {
2106 fts_table_t fts_table;
2107 char table_name[MAX_FULL_NAME_LEN];
2108
2109 if (!dict_locked) {
2110 mutex_enter(&dict_sys->mutex);
2111 }
2112
2113 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
2114
2115 for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
2116 fts_table.suffix = fts_common_tables[i];
2117 fts_get_table_name(&fts_table, table_name);
2118
2119 dict_table_t *common;
2120 common = dd_table_open_on_name_in_mem(table_name, true);
2121 if (common != nullptr && !common->can_be_evicted) {
2122 dict_table_allow_eviction(common);
2123 }
2124
2125 if (common != nullptr) {
2126 dd_table_close(common, nullptr, nullptr, true);
2127 }
2128 }
2129
2130 fts_t *fts = table->fts;
2131 if (fts == nullptr) {
2132 if (!dict_locked) {
2133 mutex_exit(&dict_sys->mutex);
2134 }
2135
2136 return;
2137 }
2138
2139 for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
2140 ++i) {
2141 dict_index_t *index;
2142 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
2143
2144 FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index);
2145
2146 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
2147 fts_table.suffix = fts_get_suffix(j);
2148 fts_get_table_name(&fts_table, table_name);
2149
2150 dict_table_t *index_table;
2151 index_table = dd_table_open_on_name_in_mem(table_name, true);
2152 if (index_table != nullptr && !index_table->can_be_evicted) {
2153 dict_table_allow_eviction(index_table);
2154 }
2155
2156 if (index_table != nullptr) {
2157 dd_table_close(index_table, nullptr, nullptr, true);
2158 }
2159 }
2160 }
2161
2162 if (!dict_locked) {
2163 mutex_exit(&dict_sys->mutex);
2164 }
2165 }
2166
2167 /** Update DD system table for auxiliary common tables for an FTS index.
2168 @param[in] table dict table instance
2169 @return true on success, false on failure */
fts_create_common_dd_tables(const dict_table_t * table)2170 bool fts_create_common_dd_tables(const dict_table_t *table) {
2171 fts_table_t fts_table;
2172 bool ret = true;
2173
2174 FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, table);
2175
2176 /* Create the FTS tables that are common to an FTS index. */
2177 for (ulint i = 0; fts_common_tables[i] != nullptr && ret; ++i) {
2178 char table_name[MAX_FULL_NAME_LEN];
2179
2180 fts_table.suffix = fts_common_tables[i];
2181 fts_get_table_name(&fts_table, table_name);
2182
2183 dict_table_t *common_table;
2184 common_table = dd_table_open_on_name_in_mem(table_name, false);
2185 ut_ad(common_table != nullptr);
2186
2187 bool is_config = fts_table.suffix == FTS_SUFFIX_CONFIG;
2188 ret = dd_create_fts_common_table(table, common_table, is_config);
2189
2190 dd_table_close(common_table, nullptr, nullptr, false);
2191 }
2192
2193 return (ret);
2194 }
2195
2196 /** Update DD system table for auxiliary index tables for an FTS index.
2197 @param[in] index the index instance
2198 @return DB_SUCCESS or error code */
fts_create_one_index_dd_tables(const dict_index_t * index)2199 static dberr_t fts_create_one_index_dd_tables(const dict_index_t *index) {
2200 ulint i;
2201 fts_table_t fts_table;
2202 dberr_t error = DB_SUCCESS;
2203 char *parent_name = index->table->name.m_name;
2204
2205 fts_table.type = FTS_INDEX_TABLE;
2206 fts_table.index_id = index->id;
2207 fts_table.table_id = index->table->id;
2208 fts_table.parent = parent_name;
2209 fts_table.table = index->table;
2210
2211 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2212 dict_table_t *new_table;
2213 char table_name[MAX_FULL_NAME_LEN];
2214 CHARSET_INFO *charset;
2215 dict_field_t *field;
2216
2217 ut_ad(index->type & DICT_FTS);
2218
2219 field = index->get_field(0);
2220 charset = fts_get_charset(field->col->prtype);
2221
2222 fts_table.suffix = fts_get_suffix(i);
2223 fts_get_table_name(&fts_table, table_name);
2224
2225 new_table = dd_table_open_on_name_in_mem(table_name, false);
2226 ut_ad(new_table != nullptr);
2227
2228 if (!dd_create_fts_index_table(fts_table.table, new_table, charset)) {
2229 ib::warn(ER_IB_MSG_467)
2230 << "Failed to create FTS index dd table " << table_name;
2231 error = DB_FAIL;
2232 }
2233
2234 dd_table_close(new_table, nullptr, nullptr, false);
2235 }
2236
2237 return (error);
2238 }
2239
2240 /** Check if a table has FTS index needs to have its auxiliary index
2241 tables' metadata updated in DD
2242 @param[in,out] table table to check
2243 @return DB_SUCCESS or error code */
fts_create_index_dd_tables(dict_table_t * table)2244 dberr_t fts_create_index_dd_tables(dict_table_t *table) {
2245 dberr_t error = DB_SUCCESS;
2246
2247 for (dict_index_t *index = table->first_index();
2248 index != nullptr && error == DB_SUCCESS; index = index->next()) {
2249 if ((index->type & DICT_FTS) && index->fill_dd) {
2250 error = fts_create_one_index_dd_tables(index);
2251 index->fill_dd = false;
2252 }
2253
2254 ut_ad(!index->fill_dd);
2255 }
2256
2257 return (error);
2258 }
2259
2260 /** Create auxiliary index tables for an FTS index.
2261 @param[in,out] trx transaction
2262 @param[in] index the index instance
2263 @param[in] table_name table name
2264 @param[in] table_id the table id
2265 @return DB_SUCCESS or error code */
fts_create_index_tables_low(trx_t * trx,dict_index_t * index,const char * table_name,table_id_t table_id)2266 dberr_t fts_create_index_tables_low(trx_t *trx, dict_index_t *index,
2267 const char *table_name,
2268 table_id_t table_id) {
2269 ulint i;
2270 fts_table_t fts_table;
2271 dberr_t error = DB_SUCCESS;
2272 mem_heap_t *heap = mem_heap_create(1024);
2273
2274 fts_table.type = FTS_INDEX_TABLE;
2275 fts_table.index_id = index->id;
2276 fts_table.table_id = table_id;
2277 fts_table.parent = table_name;
2278 fts_table.table = index->table;
2279
2280 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2281 dict_table_t *new_table;
2282
2283 /* Create the FTS auxiliary tables that are specific
2284 to an FTS index. We need to preserve the table_id %s
2285 which fts_parse_sql() will fill in for us. */
2286 fts_table.suffix = fts_get_suffix(i);
2287
2288 new_table = fts_create_one_index_table(trx, index, &fts_table, heap);
2289
2290 if (new_table == nullptr) {
2291 error = DB_FAIL;
2292 break;
2293 }
2294
2295 DBUG_EXECUTE_IF(
2296 "ib_fts_index_table_error",
2297 /* Return error after creating FTS_INDEX_5
2298 aux table. */
2299 if (i == 4) {
2300 error = DB_FAIL;
2301 break;
2302 });
2303 }
2304
2305 if (error == DB_SUCCESS) {
2306 index->fill_dd = true;
2307 }
2308
2309 mem_heap_free(heap);
2310
2311 return (error);
2312 }
2313
2314 /** Creates the column specific ancillary tables needed for supporting an
2315 FTS index on the given table. row_mysql_lock_data_dictionary must have
2316 been called before this.
2317
2318 All FTS AUX Index tables have the following schema.
2319 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2320 word VARCHAR(FTS_MAX_WORD_LEN),
2321 first_doc_id INT NOT NULL,
2322 last_doc_id UNSIGNED NOT NULL,
2323 doc_count UNSIGNED INT NOT NULL,
2324 ilist VARBINARY NOT NULL,
2325 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2326 @param[in,out] trx transaction
2327 @param[in] index index instance
2328 @return DB_SUCCESS or error code */
fts_create_index_tables(trx_t * trx,dict_index_t * index)2329 dberr_t fts_create_index_tables(trx_t *trx, dict_index_t *index) {
2330 dberr_t err;
2331 dict_table_t *table;
2332
2333 ut_ad(!mutex_own(&dict_sys->mutex));
2334
2335 table = dd_table_open_on_name_in_mem(index->table_name, false);
2336 ut_a(table != nullptr);
2337 ut_d(mutex_enter(&dict_sys->mutex));
2338 ut_ad(table->get_ref_count() > 1);
2339 ut_d(mutex_exit(&dict_sys->mutex));
2340
2341 err = fts_create_index_tables_low(trx, index, table->name.m_name, table->id);
2342
2343 dd_table_close(table, nullptr, nullptr, false);
2344
2345 return (err);
2346 }
2347 #if 0
2348 /******************************************************************//**
2349 Return string representation of state. */
2350 static
2351 const char*
2352 fts_get_state_str(
2353 /* out: string representation of state */
2354 fts_row_state zstate) /*!< in: state */
2355 {
2356 switch (state) {
2357 case FTS_INSERT:
2358 return("INSERT");
2359
2360 case FTS_MODIFY:
2361 return("MODIFY");
2362
2363 case FTS_DELETE:
2364 return("DELETE");
2365
2366 case FTS_NOTHING:
2367 return("NOTHING");
2368
2369 case FTS_INVALID:
2370 return("INVALID");
2371
2372 default:
2373 return("UNKNOWN");
2374 }
2375 }
2376 #endif
2377
2378 /** Calculate the new state of a row given the existing state and a new event.
2379 @return new state of row */
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2380 static fts_row_state fts_trx_row_get_new_state(
2381 fts_row_state old_state, /*!< in: existing state of row */
2382 fts_row_state event) /*!< in: new event */
2383 {
2384 /* The rules for transforming states:
2385
2386 I = inserted
2387 M = modified
2388 D = deleted
2389 N = nothing
2390
2391 M+D -> D:
2392
2393 If the row existed before the transaction started and it is modified
2394 during the transaction, followed by a deletion of the row, only the
2395 deletion will be signaled.
2396
2397 M+ -> M:
2398
2399 If the row existed before the transaction started and it is modified
2400 more than once during the transaction, only the last modification
2401 will be signaled.
2402
2403 IM*D -> N:
2404
2405 If a new row is added during the transaction (and possibly modified
2406 after its initial insertion) but it is deleted before the end of the
2407 transaction, nothing will be signaled.
2408
2409 IM* -> I:
2410
2411 If a new row is added during the transaction and modified after its
2412 initial insertion, only the addition will be signaled.
2413
2414 M*DI -> M:
2415
2416 If the row existed before the transaction started and it is deleted,
2417 then re-inserted, only a modification will be signaled. Note that
2418 this case is only possible if the table is using the row's primary
2419 key for FTS row ids, since those can be re-inserted by the user,
2420 which is not true for InnoDB generated row ids.
2421
2422 It is easily seen that the above rules decompose such that we do not
2423 need to store the row's entire history of events. Instead, we can
2424 store just one state for the row and update that when new events
2425 arrive. Then we can implement the above rules as a two-dimensional
2426 look-up table, and get checking of invalid combinations "for free"
2427 in the process. */
2428
2429 /* The lookup table for transforming states. old_state is the
2430 Y-axis, event is the X-axis. */
2431 static const fts_row_state table[4][4] = {
2432 /* I M D N */
2433 /* I */ {FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID},
2434 /* M */ {FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID},
2435 /* D */ {FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID},
2436 /* N */ {FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID}};
2437
2438 fts_row_state result;
2439
2440 ut_a(old_state < FTS_INVALID);
2441 ut_a(event < FTS_INVALID);
2442
2443 result = table[(int)old_state][(int)event];
2444 ut_a(result != FTS_INVALID);
2445
2446 return (result);
2447 }
2448
2449 /** Create a savepoint instance.
2450 @return savepoint instance */
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2451 static fts_savepoint_t *fts_savepoint_create(
2452 ib_vector_t *savepoints, /*!< out: InnoDB transaction */
2453 const char *name, /*!< in: savepoint name */
2454 mem_heap_t *heap) /*!< in: heap */
2455 {
2456 fts_savepoint_t *savepoint;
2457
2458 savepoint =
2459 static_cast<fts_savepoint_t *>(ib_vector_push(savepoints, nullptr));
2460
2461 memset(savepoint, 0x0, sizeof(*savepoint));
2462
2463 if (name) {
2464 savepoint->name = mem_heap_strdup(heap, name);
2465 }
2466
2467 savepoint->tables = rbt_create(sizeof(fts_trx_table_t *), fts_trx_table_cmp);
2468
2469 return (savepoint);
2470 }
2471
2472 /** Create an FTS trx.
2473 @param[in,out] trx InnoDB Transaction
2474 @return FTS transaction. */
fts_trx_create(trx_t * trx)2475 fts_trx_t *fts_trx_create(trx_t *trx) {
2476 fts_trx_t *ftt;
2477 ib_alloc_t *heap_alloc;
2478 mem_heap_t *heap = mem_heap_create(1024);
2479 trx_named_savept_t *savep;
2480
2481 ut_a(trx->fts_trx == nullptr);
2482
2483 ftt = static_cast<fts_trx_t *>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2484 ftt->trx = trx;
2485 ftt->heap = heap;
2486
2487 heap_alloc = ib_heap_allocator_create(heap);
2488
2489 ftt->savepoints = static_cast<ib_vector_t *>(
2490 ib_vector_create(heap_alloc, sizeof(fts_savepoint_t), 4));
2491
2492 ftt->last_stmt = static_cast<ib_vector_t *>(
2493 ib_vector_create(heap_alloc, sizeof(fts_savepoint_t), 4));
2494
2495 /* Default instance has no name and no heap. */
2496 fts_savepoint_create(ftt->savepoints, nullptr, nullptr);
2497 fts_savepoint_create(ftt->last_stmt, nullptr, nullptr);
2498
2499 /* Copy savepoints that already set before. */
2500 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints); savep != nullptr;
2501 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2502 fts_savepoint_take(trx, ftt, savep->name);
2503 }
2504
2505 return (ftt);
2506 }
2507
2508 /** Create an FTS trx table.
2509 @return FTS trx table */
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2510 static fts_trx_table_t *fts_trx_table_create(
2511 fts_trx_t *fts_trx, /*!< in: FTS trx */
2512 dict_table_t *table) /*!< in: table */
2513 {
2514 fts_trx_table_t *ftt;
2515
2516 ftt = static_cast<fts_trx_table_t *>(
2517 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2518
2519 memset(ftt, 0x0, sizeof(*ftt));
2520
2521 ftt->table = table;
2522 ftt->fts_trx = fts_trx;
2523
2524 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2525
2526 return (ftt);
2527 }
2528
2529 /** Clone an FTS trx table.
2530 @return FTS trx table */
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2531 static fts_trx_table_t *fts_trx_table_clone(
2532 const fts_trx_table_t *ftt_src) /*!< in: FTS trx */
2533 {
2534 fts_trx_table_t *ftt;
2535
2536 ftt = static_cast<fts_trx_table_t *>(
2537 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2538
2539 memset(ftt, 0x0, sizeof(*ftt));
2540
2541 ftt->table = ftt_src->table;
2542 ftt->fts_trx = ftt_src->fts_trx;
2543
2544 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2545
2546 /* Copy the rb tree values to the new savepoint. */
2547 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2548
2549 /* These are only added on commit. At this stage we only have
2550 the updated row state. */
2551 ut_a(ftt_src->added_doc_ids == nullptr);
2552
2553 return (ftt);
2554 }
2555
2556 /** Initialize the FTS trx instance.
2557 @return FTS trx instance */
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2558 static fts_trx_table_t *fts_trx_init(
2559 trx_t *trx, /*!< in: transaction */
2560 dict_table_t *table, /*!< in: FTS table instance */
2561 ib_vector_t *savepoints) /*!< in: Savepoints */
2562 {
2563 fts_trx_table_t *ftt;
2564 ib_rbt_bound_t parent;
2565 ib_rbt_t *tables;
2566 fts_savepoint_t *savepoint;
2567
2568 savepoint = static_cast<fts_savepoint_t *>(ib_vector_last(savepoints));
2569
2570 tables = savepoint->tables;
2571 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, nullptr);
2572
2573 if (parent.result == 0) {
2574 fts_trx_table_t **fttp;
2575
2576 fttp = rbt_value(fts_trx_table_t *, parent.last);
2577 ftt = *fttp;
2578 } else {
2579 ftt = fts_trx_table_create(trx->fts_trx, table);
2580 rbt_add_node(tables, &parent, &ftt);
2581 }
2582
2583 ut_a(ftt->table == table);
2584
2585 return (ftt);
2586 }
2587
2588 /** Notify the FTS system about an operation on an FTS-indexed table. */
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2589 static void fts_trx_table_add_op(
2590 fts_trx_table_t *ftt, /*!< in: FTS trx table */
2591 doc_id_t doc_id, /*!< in: doc id */
2592 fts_row_state state, /*!< in: state of the row */
2593 ib_vector_t *fts_indexes) /*!< in: FTS indexes affected */
2594 {
2595 ib_rbt_t *rows;
2596 ib_rbt_bound_t parent;
2597
2598 rows = ftt->rows;
2599 rbt_search(rows, &parent, &doc_id);
2600
2601 /* Row id found, update state, and if new state is FTS_NOTHING,
2602 we delete the row from our tree. */
2603 if (parent.result == 0) {
2604 fts_trx_row_t *row = rbt_value(fts_trx_row_t, parent.last);
2605
2606 row->state = fts_trx_row_get_new_state(row->state, state);
2607
2608 if (row->state == FTS_NOTHING) {
2609 if (row->fts_indexes) {
2610 ib_vector_free(row->fts_indexes);
2611 }
2612
2613 ut_free(rbt_remove_node(rows, parent.last));
2614 row = nullptr;
2615 } else if (row->fts_indexes != nullptr) {
2616 ib_vector_free(row->fts_indexes);
2617 row->fts_indexes = fts_indexes;
2618 }
2619
2620 } else { /* Row-id not found, create a new one. */
2621 fts_trx_row_t row;
2622
2623 row.doc_id = doc_id;
2624 row.state = state;
2625 row.fts_indexes = fts_indexes;
2626
2627 rbt_add_node(rows, &parent, &row);
2628 }
2629 }
2630
2631 /** Notify the FTS system about an operation on an FTS-indexed table. */
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2632 void fts_trx_add_op(trx_t *trx, /*!< in: InnoDB transaction */
2633 dict_table_t *table, /*!< in: table */
2634 doc_id_t doc_id, /*!< in: new doc id */
2635 fts_row_state state, /*!< in: state of the row */
2636 ib_vector_t *fts_indexes) /*!< in: FTS indexes affected
2637 (NULL=all) */
2638 {
2639 fts_trx_table_t *tran_ftt;
2640 fts_trx_table_t *stmt_ftt;
2641
2642 if (!trx->fts_trx) {
2643 trx->fts_trx = fts_trx_create(trx);
2644 }
2645
2646 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2647 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2648
2649 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2650 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2651 }
2652
2653 /** Fetch callback that converts a textual document id to a binary value and
2654 stores it in the given place.
2655 @return always returns NULL */
fts_fetch_store_doc_id(void * row,void * user_arg)2656 static ibool fts_fetch_store_doc_id(void *row, /*!< in: sel_node_t* */
2657 void *user_arg) /*!< in: doc_id_t* to store
2658 doc_id in */
2659 {
2660 int n_parsed;
2661 sel_node_t *node = static_cast<sel_node_t *>(row);
2662 doc_id_t *doc_id = static_cast<doc_id_t *>(user_arg);
2663 dfield_t *dfield = que_node_get_val(node->select_list);
2664 dtype_t *type = dfield_get_type(dfield);
2665 ulint len = dfield_get_len(dfield);
2666
2667 char buf[32];
2668
2669 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2670 ut_a(len > 0 && len < sizeof(buf));
2671
2672 memcpy(buf, dfield_get_data(dfield), len);
2673 buf[len] = '\0';
2674
2675 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2676 ut_a(n_parsed == 1);
2677
2678 return (FALSE);
2679 }
2680
2681 #ifdef FTS_CACHE_SIZE_DEBUG
2682 /** Get the max cache size in bytes. If there is an error reading the
2683 value we simply print an error message here and return the default
2684 value to the caller.
2685 @return max cache size in bytes */
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2686 static ulint fts_get_max_cache_size(
2687 trx_t *trx, /*!< in: transaction */
2688 fts_table_t *fts_table) /*!< in: table instance */
2689 {
2690 dberr_t error;
2691 fts_string_t value;
2692 ulint cache_size_in_mb;
2693
2694 /* Set to the default value. */
2695 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2696
2697 /* We set the length of value to the max bytes it can hold. This
2698 information is used by the callback that reads the value. */
2699 value.f_n_char = 0;
2700 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2701 value.f_str = ut_malloc_nokey(value.f_len + 1);
2702
2703 error =
2704 fts_config_get_value(trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2705
2706 if (error == DB_SUCCESS) {
2707 value.f_str[value.f_len] = 0;
2708 cache_size_in_mb = strtoul((char *)value.f_str, NULL, 10);
2709
2710 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2711 ib::warn(ER_IB_MSG_468)
2712 << "FTS max cache size (" << cache_size_in_mb
2713 << ") out of range."
2714 " Minimum value is "
2715 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB << "MB and the maximum value is "
2716 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2717 << "MB, setting cache size to upper limit";
2718
2719 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2720
2721 } else if (cache_size_in_mb < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2722 ib::warn(ER_IB_MSG_469)
2723 << "FTS max cache size (" << cache_size_in_mb
2724 << ") out of range."
2725 " Minimum value is "
2726 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB << "MB and the maximum value is"
2727 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2728 << "MB, setting cache size to lower limit";
2729
2730 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2731 }
2732 } else {
2733 ib::error(ER_IB_MSG_470) << "(" << ut_strerr(error)
2734 << ") reading max"
2735 " cache config value from config table";
2736 }
2737
2738 ut_free(value.f_str);
2739
2740 return (cache_size_in_mb * 1024 * 1024);
2741 }
2742 #endif
2743
2744 /** Update the next and last Doc ID in the CONFIG table to be the input
2745 "doc_id" value (+ 1). We would do so after each FTS index build or
2746 table truncate */
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2747 void fts_update_next_doc_id(
2748 trx_t *trx, /*!< in/out: transaction */
2749 const dict_table_t *table, /*!< in: table */
2750 const char *table_name, /*!< in: table name, or NULL */
2751 doc_id_t doc_id) /*!< in: DOC ID to set */
2752 {
2753 table->fts->cache->synced_doc_id = doc_id;
2754 table->fts->cache->next_doc_id = doc_id + 1;
2755
2756 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2757
2758 fts_update_sync_doc_id(table, table_name, table->fts->cache->synced_doc_id,
2759 trx);
2760 }
2761
2762 /** Get the next available document id.
2763 @return DB_SUCCESS if OK */
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2764 dberr_t fts_get_next_doc_id(const dict_table_t *table, /*!< in: table */
2765 doc_id_t *doc_id) /*!< out: new document id */
2766 {
2767 fts_cache_t *cache = table->fts->cache;
2768
2769 /* If the Doc ID system has not yet been initialized, we
2770 will consult the CONFIG table and user table to re-establish
2771 the initial value of the Doc ID */
2772 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2773 fts_init_doc_id(table);
2774 }
2775
2776 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2777 *doc_id = FTS_NULL_DOC_ID;
2778 return (DB_SUCCESS);
2779 }
2780
2781 mutex_enter(&cache->doc_id_lock);
2782 *doc_id = ++cache->next_doc_id;
2783 mutex_exit(&cache->doc_id_lock);
2784
2785 return (DB_SUCCESS);
2786 }
2787
2788 /** This function fetch the Doc ID from CONFIG table, and compare with
2789 the Doc ID supplied. And store the larger one to the CONFIG table.
2790 @return DB_SUCCESS if OK */
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2791 static dberr_t fts_cmp_set_sync_doc_id(
2792 const dict_table_t *table, /*!< in: table */
2793 doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
2794 ibool read_only, /*!< in: TRUE if read the
2795 synced_doc_id only */
2796 doc_id_t *doc_id) /*!< out: larger document id
2797 after comparing "doc_id_cmp"
2798 to the one stored in CONFIG
2799 table */
2800 {
2801 trx_t *trx;
2802 pars_info_t *info;
2803 dberr_t error;
2804 fts_table_t fts_table;
2805 que_t *graph = nullptr;
2806 fts_cache_t *cache = table->fts->cache;
2807 char table_name[MAX_FULL_NAME_LEN];
2808 retry:
2809 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2810
2811 fts_table.suffix = FTS_SUFFIX_CONFIG;
2812 fts_table.table_id = table->id;
2813 fts_table.type = FTS_COMMON_TABLE;
2814 fts_table.table = table;
2815
2816 fts_table.parent = table->name.m_name;
2817
2818 trx = trx_allocate_for_background();
2819
2820 trx->op_info = "update the next FTS document id";
2821
2822 info = pars_info_create();
2823
2824 pars_info_bind_function(info, "my_func", fts_fetch_store_doc_id, doc_id);
2825
2826 fts_get_table_name(&fts_table, table_name);
2827 pars_info_bind_id(info, true, "config_table", table_name);
2828
2829 graph = fts_parse_sql(&fts_table, info,
2830 "DECLARE FUNCTION my_func;\n"
2831 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2832 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2833 "BEGIN\n"
2834 ""
2835 "OPEN c;\n"
2836 "WHILE 1 = 1 LOOP\n"
2837 " FETCH c INTO my_func();\n"
2838 " IF c % NOTFOUND THEN\n"
2839 " EXIT;\n"
2840 " END IF;\n"
2841 "END LOOP;\n"
2842 "CLOSE c;");
2843
2844 *doc_id = 0;
2845
2846 error = fts_eval_sql(trx, graph);
2847
2848 que_graph_free(graph);
2849
2850 // FIXME: We need to retry deadlock errors
2851 if (error != DB_SUCCESS) {
2852 goto func_exit;
2853 }
2854
2855 if (read_only) {
2856 goto func_exit;
2857 }
2858
2859 if (doc_id_cmp == 0 && *doc_id) {
2860 cache->synced_doc_id = *doc_id - 1;
2861 } else {
2862 cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2863 }
2864
2865 mutex_enter(&cache->doc_id_lock);
2866 /* For each sync operation, we will add next_doc_id by 1,
2867 so to mark a sync operation */
2868 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2869 cache->next_doc_id = cache->synced_doc_id + 1;
2870 }
2871 mutex_exit(&cache->doc_id_lock);
2872
2873 if (doc_id_cmp > *doc_id) {
2874 error = fts_update_sync_doc_id(table, table->name.m_name,
2875 cache->synced_doc_id, trx);
2876 }
2877
2878 *doc_id = cache->next_doc_id;
2879
2880 func_exit:
2881
2882 if (error == DB_SUCCESS) {
2883 fts_sql_commit(trx);
2884 } else {
2885 *doc_id = 0;
2886
2887 ib::error(ER_IB_MSG_471) << "(" << ut_strerr(error)
2888 << ") while getting"
2889 " next doc id.";
2890 fts_sql_rollback(trx);
2891
2892 if (error == DB_DEADLOCK) {
2893 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2894 goto retry;
2895 }
2896 }
2897
2898 trx_free_for_background(trx);
2899
2900 return (error);
2901 }
2902
2903 /** Update the last document id. This function could create a new
2904 transaction to update the last document id.
2905 @return DB_SUCCESS if OK */
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2906 static dberr_t fts_update_sync_doc_id(
2907 const dict_table_t *table, /*!< in: table */
2908 const char *table_name, /*!< in: table name, or NULL */
2909 doc_id_t doc_id, /*!< in: last document id */
2910 trx_t *trx) /*!< in: update trx, or NULL */
2911 {
2912 byte id[FTS_MAX_ID_LEN];
2913 pars_info_t *info;
2914 fts_table_t fts_table;
2915 ulint id_len;
2916 que_t *graph = nullptr;
2917 dberr_t error;
2918 ibool local_trx = FALSE;
2919 fts_cache_t *cache = table->fts->cache;
2920 char fts_name[MAX_FULL_NAME_LEN];
2921
2922 fts_table.suffix = FTS_SUFFIX_CONFIG;
2923 fts_table.table_id = table->id;
2924 fts_table.type = FTS_COMMON_TABLE;
2925 fts_table.table = table;
2926 if (table_name) {
2927 fts_table.parent = table_name;
2928 } else {
2929 fts_table.parent = table->name.m_name;
2930 }
2931
2932 if (!trx) {
2933 trx = trx_allocate_for_background();
2934
2935 trx->op_info = "setting last FTS document id";
2936 local_trx = TRUE;
2937 }
2938
2939 info = pars_info_create();
2940
2941 id_len = snprintf((char *)id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2942
2943 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2944
2945 fts_get_table_name(&fts_table, fts_name);
2946 pars_info_bind_id(info, true, "table_name", fts_name);
2947
2948 graph = fts_parse_sql(&fts_table, info,
2949 "BEGIN"
2950 " UPDATE $table_name SET value = :doc_id"
2951 " WHERE key = 'synced_doc_id';");
2952
2953 error = fts_eval_sql(trx, graph);
2954
2955 que_graph_free(graph);
2956
2957 if (local_trx) {
2958 if (error == DB_SUCCESS) {
2959 fts_sql_commit(trx);
2960 cache->synced_doc_id = doc_id;
2961 } else {
2962 ib::error(ER_IB_MSG_472) << "(" << ut_strerr(error)
2963 << ") while"
2964 " updating last doc id.";
2965
2966 fts_sql_rollback(trx);
2967 }
2968 trx_free_for_background(trx);
2969 }
2970
2971 return (error);
2972 }
2973
2974 /** Create a new fts_doc_ids_t.
2975 @return new fts_doc_ids_t */
fts_doc_ids_create(void)2976 fts_doc_ids_t *fts_doc_ids_create(void) {
2977 fts_doc_ids_t *fts_doc_ids;
2978 mem_heap_t *heap = mem_heap_create(512);
2979
2980 fts_doc_ids =
2981 static_cast<fts_doc_ids_t *>(mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2982
2983 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2984
2985 fts_doc_ids->doc_ids = static_cast<ib_vector_t *>(
2986 ib_vector_create(fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2987
2988 return (fts_doc_ids);
2989 }
2990
2991 /** Free a fts_doc_ids_t. */
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2992 void fts_doc_ids_free(fts_doc_ids_t *fts_doc_ids) {
2993 mem_heap_t *heap = static_cast<mem_heap_t *>(fts_doc_ids->self_heap->arg);
2994
2995 memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
2996
2997 mem_heap_free(heap);
2998 }
2999
3000 /** Do commit-phase steps necessary for the insertion of a new row.
3001 @param[in] ftt FTS transaction table
3002 @param[in] row row to be inserted in index
3003 */
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3004 static void fts_add(fts_trx_table_t *ftt, fts_trx_row_t *row) {
3005 dict_table_t *table = ftt->table;
3006 doc_id_t doc_id = row->doc_id;
3007
3008 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3009
3010 fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3011
3012 mutex_enter(&table->fts->cache->deleted_lock);
3013 ++table->fts->cache->added;
3014 mutex_exit(&table->fts->cache->deleted_lock);
3015
3016 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) &&
3017 doc_id >= table->fts->cache->next_doc_id) {
3018 table->fts->cache->next_doc_id = doc_id + 1;
3019 }
3020 }
3021
3022 /** Do commit-phase steps necessary for the deletion of a row.
3023 @return DB_SUCCESS or error code */
3024 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3025 fts_delete(fts_trx_table_t *ftt, /*!< in: FTS trx table */
3026 fts_trx_row_t *row) /*!< in: row */
3027 {
3028 que_t *graph;
3029 fts_table_t fts_table;
3030 dberr_t error = DB_SUCCESS;
3031 doc_id_t write_doc_id;
3032 dict_table_t *table = ftt->table;
3033 doc_id_t doc_id = row->doc_id;
3034 trx_t *trx = ftt->fts_trx->trx;
3035 pars_info_t *info = pars_info_create();
3036 fts_cache_t *cache = table->fts->cache;
3037
3038 /* we do not index Documents whose Doc ID value is 0 */
3039 if (doc_id == FTS_NULL_DOC_ID) {
3040 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3041 return (error);
3042 }
3043
3044 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3045
3046 FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_DELETED, FTS_COMMON_TABLE, table);
3047
3048 /* Convert to "storage" byte order. */
3049 fts_write_doc_id((byte *)&write_doc_id, doc_id);
3050 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3051
3052 /* It is possible we update a record that has not yet been sync-ed
3053 into cache from last crash (delete Doc will not initialize the
3054 sync). Avoid any added counter accounting until the FTS cache
3055 is re-established and sync-ed */
3056 if (table->fts->fts_status & ADDED_TABLE_SYNCED &&
3057 doc_id > cache->synced_doc_id) {
3058 mutex_enter(&table->fts->cache->deleted_lock);
3059
3060 /* The Doc ID could belong to those left in
3061 ADDED table from last crash. So need to check
3062 if it is less than first_doc_id when we initialize
3063 the Doc ID system after reboot */
3064 if (doc_id >= table->fts->cache->first_doc_id &&
3065 table->fts->cache->added > 0) {
3066 --table->fts->cache->added;
3067 }
3068
3069 mutex_exit(&table->fts->cache->deleted_lock);
3070
3071 /* Only if the row was really deleted. */
3072 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3073 }
3074
3075 /* Note the deleted document for OPTIMIZE to purge. */
3076 if (error == DB_SUCCESS) {
3077 char table_name[MAX_FULL_NAME_LEN];
3078
3079 trx->op_info = "adding doc id to FTS DELETED";
3080
3081 info->graph_owns_us = TRUE;
3082
3083 fts_table.suffix = FTS_SUFFIX_DELETED;
3084
3085 fts_get_table_name(&fts_table, table_name);
3086 pars_info_bind_id(info, true, "deleted", table_name);
3087
3088 graph = fts_parse_sql(&fts_table, info,
3089 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3090
3091 error = fts_eval_sql(trx, graph);
3092
3093 fts_que_graph_free(graph);
3094 } else {
3095 pars_info_free(info);
3096 }
3097
3098 /* Increment the total deleted count, this is used to calculate the
3099 number of documents indexed. */
3100 if (error == DB_SUCCESS) {
3101 mutex_enter(&table->fts->cache->deleted_lock);
3102
3103 ++table->fts->cache->deleted;
3104
3105 mutex_exit(&table->fts->cache->deleted_lock);
3106 }
3107
3108 return (error);
3109 }
3110
3111 /** Do commit-phase steps necessary for the modification of a row.
3112 @return DB_SUCCESS or error code */
3113 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3114 fts_modify(fts_trx_table_t *ftt, /*!< in: FTS trx table */
3115 fts_trx_row_t *row) /*!< in: row */
3116 {
3117 dberr_t error;
3118
3119 ut_a(row->state == FTS_MODIFY);
3120
3121 error = fts_delete(ftt, row);
3122
3123 if (error == DB_SUCCESS) {
3124 fts_add(ftt, row);
3125 }
3126
3127 return (error);
3128 }
3129
3130 /** Create a new document id.
3131 @return DB_SUCCESS if all went well else error */
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3132 dberr_t fts_create_doc_id(dict_table_t *table, /*!< in: row is of this table. */
3133 dtuple_t *row, /* in/out: add doc id value to this
3134 row. This is the current row that is
3135 being inserted. */
3136 mem_heap_t *heap) /*!< in: heap */
3137 {
3138 doc_id_t doc_id;
3139 dberr_t error = DB_SUCCESS;
3140
3141 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3142
3143 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3144 if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3145 error = fts_get_next_doc_id(table, &doc_id);
3146 }
3147 return (error);
3148 }
3149
3150 error = fts_get_next_doc_id(table, &doc_id);
3151
3152 if (error == DB_SUCCESS) {
3153 dfield_t *dfield;
3154 doc_id_t *write_doc_id;
3155
3156 ut_a(doc_id > 0);
3157
3158 dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3159 write_doc_id =
3160 static_cast<doc_id_t *>(mem_heap_alloc(heap, sizeof(*write_doc_id)));
3161
3162 ut_a(doc_id != FTS_NULL_DOC_ID);
3163 ut_a(sizeof(doc_id) == dfield->type.len);
3164 fts_write_doc_id((byte *)write_doc_id, doc_id);
3165
3166 dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3167 }
3168
3169 return (error);
3170 }
3171
3172 /** The given transaction is about to be committed; do whatever is necessary
3173 from the FTS system's POV.
3174 @return DB_SUCCESS or error code */
3175 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_commit_table(fts_trx_table_t * ftt)3176 fts_commit_table(fts_trx_table_t *ftt) /*!< in: FTS table to commit*/
3177 {
3178 const ib_rbt_node_t *node;
3179 ib_rbt_t *rows;
3180 dberr_t error = DB_SUCCESS;
3181 fts_cache_t *cache = ftt->table->fts->cache;
3182 trx_t *trx = trx_allocate_for_background();
3183
3184 rows = ftt->rows;
3185
3186 ftt->fts_trx->trx = trx;
3187
3188 if (cache->get_docs == nullptr) {
3189 rw_lock_x_lock(&cache->init_lock);
3190 if (cache->get_docs == nullptr) {
3191 cache->get_docs = fts_get_docs_create(cache);
3192 }
3193 rw_lock_x_unlock(&cache->init_lock);
3194 }
3195
3196 for (node = rbt_first(rows); node != nullptr && error == DB_SUCCESS;
3197 node = rbt_next(rows, node)) {
3198 fts_trx_row_t *row = rbt_value(fts_trx_row_t, node);
3199
3200 switch (row->state) {
3201 case FTS_INSERT:
3202 fts_add(ftt, row);
3203 break;
3204
3205 case FTS_MODIFY:
3206 error = fts_modify(ftt, row);
3207 break;
3208
3209 case FTS_DELETE:
3210 error = fts_delete(ftt, row);
3211 break;
3212
3213 default:
3214 ut_error;
3215 }
3216 }
3217
3218 fts_sql_commit(trx);
3219
3220 trx_free_for_background(trx);
3221
3222 return (error);
3223 }
3224
3225 /** The given transaction is about to be committed; do whatever is necessary
3226 from the FTS system's POV.
3227 @return DB_SUCCESS or error code */
fts_commit(trx_t * trx)3228 dberr_t fts_commit(trx_t *trx) /*!< in: transaction */
3229 {
3230 const ib_rbt_node_t *node;
3231 dberr_t error;
3232 ib_rbt_t *tables;
3233 fts_savepoint_t *savepoint;
3234
3235 savepoint =
3236 static_cast<fts_savepoint_t *>(ib_vector_last(trx->fts_trx->savepoints));
3237 tables = savepoint->tables;
3238
3239 for (node = rbt_first(tables), error = DB_SUCCESS;
3240 node != nullptr && error == DB_SUCCESS; node = rbt_next(tables, node)) {
3241 fts_trx_table_t **ftt;
3242
3243 ftt = rbt_value(fts_trx_table_t *, node);
3244
3245 error = fts_commit_table(*ftt);
3246 }
3247
3248 return (error);
3249 }
3250
3251 /** Initialize a document. */
fts_doc_init(fts_doc_t * doc)3252 void fts_doc_init(fts_doc_t *doc) /*!< in: doc to initialize */
3253 {
3254 mem_heap_t *heap = mem_heap_create(32);
3255
3256 memset(doc, 0, sizeof(*doc));
3257
3258 doc->self_heap = ib_heap_allocator_create(heap);
3259 }
3260
3261 /** Free document. */
fts_doc_free(fts_doc_t * doc)3262 void fts_doc_free(fts_doc_t *doc) /*!< in: document */
3263 {
3264 mem_heap_t *heap = static_cast<mem_heap_t *>(doc->self_heap->arg);
3265
3266 if (doc->tokens) {
3267 rbt_free(doc->tokens);
3268 }
3269
3270 ut_d(memset(doc, 0, sizeof(*doc)));
3271
3272 mem_heap_free(heap);
3273 }
3274
3275 /** Callback function for fetch that stores the text of an FTS document,
3276 converting each column to UTF-16.
3277 @return always false */
fts_query_expansion_fetch_doc(void * row,void * user_arg)3278 ibool fts_query_expansion_fetch_doc(void *row, /*!< in: sel_node_t* */
3279 void *user_arg) /*!< in: fts_doc_t* */
3280 {
3281 que_node_t *exp;
3282 sel_node_t *node = static_cast<sel_node_t *>(row);
3283 fts_doc_t *result_doc = static_cast<fts_doc_t *>(user_arg);
3284 dfield_t *dfield;
3285 ulint len;
3286 ulint doc_len;
3287 fts_doc_t doc;
3288 CHARSET_INFO *doc_charset = nullptr;
3289 ulint field_no = 0;
3290
3291 len = 0;
3292
3293 fts_doc_init(&doc);
3294 doc.found = TRUE;
3295
3296 exp = node->select_list;
3297 doc_len = 0;
3298
3299 doc_charset = result_doc->charset;
3300
3301 /* Copy each indexed column content into doc->text.f_str */
3302 while (exp) {
3303 dfield = que_node_get_val(exp);
3304 len = dfield_get_len(dfield);
3305
3306 /* NULL column */
3307 if (len == UNIV_SQL_NULL) {
3308 exp = que_node_get_next(exp);
3309 continue;
3310 }
3311
3312 if (!doc_charset) {
3313 doc_charset = fts_get_charset(dfield->type.prtype);
3314 }
3315
3316 doc.charset = doc_charset;
3317 doc.is_ngram = result_doc->is_ngram;
3318
3319 if (dfield_is_ext(dfield)) {
3320 /* We ignore columns that are stored externally, this
3321 could result in too many words to search */
3322 exp = que_node_get_next(exp);
3323 continue;
3324 } else {
3325 doc.text.f_n_char = 0;
3326
3327 doc.text.f_str = static_cast<byte *>(dfield_get_data(dfield));
3328
3329 doc.text.f_len = len;
3330 }
3331
3332 if (field_no == 0) {
3333 fts_tokenize_document(&doc, result_doc, result_doc->parser);
3334 } else {
3335 fts_tokenize_document_next(&doc, doc_len, result_doc, result_doc->parser);
3336 }
3337
3338 exp = que_node_get_next(exp);
3339
3340 doc_len += (exp) ? len + 1 : len;
3341
3342 field_no++;
3343 }
3344
3345 ut_ad(doc_charset);
3346
3347 if (!result_doc->charset) {
3348 result_doc->charset = doc_charset;
3349 }
3350
3351 fts_doc_free(&doc);
3352
3353 return (FALSE);
3354 }
3355
3356 /** fetch and tokenize the document. */
fts_fetch_doc_from_rec(trx_t * trx,fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3357 static void fts_fetch_doc_from_rec(
3358 trx_t *trx, /*!< in: current transaction */
3359 fts_get_doc_t *get_doc, /*!< in: FTS index's get_doc struct */
3360 dict_index_t *clust_index, /*!< in: cluster index */
3361 btr_pcur_t *pcur, /*!< in: cursor whose position
3362 has been stored */
3363 ulint *offsets, /*!< in: offsets */
3364 fts_doc_t *doc) /*!< out: fts doc to hold parsed
3365 documents */
3366 {
3367 dict_index_t *index;
3368 dict_table_t *table;
3369 const rec_t *clust_rec;
3370 ulint num_field;
3371 const dict_field_t *ifield;
3372 const dict_col_t *col;
3373 uint16_t clust_pos;
3374 ulint i;
3375 ulint doc_len = 0;
3376 ulint processed_doc = 0;
3377 st_mysql_ftparser *parser;
3378
3379 if (!get_doc) {
3380 return;
3381 }
3382
3383 index = get_doc->index_cache->index;
3384 table = get_doc->index_cache->index->table;
3385 parser = get_doc->index_cache->index->parser;
3386
3387 clust_rec = btr_pcur_get_rec(pcur);
3388
3389 num_field = dict_index_get_n_fields(index);
3390
3391 for (i = 0; i < num_field; i++) {
3392 ifield = index->get_field(i);
3393 col = ifield->col;
3394 clust_pos = static_cast<uint16_t>(dict_col_get_clust_pos(col, clust_index));
3395
3396 if (!get_doc->index_cache->charset) {
3397 get_doc->index_cache->charset = fts_get_charset(ifield->col->prtype);
3398 }
3399
3400 if (rec_offs_nth_extern(offsets, clust_pos)) {
3401 doc->text.f_str = lob::btr_rec_copy_externally_stored_field(
3402 nullptr, clust_index, clust_rec, offsets, dict_table_page_size(table),
3403 clust_pos, &doc->text.f_len, nullptr, false,
3404 static_cast<mem_heap_t *>(doc->self_heap->arg));
3405 } else {
3406 doc->text.f_str = const_cast<byte *>(rec_get_nth_field_instant(
3407 clust_rec, offsets, clust_pos, clust_index, &doc->text.f_len));
3408 }
3409
3410 doc->found = TRUE;
3411 doc->charset = get_doc->index_cache->charset;
3412 doc->is_ngram = index->is_ngram;
3413
3414 /* Null Field */
3415 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3416 continue;
3417 }
3418
3419 if (processed_doc == 0) {
3420 fts_tokenize_document(doc, nullptr, parser);
3421 } else {
3422 fts_tokenize_document_next(doc, doc_len, nullptr, parser);
3423 }
3424
3425 processed_doc++;
3426 doc_len += doc->text.f_len + 1;
3427 }
3428 }
3429
3430 /** Fetch the data from tuple and tokenize the document.
3431 @param[in] get_doc FTS index's get_doc struct
3432 @param[in] tuple tuple should be arranged in table schema order
3433 @param[out] doc fts doc to hold parsed documents. */
fts_fetch_doc_from_tuple(fts_get_doc_t * get_doc,const dtuple_t * tuple,fts_doc_t * doc)3434 static void fts_fetch_doc_from_tuple(fts_get_doc_t *get_doc,
3435 const dtuple_t *tuple, fts_doc_t *doc) {
3436 dict_index_t *index;
3437 st_mysql_ftparser *parser;
3438 ulint doc_len = 0;
3439 ulint processed_doc = 0;
3440 ulint num_field;
3441
3442 if (get_doc == nullptr) {
3443 return;
3444 }
3445
3446 index = get_doc->index_cache->index;
3447 parser = get_doc->index_cache->index->parser;
3448 num_field = dict_index_get_n_fields(index);
3449
3450 for (ulint i = 0; i < num_field; i++) {
3451 const dict_field_t *ifield;
3452 const dict_col_t *col;
3453 ulint pos;
3454 dfield_t *field;
3455
3456 ifield = index->get_field(i);
3457 col = ifield->col;
3458 pos = dict_col_get_no(col);
3459 field = dtuple_get_nth_field(tuple, pos);
3460
3461 if (!get_doc->index_cache->charset) {
3462 get_doc->index_cache->charset = fts_get_charset(ifield->col->prtype);
3463 }
3464
3465 ut_ad(!dfield_is_ext(field));
3466
3467 doc->text.f_str = (byte *)dfield_get_data(field);
3468 doc->text.f_len = dfield_get_len(field);
3469 doc->found = TRUE;
3470 doc->charset = get_doc->index_cache->charset;
3471 doc->is_ngram = index->is_ngram;
3472
3473 /* field data is NULL. */
3474 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3475 continue;
3476 }
3477
3478 if (processed_doc == 0) {
3479 fts_tokenize_document(doc, nullptr, parser);
3480 } else {
3481 fts_tokenize_document_next(doc, doc_len, nullptr, parser);
3482 }
3483
3484 processed_doc++;
3485 doc_len += doc->text.f_len + 1;
3486 }
3487 }
3488
3489 /** Fetch the document from tuple, tokenize the text data and
3490 insert the text data into fts auxiliary table and
3491 its cache. Moreover this tuple fields doesn't contain any information
3492 about externally stored field. This tuple contains data directly
3493 converted from mysql.
3494 @param[in] ftt FTS transaction table
3495 @param[in] doc_id doc id
3496 @param[in] tuple tuple from where data can be retrieved
3497 and tuple should be arranged in table
3498 schema order. */
fts_add_doc_from_tuple(fts_trx_table_t * ftt,doc_id_t doc_id,const dtuple_t * tuple)3499 void fts_add_doc_from_tuple(fts_trx_table_t *ftt, doc_id_t doc_id,
3500 const dtuple_t *tuple) {
3501 mtr_t mtr;
3502 fts_cache_t *cache = ftt->table->fts->cache;
3503
3504 ut_ad(cache->get_docs);
3505
3506 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3507 fts_init_index(ftt->table, FALSE);
3508 }
3509
3510 mtr_start(&mtr);
3511
3512 ulint num_idx = ib_vector_size(cache->get_docs);
3513
3514 for (ulint i = 0; i < num_idx; ++i) {
3515 fts_doc_t doc;
3516 dict_table_t *table;
3517 fts_get_doc_t *get_doc;
3518
3519 get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
3520 table = get_doc->index_cache->index->table;
3521
3522 fts_doc_init(&doc);
3523 fts_fetch_doc_from_tuple(get_doc, tuple, &doc);
3524
3525 if (doc.found) {
3526 mtr_commit(&mtr);
3527 rw_lock_x_lock(&table->fts->cache->lock);
3528
3529 if (table->fts->cache->stopword_info.status & STOPWORD_NOT_INIT) {
3530 fts_load_stopword(table, nullptr, nullptr, nullptr, TRUE, TRUE);
3531 }
3532
3533 fts_cache_add_doc(table->fts->cache, get_doc->index_cache, doc_id,
3534 doc.tokens);
3535
3536 rw_lock_x_unlock(&table->fts->cache->lock);
3537
3538 if (cache->total_size > fts_max_cache_size / 5 || fts_need_sync) {
3539 fts_sync(cache->sync, true, false, false);
3540 }
3541
3542 mtr_start(&mtr);
3543 }
3544
3545 fts_doc_free(&doc);
3546 }
3547
3548 mtr_commit(&mtr);
3549 }
3550
3551 /** Fetch the document just inserted right before we commit
3552 the transaction, and tokenize the inserted text data
3553 and insert into FTS auxiliary table and its cache.
3554 @param[in] ftt FTS transaction table
3555 @param[in] doc_id doc id
3556 @param[in] fts_indexes affected FTS indexes
3557 @return true if successful */
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3558 static ulint fts_add_doc_by_id(fts_trx_table_t *ftt, doc_id_t doc_id,
3559 ib_vector_t *fts_indexes
3560 MY_ATTRIBUTE((unused))) {
3561 mtr_t mtr;
3562 mem_heap_t *heap;
3563 btr_pcur_t pcur;
3564 dict_table_t *table;
3565 dtuple_t *tuple;
3566 dfield_t *dfield;
3567 fts_get_doc_t *get_doc;
3568 doc_id_t temp_doc_id;
3569 dict_index_t *clust_index;
3570 dict_index_t *fts_id_index;
3571 ibool is_id_cluster;
3572 fts_cache_t *cache = ftt->table->fts->cache;
3573
3574 ut_ad(cache->get_docs);
3575
3576 /* If Doc ID has been supplied by the user, then the table
3577 might not yet be sync-ed */
3578
3579 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3580 fts_init_index(ftt->table, FALSE);
3581 }
3582
3583 /* Get the first FTS index's get_doc */
3584 get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, 0));
3585 ut_ad(get_doc);
3586
3587 table = get_doc->index_cache->index->table;
3588
3589 heap = mem_heap_create(512);
3590
3591 clust_index = table->first_index();
3592 fts_id_index = table->fts_doc_id_index;
3593
3594 /* Check whether the index on FTS_DOC_ID is cluster index */
3595 is_id_cluster = (clust_index == fts_id_index);
3596
3597 mtr_start(&mtr);
3598 btr_pcur_init(&pcur);
3599
3600 /* Search based on Doc ID. Here, we'll need to consider the case
3601 when there is no primary index on Doc ID */
3602 tuple = dtuple_create(heap, 1);
3603 dfield = dtuple_get_nth_field(tuple, 0);
3604 dfield->type.mtype = DATA_INT;
3605 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3606
3607 mach_write_to_8((byte *)&temp_doc_id, doc_id);
3608 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3609
3610 btr_pcur_open_with_no_init(fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3611 &pcur, 0, &mtr);
3612
3613 /* If we have a match, add the data to doc structure */
3614 if (btr_pcur_get_low_match(&pcur) == 1) {
3615 const rec_t *rec;
3616 btr_pcur_t *doc_pcur;
3617 const rec_t *clust_rec;
3618 btr_pcur_t clust_pcur;
3619 ulint *offsets = nullptr;
3620 ulint num_idx = ib_vector_size(cache->get_docs);
3621
3622 rec = btr_pcur_get_rec(&pcur);
3623
3624 /* Doc could be deleted */
3625 if (page_rec_is_infimum(rec) ||
3626 rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3627 goto func_exit;
3628 }
3629
3630 if (is_id_cluster) {
3631 clust_rec = rec;
3632 doc_pcur = &pcur;
3633 } else {
3634 dtuple_t *clust_ref;
3635 ulint n_fields;
3636
3637 btr_pcur_init(&clust_pcur);
3638 n_fields = dict_index_get_n_unique(clust_index);
3639
3640 clust_ref = dtuple_create(heap, n_fields);
3641 dict_index_copy_types(clust_ref, clust_index, n_fields);
3642
3643 row_build_row_ref_in_tuple(clust_ref, rec, fts_id_index, nullptr,
3644 nullptr);
3645
3646 btr_pcur_open_with_no_init(clust_index, clust_ref, PAGE_CUR_LE,
3647 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3648
3649 doc_pcur = &clust_pcur;
3650 clust_rec = btr_pcur_get_rec(&clust_pcur);
3651 }
3652
3653 offsets = rec_get_offsets(clust_rec, clust_index, nullptr, ULINT_UNDEFINED,
3654 &heap);
3655
3656 for (ulint i = 0; i < num_idx; ++i) {
3657 fts_doc_t doc;
3658 dict_table_t *table;
3659 fts_get_doc_t *get_doc;
3660
3661 get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
3662
3663 table = get_doc->index_cache->index->table;
3664
3665 fts_doc_init(&doc);
3666
3667 fts_fetch_doc_from_rec(ftt->fts_trx->trx, get_doc, clust_index, doc_pcur,
3668 offsets, &doc);
3669
3670 if (doc.found) {
3671 ibool success MY_ATTRIBUTE((unused));
3672
3673 btr_pcur_store_position(doc_pcur, &mtr);
3674 mtr_commit(&mtr);
3675
3676 DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3677 rw_lock_x_lock(&table->fts->cache->lock);
3678
3679 if (table->fts->cache->stopword_info.status & STOPWORD_NOT_INIT) {
3680 fts_load_stopword(table, nullptr, nullptr, nullptr, TRUE, TRUE);
3681 }
3682
3683 fts_cache_add_doc(table->fts->cache, get_doc->index_cache, doc_id,
3684 doc.tokens);
3685
3686 bool need_sync = false;
3687 if ((cache->total_size > fts_max_cache_size / 10 || fts_need_sync) &&
3688 !cache->sync->in_progress) {
3689 need_sync = true;
3690 }
3691
3692 rw_lock_x_unlock(&table->fts->cache->lock);
3693
3694 DBUG_EXECUTE_IF("fts_instrument_sync_cache_wait",
3695 srv_fatal_semaphore_wait_threshold = 25;
3696 fts_max_cache_size = 100;
3697 fts_sync(cache->sync, true, true, false););
3698
3699 DBUG_EXECUTE_IF("fts_instrument_sync",
3700 fts_optimize_request_sync_table(table);
3701 os_event_wait(cache->sync->event););
3702
3703 DBUG_EXECUTE_IF("fts_instrument_sync_debug",
3704 fts_sync(cache->sync, true, true, false););
3705
3706 DEBUG_SYNC_C("fts_instrument_sync_request");
3707 DBUG_EXECUTE_IF("fts_instrument_sync_request",
3708 fts_optimize_request_sync_table(table););
3709
3710 if (need_sync) {
3711 fts_optimize_request_sync_table(table);
3712 }
3713
3714 mtr_start(&mtr);
3715
3716 if (i < num_idx - 1) {
3717 success = btr_pcur_restore_position(BTR_SEARCH_LEAF, doc_pcur, &mtr);
3718
3719 ut_ad(success);
3720 }
3721 }
3722
3723 fts_doc_free(&doc);
3724 }
3725
3726 if (!is_id_cluster) {
3727 btr_pcur_close(doc_pcur);
3728 }
3729 }
3730 func_exit:
3731 mtr_commit(&mtr);
3732
3733 btr_pcur_close(&pcur);
3734
3735 mem_heap_free(heap);
3736 return (TRUE);
3737 }
3738
3739 /** Callback function to read a single ulint column.
3740 return always returns TRUE */
fts_read_ulint(void * row,void * user_arg)3741 static ibool fts_read_ulint(void *row, /*!< in: sel_node_t* */
3742 void *user_arg) /*!< in: pointer to ulint */
3743 {
3744 sel_node_t *sel_node = static_cast<sel_node_t *>(row);
3745 ulint *value = static_cast<ulint *>(user_arg);
3746 que_node_t *exp = sel_node->select_list;
3747 dfield_t *dfield = que_node_get_val(exp);
3748 void *data = dfield_get_data(dfield);
3749
3750 *value =
3751 static_cast<ulint>(mach_read_from_4(static_cast<const byte *>(data)));
3752
3753 return (TRUE);
3754 }
3755
3756 /** Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3757 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
fts_get_max_doc_id(dict_table_t * table)3758 doc_id_t fts_get_max_doc_id(dict_table_t *table) /*!< in: user table */
3759 {
3760 dict_index_t *index;
3761 dict_field_t *dfield MY_ATTRIBUTE((unused)) = nullptr;
3762 doc_id_t doc_id = 0;
3763 mtr_t mtr;
3764 btr_pcur_t pcur;
3765
3766 index = table->fts_doc_id_index;
3767
3768 if (!index) {
3769 return (0);
3770 }
3771
3772 dfield = index->get_field(0);
3773
3774 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3775 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3776 #endif
3777
3778 mtr_start(&mtr);
3779
3780 /* fetch the largest indexes value */
3781 btr_pcur_open_at_index_side(false, index, BTR_SEARCH_LEAF, &pcur, true, 0,
3782 &mtr);
3783
3784 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3785 const rec_t *rec = nullptr;
3786 ulint offsets_[REC_OFFS_NORMAL_SIZE];
3787 ulint *offsets = offsets_;
3788 mem_heap_t *heap = nullptr;
3789 ulint len;
3790 const void *data;
3791
3792 rec_offs_init(offsets_);
3793
3794 do {
3795 rec = btr_pcur_get_rec(&pcur);
3796
3797 if (page_rec_is_user_rec(rec)) {
3798 break;
3799 }
3800 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3801
3802 if (!rec) {
3803 goto func_exit;
3804 }
3805
3806 offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3807
3808 data = rec_get_nth_field(rec, offsets, 0, &len);
3809
3810 doc_id =
3811 static_cast<doc_id_t>(fts_read_doc_id(static_cast<const byte *>(data)));
3812 }
3813
3814 func_exit:
3815 btr_pcur_close(&pcur);
3816 mtr_commit(&mtr);
3817 return (doc_id);
3818 }
3819
3820 /** Fetch document with the given document id.
3821 @return DB_SUCCESS if OK else error */
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3822 dberr_t fts_doc_fetch_by_doc_id(
3823 fts_get_doc_t *get_doc, /*!< in: state */
3824 doc_id_t doc_id, /*!< in: id of document to
3825 fetch */
3826 dict_index_t *index_to_use, /*!< in: caller supplied FTS index,
3827 or NULL */
3828 ulint option, /*!< in: search option, if it is
3829 greater than doc_id or equal */
3830 fts_sql_callback callback, /*!< in: callback to read */
3831 void *arg) /*!< in: callback arg */
3832 {
3833 pars_info_t *info;
3834 dberr_t error;
3835 const char *select_str;
3836 doc_id_t write_doc_id;
3837 dict_index_t *index;
3838 trx_t *trx = trx_allocate_for_background();
3839 que_t *graph;
3840
3841 trx->op_info = "fetching indexed FTS document";
3842
3843 /* The FTS index can be supplied by caller directly with
3844 "index_to_use", otherwise, get it from "get_doc" */
3845 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3846
3847 if (get_doc && get_doc->get_document_graph) {
3848 info = get_doc->get_document_graph->info;
3849 } else {
3850 info = pars_info_create();
3851 }
3852
3853 /* Convert to "storage" byte order. */
3854 fts_write_doc_id((byte *)&write_doc_id, doc_id);
3855 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3856 pars_info_bind_function(info, "my_func", callback, arg);
3857
3858 select_str = fts_get_select_columns_str(index, info, info->heap);
3859 pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3860
3861 if (!get_doc || !get_doc->get_document_graph) {
3862 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3863 graph = fts_parse_sql(nullptr, info,
3864 mem_heap_printf(info->heap,
3865 "DECLARE FUNCTION my_func;\n"
3866 "DECLARE CURSOR c IS"
3867 " SELECT %s FROM $table_name"
3868 " WHERE %s = :doc_id;\n"
3869 "BEGIN\n"
3870 ""
3871 "OPEN c;\n"
3872 "WHILE 1 = 1 LOOP\n"
3873 " FETCH c INTO my_func();\n"
3874 " IF c %% NOTFOUND THEN\n"
3875 " EXIT;\n"
3876 " END IF;\n"
3877 "END LOOP;\n"
3878 "CLOSE c;",
3879 select_str, FTS_DOC_ID_COL_NAME));
3880 } else {
3881 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3882
3883 /* This is used for crash recovery of table with
3884 hidden DOC ID or FTS indexes. We will scan the table
3885 to re-processing user table rows whose DOC ID or
3886 FTS indexed documents have not been sync-ed to disc
3887 during recent crash.
3888 In the case that all fulltext indexes are dropped
3889 for a table, we will keep the "hidden" FTS_DOC_ID
3890 column, and this scan is to retreive the largest
3891 DOC ID being used in the table to determine the
3892 appropriate next DOC ID.
3893 In the case of there exists fulltext index(es), this
3894 operation will re-tokenize any docs that have not
3895 been sync-ed to the disk, and re-prime the FTS
3896 cached */
3897 graph = fts_parse_sql(nullptr, info,
3898 mem_heap_printf(info->heap,
3899 "DECLARE FUNCTION my_func;\n"
3900 "DECLARE CURSOR c IS"
3901 " SELECT %s, %s FROM $table_name"
3902 " WHERE %s > :doc_id;\n"
3903 "BEGIN\n"
3904 ""
3905 "OPEN c;\n"
3906 "WHILE 1 = 1 LOOP\n"
3907 " FETCH c INTO my_func();\n"
3908 " IF c %% NOTFOUND THEN\n"
3909 " EXIT;\n"
3910 " END IF;\n"
3911 "END LOOP;\n"
3912 "CLOSE c;",
3913 FTS_DOC_ID_COL_NAME, select_str,
3914 FTS_DOC_ID_COL_NAME));
3915 }
3916 if (get_doc) {
3917 get_doc->get_document_graph = graph;
3918 }
3919 } else {
3920 graph = get_doc->get_document_graph;
3921 }
3922
3923 error = fts_eval_sql(trx, graph);
3924
3925 if (error == DB_SUCCESS) {
3926 fts_sql_commit(trx);
3927 } else {
3928 fts_sql_rollback(trx);
3929 }
3930
3931 trx_free_for_background(trx);
3932
3933 if (!get_doc) {
3934 fts_que_graph_free(graph);
3935 }
3936
3937 return (error);
3938 }
3939
3940 /** Write out a single word's data as new entry/entries in the INDEX table.
3941 @return DB_SUCCESS if all OK. */
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3942 dberr_t fts_write_node(trx_t *trx, /*!< in: transaction */
3943 que_t **graph, /*!< in: query graph */
3944 fts_table_t *fts_table, /*!< in: aux table */
3945 fts_string_t *word, /*!< in: word in UTF-8 */
3946 fts_node_t *node) /*!< in: node columns */
3947 {
3948 pars_info_t *info;
3949 dberr_t error;
3950 ib_uint32_t doc_count;
3951 doc_id_t last_doc_id;
3952 doc_id_t first_doc_id;
3953 char table_name[MAX_FULL_NAME_LEN];
3954
3955 ut_a(node->ilist != nullptr);
3956
3957 if (*graph) {
3958 info = (*graph)->info;
3959 } else {
3960 info = pars_info_create();
3961
3962 fts_get_table_name(fts_table, table_name);
3963 pars_info_bind_id(info, true, "index_table_name", table_name);
3964 }
3965
3966 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3967
3968 /* Convert to "storage" byte order. */
3969 fts_write_doc_id((byte *)&first_doc_id, node->first_doc_id);
3970 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3971
3972 /* Convert to "storage" byte order. */
3973 fts_write_doc_id((byte *)&last_doc_id, node->last_doc_id);
3974 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3975
3976 ut_a(node->last_doc_id >= node->first_doc_id);
3977
3978 /* Convert to "storage" byte order. */
3979 mach_write_to_4((byte *)&doc_count, node->doc_count);
3980 pars_info_bind_int4_literal(info, "doc_count",
3981 (const ib_uint32_t *)&doc_count);
3982
3983 /* Set copy_name to FALSE since it's a static. */
3984 pars_info_bind_literal(info, "ilist", node->ilist, node->ilist_size,
3985 DATA_BLOB, DATA_BINARY_TYPE);
3986
3987 if (!*graph) {
3988 *graph = fts_parse_sql(fts_table, info,
3989 "BEGIN\n"
3990 "INSERT INTO $index_table_name VALUES"
3991 " (:token, :first_doc_id,"
3992 " :last_doc_id, :doc_count, :ilist);");
3993 }
3994
3995 const auto start_time = ut_time_monotonic();
3996 error = fts_eval_sql(trx, *graph);
3997 elapsed_time += ut_time_monotonic() - start_time;
3998 ++n_nodes;
3999
4000 return (error);
4001 }
4002
4003 /** Add rows to the DELETED_CACHE table.
4004 @return DB_SUCCESS if all went well else error code*/
4005 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4006 fts_sync_add_deleted_cache(fts_sync_t *sync, /*!< in: sync state */
4007 ib_vector_t *doc_ids) /*!< in: doc ids to add */
4008 {
4009 ulint i;
4010 pars_info_t *info;
4011 que_t *graph;
4012 fts_table_t fts_table;
4013 char table_name[MAX_FULL_NAME_LEN];
4014 doc_id_t dummy = 0;
4015 dberr_t error = DB_SUCCESS;
4016 ulint n_elems = ib_vector_size(doc_ids);
4017
4018 ut_a(ib_vector_size(doc_ids) > 0);
4019
4020 ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4021
4022 info = pars_info_create();
4023
4024 fts_bind_doc_id(info, "doc_id", &dummy);
4025
4026 FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_DELETED_CACHE, FTS_COMMON_TABLE,
4027 sync->table);
4028
4029 fts_get_table_name(&fts_table, table_name);
4030 pars_info_bind_id(info, true, "table_name", table_name);
4031
4032 graph = fts_parse_sql(&fts_table, info,
4033 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4034
4035 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4036 fts_update_t *update;
4037 doc_id_t write_doc_id;
4038
4039 update = static_cast<fts_update_t *>(ib_vector_get(doc_ids, i));
4040
4041 /* Convert to "storage" byte order. */
4042 fts_write_doc_id((byte *)&write_doc_id, update->doc_id);
4043 fts_bind_doc_id(info, "doc_id", &write_doc_id);
4044
4045 error = fts_eval_sql(sync->trx, graph);
4046 }
4047
4048 fts_que_graph_free(graph);
4049
4050 return (error);
4051 }
4052
4053 /** Write the words and ilist to disk.
4054 @param[in,out] trx transaction
4055 @param[in] index_cache index cache
4056 @param[in] unlock_cache whether unlock cache when write node
4057 @param[in] sync_start_time Holds the timestamp of start of sync
4058 for deducing the length of sync time
4059 @return DB_SUCCESS if all went well else error code */
4060 static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4061 fts_sync_write_words(trx_t *trx, fts_index_cache_t *index_cache,
4062 bool unlock_cache, ib_time_t sync_start_time) {
4063 fts_table_t fts_table;
4064 ulint n_nodes = 0;
4065 ulint n_words = 0;
4066 const ib_rbt_node_t *rbt_node;
4067 dberr_t error = DB_SUCCESS;
4068 ibool print_error = FALSE;
4069 dict_table_t *table = index_cache->index->table;
4070 const float cutoff = 0.98f;
4071 ulint lock_threshold = static_cast<ulint>(
4072 (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION) *
4073 cutoff);
4074 bool timeout_extended = false;
4075
4076 FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE,
4077 index_cache->index);
4078
4079 n_words = rbt_size(index_cache->words);
4080
4081 /* We iterate over the entire tree, even if there is an error,
4082 since we want to free the memory used during caching. */
4083 for (rbt_node = rbt_first(index_cache->words); rbt_node;
4084 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4085 ulint i;
4086 ulint selected;
4087 fts_tokenizer_word_t *word;
4088
4089 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4090
4091 selected = fts_select_index(index_cache->charset, word->text.f_str,
4092 word->text.f_len);
4093
4094 fts_table.suffix = fts_get_suffix(selected);
4095
4096 /* We iterate over all the nodes even if there was an error */
4097 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4098 fts_node_t *fts_node =
4099 static_cast<fts_node_t *>(ib_vector_get(word->nodes, i));
4100
4101 if (fts_node->synced) {
4102 continue;
4103 } else {
4104 fts_node->synced = true;
4105 }
4106
4107 /*FIXME: we need to handle the error properly. */
4108 if (error == DB_SUCCESS) {
4109 DBUG_EXECUTE_IF("fts_instrument_sync_write",
4110 os_thread_sleep(10000000););
4111 if (!unlock_cache) {
4112 ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4113 if (cache_lock_time > lock_threshold) {
4114 if (!timeout_extended) {
4115 os_atomic_increment_ulint(&srv_fatal_semaphore_wait_threshold,
4116 SRV_SEMAPHORE_WAIT_EXTENSION);
4117 timeout_extended = true;
4118 lock_threshold += SRV_SEMAPHORE_WAIT_EXTENSION;
4119 } else {
4120 unlock_cache = true;
4121 os_atomic_decrement_ulint(&srv_fatal_semaphore_wait_threshold,
4122 SRV_SEMAPHORE_WAIT_EXTENSION);
4123 timeout_extended = false;
4124 }
4125 }
4126 }
4127
4128 if (unlock_cache) {
4129 rw_lock_x_unlock(&table->fts->cache->lock);
4130 }
4131
4132 error = fts_write_node(trx, &index_cache->ins_graph[selected],
4133 &fts_table, &word->text, fts_node);
4134
4135 DBUG_EXECUTE_IF("fts_instrument_sync_write",
4136 os_thread_sleep(10000000););
4137
4138 DEBUG_SYNC_C("fts_write_node");
4139 DBUG_EXECUTE_IF("fts_write_node_crash", DBUG_SUICIDE(););
4140
4141 DBUG_EXECUTE_IF("fts_instrument_sync_sleep", os_thread_sleep(1000000););
4142
4143 if (unlock_cache) {
4144 rw_lock_x_lock(&table->fts->cache->lock);
4145 }
4146 }
4147 }
4148
4149 n_nodes += ib_vector_size(word->nodes);
4150
4151 if (error != DB_SUCCESS && !print_error) {
4152 ib::error(ER_IB_MSG_473) << "(" << ut_strerr(error)
4153 << ") writing"
4154 " word node to FTS auxiliary index table.";
4155 print_error = TRUE;
4156 }
4157 }
4158
4159 if (fts_enable_diag_print) {
4160 printf("Avg number of nodes: %lf\n",
4161 (double)n_nodes / (double)(n_words > 1 ? n_words : 1));
4162 }
4163
4164 return (error);
4165 }
4166
4167 /** Begin Sync, create transaction, acquire locks, etc. */
fts_sync_begin(fts_sync_t * sync)4168 static void fts_sync_begin(fts_sync_t *sync) /*!< in: sync state */
4169 {
4170 fts_cache_t *cache = sync->table->fts->cache;
4171
4172 n_nodes = 0;
4173 elapsed_time = 0;
4174
4175 sync->start_time = ut_time_monotonic();
4176
4177 sync->trx = trx_allocate_for_background();
4178
4179 if (fts_enable_diag_print) {
4180 ib::info(ER_IB_MSG_474)
4181 << "FTS SYNC for table " << sync->table->name
4182 << ", deleted count: " << ib_vector_size(cache->deleted_doc_ids)
4183 << " size: " << cache->total_size << " bytes";
4184 }
4185 }
4186
4187 /** Run SYNC on the table, i.e., write out data from the index specific
4188 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4189 @return DB_SUCCESS if all OK */
4190 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4191 fts_sync_index(fts_sync_t *sync, /*!< in: sync state */
4192 fts_index_cache_t *index_cache) /*!< in: index cache */
4193 {
4194 trx_t *trx = sync->trx;
4195
4196 trx->op_info = "doing SYNC index";
4197
4198 if (fts_enable_diag_print) {
4199 ib::info(ER_IB_MSG_475) << "SYNC words: " << rbt_size(index_cache->words);
4200 }
4201
4202 ut_ad(rbt_validate(index_cache->words));
4203
4204 return (fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4205 sync->start_time));
4206 }
4207
4208 /** Check if index cache has been synced completely
4209 @param[in,out] index_cache index cache
4210 @return true if index is synced, otherwise false. */
fts_sync_index_check(fts_index_cache_t * index_cache)4211 static bool fts_sync_index_check(fts_index_cache_t *index_cache) {
4212 const ib_rbt_node_t *rbt_node;
4213
4214 for (rbt_node = rbt_first(index_cache->words); rbt_node != nullptr;
4215 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4216 fts_tokenizer_word_t *word;
4217 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4218
4219 fts_node_t *fts_node;
4220 fts_node = static_cast<fts_node_t *>(ib_vector_last(word->nodes));
4221
4222 if (!fts_node->synced) {
4223 return (false);
4224 }
4225 }
4226
4227 return (true);
4228 }
4229
4230 /** Reset synced flag in index cache when rollback
4231 @param[in,out] index_cache index cache */
fts_sync_index_reset(fts_index_cache_t * index_cache)4232 static void fts_sync_index_reset(fts_index_cache_t *index_cache) {
4233 const ib_rbt_node_t *rbt_node;
4234
4235 for (rbt_node = rbt_first(index_cache->words); rbt_node != nullptr;
4236 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4237 fts_tokenizer_word_t *word;
4238 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4239
4240 fts_node_t *fts_node;
4241 fts_node = static_cast<fts_node_t *>(ib_vector_last(word->nodes));
4242
4243 fts_node->synced = false;
4244 }
4245 }
4246
4247 /** Commit the SYNC, change state of processed doc ids etc.
4248 @param[in,out] sync sync state
4249 @return DB_SUCCESS if all OK */
4250 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
fts_sync_commit(fts_sync_t * sync)4251 fts_sync_commit(fts_sync_t *sync) {
4252 dberr_t error;
4253 trx_t *trx = sync->trx;
4254 fts_cache_t *cache = sync->table->fts->cache;
4255 doc_id_t last_doc_id;
4256
4257 trx->op_info = "doing SYNC commit";
4258
4259 /* After each Sync, update the CONFIG table about the max doc id
4260 we just sync-ed to index table */
4261 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4262 &last_doc_id);
4263
4264 /* Get the list of deleted documents that are either in the
4265 cache or were headed there but were deleted before the add
4266 thread got to them. */
4267
4268 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4269 error = fts_sync_add_deleted_cache(sync, cache->deleted_doc_ids);
4270 }
4271
4272 /* We need to do this within the deleted lock since fts_delete() can
4273 attempt to add a deleted doc id to the cache deleted id array. */
4274 fts_cache_clear(cache);
4275 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4276 fts_cache_init(cache);
4277 rw_lock_x_unlock(&cache->lock);
4278
4279 if (error == DB_SUCCESS) {
4280 fts_sql_commit(trx);
4281
4282 } else if (error != DB_SUCCESS) {
4283 fts_sql_rollback(trx);
4284
4285 ib::error(ER_IB_MSG_476) << "(" << ut_strerr(error) << ") during SYNC.";
4286 }
4287
4288 if (fts_enable_diag_print && elapsed_time) {
4289 ib::info(ER_IB_MSG_477)
4290 << "SYNC for table " << sync->table->name
4291 << ": SYNC time: " << (ut_time_monotonic() - sync->start_time)
4292 << " secs: elapsed " << (double)n_nodes / elapsed_time << " ins/sec";
4293 }
4294
4295 /* Avoid assertion in trx_free(). */
4296 trx->dict_operation_lock_mode = 0;
4297 trx_free_for_background(trx);
4298
4299 return (error);
4300 }
4301
4302 /** Rollback a sync operation */
fts_sync_rollback(fts_sync_t * sync)4303 static void fts_sync_rollback(fts_sync_t *sync) /*!< in: sync state */
4304 {
4305 trx_t *trx = sync->trx;
4306 fts_cache_t *cache = sync->table->fts->cache;
4307
4308 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4309 ulint j;
4310 fts_index_cache_t *index_cache;
4311
4312 index_cache =
4313 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
4314
4315 /* Reset synced flag so nodes will not be skipped
4316 in the next sync, see fts_sync_write_words(). */
4317 fts_sync_index_reset(index_cache);
4318
4319 for (j = 0; fts_index_selector[j].value; ++j) {
4320 if (index_cache->ins_graph[j] != nullptr) {
4321 que_graph_free(index_cache->ins_graph[j]);
4322
4323 index_cache->ins_graph[j] = nullptr;
4324 }
4325
4326 if (index_cache->sel_graph[j] != nullptr) {
4327 que_graph_free(index_cache->sel_graph[j]);
4328
4329 index_cache->sel_graph[j] = nullptr;
4330 }
4331 }
4332 }
4333
4334 rw_lock_x_unlock(&cache->lock);
4335
4336 fts_sql_rollback(trx);
4337
4338 /* Avoid assertion in trx_free(). */
4339 trx->dict_operation_lock_mode = 0;
4340 trx_free_for_background(trx);
4341 }
4342
4343 /** Run SYNC on the table, i.e., write out data from the cache to the
4344 FTS auxiliary INDEX table and clear the cache at the end.
4345 @param[in,out] sync sync state
4346 @param[in] unlock_cache whether unlock cache lock when write node
4347 @param[in] wait whether wait when a sync is in progress
4348 @param[in] has_dict whether has dict operation lock
4349 @return DB_SUCCESS if all OK */
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict)4350 static dberr_t fts_sync(fts_sync_t *sync, bool unlock_cache, bool wait,
4351 bool has_dict) {
4352 ulint i;
4353 dberr_t error = DB_SUCCESS;
4354 fts_cache_t *cache = sync->table->fts->cache;
4355
4356 rw_lock_x_lock(&cache->lock);
4357
4358 /* Check if cache is being synced.
4359 Note: we release cache lock in fts_sync_write_words() to
4360 avoid long wait for the lock by other threads. */
4361 while (sync->in_progress) {
4362 rw_lock_x_unlock(&cache->lock);
4363
4364 if (wait) {
4365 os_event_wait(sync->event);
4366 } else {
4367 return (DB_SUCCESS);
4368 }
4369
4370 rw_lock_x_lock(&cache->lock);
4371 }
4372 sync->unlock_cache = unlock_cache;
4373 sync->in_progress = true;
4374
4375 DEBUG_SYNC_C("fts_sync_begin");
4376 fts_sync_begin(sync);
4377
4378 /* When sync in background, we hold dict operation lock
4379 to prevent DDL like DROP INDEX, etc. */
4380 if (has_dict) {
4381 sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4382 }
4383
4384 begin_sync:
4385 if (cache->total_size > fts_max_cache_size) {
4386 /* Avoid the case: sync never finish when
4387 insert/update keeps comming. */
4388 ut_ad(sync->unlock_cache);
4389 sync->unlock_cache = false;
4390 }
4391
4392 DEBUG_SYNC_C("fts_instrument_sync1");
4393 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4394 fts_index_cache_t *index_cache;
4395
4396 index_cache =
4397 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
4398
4399 error = fts_sync_index(sync, index_cache);
4400
4401 if (error != DB_SUCCESS && !sync->interrupted) {
4402 goto end_sync;
4403 }
4404 }
4405
4406 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted", sync->interrupted = true;
4407 error = DB_INTERRUPTED; goto end_sync;);
4408
4409 /* Make sure all the caches are synced. */
4410 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4411 fts_index_cache_t *index_cache;
4412
4413 index_cache =
4414 static_cast<fts_index_cache_t *>(ib_vector_get(cache->indexes, i));
4415
4416 if (index_cache->index->to_be_dropped ||
4417 fts_sync_index_check(index_cache)) {
4418 continue;
4419 }
4420
4421 goto begin_sync;
4422 }
4423
4424 end_sync:
4425 if (error == DB_SUCCESS && !sync->interrupted) {
4426 error = fts_sync_commit(sync);
4427 } else {
4428 fts_sync_rollback(sync);
4429 }
4430
4431 rw_lock_x_lock(&cache->lock);
4432 sync->interrupted = false;
4433 sync->in_progress = false;
4434 os_event_set(sync->event);
4435 rw_lock_x_unlock(&cache->lock);
4436
4437 /* We need to check whether an optimize is required, for that
4438 we make copies of the two variables that control the trigger. These
4439 variables can change behind our back and we don't want to hold the
4440 lock for longer than is needed. */
4441 mutex_enter(&cache->deleted_lock);
4442
4443 cache->added = 0;
4444 cache->deleted = 0;
4445
4446 mutex_exit(&cache->deleted_lock);
4447
4448 return (error);
4449 }
4450
4451 /** Run SYNC on the table, i.e., write out data from the cache to the
4452 FTS auxiliary INDEX table and clear the cache at the end.
4453 @param[in,out] table fts table
4454 @param[in] unlock_cache whether unlock cache when write node
4455 @param[in] wait whether wait for existing sync to finish
4456 @param[in] has_dict whether has dict operation lock
4457 @return DB_SUCCESS on success, error code on failure. */
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4458 dberr_t fts_sync_table(dict_table_t *table, bool unlock_cache, bool wait,
4459 bool has_dict) {
4460 dberr_t err = DB_SUCCESS;
4461
4462 ut_ad(table->fts);
4463
4464 if (!dict_table_is_discarded(table) && table->fts->cache &&
4465 !table->is_corrupted()) {
4466 err = fts_sync(table->fts->cache->sync, unlock_cache, wait, has_dict);
4467 }
4468
4469 return (err);
4470 }
4471
4472 /** Check fts token
4473 1. for ngram token, check whether the token contains any words in stopwords
4474 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4475 or greater than fts_max_token_size.
4476 @param[in] token token string
4477 @param[in] stopwords stopwords rb tree
4478 @param[in] is_ngram is ngram parser
4479 @param[in] cs token charset
4480 @retval true if it is not stopword and length in range
4481 @retval false if it is stopword or lenght not in range */
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs)4482 bool fts_check_token(const fts_string_t *token, const ib_rbt_t *stopwords,
4483 bool is_ngram, const CHARSET_INFO *cs) {
4484 ut_ad(cs != nullptr || stopwords == nullptr);
4485
4486 if (!is_ngram) {
4487 ib_rbt_bound_t parent;
4488
4489 if (token->f_n_char < fts_min_token_size ||
4490 token->f_n_char > fts_max_token_size ||
4491 (stopwords != nullptr && rbt_search(stopwords, &parent, token) == 0)) {
4492 return (false);
4493 } else {
4494 return (true);
4495 }
4496 }
4497
4498 /* Check token for ngram. */
4499 DBUG_EXECUTE_IF("fts_instrument_ignore_ngram_check", return (true););
4500
4501 /* We ignore fts_min_token_size when ngram */
4502 ut_ad(token->f_n_char > 0 && token->f_n_char <= fts_max_token_size);
4503
4504 if (stopwords == nullptr) {
4505 return (true);
4506 }
4507
4508 /*Ngram checks whether the token contains any words in stopwords.
4509 We can't simply use CONTAIN to search in stopwords, because it's
4510 built on COMPARE. So we need to tokenize the token into words
4511 from unigram to f_n_char, and check them separately. */
4512 for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4513 ngram_token_size++) {
4514 const char *start;
4515 const char *next;
4516 const char *end;
4517 ulint char_len;
4518 ulint n_chars;
4519
4520 start = reinterpret_cast<char *>(token->f_str);
4521 next = start;
4522 end = start + token->f_len;
4523 n_chars = 0;
4524
4525 while (next < end) {
4526 char_len = my_mbcharlen_ptr(cs, next, end);
4527
4528 if (next + char_len > end || char_len == 0) {
4529 break;
4530 } else {
4531 /* Skip SPACE */
4532 if (char_len == 1 && *next == ' ') {
4533 start = next + 1;
4534 next = start;
4535 n_chars = 0;
4536
4537 continue;
4538 }
4539
4540 next += char_len;
4541 n_chars++;
4542 }
4543
4544 if (n_chars == ngram_token_size) {
4545 fts_string_t ngram_token;
4546 ngram_token.f_str = reinterpret_cast<byte *>(const_cast<char *>(start));
4547 ngram_token.f_len = next - start;
4548 ngram_token.f_n_char = ngram_token_size;
4549
4550 ib_rbt_bound_t parent;
4551 if (rbt_search(stopwords, &parent, &ngram_token) == 0) {
4552 return (false);
4553 }
4554
4555 /* Move a char forward */
4556 start += my_mbcharlen_ptr(cs, start, end);
4557 n_chars = ngram_token_size - 1;
4558 }
4559 }
4560 }
4561
4562 return (true);
4563 }
4564
4565 /** Add the token and its start position to the token's list of positions.
4566 @param[in,out] result_doc result doc rb tree
4567 @param[in] str token string
4568 @param[in] position token position */
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)4569 static void fts_add_token(fts_doc_t *result_doc, fts_string_t str,
4570 ulint position) {
4571 /* Ignore string whose character number is less than
4572 "fts_min_token_size" or more than "fts_max_token_size" */
4573
4574 if (fts_check_token(&str, nullptr, result_doc->is_ngram,
4575 result_doc->charset)) {
4576 mem_heap_t *heap;
4577 fts_string_t t_str;
4578 fts_token_t *token;
4579 ib_rbt_bound_t parent;
4580 ulint newlen;
4581
4582 heap = static_cast<mem_heap_t *>(result_doc->self_heap->arg);
4583
4584 t_str.f_n_char = str.f_n_char;
4585
4586 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4587
4588 t_str.f_str = static_cast<byte *>(mem_heap_alloc(heap, t_str.f_len));
4589
4590 /* For binary collations, a case sensitive search is
4591 performed. Hence don't convert to lower case. */
4592 if (my_binary_compare(result_doc->charset)) {
4593 memcpy(t_str.f_str, str.f_str, str.f_len);
4594 t_str.f_str[str.f_len] = 0;
4595 newlen = str.f_len;
4596 } else {
4597 newlen =
4598 innobase_fts_casedn_str(result_doc->charset, (char *)str.f_str,
4599 str.f_len, (char *)t_str.f_str, t_str.f_len);
4600 }
4601
4602 t_str.f_len = newlen;
4603 t_str.f_str[newlen] = 0;
4604
4605 /* Add the word to the document statistics. If the word
4606 hasn't been seen before we create a new entry for it. */
4607 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4608 fts_token_t new_token;
4609
4610 new_token.text.f_len = newlen;
4611 new_token.text.f_str = t_str.f_str;
4612 new_token.text.f_n_char = t_str.f_n_char;
4613
4614 new_token.positions =
4615 ib_vector_create(result_doc->self_heap, sizeof(ulint), 32);
4616
4617 parent.last = rbt_add_node(result_doc->tokens, &parent, &new_token);
4618
4619 ut_ad(rbt_validate(result_doc->tokens));
4620 }
4621
4622 token = rbt_value(fts_token_t, parent.last);
4623 ib_vector_push(token->positions, &position);
4624 }
4625 }
4626
4627 /** Process next token from document starting at the given position, i.e., add
4628 the token's start position to the token's list of positions.
4629 @param[in,out] doc document to tokenize
4630 @param[out] result if provided, save result here
4631 @param[in] start_pos start position in text
4632 @param[in] add_pos add this position to all tokens from this
4633 tokenization
4634 @return number of characters handled in this call */
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4635 static ulint fts_process_token(fts_doc_t *doc, fts_doc_t *result,
4636 ulint start_pos, ulint add_pos) {
4637 ulint ret;
4638 fts_string_t str;
4639 ulint position;
4640 fts_doc_t *result_doc;
4641 byte buf[FTS_MAX_WORD_LEN + 1];
4642
4643 str.f_str = buf;
4644
4645 /* Determine where to save the result. */
4646 result_doc = (result != nullptr) ? result : doc;
4647
4648 /* The length of a string in characters is set here only. */
4649
4650 ret = innobase_mysql_fts_get_token(doc->charset, doc->text.f_str + start_pos,
4651 doc->text.f_str + doc->text.f_len, &str);
4652
4653 position = start_pos + ret - str.f_len + add_pos;
4654
4655 fts_add_token(result_doc, str, position);
4656
4657 return (ret);
4658 }
4659
4660 /** Get token char size by charset
4661 @return token size */
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)4662 ulint fts_get_token_size(const CHARSET_INFO *cs, /*!< in: Character set */
4663 const char *token, /*!< in: token */
4664 ulint len) /*!< in: token length */
4665 {
4666 char *start;
4667 char *end;
4668 ulint size = 0;
4669
4670 /* const_cast is for reinterpret_cast below, or it will fail. */
4671 start = const_cast<char *>(token);
4672 end = start + len;
4673 while (start < end) {
4674 int ctype;
4675 int mbl;
4676
4677 mbl = cs->cset->ctype(cs, &ctype, reinterpret_cast<uchar *>(start),
4678 reinterpret_cast<uchar *>(end));
4679
4680 size++;
4681
4682 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4683 }
4684
4685 return (size);
4686 }
4687
4688 /** FTS plugin parser 'myql_parser' callback function for document tokenize.
4689 Refer to 'MYSQL_FTPARSER_PARAM' for more detail.
4690 @return always returns 0 */
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)4691 int fts_tokenize_document_internal(
4692 MYSQL_FTPARSER_PARAM *param, /*!< in: parser parameter */
4693 char *doc, /*!< in/out: document */
4694 int len) /*!< in: document length */
4695 {
4696 fts_string_t str;
4697 byte buf[FTS_MAX_WORD_LEN + 1];
4698 MYSQL_FTPARSER_BOOLEAN_INFO bool_info = {FT_TOKEN_WORD, 0, 0, 0, 0, 0,
4699 ' ', nullptr};
4700
4701 ut_ad(len >= 0);
4702
4703 str.f_str = buf;
4704
4705 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4706 inc =
4707 innobase_mysql_fts_get_token(const_cast<CHARSET_INFO *>(param->cs),
4708 reinterpret_cast<byte *>(doc) + i,
4709 reinterpret_cast<byte *>(doc) + len, &str);
4710
4711 if (str.f_len > 0) {
4712 bool_info.position = static_cast<int>(i + inc - str.f_len);
4713 ut_ad(bool_info.position >= 0);
4714
4715 /* Stop when add word fails */
4716 if (param->mysql_add_word(param, reinterpret_cast<char *>(str.f_str),
4717 static_cast<int>(str.f_len), &bool_info)) {
4718 break;
4719 }
4720 }
4721 }
4722
4723 return (0);
4724 }
4725
4726 /** FTS plugin parser 'myql_add_word' callback function for document tokenize.
4727 Refer to 'MYSQL_FTPARSER_PARAM' for more detail.
4728 @return always returns 0 */
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)4729 static int fts_tokenize_add_word_for_parser(
4730 MYSQL_FTPARSER_PARAM *param, /* in: parser paramter */
4731 char *word, /* in: token word */
4732 int word_len, /* in: word len */
4733 MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info) /* in: word boolean info */
4734 {
4735 fts_string_t str;
4736 fts_tokenize_param_t *fts_param;
4737 fts_doc_t *result_doc;
4738 ulint position;
4739
4740 fts_param = static_cast<fts_tokenize_param_t *>(param->mysql_ftparam);
4741 result_doc = fts_param->result_doc;
4742 ut_ad(result_doc != nullptr);
4743
4744 str.f_str = reinterpret_cast<byte *>(word);
4745 str.f_len = word_len;
4746 str.f_n_char =
4747 fts_get_token_size(const_cast<CHARSET_INFO *>(param->cs), word, word_len);
4748
4749 ut_ad(boolean_info->position >= 0);
4750 position = boolean_info->position + fts_param->add_pos;
4751
4752 fts_add_token(result_doc, str, position);
4753
4754 return (0);
4755 }
4756
4757 /** Parse a document using an external / user supplied parser */
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)4758 static void fts_tokenize_by_parser(
4759 fts_doc_t *doc, /* in/out: document to tokenize */
4760 st_mysql_ftparser *parser, /* in: plugin fts parser */
4761 fts_tokenize_param_t *fts_param) /* in: fts tokenize param */
4762 {
4763 MYSQL_FTPARSER_PARAM param;
4764
4765 ut_a(parser);
4766
4767 /* Set paramters for param */
4768 param.mysql_parse = fts_tokenize_document_internal;
4769 param.mysql_add_word = fts_tokenize_add_word_for_parser;
4770 param.mysql_ftparam = fts_param;
4771 param.cs = doc->charset;
4772 param.doc = reinterpret_cast<char *>(doc->text.f_str);
4773 param.length = static_cast<int>(doc->text.f_len);
4774 param.mode = MYSQL_FTPARSER_SIMPLE_MODE;
4775
4776 PARSER_INIT(parser, ¶m);
4777 parser->parse(¶m);
4778 PARSER_DEINIT(parser, ¶m);
4779 }
4780
4781 /** Tokenize a document.
4782 @param[in,out] doc document to tokenize
4783 @param[out] result tokenization result
4784 @param[in] parser pluggable parser */
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)4785 static void fts_tokenize_document(fts_doc_t *doc, fts_doc_t *result,
4786 st_mysql_ftparser *parser) {
4787 ut_a(!doc->tokens);
4788 ut_a(doc->charset);
4789
4790 doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t), innobase_fts_text_cmp,
4791 doc->charset);
4792
4793 if (parser != nullptr) {
4794 fts_tokenize_param_t fts_param;
4795
4796 fts_param.result_doc = (result != nullptr) ? result : doc;
4797 fts_param.add_pos = 0;
4798
4799 fts_tokenize_by_parser(doc, parser, &fts_param);
4800 } else {
4801 ulint inc;
4802
4803 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4804 inc = fts_process_token(doc, result, i, 0);
4805 ut_a(inc > 0);
4806 }
4807 }
4808 }
4809
4810 /** Continue to tokenize a document.
4811 @param[in,out] doc document to tokenize
4812 @param[in] add_pos add this position to all tokens from this tokenization
4813 @param[out] result tokenization result
4814 @param[in] parser pluggable parser */
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)4815 static void fts_tokenize_document_next(fts_doc_t *doc, ulint add_pos,
4816 fts_doc_t *result,
4817 st_mysql_ftparser *parser) {
4818 ut_a(doc->tokens);
4819
4820 if (parser) {
4821 fts_tokenize_param_t fts_param;
4822
4823 fts_param.result_doc = (result != nullptr) ? result : doc;
4824 fts_param.add_pos = add_pos;
4825
4826 fts_tokenize_by_parser(doc, parser, &fts_param);
4827 } else {
4828 ulint inc;
4829
4830 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4831 inc = fts_process_token(doc, result, i, add_pos);
4832 ut_a(inc > 0);
4833 }
4834 }
4835 }
4836
4837 /** Create the vector of fts_get_doc_t instances.
4838 @param[in,out] cache fts cache
4839 @return vector of fts_get_doc_t instances */
fts_get_docs_create(fts_cache_t * cache)4840 static ib_vector_t *fts_get_docs_create(fts_cache_t *cache) {
4841 ib_vector_t *get_docs;
4842
4843 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4844
4845 /* We need one instance of fts_get_doc_t per index. */
4846 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4847
4848 /* Create the get_doc instance, we need one of these
4849 per FTS index. */
4850 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4851 dict_index_t **index;
4852 fts_get_doc_t *get_doc;
4853
4854 index = static_cast<dict_index_t **>(ib_vector_get(cache->indexes, i));
4855
4856 get_doc = static_cast<fts_get_doc_t *>(ib_vector_push(get_docs, nullptr));
4857
4858 memset(get_doc, 0x0, sizeof(*get_doc));
4859
4860 get_doc->index_cache = fts_get_index_cache(cache, *index);
4861 get_doc->cache = cache;
4862
4863 /* Must find the index cache. */
4864 ut_a(get_doc->index_cache != nullptr);
4865 }
4866
4867 return (get_docs);
4868 }
4869
4870 /********************************************************************
4871 Release any resources held by the fts_get_doc_t instances. */
fts_get_docs_clear(ib_vector_t * get_docs)4872 static void fts_get_docs_clear(
4873 ib_vector_t *get_docs) /*!< in: Doc retrieval vector */
4874 {
4875 ulint i;
4876
4877 /* Release the get doc graphs if any. */
4878 for (i = 0; i < ib_vector_size(get_docs); ++i) {
4879 fts_get_doc_t *get_doc =
4880 static_cast<fts_get_doc_t *>(ib_vector_get(get_docs, i));
4881
4882 if (get_doc->get_document_graph != nullptr) {
4883 ut_a(get_doc->index_cache);
4884
4885 fts_que_graph_free(get_doc->get_document_graph);
4886 get_doc->get_document_graph = nullptr;
4887 }
4888 }
4889 }
4890
4891 /** Get the initial Doc ID by consulting the CONFIG table
4892 @return initial Doc ID */
fts_init_doc_id(const dict_table_t * table)4893 doc_id_t fts_init_doc_id(const dict_table_t *table) /*!< in: table */
4894 {
4895 doc_id_t max_doc_id = 0;
4896
4897 rw_lock_x_lock(&table->fts->cache->lock);
4898
4899 /* Return if the table is already initialized for DOC ID */
4900 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4901 rw_lock_x_unlock(&table->fts->cache->lock);
4902 return (0);
4903 }
4904
4905 DEBUG_SYNC_C("fts_initialize_doc_id");
4906
4907 /* Then compare this value with the ID value stored in the CONFIG
4908 table. The larger one will be our new initial Doc ID */
4909 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4910
4911 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4912 creating index (and add doc id column. No need to recovery
4913 documents */
4914 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4915 fts_init_index((dict_table_t *)table, TRUE);
4916 }
4917
4918 table->fts->fts_status |= ADDED_TABLE_SYNCED;
4919
4920 table->fts->cache->first_doc_id = max_doc_id;
4921
4922 rw_lock_x_unlock(&table->fts->cache->lock);
4923
4924 ut_ad(max_doc_id > 0);
4925
4926 return (max_doc_id);
4927 }
4928
4929 #ifdef FTS_MULT_INDEX
4930 /** Check if the index is in the affected set.
4931 @return true if index is updated */
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)4932 static ibool fts_is_index_updated(
4933 const ib_vector_t *fts_indexes, /*!< in: affected FTS indexes */
4934 const fts_get_doc_t *get_doc) /*!< in: info for reading
4935 document */
4936 {
4937 ulint i;
4938 dict_index_t *index = get_doc->index_cache->index;
4939
4940 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4941 const dict_index_t *updated_fts_index;
4942
4943 updated_fts_index =
4944 static_cast<const dict_index_t *>(ib_vector_getp_const(fts_indexes, i));
4945
4946 ut_a(updated_fts_index != NULL);
4947
4948 if (updated_fts_index == index) {
4949 return (TRUE);
4950 }
4951 }
4952
4953 return (FALSE);
4954 }
4955 #endif
4956
4957 /** Fetch COUNT(*) from specified table.
4958 @return the number of rows in the table */
fts_get_rows_count(fts_table_t * fts_table)4959 ulint fts_get_rows_count(fts_table_t *fts_table) /*!< in: fts table to read */
4960 {
4961 trx_t *trx;
4962 pars_info_t *info;
4963 que_t *graph;
4964 dberr_t error;
4965 ulint count = 0;
4966 char table_name[MAX_FULL_NAME_LEN];
4967
4968 trx = trx_allocate_for_background();
4969
4970 trx->op_info = "fetching FT table rows count";
4971
4972 info = pars_info_create();
4973
4974 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
4975
4976 fts_get_table_name(fts_table, table_name);
4977 pars_info_bind_id(info, true, "table_name", table_name);
4978
4979 graph = fts_parse_sql(fts_table, info,
4980 "DECLARE FUNCTION my_func;\n"
4981 "DECLARE CURSOR c IS"
4982 " SELECT COUNT(*)"
4983 " FROM $table_name;\n"
4984 "BEGIN\n"
4985 "\n"
4986 "OPEN c;\n"
4987 "WHILE 1 = 1 LOOP\n"
4988 " FETCH c INTO my_func();\n"
4989 " IF c % NOTFOUND THEN\n"
4990 " EXIT;\n"
4991 " END IF;\n"
4992 "END LOOP;\n"
4993 "CLOSE c;");
4994
4995 for (;;) {
4996 error = fts_eval_sql(trx, graph);
4997
4998 if (error == DB_SUCCESS) {
4999 fts_sql_commit(trx);
5000
5001 break; /* Exit the loop. */
5002 } else {
5003 fts_sql_rollback(trx);
5004
5005 if (error == DB_LOCK_WAIT_TIMEOUT) {
5006 ib::warn(ER_IB_MSG_478) << "lock wait timeout reading"
5007 " FTS table. Retrying!";
5008
5009 trx->error_state = DB_SUCCESS;
5010 } else {
5011 ib::error(ER_IB_MSG_479)
5012 << "(" << ut_strerr(error) << ") while reading FTS table.";
5013
5014 break; /* Exit the loop. */
5015 }
5016 }
5017 }
5018
5019 fts_que_graph_free(graph);
5020
5021 trx_free_for_background(trx);
5022
5023 return (count);
5024 }
5025
5026 #ifdef FTS_CACHE_SIZE_DEBUG
5027 /** Read the max cache size parameter from the config table. */
fts_update_max_cache_size(fts_sync_t * sync)5028 static void fts_update_max_cache_size(fts_sync_t *sync) /*!< in: sync state */
5029 {
5030 trx_t *trx;
5031 fts_table_t fts_table;
5032
5033 trx = trx_allocate_for_background();
5034
5035 FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_CONFIG, FTS_COMMON_TABLE,
5036 sync->table);
5037
5038 /* The size returned is in bytes. */
5039 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5040
5041 fts_sql_commit(trx);
5042
5043 trx_free_for_background(trx);
5044 }
5045 #endif /* FTS_CACHE_SIZE_DEBUG */
5046
5047 /** Free the modified rows of a table. */
5048 UNIV_INLINE
fts_trx_table_rows_free(ib_rbt_t * rows)5049 void fts_trx_table_rows_free(ib_rbt_t *rows) /*!< in: rbt of rows to free */
5050 {
5051 const ib_rbt_node_t *node;
5052
5053 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5054 fts_trx_row_t *row;
5055
5056 row = rbt_value(fts_trx_row_t, node);
5057
5058 if (row->fts_indexes != nullptr) {
5059 /* This vector shouldn't be using the
5060 heap allocator. */
5061 ut_a(row->fts_indexes->allocator->arg == nullptr);
5062
5063 ib_vector_free(row->fts_indexes);
5064 row->fts_indexes = nullptr;
5065 }
5066
5067 ut_free(rbt_remove_node(rows, node));
5068 }
5069
5070 ut_a(rbt_empty(rows));
5071 rbt_free(rows);
5072 }
5073
5074 /** Free an FTS savepoint instance. */
5075 UNIV_INLINE
fts_savepoint_free(fts_savepoint_t * savepoint)5076 void fts_savepoint_free(
5077 fts_savepoint_t *savepoint) /*!< in: savepoint instance */
5078 {
5079 const ib_rbt_node_t *node;
5080 ib_rbt_t *tables = savepoint->tables;
5081
5082 /* Nothing to free! */
5083 if (tables == nullptr) {
5084 return;
5085 }
5086
5087 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5088 fts_trx_table_t *ftt;
5089 fts_trx_table_t **fttp;
5090
5091 fttp = rbt_value(fts_trx_table_t *, node);
5092 ftt = *fttp;
5093
5094 /* This can be NULL if a savepoint was released. */
5095 if (ftt->rows != nullptr) {
5096 fts_trx_table_rows_free(ftt->rows);
5097 ftt->rows = nullptr;
5098 }
5099
5100 /* This can be NULL if a savepoint was released. */
5101 if (ftt->added_doc_ids != nullptr) {
5102 fts_doc_ids_free(ftt->added_doc_ids);
5103 ftt->added_doc_ids = nullptr;
5104 }
5105
5106 /* The default savepoint name must be NULL. */
5107 if (ftt->docs_added_graph) {
5108 fts_que_graph_free(ftt->docs_added_graph);
5109 }
5110
5111 /* NOTE: We are responsible for free'ing the node */
5112 ut_free(rbt_remove_node(tables, node));
5113 }
5114
5115 ut_a(rbt_empty(tables));
5116 rbt_free(tables);
5117 savepoint->tables = nullptr;
5118 }
5119
5120 /** Free an FTS trx. */
fts_trx_free(fts_trx_t * fts_trx)5121 void fts_trx_free(fts_trx_t *fts_trx) /* in, own: FTS trx */
5122 {
5123 ulint i;
5124
5125 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5126 fts_savepoint_t *savepoint;
5127
5128 savepoint =
5129 static_cast<fts_savepoint_t *>(ib_vector_get(fts_trx->savepoints, i));
5130
5131 /* The default savepoint name must be NULL. */
5132 if (i == 0) {
5133 ut_a(savepoint->name == nullptr);
5134 }
5135
5136 fts_savepoint_free(savepoint);
5137 }
5138
5139 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5140 fts_savepoint_t *savepoint;
5141
5142 savepoint =
5143 static_cast<fts_savepoint_t *>(ib_vector_get(fts_trx->last_stmt, i));
5144
5145 /* The default savepoint name must be NULL. */
5146 if (i == 0) {
5147 ut_a(savepoint->name == nullptr);
5148 }
5149
5150 fts_savepoint_free(savepoint);
5151 }
5152
5153 if (fts_trx->heap) {
5154 mem_heap_free(fts_trx->heap);
5155 }
5156 }
5157
5158 /** Extract the doc id from the FTS hidden column.
5159 @return doc id that was extracted from rec */
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5160 doc_id_t fts_get_doc_id_from_row(dict_table_t *table, /*!< in: table */
5161 dtuple_t *row) /*!< in: row whose FTS doc id we
5162 want to extract.*/
5163 {
5164 dfield_t *field;
5165 doc_id_t doc_id = 0;
5166
5167 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5168
5169 field = dtuple_get_nth_field(row, table->fts->doc_col);
5170
5171 ut_a(dfield_get_len(field) == sizeof(doc_id));
5172 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5173
5174 doc_id = fts_read_doc_id(static_cast<const byte *>(dfield_get_data(field)));
5175
5176 return (doc_id);
5177 }
5178
5179 /** Extract the doc id from the record that belongs to index.
5180 @param[in] table table
5181 @param[in] rec record contains FTS_DOC_ID
5182 @param[in] index index of rec
5183 @param[in] heap heap memory
5184 @return doc id that was extracted from rec */
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5185 doc_id_t fts_get_doc_id_from_rec(dict_table_t *table, const rec_t *rec,
5186 const dict_index_t *index, mem_heap_t *heap) {
5187 ulint len;
5188 const byte *data;
5189 ulint col_no;
5190 doc_id_t doc_id = 0;
5191 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5192 ulint *offsets = offsets_;
5193 mem_heap_t *my_heap = heap;
5194
5195 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5196
5197 rec_offs_init(offsets_);
5198
5199 offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5200
5201 col_no = index->get_col_pos(table->fts->doc_col);
5202
5203 ut_ad(col_no != ULINT_UNDEFINED);
5204
5205 data = rec_get_nth_field(rec, offsets, col_no, &len);
5206
5207 ut_a(len == 8);
5208 ut_ad(8 == sizeof(doc_id));
5209 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5210
5211 if (my_heap && !heap) {
5212 mem_heap_free(my_heap);
5213 }
5214
5215 return (doc_id);
5216 }
5217
5218 /** Search the index specific cache for a particular FTS index.
5219 @return the index specific cache else NULL */
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5220 fts_index_cache_t *fts_find_index_cache(
5221 const fts_cache_t *cache, /*!< in: cache to search */
5222 const dict_index_t *index) /*!< in: index to search for */
5223 {
5224 /* We cast away the const because our internal function, takes
5225 non-const cache arg and returns a non-const pointer. */
5226 return (static_cast<fts_index_cache_t *>(
5227 fts_get_index_cache((fts_cache_t *)cache, index)));
5228 }
5229
5230 /** Search cache for word.
5231 @return the word node vector if found else NULL */
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5232 const ib_vector_t *fts_cache_find_word(
5233 const fts_index_cache_t *index_cache, /*!< in: cache to search */
5234 const fts_string_t *text) /*!< in: word to search for */
5235 {
5236 ib_rbt_bound_t parent;
5237 const ib_vector_t *nodes = nullptr;
5238 #ifdef UNIV_DEBUG
5239 dict_table_t *table = index_cache->index->table;
5240 fts_cache_t *cache = table->fts->cache;
5241
5242 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5243 #endif /* UNIV_DEBUG */
5244
5245 /* Lookup the word in the rb tree */
5246 if (rbt_search(index_cache->words, &parent, text) == 0) {
5247 const fts_tokenizer_word_t *word;
5248
5249 word = rbt_value(fts_tokenizer_word_t, parent.last);
5250
5251 nodes = word->nodes;
5252 }
5253
5254 return (nodes);
5255 }
5256
5257 /** Append deleted doc ids to vector. */
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5258 void fts_cache_append_deleted_doc_ids(
5259 const fts_cache_t *cache, /*!< in: cache to use */
5260 ib_vector_t *vector) /*!< in: append to this vector */
5261 {
5262 mutex_enter(const_cast<ib_mutex_t *>(&cache->deleted_lock));
5263
5264 if (cache->deleted_doc_ids == nullptr) {
5265 mutex_exit((ib_mutex_t *)&cache->deleted_lock);
5266 return;
5267 }
5268
5269 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5270 fts_update_t *update;
5271
5272 update =
5273 static_cast<fts_update_t *>(ib_vector_get(cache->deleted_doc_ids, i));
5274
5275 ib_vector_push(vector, &update->doc_id);
5276 }
5277
5278 mutex_exit((ib_mutex_t *)&cache->deleted_lock);
5279 }
5280
5281 /** Wait for the background thread to start. We poll to detect change
5282 of state, which is acceptable, since the wait should happen only
5283 once during startup.
5284 @return true if the thread started else false (i.e timed out) */
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5285 ibool fts_wait_for_background_thread_to_start(
5286 dict_table_t *table, /*!< in: table to which the thread
5287 is attached */
5288 ulint max_wait) /*!< in: time in microseconds, if
5289 set to 0 then it disables
5290 timeout checking */
5291 {
5292 ulint count = 0;
5293 ibool done = FALSE;
5294
5295 ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5296
5297 for (;;) {
5298 fts_t *fts = table->fts;
5299
5300 mutex_enter(&fts->bg_threads_mutex);
5301
5302 if (fts->fts_status & BG_THREAD_READY) {
5303 done = TRUE;
5304 }
5305
5306 mutex_exit(&fts->bg_threads_mutex);
5307
5308 if (!done) {
5309 os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5310
5311 if (max_wait > 0) {
5312 max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5313
5314 /* We ignore the residual value. */
5315 if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5316 break;
5317 }
5318 }
5319
5320 ++count;
5321 } else {
5322 break;
5323 }
5324
5325 if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5326 ib::error(ER_IB_MSG_480) << "The background thread for the FTS"
5327 " table "
5328 << table->name << " refuses to start";
5329
5330 count = 0;
5331 }
5332 }
5333
5334 return (done);
5335 }
5336
5337 /** Add the FTS document id hidden column. */
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5338 void fts_add_doc_id_column(
5339 dict_table_t *table, /*!< in/out: Table with FTS index */
5340 mem_heap_t *heap) /*!< in: temporary memory heap, or NULL */
5341 {
5342 dict_mem_table_add_col(
5343 table, heap, FTS_DOC_ID_COL_NAME, DATA_INT,
5344 dtype_form_prtype(
5345 DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE | DATA_FTS_DOC_ID,
5346 0),
5347 sizeof(doc_id_t), false);
5348 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5349 }
5350
5351 /** Add new fts doc id to the update vector.
5352 @param[in] table the table that contains the FTS index.
5353 @param[in,out] ufield the fts doc id field in the update vector.
5354 No new memory is allocated for this in this
5355 function.
5356 @param[in,out] next_doc_id the fts doc id that has been added to the
5357 update vector. If 0, a new fts doc id is
5358 automatically generated. The memory provided
5359 for this argument will be used by the update
5360 vector. Ensure that the life time of this
5361 memory matches that of the update vector.
5362 @return the fts doc id used in the update vector */
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5363 doc_id_t fts_update_doc_id(dict_table_t *table, upd_field_t *ufield,
5364 doc_id_t *next_doc_id) {
5365 doc_id_t doc_id;
5366 dberr_t error = DB_SUCCESS;
5367
5368 if (*next_doc_id) {
5369 doc_id = *next_doc_id;
5370 } else {
5371 /* Get the new document id that will be added. */
5372 error = fts_get_next_doc_id(table, &doc_id);
5373 }
5374
5375 if (error == DB_SUCCESS) {
5376 dict_index_t *clust_index;
5377 dict_col_t *col = table->get_col(table->fts->doc_col);
5378
5379 ufield->exp = nullptr;
5380
5381 ufield->new_val.len = sizeof(doc_id);
5382
5383 clust_index = table->first_index();
5384
5385 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5386 col->copy_type(dfield_get_type(&ufield->new_val));
5387
5388 /* It is possible we update record that has
5389 not yet be sync-ed from last crash. */
5390
5391 /* Convert to storage byte order. */
5392 ut_a(doc_id != FTS_NULL_DOC_ID);
5393 fts_write_doc_id((byte *)next_doc_id, doc_id);
5394
5395 ufield->new_val.data = next_doc_id;
5396 ufield->new_val.ext = 0;
5397 }
5398
5399 return (doc_id);
5400 }
5401
5402 /** fts_t constructor.
5403 @param[in] table table with FTS indexes
5404 @param[in,out] heap memory heap where 'this' is stored */
fts_t(dict_table_t * table,mem_heap_t * heap)5405 fts_t::fts_t(dict_table_t *table, mem_heap_t *heap)
5406 : bg_threads(0),
5407 fts_status(0),
5408 add_wq(nullptr),
5409 cache(nullptr),
5410 doc_col(ULINT_UNDEFINED),
5411 fts_heap(heap) {
5412 ut_a(table->fts == nullptr);
5413
5414 mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
5415
5416 ib_alloc_t *heap_alloc = ib_heap_allocator_create(fts_heap);
5417
5418 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t *), 4);
5419
5420 dict_table_get_all_fts_indexes(table, indexes);
5421 }
5422
5423 /** fts_t destructor. */
~fts_t()5424 fts_t::~fts_t() {
5425 mutex_free(&bg_threads_mutex);
5426
5427 ut_ad(add_wq == nullptr);
5428
5429 if (cache != nullptr) {
5430 fts_cache_clear(cache);
5431 fts_cache_destroy(cache);
5432 cache = nullptr;
5433 }
5434
5435 /* There is no need to call ib_vector_free() on this->indexes
5436 because it is stored in this->fts_heap. */
5437 }
5438
5439 /** Create an instance of fts_t.
5440 @return instance of fts_t */
fts_create(dict_table_t * table)5441 fts_t *fts_create(dict_table_t *table) /*!< in/out: table with FTS indexes */
5442 {
5443 fts_t *fts;
5444 mem_heap_t *heap;
5445
5446 heap = mem_heap_create(512);
5447
5448 fts = static_cast<fts_t *>(mem_heap_alloc(heap, sizeof(*fts)));
5449
5450 new (fts) fts_t(table, heap);
5451
5452 return (fts);
5453 }
5454
5455 /** Free the FTS resources. */
fts_free(dict_table_t * table)5456 void fts_free(dict_table_t *table) /*!< in/out: table with FTS indexes */
5457 {
5458 fts_t *fts = table->fts;
5459
5460 fts->~fts_t();
5461
5462 mem_heap_free(fts->fts_heap);
5463
5464 table->fts = nullptr;
5465 }
5466
5467 #if 0 // TODO: Enable this in WL#6608
5468 /*********************************************************************//**
5469 Signal FTS threads to initiate shutdown. */
5470 void
5471 fts_start_shutdown(
5472 dict_table_t* table, /*!< in: table with FTS indexes */
5473 fts_t* fts) /*!< in: fts instance that needs
5474 to be informed about shutdown */
5475 {
5476 mutex_enter(&fts->bg_threads_mutex);
5477
5478 fts->fts_status |= BG_THREAD_STOP;
5479
5480 mutex_exit(&fts->bg_threads_mutex);
5481
5482 }
5483
5484 /*********************************************************************//**
5485 Wait for FTS threads to shutdown. */
5486 void
5487 fts_shutdown(
5488 dict_table_t* table, /*!< in: table with FTS indexes */
5489 fts_t* fts) /*!< in: fts instance to shutdown */
5490 {
5491 mutex_enter(&fts->bg_threads_mutex);
5492
5493 ut_a(fts->fts_status & BG_THREAD_STOP);
5494
5495 dict_table_wait_for_bg_threads_to_exit(table, 20000);
5496
5497 mutex_exit(&fts->bg_threads_mutex);
5498 }
5499 #endif
5500
5501 /** Take a FTS savepoint. */
5502 UNIV_INLINE
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5503 void fts_savepoint_copy(const fts_savepoint_t *src, /*!< in: source savepoint */
5504 fts_savepoint_t *dst) /*!< out: destination savepoint */
5505 {
5506 const ib_rbt_node_t *node;
5507 const ib_rbt_t *tables;
5508
5509 tables = src->tables;
5510
5511 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5512 fts_trx_table_t *ftt_dst;
5513 const fts_trx_table_t **ftt_src;
5514
5515 ftt_src = rbt_value(const fts_trx_table_t *, node);
5516
5517 ftt_dst = fts_trx_table_clone(*ftt_src);
5518
5519 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5520 }
5521 }
5522
5523 /** Take a FTS savepoint. */
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)5524 void fts_savepoint_take(trx_t *trx, /*!< in: transaction */
5525 fts_trx_t *fts_trx, /*!< in: fts transaction */
5526 const char *name) /*!< in: savepoint name */
5527 {
5528 mem_heap_t *heap;
5529 fts_savepoint_t *savepoint;
5530 fts_savepoint_t *last_savepoint;
5531
5532 ut_a(name != nullptr);
5533
5534 heap = fts_trx->heap;
5535
5536 /* The implied savepoint must exist. */
5537 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5538
5539 last_savepoint =
5540 static_cast<fts_savepoint_t *>(ib_vector_last(fts_trx->savepoints));
5541 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5542
5543 if (last_savepoint->tables != nullptr) {
5544 fts_savepoint_copy(last_savepoint, savepoint);
5545 }
5546 }
5547
5548 /** Lookup a savepoint instance by name.
5549 @return ULINT_UNDEFINED if not found */
5550 UNIV_INLINE
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5551 ulint fts_savepoint_lookup(ib_vector_t *savepoints, /*!< in: savepoints */
5552 const char *name) /*!< in: savepoint name */
5553 {
5554 ulint i;
5555
5556 ut_a(ib_vector_size(savepoints) > 0);
5557
5558 for (i = 1; i < ib_vector_size(savepoints); ++i) {
5559 fts_savepoint_t *savepoint;
5560
5561 savepoint = static_cast<fts_savepoint_t *>(ib_vector_get(savepoints, i));
5562
5563 if (strcmp(name, savepoint->name) == 0) {
5564 return (i);
5565 }
5566 }
5567
5568 return (ULINT_UNDEFINED);
5569 }
5570
5571 /** Release the savepoint data identified by name. All savepoints created
5572 after the named savepoint are kept. */
fts_savepoint_release(trx_t * trx,const char * name)5573 void fts_savepoint_release(trx_t *trx, /*!< in: transaction */
5574 const char *name) /*!< in: savepoint name */
5575 {
5576 ut_a(name != nullptr);
5577
5578 ib_vector_t *savepoints = trx->fts_trx->savepoints;
5579
5580 ut_a(ib_vector_size(savepoints) > 0);
5581
5582 ulint i = fts_savepoint_lookup(savepoints, name);
5583 if (i != ULINT_UNDEFINED) {
5584 ut_a(i >= 1);
5585
5586 fts_savepoint_t *savepoint;
5587 savepoint = static_cast<fts_savepoint_t *>(ib_vector_get(savepoints, i));
5588
5589 if (i == ib_vector_size(savepoints) - 1) {
5590 /* If the savepoint is the last, we save its
5591 tables to the previous savepoint. */
5592 fts_savepoint_t *prev_savepoint;
5593 prev_savepoint =
5594 static_cast<fts_savepoint_t *>(ib_vector_get(savepoints, i - 1));
5595
5596 ib_rbt_t *tables = savepoint->tables;
5597 savepoint->tables = prev_savepoint->tables;
5598 prev_savepoint->tables = tables;
5599 }
5600
5601 fts_savepoint_free(savepoint);
5602 ib_vector_remove(savepoints, *(void **)savepoint);
5603
5604 /* Make sure we don't delete the implied savepoint. */
5605 ut_a(ib_vector_size(savepoints) > 0);
5606 }
5607 }
5608
5609 /** Refresh last statement savepoint. */
fts_savepoint_laststmt_refresh(trx_t * trx)5610 void fts_savepoint_laststmt_refresh(trx_t *trx) /*!< in: transaction */
5611 {
5612 fts_trx_t *fts_trx;
5613 fts_savepoint_t *savepoint;
5614
5615 fts_trx = trx->fts_trx;
5616
5617 savepoint = static_cast<fts_savepoint_t *>(ib_vector_pop(fts_trx->last_stmt));
5618 fts_savepoint_free(savepoint);
5619
5620 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5621 savepoint = fts_savepoint_create(fts_trx->last_stmt, nullptr, nullptr);
5622 }
5623
5624 /********************************************************************
5625 Undo the Doc ID add/delete operations in last stmt */
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5626 static void fts_undo_last_stmt(
5627 fts_trx_table_t *s_ftt, /*!< in: Transaction FTS table */
5628 fts_trx_table_t *l_ftt) /*!< in: last stmt FTS table */
5629 {
5630 ib_rbt_t *s_rows;
5631 ib_rbt_t *l_rows;
5632 const ib_rbt_node_t *node;
5633
5634 l_rows = l_ftt->rows;
5635 s_rows = s_ftt->rows;
5636
5637 for (node = rbt_first(l_rows); node; node = rbt_next(l_rows, node)) {
5638 fts_trx_row_t *l_row = rbt_value(fts_trx_row_t, node);
5639 ib_rbt_bound_t parent;
5640
5641 rbt_search(s_rows, &parent, &(l_row->doc_id));
5642
5643 if (parent.result == 0) {
5644 fts_trx_row_t *s_row = rbt_value(fts_trx_row_t, parent.last);
5645
5646 switch (l_row->state) {
5647 case FTS_INSERT:
5648 ut_free(rbt_remove_node(s_rows, parent.last));
5649 break;
5650
5651 case FTS_DELETE:
5652 if (s_row->state == FTS_NOTHING) {
5653 s_row->state = FTS_INSERT;
5654 } else if (s_row->state == FTS_DELETE) {
5655 ut_free(rbt_remove_node(s_rows, parent.last));
5656 }
5657 break;
5658
5659 /* FIXME: Check if FTS_MODIFY need to be addressed */
5660 case FTS_MODIFY:
5661 case FTS_NOTHING:
5662 break;
5663 default:
5664 ut_error;
5665 }
5666 }
5667 }
5668 }
5669
5670 /** Rollback to savepoint indentified by name. */
fts_savepoint_rollback_last_stmt(trx_t * trx)5671 void fts_savepoint_rollback_last_stmt(trx_t *trx) /*!< in: transaction */
5672 {
5673 ib_vector_t *savepoints;
5674 fts_savepoint_t *savepoint;
5675 fts_savepoint_t *last_stmt;
5676 fts_trx_t *fts_trx;
5677 ib_rbt_bound_t parent;
5678 const ib_rbt_node_t *node;
5679 ib_rbt_t *l_tables;
5680 ib_rbt_t *s_tables;
5681
5682 fts_trx = trx->fts_trx;
5683 savepoints = fts_trx->savepoints;
5684
5685 savepoint = static_cast<fts_savepoint_t *>(ib_vector_last(savepoints));
5686 last_stmt =
5687 static_cast<fts_savepoint_t *>(ib_vector_last(fts_trx->last_stmt));
5688
5689 l_tables = last_stmt->tables;
5690 s_tables = savepoint->tables;
5691
5692 for (node = rbt_first(l_tables); node; node = rbt_next(l_tables, node)) {
5693 fts_trx_table_t **l_ftt;
5694
5695 l_ftt = rbt_value(fts_trx_table_t *, node);
5696
5697 rbt_search_cmp(s_tables, &parent, &(*l_ftt)->table->id,
5698 fts_trx_table_id_cmp, nullptr);
5699
5700 if (parent.result == 0) {
5701 fts_trx_table_t **s_ftt;
5702
5703 s_ftt = rbt_value(fts_trx_table_t *, parent.last);
5704
5705 fts_undo_last_stmt(*s_ftt, *l_ftt);
5706 }
5707 }
5708 }
5709
5710 /** Rollback to savepoint indentified by name. */
fts_savepoint_rollback(trx_t * trx,const char * name)5711 void fts_savepoint_rollback(trx_t *trx, /*!< in: transaction */
5712 const char *name) /*!< in: savepoint name */
5713 {
5714 ulint i;
5715 ib_vector_t *savepoints;
5716
5717 ut_a(name != nullptr);
5718
5719 savepoints = trx->fts_trx->savepoints;
5720
5721 /* We pop all savepoints from the the top of the stack up to
5722 and including the instance that was found. */
5723 i = fts_savepoint_lookup(savepoints, name);
5724
5725 if (i != ULINT_UNDEFINED) {
5726 fts_savepoint_t *savepoint;
5727
5728 ut_a(i > 0);
5729
5730 while (ib_vector_size(savepoints) > i) {
5731 fts_savepoint_t *savepoint;
5732
5733 savepoint = static_cast<fts_savepoint_t *>(ib_vector_pop(savepoints));
5734
5735 if (savepoint->name != nullptr) {
5736 /* Since name was allocated on the heap, the
5737 memory will be released when the transaction
5738 completes. */
5739 savepoint->name = nullptr;
5740
5741 fts_savepoint_free(savepoint);
5742 }
5743 }
5744
5745 /* Pop all a elements from the top of the stack that may
5746 have been released. We have to be careful that we don't
5747 delete the implied savepoint. */
5748
5749 for (savepoint = static_cast<fts_savepoint_t *>(ib_vector_last(savepoints));
5750 ib_vector_size(savepoints) > 1 && savepoint->name == nullptr;
5751 savepoint =
5752 static_cast<fts_savepoint_t *>(ib_vector_last(savepoints))) {
5753 ib_vector_pop(savepoints);
5754 }
5755
5756 /* Make sure we don't delete the implied savepoint. */
5757 ut_a(ib_vector_size(savepoints) > 0);
5758
5759 /* Restore the savepoint. */
5760 fts_savepoint_take(trx, trx->fts_trx, name);
5761 }
5762 }
5763
5764 /** Check if a table is an FTS auxiliary table name.
5765 @param[out] table FTS table info
5766 @param[in] name Table name
5767 @param[in] len Length of table name
5768 @return true if the name matches an auxiliary table name pattern */
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)5769 bool fts_is_aux_table_name(fts_aux_table_t *table, const char *name,
5770 ulint len) {
5771 const char *ptr;
5772 char *end;
5773 char my_name[MAX_FULL_NAME_LEN + 1];
5774
5775 ut_ad(len <= MAX_FULL_NAME_LEN);
5776 ut_memcpy(my_name, name, len);
5777 my_name[len] = 0;
5778 end = my_name + len;
5779
5780 ptr = static_cast<const char *>(memchr(my_name, '/', len));
5781
5782 if (ptr != nullptr) {
5783 /* We will start the match after the '/' */
5784 ++ptr;
5785 len = end - ptr;
5786 }
5787
5788 /* All auxiliary tables are prefixed with "FTS_" and the name
5789 length will be at the very least greater than 20 bytes. */
5790 if (ptr != nullptr && len > 20 &&
5791 (strncmp(ptr, FTS_PREFIX, 4) == 0 ||
5792 strncmp(ptr, FTS_PREFIX_5_7, 4) == 0)) {
5793 ulint i;
5794
5795 /* Skip the prefix. */
5796 ptr += 4;
5797 len -= 4;
5798
5799 /* Try and read the table id. */
5800 if (!fts_read_object_id(&table->parent_id, ptr)) {
5801 return (false);
5802 }
5803
5804 /* Skip the table id. */
5805 ptr = static_cast<const char *>(memchr(ptr, '_', len));
5806
5807 if (ptr == nullptr) {
5808 return (false);
5809 }
5810
5811 /* Skip the underscore. */
5812 ++ptr;
5813 ut_a(end > ptr);
5814 len = end - ptr;
5815
5816 /* First search the common table suffix array. */
5817 for (i = 0; fts_common_tables[i] != nullptr; ++i) {
5818 if (strncmp(ptr, fts_common_tables[i], len) == 0 ||
5819 strncmp(ptr, fts_common_tables_5_7[i], len) == 0) {
5820 table->type = FTS_COMMON_TABLE;
5821 return (true);
5822 }
5823 }
5824
5825 /* Could be obsolete common tables. */
5826 if (native_strncasecmp(ptr, "ADDED", len) == 0 ||
5827 native_strncasecmp(ptr, "STOPWORDS", len) == 0) {
5828 table->type = FTS_OBSOLETED_TABLE;
5829 return (true);
5830 }
5831
5832 /* Try and read the index id. */
5833 if (!fts_read_object_id(&table->index_id, ptr)) {
5834 return (false);
5835 }
5836
5837 /* Skip the table id. */
5838 ptr = static_cast<const char *>(memchr(ptr, '_', len));
5839
5840 if (ptr == nullptr) {
5841 return (false);
5842 }
5843
5844 /* Skip the underscore. */
5845 ++ptr;
5846 ut_a(end > ptr);
5847 len = end - ptr;
5848
5849 /* Search the FT index specific array. */
5850 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
5851 if (strncmp(ptr, fts_get_suffix(i), len) == 0 ||
5852 strncmp(ptr, fts_get_suffix_5_7(i), len) == 0) {
5853 table->type = FTS_INDEX_TABLE;
5854 return (true);
5855 }
5856 }
5857
5858 /* Other FT index specific table(s). */
5859 if (native_strncasecmp(ptr, "DOC_ID", len) == 0) {
5860 table->type = FTS_OBSOLETED_TABLE;
5861 return (true);
5862 }
5863 }
5864
5865 return (false);
5866 }
5867
5868 /** Check whether user supplied stopword table is of the right format.
5869 Caller is responsible to hold dictionary locks.
5870 @return the stopword column charset if qualifies */
fts_valid_stopword_table(const char * stopword_table_name)5871 CHARSET_INFO *fts_valid_stopword_table(
5872 const char *stopword_table_name) /*!< in: Stopword table
5873 name */
5874 {
5875 dict_table_t *table;
5876 dict_col_t *col = nullptr;
5877
5878 if (!stopword_table_name) {
5879 return (nullptr);
5880 }
5881
5882 MDL_ticket *mdl = nullptr;
5883 THD *thd = current_thd;
5884 table = dd_table_open_on_name(thd, &mdl, stopword_table_name, false,
5885 DICT_ERR_IGNORE_NONE);
5886
5887 if (!table) {
5888 ib::error(ER_IB_MSG_481)
5889 << "User stopword table " << stopword_table_name << " does not exist.";
5890
5891 return (nullptr);
5892 } else {
5893 const char *col_name;
5894
5895 dd_table_close(table, thd, &mdl, false);
5896
5897 col_name = table->get_col_name(0);
5898
5899 if (ut_strcmp(col_name, "value")) {
5900 ib::error(ER_IB_MSG_482) << "Invalid column name for stopword"
5901 " table "
5902 << stopword_table_name
5903 << ". Its"
5904 " first column must be named as 'value'.";
5905
5906 return (nullptr);
5907 }
5908
5909 col = table->get_col(0);
5910
5911 if (col->mtype != DATA_VARCHAR && col->mtype != DATA_VARMYSQL) {
5912 ib::error(ER_IB_MSG_483) << "Invalid column type for stopword"
5913 " table "
5914 << stopword_table_name
5915 << ". Its"
5916 " first column must be of varchar type";
5917
5918 return (nullptr);
5919 }
5920 }
5921
5922 ut_ad(col);
5923
5924 return (fts_get_charset(col->prtype));
5925 }
5926
5927 /** This function loads the stopword into the FTS cache. It also
5928 records/fetches stopword configuration to/from FTS configure
5929 table, depending on whether we are creating or reloading the
5930 FTS.
5931 @return true if load operation is successful */
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)5932 ibool fts_load_stopword(
5933 const dict_table_t *table, /*!< in: Table with FTS */
5934 trx_t *trx, /*!< in: Transactions */
5935 const char *global_stopword_table, /*!< in: Global stopword table
5936 name */
5937 const char *session_stopword_table, /*!< in: Session stopword table
5938 name */
5939 ibool stopword_is_on, /*!< in: Whether stopword
5940 option is turned on/off */
5941 ibool reload) /*!< in: Whether it is
5942 for reloading FTS table */
5943 {
5944 fts_table_t fts_table;
5945 fts_string_t str;
5946 dberr_t error = DB_SUCCESS;
5947 ulint use_stopword;
5948 fts_cache_t *cache;
5949 const char *stopword_to_use = nullptr;
5950 ibool new_trx = FALSE;
5951 byte str_buffer[MAX_FULL_NAME_LEN + 1];
5952
5953 FTS_INIT_FTS_TABLE(&fts_table, FTS_SUFFIX_CONFIG, FTS_COMMON_TABLE, table);
5954
5955 cache = table->fts->cache;
5956
5957 if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
5958 return (TRUE);
5959 }
5960
5961 if (!trx) {
5962 trx = trx_allocate_for_background();
5963 trx->op_info = "upload FTS stopword";
5964 new_trx = TRUE;
5965 }
5966
5967 /* First check whether stopword filtering is turned off */
5968 if (reload) {
5969 error =
5970 fts_config_get_ulint(trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
5971 } else {
5972 use_stopword = (ulint)stopword_is_on;
5973
5974 error =
5975 fts_config_set_ulint(trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
5976 }
5977
5978 if (error != DB_SUCCESS) {
5979 goto cleanup;
5980 }
5981
5982 /* If stopword is turned off, no need to continue to load the
5983 stopword into cache, but still need to do initialization */
5984 if (!use_stopword) {
5985 cache->stopword_info.status = STOPWORD_OFF;
5986 goto cleanup;
5987 }
5988
5989 if (reload) {
5990 /* Fetch the stopword table name from FTS config
5991 table */
5992 str.f_n_char = 0;
5993 str.f_str = str_buffer;
5994 str.f_len = sizeof(str_buffer) - 1;
5995
5996 error =
5997 fts_config_get_value(trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
5998
5999 if (error != DB_SUCCESS) {
6000 goto cleanup;
6001 }
6002
6003 if (strlen((char *)str.f_str) > 0) {
6004 stopword_to_use = (const char *)str.f_str;
6005 }
6006 } else {
6007 stopword_to_use = (session_stopword_table) ? session_stopword_table
6008 : global_stopword_table;
6009 }
6010
6011 if (stopword_to_use && fts_load_user_stopword(table->fts, stopword_to_use,
6012 &cache->stopword_info)) {
6013 /* Save the stopword table name to the configure
6014 table */
6015 if (!reload) {
6016 str.f_n_char = 0;
6017 str.f_str = (byte *)stopword_to_use;
6018 str.f_len = ut_strlen(stopword_to_use);
6019
6020 error =
6021 fts_config_set_value(trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6022 }
6023 } else {
6024 /* Load system default stopword list */
6025 fts_load_default_stopword(&cache->stopword_info);
6026 }
6027
6028 cleanup:
6029 if (new_trx) {
6030 if (error == DB_SUCCESS) {
6031 fts_sql_commit(trx);
6032 } else {
6033 fts_sql_rollback(trx);
6034 }
6035
6036 trx_free_for_background(trx);
6037 }
6038
6039 if (!cache->stopword_info.cached_stopword) {
6040 cache->stopword_info.cached_stopword =
6041 rbt_create_arg_cmp(sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
6042 &my_charset_latin1);
6043 }
6044
6045 return (error == DB_SUCCESS);
6046 }
6047
6048 /** Callback function when we initialize the FTS at the start up
6049 time. It recovers the maximum Doc IDs presented in the current table.
6050 @return: always returns true */
fts_init_get_doc_id(void * row,void * user_arg)6051 static ibool fts_init_get_doc_id(void *row, /*!< in: sel_node_t* */
6052 void *user_arg) /*!< in: fts cache */
6053 {
6054 doc_id_t doc_id = FTS_NULL_DOC_ID;
6055 sel_node_t *node = static_cast<sel_node_t *>(row);
6056 que_node_t *exp = node->select_list;
6057 fts_cache_t *cache = static_cast<fts_cache_t *>(user_arg);
6058
6059 ut_ad(ib_vector_is_empty(cache->get_docs));
6060
6061 /* Copy each indexed column content into doc->text.f_str */
6062 if (exp) {
6063 dfield_t *dfield = que_node_get_val(exp);
6064 dtype_t *type = dfield_get_type(dfield);
6065 void *data = dfield_get_data(dfield);
6066
6067 ut_a(dtype_get_mtype(type) == DATA_INT);
6068
6069 doc_id = static_cast<doc_id_t>(
6070 mach_read_from_8(static_cast<const byte *>(data)));
6071
6072 if (doc_id >= cache->next_doc_id) {
6073 cache->next_doc_id = doc_id + 1;
6074 }
6075 }
6076
6077 return (TRUE);
6078 }
6079
6080 /** Callback function when we initialize the FTS at the start up
6081 time. It recovers Doc IDs that have not sync-ed to the auxiliary
6082 table, and require to bring them back into FTS index.
6083 @return: always returns true */
fts_init_recover_doc(void * row,void * user_arg)6084 static ibool fts_init_recover_doc(void *row, /*!< in: sel_node_t* */
6085 void *user_arg) /*!< in: fts cache */
6086 {
6087 fts_doc_t doc;
6088 ulint doc_len = 0;
6089 ulint field_no = 0;
6090 fts_get_doc_t *get_doc = static_cast<fts_get_doc_t *>(user_arg);
6091 doc_id_t doc_id = FTS_NULL_DOC_ID;
6092 sel_node_t *node = static_cast<sel_node_t *>(row);
6093 que_node_t *exp = node->select_list;
6094 fts_cache_t *cache = get_doc->cache;
6095 st_mysql_ftparser *parser = get_doc->index_cache->index->parser;
6096
6097 fts_doc_init(&doc);
6098 doc.found = TRUE;
6099
6100 ut_ad(cache);
6101
6102 /* Copy each indexed column content into doc->text.f_str */
6103 while (exp) {
6104 dfield_t *dfield = que_node_get_val(exp);
6105 ulint len = dfield_get_len(dfield);
6106
6107 if (field_no == 0) {
6108 dtype_t *type = dfield_get_type(dfield);
6109 void *data = dfield_get_data(dfield);
6110
6111 ut_a(dtype_get_mtype(type) == DATA_INT);
6112
6113 doc_id = static_cast<doc_id_t>(
6114 mach_read_from_8(static_cast<const byte *>(data)));
6115
6116 field_no++;
6117 exp = que_node_get_next(exp);
6118 continue;
6119 }
6120
6121 if (len == UNIV_SQL_NULL) {
6122 exp = que_node_get_next(exp);
6123 continue;
6124 }
6125
6126 ut_ad(get_doc);
6127
6128 if (!get_doc->index_cache->charset) {
6129 get_doc->index_cache->charset = fts_get_charset(dfield->type.prtype);
6130 }
6131
6132 doc.charset = get_doc->index_cache->charset;
6133 doc.is_ngram = get_doc->index_cache->index->is_ngram;
6134
6135 if (dfield_is_ext(dfield)) {
6136 dict_table_t *table = cache->sync->table;
6137
6138 /** When a nullptr is passed for trx, it means we will
6139 fetch the latest LOB (and no MVCC will be done). */
6140 doc.text.f_str = lob::btr_copy_externally_stored_field(
6141 nullptr, get_doc->index_cache->index, &doc.text.f_len, nullptr,
6142 static_cast<byte *>(dfield_get_data(dfield)),
6143 dict_table_page_size(table), len, false,
6144 static_cast<mem_heap_t *>(doc.self_heap->arg));
6145 } else {
6146 doc.text.f_str = static_cast<byte *>(dfield_get_data(dfield));
6147
6148 doc.text.f_len = len;
6149 }
6150
6151 if (field_no == 1) {
6152 fts_tokenize_document(&doc, nullptr, parser);
6153 } else {
6154 fts_tokenize_document_next(&doc, doc_len, nullptr, parser);
6155 }
6156
6157 exp = que_node_get_next(exp);
6158
6159 doc_len += (exp) ? len + 1 : len;
6160
6161 field_no++;
6162 }
6163
6164 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
6165
6166 fts_doc_free(&doc);
6167
6168 cache->added++;
6169
6170 if (doc_id >= cache->next_doc_id) {
6171 cache->next_doc_id = doc_id + 1;
6172 }
6173
6174 return (TRUE);
6175 }
6176
6177 /** This function brings FTS index in sync when FTS index is first
6178 used. There are documents that have not yet sync-ed to auxiliary
6179 tables from last server abnormally shutdown, we will need to bring
6180 such document into FTS cache before any further operations
6181 @return true if all OK */
fts_init_index(dict_table_t * table,ibool has_cache_lock)6182 ibool fts_init_index(dict_table_t *table, /*!< in: Table with FTS */
6183 ibool has_cache_lock) /*!< in: Whether we already have
6184 cache lock */
6185 {
6186 dict_index_t *index;
6187 doc_id_t start_doc;
6188 fts_get_doc_t *get_doc = nullptr;
6189 fts_cache_t *cache = table->fts->cache;
6190 bool need_init = false;
6191
6192 ut_ad(!mutex_own(&dict_sys->mutex));
6193
6194 /* First check cache->get_docs is initialized */
6195 if (!has_cache_lock) {
6196 rw_lock_x_lock(&cache->lock);
6197 }
6198
6199 rw_lock_x_lock(&cache->init_lock);
6200 if (cache->get_docs == nullptr) {
6201 cache->get_docs = fts_get_docs_create(cache);
6202 }
6203 rw_lock_x_unlock(&cache->init_lock);
6204
6205 if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
6206 goto func_exit;
6207 }
6208
6209 need_init = true;
6210
6211 start_doc = cache->synced_doc_id;
6212
6213 if (!start_doc) {
6214 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
6215 cache->synced_doc_id = start_doc;
6216 }
6217
6218 /* No FTS index, this is the case when previous FTS index
6219 dropped, and we re-initialize the Doc ID system for subsequent
6220 insertion */
6221 if (ib_vector_is_empty(cache->get_docs)) {
6222 index = table->fts_doc_id_index;
6223
6224 ut_a(index);
6225
6226 fts_doc_fetch_by_doc_id(nullptr, start_doc, index,
6227 FTS_FETCH_DOC_BY_ID_LARGE, fts_init_get_doc_id,
6228 cache);
6229 } else {
6230 if (table->fts->cache->stopword_info.status & STOPWORD_NOT_INIT) {
6231 fts_load_stopword(table, nullptr, nullptr, nullptr, TRUE, TRUE);
6232 }
6233
6234 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
6235 get_doc = static_cast<fts_get_doc_t *>(ib_vector_get(cache->get_docs, i));
6236
6237 index = get_doc->index_cache->index;
6238
6239 fts_doc_fetch_by_doc_id(nullptr, start_doc, index,
6240 FTS_FETCH_DOC_BY_ID_LARGE, fts_init_recover_doc,
6241 get_doc);
6242 }
6243 }
6244
6245 table->fts->fts_status |= ADDED_TABLE_SYNCED;
6246
6247 fts_get_docs_clear(cache->get_docs);
6248
6249 func_exit:
6250 if (!has_cache_lock) {
6251 rw_lock_x_unlock(&cache->lock);
6252 }
6253
6254 if (need_init) {
6255 mutex_enter(&dict_sys->mutex);
6256 /* Register the table with the optimize thread. */
6257 fts_optimize_add_table(table);
6258 mutex_exit(&dict_sys->mutex);
6259 }
6260
6261 return (TRUE);
6262 }
6263
6264 /** Rename old FTS common and aux tables with the new table_id
6265 @param[in] old_name old name of FTS AUX table
6266 @param[in] new_name new name of FTS AUX table
6267 @return new fts table if success, else nullptr on failure */
fts_upgrade_rename_aux_table_low(const char * old_name,const char * new_name)6268 static dict_table_t *fts_upgrade_rename_aux_table_low(const char *old_name,
6269 const char *new_name) {
6270 mutex_enter(&dict_sys->mutex);
6271
6272 dict_table_t *old_aux_table =
6273 dict_table_open_on_name(old_name, true, false, DICT_ERR_IGNORE_NONE);
6274
6275 ut_ad(old_aux_table != nullptr);
6276 dict_table_close(old_aux_table, true, false);
6277 dberr_t err = dict_table_rename_in_cache(old_aux_table, new_name, false);
6278 if (err != DB_SUCCESS) {
6279 mutex_exit(&dict_sys->mutex);
6280 return (nullptr);
6281 }
6282
6283 dict_table_t *new_aux_table =
6284 dict_table_open_on_name(new_name, true, false, DICT_ERR_IGNORE_NONE);
6285 ut_ad(new_aux_table != nullptr);
6286 mutex_exit(&dict_sys->mutex);
6287
6288 return (new_aux_table);
6289 }
6290
6291 /** Rename old FTS common and aux tables with the new table_id
6292 @param[in] old_name old name of FTS AUX table
6293 @param[in] new_name new name of FTS AUX table
6294 @param[in] rollback if true, do the rename back
6295 else mark original AUX tables
6296 evictable */
fts_upgrade_rename_aux_table(const char * old_name,const char * new_name,bool rollback)6297 static void fts_upgrade_rename_aux_table(const char *old_name,
6298 const char *new_name, bool rollback) {
6299 dict_table_t *new_table = nullptr;
6300
6301 if (rollback) {
6302 new_table = fts_upgrade_rename_aux_table_low(old_name, new_name);
6303
6304 } else {
6305 new_table =
6306 dict_table_open_on_name(old_name, false, false, DICT_ERR_IGNORE_NONE);
6307 }
6308
6309 if (new_table == nullptr) {
6310 return;
6311 }
6312
6313 mutex_enter(&dict_sys->mutex);
6314 dict_table_allow_eviction(new_table);
6315 dict_table_close(new_table, true, false);
6316 mutex_exit(&dict_sys->mutex);
6317 }
6318
6319 /** During upgrade, tables are moved by DICT_MAX_DD_TABLES
6320 offset, remove this offset to get 5.7 fts aux table names
6321 @param[in] table_id 8.0 table id */
fts_upgrade_get_5_7_table_id(table_id_t table_id)6322 inline table_id_t fts_upgrade_get_5_7_table_id(table_id_t table_id) {
6323 return (table_id - DICT_MAX_DD_TABLES);
6324 }
6325
6326 /** Upgrade FTS AUX Tables. The FTS common and aux tables are
6327 renamed because they have table_id in their name. We move table_ids
6328 by DICT_MAX_DD_TABLES offset. Aux tables are registered into DD
6329 afer rename.
6330 @param[in] table InnoDB table object
6331 @return DB_SUCCESS or error code */
fts_upgrade_aux_tables(dict_table_t * table)6332 dberr_t fts_upgrade_aux_tables(dict_table_t *table) {
6333 fts_table_t fts_old_table;
6334
6335 ut_ad(srv_is_upgrade_mode);
6336
6337 FTS_INIT_FTS_TABLE(&fts_old_table, nullptr, FTS_COMMON_TABLE, table);
6338 fts_table_t fts_new_table = fts_old_table;
6339
6340 fts_old_table.table_id = fts_upgrade_get_5_7_table_id(fts_old_table.table_id);
6341
6342 /* Rename common auxiliary tables */
6343 for (ulint i = 0; fts_common_tables_5_7[i] != nullptr; ++i) {
6344 fts_old_table.suffix = fts_common_tables_5_7[i];
6345
6346 bool is_config = fts_old_table.suffix == FTS_SUFFIX_CONFIG_5_7;
6347 char old_name[MAX_FULL_NAME_LEN];
6348 char new_name[MAX_FULL_NAME_LEN];
6349
6350 fts_get_table_name_5_7(&fts_old_table, old_name);
6351
6352 DBUG_EXECUTE_IF("dd_upgrade", ib::info(ER_IB_MSG_484)
6353 << "Old fts table name is " << old_name;);
6354
6355 fts_new_table.suffix = fts_common_tables[i];
6356 fts_get_table_name(&fts_new_table, new_name);
6357
6358 DBUG_EXECUTE_IF("dd_upgrade", ib::info(ER_IB_MSG_485)
6359 << "New fts table name is " << new_name;);
6360
6361 dict_table_t *new_table =
6362 fts_upgrade_rename_aux_table_low(old_name, new_name);
6363
6364 if (new_table == nullptr) {
6365 return (DB_ERROR);
6366 }
6367
6368 mutex_enter(&dict_sys->mutex);
6369 dict_table_prevent_eviction(new_table);
6370 mutex_exit(&dict_sys->mutex);
6371
6372 if (!dd_create_fts_common_table(table, new_table, is_config)) {
6373 dict_table_close(new_table, false, false);
6374 return (DB_FAIL);
6375 }
6376 dict_table_close(new_table, false, false);
6377 }
6378
6379 fts_t *fts = table->fts;
6380
6381 /* Rename index specific auxiliary tables */
6382 for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
6383 ++i) {
6384 dict_index_t *index;
6385
6386 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
6387
6388 FTS_INIT_INDEX_TABLE(&fts_old_table, nullptr, FTS_INDEX_TABLE, index);
6389 fts_new_table = fts_old_table;
6390
6391 fts_old_table.table_id =
6392 fts_upgrade_get_5_7_table_id(fts_old_table.table_id);
6393
6394 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
6395 fts_old_table.suffix = fts_get_suffix_5_7(j);
6396
6397 char old_name[MAX_FULL_NAME_LEN];
6398 char new_name[MAX_FULL_NAME_LEN];
6399
6400 fts_get_table_name_5_7(&fts_old_table, old_name);
6401
6402 fts_new_table.suffix = fts_get_suffix(j);
6403 fts_get_table_name(&fts_new_table, new_name);
6404
6405 dict_table_t *new_table =
6406 fts_upgrade_rename_aux_table_low(old_name, new_name);
6407
6408 if (new_table == nullptr) {
6409 return (DB_ERROR);
6410 }
6411
6412 mutex_enter(&dict_sys->mutex);
6413 dict_table_prevent_eviction(new_table);
6414 mutex_exit(&dict_sys->mutex);
6415
6416 CHARSET_INFO *charset = fts_get_charset(index->get_field(0)->col->prtype);
6417
6418 if (!dd_create_fts_index_table(table, new_table, charset)) {
6419 dict_table_close(new_table, false, false);
6420 return (DB_FAIL);
6421 }
6422 dict_table_close(new_table, false, false);
6423 }
6424 }
6425
6426 return (DB_SUCCESS);
6427 }
6428
6429 /** Rename FTS AUX tablespace name from 8.0 format to 5.7 format.
6430 This will be done on upgrade failure
6431 @param[in] table parent table
6432 @param[in] rollback rollback the rename from 8.0 to 5.7
6433 if true, rename to 5.7 format
6434 if false, mark the table as evictable
6435 @return DB_SUCCESS on success, DB_ERROR on error */
fts_upgrade_rename(const dict_table_t * table,bool rollback)6436 dberr_t fts_upgrade_rename(const dict_table_t *table, bool rollback) {
6437 fts_table_t fts_old_table;
6438
6439 ut_ad(srv_is_upgrade_mode);
6440
6441 FTS_INIT_FTS_TABLE(&fts_old_table, nullptr, FTS_COMMON_TABLE, table);
6442
6443 fts_table_t fts_new_table = fts_old_table;
6444
6445 fts_new_table.table_id = fts_upgrade_get_5_7_table_id(fts_new_table.table_id);
6446
6447 /* Rename common auxiliary tables */
6448 for (ulint i = 0; fts_common_tables[i] != nullptr; ++i) {
6449 fts_old_table.suffix = fts_common_tables[i];
6450
6451 char old_name[MAX_FULL_NAME_LEN];
6452 char new_name[MAX_FULL_NAME_LEN];
6453
6454 fts_get_table_name(&fts_old_table, old_name);
6455
6456 fts_new_table.suffix = fts_common_tables_5_7[i];
6457 fts_get_table_name_5_7(&fts_new_table, new_name);
6458
6459 fts_upgrade_rename_aux_table(old_name, new_name, rollback);
6460 }
6461
6462 fts_t *fts = table->fts;
6463
6464 /* Rename index specific auxiliary tables */
6465 for (ulint i = 0; fts->indexes != nullptr && i < ib_vector_size(fts->indexes);
6466 ++i) {
6467 dict_index_t *index;
6468
6469 index = static_cast<dict_index_t *>(ib_vector_getp(fts->indexes, i));
6470
6471 FTS_INIT_INDEX_TABLE(&fts_old_table, nullptr, FTS_INDEX_TABLE, index);
6472 fts_new_table = fts_old_table;
6473
6474 fts_new_table.table_id =
6475 fts_upgrade_get_5_7_table_id(fts_new_table.table_id);
6476
6477 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
6478 fts_old_table.suffix = fts_get_suffix(j);
6479
6480 char old_name[MAX_FULL_NAME_LEN];
6481 char new_name[MAX_FULL_NAME_LEN];
6482
6483 fts_get_table_name(&fts_old_table, old_name);
6484
6485 fts_new_table.suffix = fts_get_suffix_5_7(j);
6486 fts_get_table_name_5_7(&fts_new_table, new_name);
6487
6488 fts_upgrade_rename_aux_table(old_name, new_name, rollback);
6489 }
6490 }
6491 return (DB_SUCCESS);
6492 }
6493