1 /*****************************************************************************
2
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31
32 #include "ha_prototypes.h"
33
34 #include "trx0roll.h"
35 #include "row0mysql.h"
36 #include "row0upd.h"
37 #include "dict0types.h"
38 #include "dict0stats_bg.h"
39 #include "row0sel.h"
40 #include "fts0fts.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "fts0plugin.h"
46 #include "dict0priv.h"
47 #include "dict0stats.h"
48 #include "btr0pcur.h"
49 #include "sync0sync.h"
50 #include "ut0new.h"
51
52 #include "fil0crypt.h"
53
54 static const ulint FTS_MAX_ID_LEN = 32;
55
56 /** Column name from the FTS config table */
57 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
58
59 /** Verify if a aux table name is a obsolete table
60 by looking up the key word in the obsolete table names */
61 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
62 (strstr((table_name), "DOC_ID") != NULL \
63 || strstr((table_name), "ADDED") != NULL \
64 || strstr((table_name), "STOPWORDS") != NULL)
65
66 /** This is maximum FTS cache for each table and would be
67 a configurable variable */
68 ulong fts_max_cache_size;
69
70 /** Whether the total memory used for FTS cache is exhausted, and we will
71 need a sync to free some memory */
72 bool fts_need_sync = false;
73
74 /** Variable specifying the total memory allocated for FTS cache */
75 ulong fts_max_total_cache_size;
76
77 /** This is FTS result cache limit for each query and would be
78 a configurable variable */
79 ulong fts_result_cache_limit;
80
81 /** Variable specifying the maximum FTS max token size */
82 ulong fts_max_token_size;
83
84 /** Variable specifying the minimum FTS max token size */
85 ulong fts_min_token_size;
86
87
88 // FIXME: testing
89 ib_time_monotonic_t elapsed_time = 0;
90 ulint n_nodes = 0;
91
92 #ifdef FTS_CACHE_SIZE_DEBUG
93 /** The cache size permissible lower limit (1K) */
94 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
95
96 /** The cache size permissible upper limit (1G) */
97 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
98 #endif
99
100 /** Time to sleep after DEADLOCK error before retrying operation. */
101 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
102
103 /** variable to record innodb_fts_internal_tbl_name for information
104 schema table INNODB_FTS_INSERTED etc. */
105 char* fts_internal_tbl_name = NULL;
106
107 /** InnoDB default stopword list:
108 There are different versions of stopwords, the stop words listed
109 below comes from "Google Stopword" list. Reference:
110 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
111 The final version of InnoDB default stopword list is still pending
112 for decision */
113 const char *fts_default_stopword[] =
114 {
115 "a",
116 "about",
117 "an",
118 "are",
119 "as",
120 "at",
121 "be",
122 "by",
123 "com",
124 "de",
125 "en",
126 "for",
127 "from",
128 "how",
129 "i",
130 "in",
131 "is",
132 "it",
133 "la",
134 "of",
135 "on",
136 "or",
137 "that",
138 "the",
139 "this",
140 "to",
141 "was",
142 "what",
143 "when",
144 "where",
145 "who",
146 "will",
147 "with",
148 "und",
149 "the",
150 "www",
151 NULL
152 };
153
154 /** For storing table info when checking for orphaned tables. */
155 struct fts_aux_table_t {
156 table_id_t id; /*!< Table id */
157 table_id_t parent_id; /*!< Parent table id */
158 table_id_t index_id; /*!< Table FT index id */
159 char* name; /*!< Name of the table */
160 };
161
162 #ifdef FTS_DOC_STATS_DEBUG
163 /** Template for creating the FTS auxiliary index specific tables. This is
164 mainly designed for the statistics work in the future */
165 static const char* fts_create_index_tables_sql = {
166 "BEGIN\n"
167 ""
168 "CREATE TABLE $doc_id_table (\n"
169 " doc_id BIGINT UNSIGNED,\n"
170 " word_count INTEGER UNSIGNED NOT NULL\n"
171 ") COMPACT;\n"
172 "CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
173 };
174 #endif
175
176 /** FTS auxiliary table suffixes that are common to all FT indexes. */
177 const char* fts_common_tables[] = {
178 "BEING_DELETED",
179 "BEING_DELETED_CACHE",
180 "CONFIG",
181 "DELETED",
182 "DELETED_CACHE",
183 NULL
184 };
185
186 /** FTS auxiliary INDEX split intervals. */
187 const fts_index_selector_t fts_index_selector[] = {
188 { 9, "INDEX_1" },
189 { 65, "INDEX_2" },
190 { 70, "INDEX_3" },
191 { 75, "INDEX_4" },
192 { 80, "INDEX_5" },
193 { 85, "INDEX_6" },
194 { 0 , NULL }
195 };
196
197 /** Default config values for FTS indexes on a table. */
198 static const char* fts_config_table_insert_values_sql =
199 "BEGIN\n"
200 "\n"
201 "INSERT INTO $config_table VALUES('"
202 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
203 ""
204 "INSERT INTO $config_table VALUES('"
205 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
206 ""
207 "INSERT INTO $config_table VALUES ('"
208 FTS_SYNCED_DOC_ID "', '0');\n"
209 ""
210 "INSERT INTO $config_table VALUES ('"
211 FTS_TOTAL_DELETED_COUNT "', '0');\n"
212 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
213 "INSERT INTO $config_table VALUES ('"
214 FTS_TABLE_STATE "', '0');\n";
215
216 /** FTS tokenize parmameter for plugin parser */
217 struct fts_tokenize_param_t {
218 fts_doc_t* result_doc; /*!< Result doc for tokens */
219 ulint add_pos; /*!< Added position for tokens */
220 };
221
222 /** Run SYNC on the table, i.e., write out data from the cache to the
223 FTS auxiliary INDEX table and clear the cache at the end.
224 @param[in,out] sync sync state
225 @param[in] unlock_cache whether unlock cache lock when write node
226 @param[in] wait whether wait when a sync is in progress
227 @param[in] has_dict_lock whether has dict operation lock
228 @return DB_SUCCESS if all OK */
229 static
230 dberr_t
231 fts_sync(
232 fts_sync_t* sync,
233 bool unlock_cache,
234 bool wait,
235 bool has_dict_lock);
236
237 /****************************************************************//**
238 Release all resources help by the words rb tree e.g., the node ilist. */
239 static
240 void
241 fts_words_free(
242 /*===========*/
243 ib_rbt_t* words) /*!< in: rb tree of words */
244 MY_ATTRIBUTE((nonnull));
245 #ifdef FTS_CACHE_SIZE_DEBUG
246 /****************************************************************//**
247 Read the max cache size parameter from the config table. */
248 static
249 void
250 fts_update_max_cache_size(
251 /*======================*/
252 fts_sync_t* sync); /*!< in: sync state */
253 #endif
254
255 /*********************************************************************//**
256 This function fetches the document just inserted right before
257 we commit the transaction, and tokenize the inserted text data
258 and insert into FTS auxiliary table and its cache.
259 @return TRUE if successful */
260 static
261 ulint
262 fts_add_doc_by_id(
263 /*==============*/
264 fts_trx_table_t*ftt, /*!< in: FTS trx table */
265 doc_id_t doc_id, /*!< in: doc id */
266 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
267 /*!< in: affected fts indexes */
268 #ifdef FTS_DOC_STATS_DEBUG
269 /****************************************************************//**
270 Check whether a particular word (term) exists in the FTS index.
271 @return DB_SUCCESS if all went fine */
272 static
273 dberr_t
274 fts_is_word_in_index(
275 /*=================*/
276 trx_t* trx, /*!< in: FTS query state */
277 que_t** graph, /*!< out: Query graph */
278 fts_table_t* fts_table, /*!< in: table instance */
279 const fts_string_t* word, /*!< in: the word to check */
280 ibool* found) /*!< out: TRUE if exists */
281 MY_ATTRIBUTE((nonnull, warn_unused_result));
282 #endif /* FTS_DOC_STATS_DEBUG */
283
284 /******************************************************************//**
285 Update the last document id. This function could create a new
286 transaction to update the last document id.
287 @return DB_SUCCESS if OK */
288 static
289 dberr_t
290 fts_update_sync_doc_id(
291 /*===================*/
292 const dict_table_t* table, /*!< in: table */
293 const char* table_name, /*!< in: table name, or NULL */
294 doc_id_t doc_id, /*!< in: last document id */
295 trx_t* trx) /*!< in: update trx, or NULL */
296 MY_ATTRIBUTE((nonnull(1)));
297
298 /** Get a character set based on precise type.
299 @param prtype precise type
300 @return the corresponding character set */
301 UNIV_INLINE
302 CHARSET_INFO*
fts_get_charset(ulint prtype)303 fts_get_charset(ulint prtype)
304 {
305 #ifdef UNIV_DEBUG
306 switch (prtype & DATA_MYSQL_TYPE_MASK) {
307 case MYSQL_TYPE_BIT:
308 case MYSQL_TYPE_STRING:
309 case MYSQL_TYPE_VAR_STRING:
310 case MYSQL_TYPE_TINY_BLOB:
311 case MYSQL_TYPE_MEDIUM_BLOB:
312 case MYSQL_TYPE_BLOB:
313 case MYSQL_TYPE_LONG_BLOB:
314 case MYSQL_TYPE_VARCHAR:
315 break;
316 default:
317 ut_error;
318 }
319 #endif /* UNIV_DEBUG */
320
321 uint cs_num = (uint) dtype_get_charset_coll(prtype);
322
323 if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
324 return(cs);
325 }
326
327 ib::fatal() << "Unable to find charset-collation " << cs_num;
328 return(NULL);
329 }
330
331 /****************************************************************//**
332 This function loads the default InnoDB stopword list */
333 static
334 void
fts_load_default_stopword(fts_stopword_t * stopword_info)335 fts_load_default_stopword(
336 /*======================*/
337 fts_stopword_t* stopword_info) /*!< in: stopword info */
338 {
339 fts_string_t str;
340 mem_heap_t* heap;
341 ib_alloc_t* allocator;
342 ib_rbt_t* stop_words;
343
344 allocator = stopword_info->heap;
345 heap = static_cast<mem_heap_t*>(allocator->arg);
346
347 if (!stopword_info->cached_stopword) {
348 stopword_info->cached_stopword = rbt_create_arg_cmp(
349 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
350 &my_charset_latin1);
351 }
352
353 stop_words = stopword_info->cached_stopword;
354
355 str.f_n_char = 0;
356
357 for (ulint i = 0; fts_default_stopword[i]; ++i) {
358 char* word;
359 fts_tokenizer_word_t new_word;
360
361 /* We are going to duplicate the value below. */
362 word = const_cast<char*>(fts_default_stopword[i]);
363
364 new_word.nodes = ib_vector_create(
365 allocator, sizeof(fts_node_t), 4);
366
367 str.f_len = ut_strlen(word);
368 str.f_str = reinterpret_cast<byte*>(word);
369
370 fts_string_dup(&new_word.text, &str, heap);
371
372 rbt_insert(stop_words, &new_word, &new_word);
373 }
374
375 stopword_info->status = STOPWORD_FROM_DEFAULT;
376 }
377
378 /****************************************************************//**
379 Callback function to read a single stopword value.
380 @return Always return TRUE */
381 static
382 ibool
fts_read_stopword(void * row,void * user_arg)383 fts_read_stopword(
384 /*==============*/
385 void* row, /*!< in: sel_node_t* */
386 void* user_arg) /*!< in: pointer to ib_vector_t */
387 {
388 ib_alloc_t* allocator;
389 fts_stopword_t* stopword_info;
390 sel_node_t* sel_node;
391 que_node_t* exp;
392 ib_rbt_t* stop_words;
393 dfield_t* dfield;
394 fts_string_t str;
395 mem_heap_t* heap;
396 ib_rbt_bound_t parent;
397
398 sel_node = static_cast<sel_node_t*>(row);
399 stopword_info = static_cast<fts_stopword_t*>(user_arg);
400
401 stop_words = stopword_info->cached_stopword;
402 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
403 heap = static_cast<mem_heap_t*>(allocator->arg);
404
405 exp = sel_node->select_list;
406
407 /* We only need to read the first column */
408 dfield = que_node_get_val(exp);
409
410 str.f_n_char = 0;
411 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
412 str.f_len = dfield_get_len(dfield);
413
414 /* Only create new node if it is a value not already existed */
415 if (str.f_len != UNIV_SQL_NULL
416 && rbt_search(stop_words, &parent, &str) != 0) {
417
418 fts_tokenizer_word_t new_word;
419
420 new_word.nodes = ib_vector_create(
421 allocator, sizeof(fts_node_t), 4);
422
423 new_word.text.f_str = static_cast<byte*>(
424 mem_heap_alloc(heap, str.f_len + 1));
425
426 memcpy(new_word.text.f_str, str.f_str, str.f_len);
427
428 new_word.text.f_n_char = 0;
429 new_word.text.f_len = str.f_len;
430 new_word.text.f_str[str.f_len] = 0;
431
432 rbt_insert(stop_words, &new_word, &new_word);
433 }
434
435 return(TRUE);
436 }
437
438 /******************************************************************//**
439 Load user defined stopword from designated user table
440 @return TRUE if load operation is successful */
441 static
442 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)443 fts_load_user_stopword(
444 /*===================*/
445 fts_t* fts, /*!< in: FTS struct */
446 const char* stopword_table_name, /*!< in: Stopword table
447 name */
448 fts_stopword_t* stopword_info) /*!< in: Stopword info */
449 {
450 pars_info_t* info;
451 que_t* graph;
452 dberr_t error = DB_SUCCESS;
453 ibool ret = TRUE;
454 trx_t* trx;
455 ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
456
457 trx = trx_allocate_for_background();
458 trx->op_info = "Load user stopword table into FTS cache";
459
460 if (!has_lock) {
461 mutex_enter(&dict_sys->mutex);
462 }
463
464 /* Validate the user table existence and in the right
465 format */
466 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
467 if (!stopword_info->charset) {
468 ret = FALSE;
469 goto cleanup;
470 } else if (!stopword_info->cached_stopword) {
471 /* Create the stopword RB tree with the stopword column
472 charset. All comparison will use this charset */
473 stopword_info->cached_stopword = rbt_create_arg_cmp(
474 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
475 stopword_info->charset);
476
477 }
478
479 info = pars_info_create();
480
481 pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
482
483 pars_info_bind_function(info, "my_func", fts_read_stopword,
484 stopword_info);
485
486 graph = fts_parse_sql_no_dict_lock(
487 NULL,
488 info,
489 "DECLARE FUNCTION my_func;\n"
490 "DECLARE CURSOR c IS"
491 " SELECT value"
492 " FROM $table_stopword;\n"
493 "BEGIN\n"
494 "\n"
495 "OPEN c;\n"
496 "WHILE 1 = 1 LOOP\n"
497 " FETCH c INTO my_func();\n"
498 " IF c % NOTFOUND THEN\n"
499 " EXIT;\n"
500 " END IF;\n"
501 "END LOOP;\n"
502 "CLOSE c;");
503
504 for (;;) {
505 error = fts_eval_sql(trx, graph);
506
507 if (error == DB_SUCCESS) {
508 fts_sql_commit(trx);
509 stopword_info->status = STOPWORD_USER_TABLE;
510 break;
511 } else {
512
513 fts_sql_rollback(trx);
514
515 if (error == DB_LOCK_WAIT_TIMEOUT) {
516 ib::warn() << "Lock wait timeout reading user"
517 " stopword table. Retrying!";
518
519 trx->error_state = DB_SUCCESS;
520 } else {
521 ib::error() << "Error '" << ut_strerr(error)
522 << "' while reading user stopword"
523 " table.";
524 ret = FALSE;
525 break;
526 }
527 }
528 }
529
530 que_graph_free(graph);
531
532 cleanup:
533 if (!has_lock) {
534 mutex_exit(&dict_sys->mutex);
535 }
536
537 trx_free_for_background(trx);
538 return(ret);
539 }
540
541 /******************************************************************//**
542 Initialize the index cache. */
543 static
544 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)545 fts_index_cache_init(
546 /*=================*/
547 ib_alloc_t* allocator, /*!< in: the allocator to use */
548 fts_index_cache_t* index_cache) /*!< in: index cache */
549 {
550 ulint i;
551
552 ut_a(index_cache->words == NULL);
553
554 index_cache->words = rbt_create_arg_cmp(
555 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
556 index_cache->charset);
557
558 ut_a(index_cache->doc_stats == NULL);
559
560 index_cache->doc_stats = ib_vector_create(
561 allocator, sizeof(fts_doc_stats_t), 4);
562
563 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
564 ut_a(index_cache->ins_graph[i] == NULL);
565 ut_a(index_cache->sel_graph[i] == NULL);
566 }
567 }
568
569 /*********************************************************************//**
570 Initialize FTS cache. */
571 void
fts_cache_init(fts_cache_t * cache)572 fts_cache_init(
573 /*===========*/
574 fts_cache_t* cache) /*!< in: cache to initialize */
575 {
576 ulint i;
577
578 /* Just to make sure */
579 ut_a(cache->sync_heap->arg == NULL);
580
581 cache->sync_heap->arg = mem_heap_create(1024);
582
583 cache->total_size = 0;
584 cache->total_size_before_sync = 0;
585
586 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
587 cache->deleted_doc_ids = ib_vector_create(
588 cache->sync_heap, sizeof(fts_update_t), 4);
589 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
590
591 /* Reset the cache data for all the FTS indexes. */
592 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
593 fts_index_cache_t* index_cache;
594
595 index_cache = static_cast<fts_index_cache_t*>(
596 ib_vector_get(cache->indexes, i));
597
598 fts_index_cache_init(cache->sync_heap, index_cache);
599 }
600 }
601
602 /****************************************************************//**
603 Create a FTS cache. */
604 fts_cache_t*
fts_cache_create(dict_table_t * table)605 fts_cache_create(
606 /*=============*/
607 dict_table_t* table) /*!< in: table owns the FTS cache */
608 {
609 mem_heap_t* heap;
610 fts_cache_t* cache;
611
612 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
613
614 cache = static_cast<fts_cache_t*>(
615 mem_heap_zalloc(heap, sizeof(*cache)));
616
617 cache->cache_heap = heap;
618
619 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
620
621 rw_lock_create(
622 fts_cache_init_rw_lock_key, &cache->init_lock,
623 SYNC_FTS_CACHE_INIT);
624
625 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
626
627 mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
628
629 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
630
631 /* This is the heap used to create the cache itself. */
632 cache->self_heap = ib_heap_allocator_create(heap);
633
634 /* This is a transient heap, used for storing sync data. */
635 cache->sync_heap = ib_heap_allocator_create(heap);
636 cache->sync_heap->arg = NULL;
637
638 cache->sync = static_cast<fts_sync_t*>(
639 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
640
641 cache->sync->table = table;
642 cache->sync->event = os_event_create(0);
643
644 /* Create the index cache vector that will hold the inverted indexes. */
645 cache->indexes = ib_vector_create(
646 cache->self_heap, sizeof(fts_index_cache_t), 2);
647
648 fts_cache_init(cache);
649
650 cache->stopword_info.cached_stopword = NULL;
651 cache->stopword_info.charset = NULL;
652
653 cache->stopword_info.heap = cache->self_heap;
654
655 cache->stopword_info.status = STOPWORD_NOT_INIT;
656
657 return(cache);
658 }
659
660 /*******************************************************************//**
661 Add a newly create index into FTS cache */
662 void
fts_add_index(dict_index_t * index,dict_table_t * table)663 fts_add_index(
664 /*==========*/
665 dict_index_t* index, /*!< FTS index to be added */
666 dict_table_t* table) /*!< table */
667 {
668 fts_t* fts = table->fts;
669 fts_cache_t* cache;
670 fts_index_cache_t* index_cache;
671
672 ut_ad(fts);
673 cache = table->fts->cache;
674
675 rw_lock_x_lock(&cache->init_lock);
676
677 ib_vector_push(fts->indexes, &index);
678
679 index_cache = fts_find_index_cache(cache, index);
680
681 if (!index_cache) {
682 /* Add new index cache structure */
683 index_cache = fts_cache_index_cache_create(table, index);
684 }
685
686 rw_lock_x_unlock(&cache->init_lock);
687 }
688
689 /*******************************************************************//**
690 recalibrate get_doc structure after index_cache in cache->indexes changed */
691 static
692 void
fts_reset_get_doc(fts_cache_t * cache)693 fts_reset_get_doc(
694 /*==============*/
695 fts_cache_t* cache) /*!< in: FTS index cache */
696 {
697 fts_get_doc_t* get_doc;
698 ulint i;
699
700 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
701
702 ib_vector_reset(cache->get_docs);
703
704 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
705 fts_index_cache_t* ind_cache;
706
707 ind_cache = static_cast<fts_index_cache_t*>(
708 ib_vector_get(cache->indexes, i));
709
710 get_doc = static_cast<fts_get_doc_t*>(
711 ib_vector_push(cache->get_docs, NULL));
712
713 memset(get_doc, 0x0, sizeof(*get_doc));
714
715 get_doc->index_cache = ind_cache;
716 }
717
718 ut_ad(ib_vector_size(cache->get_docs)
719 == ib_vector_size(cache->indexes));
720 }
721
722 /*******************************************************************//**
723 Check an index is in the table->indexes list
724 @return TRUE if it exists */
725 static
726 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)727 fts_in_dict_index(
728 /*==============*/
729 dict_table_t* table, /*!< in: Table */
730 dict_index_t* index_check) /*!< in: index to be checked */
731 {
732 dict_index_t* index;
733
734 for (index = dict_table_get_first_index(table);
735 index != NULL;
736 index = dict_table_get_next_index(index)) {
737
738 if (index == index_check) {
739 return(TRUE);
740 }
741 }
742
743 return(FALSE);
744 }
745
746 /*******************************************************************//**
747 Check an index is in the fts->cache->indexes list
748 @return TRUE if it exists */
749 static
750 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)751 fts_in_index_cache(
752 /*===============*/
753 dict_table_t* table, /*!< in: Table */
754 dict_index_t* index) /*!< in: index to be checked */
755 {
756 ulint i;
757
758 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
759 fts_index_cache_t* index_cache;
760
761 index_cache = static_cast<fts_index_cache_t*>(
762 ib_vector_get(table->fts->cache->indexes, i));
763
764 if (index_cache->index == index) {
765 return(TRUE);
766 }
767 }
768
769 return(FALSE);
770 }
771
772 /*******************************************************************//**
773 Check indexes in the fts->indexes is also present in index cache and
774 table->indexes list
775 @return TRUE if all indexes match */
776 ibool
fts_check_cached_index(dict_table_t * table)777 fts_check_cached_index(
778 /*===================*/
779 dict_table_t* table) /*!< in: Table where indexes are dropped */
780 {
781 ulint i;
782
783 if (!table->fts || !table->fts->cache) {
784 return(TRUE);
785 }
786
787 ut_a(ib_vector_size(table->fts->indexes)
788 == ib_vector_size(table->fts->cache->indexes));
789
790 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
791 dict_index_t* index;
792
793 index = static_cast<dict_index_t*>(
794 ib_vector_getp(table->fts->indexes, i));
795
796 if (!fts_in_index_cache(table, index)) {
797 return(FALSE);
798 }
799
800 if (!fts_in_dict_index(table, index)) {
801 return(FALSE);
802 }
803 }
804
805 return(TRUE);
806 }
807
808 /*******************************************************************//**
809 Drop auxiliary tables related to an FTS index
810 @return DB_SUCCESS or error number */
811 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)812 fts_drop_index(
813 /*===========*/
814 dict_table_t* table, /*!< in: Table where indexes are dropped */
815 dict_index_t* index, /*!< in: Index to be dropped */
816 trx_t* trx) /*!< in: Transaction for the drop */
817 {
818 ib_vector_t* indexes = table->fts->indexes;
819 dberr_t err = DB_SUCCESS;
820
821 ut_a(indexes);
822
823 if ((ib_vector_size(indexes) == 1
824 && (index == static_cast<dict_index_t*>(
825 ib_vector_getp(table->fts->indexes, 0))))
826 || ib_vector_is_empty(indexes)) {
827 doc_id_t current_doc_id;
828 doc_id_t first_doc_id;
829
830 /* If we are dropping the only FTS index of the table,
831 remove it from optimize thread */
832 fts_optimize_remove_table(table);
833
834 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
835
836 /* If Doc ID column is not added internally by FTS index,
837 we can drop all FTS auxiliary tables. Otherwise, we will
838 need to keep some common table such as CONFIG table, so
839 as to keep track of incrementing Doc IDs */
840 if (!DICT_TF2_FLAG_IS_SET(
841 table, DICT_TF2_FTS_HAS_DOC_ID)) {
842
843 err = fts_drop_tables(trx, table);
844
845 err = fts_drop_index_tables(trx, index);
846
847 while (index->index_fts_syncing
848 && !trx_is_interrupted(trx)) {
849 DICT_BG_YIELD(trx);
850 }
851
852 fts_free(table);
853
854 return(err);
855 }
856
857 while (index->index_fts_syncing
858 && !trx_is_interrupted(trx)) {
859 DICT_BG_YIELD(trx);
860 }
861
862 current_doc_id = table->fts->cache->next_doc_id;
863 first_doc_id = table->fts->cache->first_doc_id;
864 fts_cache_clear(table->fts->cache);
865 fts_cache_destroy(table->fts->cache);
866 table->fts->cache = fts_cache_create(table);
867 table->fts->cache->next_doc_id = current_doc_id;
868 table->fts->cache->first_doc_id = first_doc_id;
869
870 } else {
871 fts_cache_t* cache = table->fts->cache;
872 fts_index_cache_t* index_cache;
873
874 rw_lock_x_lock(&cache->init_lock);
875
876 index_cache = fts_find_index_cache(cache, index);
877
878 if (index_cache != NULL) {
879 while (index->index_fts_syncing
880 && !trx_is_interrupted(trx)) {
881 DICT_BG_YIELD(trx);
882 }
883
884 if (index_cache->words) {
885 fts_words_free(index_cache->words);
886 rbt_free(index_cache->words);
887 }
888
889 ib_vector_remove(cache->indexes, *(void**) index_cache);
890 }
891
892 if (cache->get_docs) {
893 fts_reset_get_doc(cache);
894 }
895
896 rw_lock_x_unlock(&cache->init_lock);
897 }
898
899 err = fts_drop_index_tables(trx, index);
900
901 ib_vector_remove(indexes, (const void*) index);
902
903 return(err);
904 }
905
906 /****************************************************************//**
907 Free the query graph but check whether dict_sys->mutex is already
908 held */
909 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)910 fts_que_graph_free_check_lock(
911 /*==========================*/
912 fts_table_t* fts_table, /*!< in: FTS table */
913 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
914 que_t* graph) /*!< in: query graph */
915 {
916 ibool has_dict = FALSE;
917
918 if (fts_table && fts_table->table) {
919 ut_ad(fts_table->table->fts);
920
921 has_dict = fts_table->table->fts->fts_status
922 & TABLE_DICT_LOCKED;
923 } else if (index_cache) {
924 ut_ad(index_cache->index->table->fts);
925
926 has_dict = index_cache->index->table->fts->fts_status
927 & TABLE_DICT_LOCKED;
928 }
929
930 if (!has_dict) {
931 mutex_enter(&dict_sys->mutex);
932 }
933
934 ut_ad(mutex_own(&dict_sys->mutex));
935
936 que_graph_free(graph);
937
938 if (!has_dict) {
939 mutex_exit(&dict_sys->mutex);
940 }
941 }
942
943 /****************************************************************//**
944 Create an FTS index cache. */
945 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)946 fts_index_get_charset(
947 /*==================*/
948 dict_index_t* index) /*!< in: FTS index */
949 {
950 CHARSET_INFO* charset = NULL;
951 dict_field_t* field;
952 ulint prtype;
953
954 field = dict_index_get_nth_field(index, 0);
955 prtype = field->col->prtype;
956
957 charset = fts_get_charset(prtype);
958
959 #ifdef FTS_DEBUG
960 /* Set up charset info for this index. Please note all
961 field of the FTS index should have the same charset */
962 for (i = 1; i < index->n_fields; i++) {
963 CHARSET_INFO* fld_charset;
964
965 field = dict_index_get_nth_field(index, i);
966 prtype = field->col->prtype;
967
968 fld_charset = fts_get_charset(prtype);
969
970 /* All FTS columns should have the same charset */
971 if (charset) {
972 ut_a(charset == fld_charset);
973 } else {
974 charset = fld_charset;
975 }
976 }
977 #endif
978
979 return(charset);
980
981 }
982 /****************************************************************//**
983 Create an FTS index cache.
984 @return Index Cache */
985 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)986 fts_cache_index_cache_create(
987 /*=========================*/
988 dict_table_t* table, /*!< in: table with FTS index */
989 dict_index_t* index) /*!< in: FTS index */
990 {
991 ulint n_bytes;
992 fts_index_cache_t* index_cache;
993 fts_cache_t* cache = table->fts->cache;
994
995 ut_a(cache != NULL);
996
997 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
998
999 /* Must not already exist in the cache vector. */
1000 ut_a(fts_find_index_cache(cache, index) == NULL);
1001
1002 index_cache = static_cast<fts_index_cache_t*>(
1003 ib_vector_push(cache->indexes, NULL));
1004
1005 memset(index_cache, 0x0, sizeof(*index_cache));
1006
1007 index_cache->index = index;
1008
1009 index_cache->charset = fts_index_get_charset(index);
1010
1011 n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1012
1013 index_cache->ins_graph = static_cast<que_t**>(
1014 mem_heap_zalloc(static_cast<mem_heap_t*>(
1015 cache->self_heap->arg), n_bytes));
1016
1017 index_cache->sel_graph = static_cast<que_t**>(
1018 mem_heap_zalloc(static_cast<mem_heap_t*>(
1019 cache->self_heap->arg), n_bytes));
1020
1021 fts_index_cache_init(cache->sync_heap, index_cache);
1022
1023 if (cache->get_docs) {
1024 fts_reset_get_doc(cache);
1025 }
1026
1027 return(index_cache);
1028 }
1029
1030 /****************************************************************//**
1031 Release all resources help by the words rb tree e.g., the node ilist. */
1032 static
1033 void
fts_words_free(ib_rbt_t * words)1034 fts_words_free(
1035 /*===========*/
1036 ib_rbt_t* words) /*!< in: rb tree of words */
1037 {
1038 const ib_rbt_node_t* rbt_node;
1039
1040 /* Free the resources held by a word. */
1041 for (rbt_node = rbt_first(words);
1042 rbt_node != NULL;
1043 rbt_node = rbt_first(words)) {
1044
1045 ulint i;
1046 fts_tokenizer_word_t* word;
1047
1048 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1049
1050 /* Free the ilists of this word. */
1051 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1052
1053 fts_node_t* fts_node = static_cast<fts_node_t*>(
1054 ib_vector_get(word->nodes, i));
1055
1056 ut_free(fts_node->ilist);
1057 fts_node->ilist = NULL;
1058 }
1059
1060 /* NOTE: We are responsible for free'ing the node */
1061 ut_free(rbt_remove_node(words, rbt_node));
1062 }
1063 }
1064
1065 /** Clear cache.
1066 @param[in,out] cache fts cache */
1067 void
fts_cache_clear(fts_cache_t * cache)1068 fts_cache_clear(
1069 fts_cache_t* cache)
1070 {
1071 ulint i;
1072
1073 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1074 ulint j;
1075 fts_index_cache_t* index_cache;
1076
1077 index_cache = static_cast<fts_index_cache_t*>(
1078 ib_vector_get(cache->indexes, i));
1079
1080 fts_words_free(index_cache->words);
1081
1082 rbt_free(index_cache->words);
1083
1084 index_cache->words = NULL;
1085
1086 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1087
1088 if (index_cache->ins_graph[j] != NULL) {
1089
1090 fts_que_graph_free_check_lock(
1091 NULL, index_cache,
1092 index_cache->ins_graph[j]);
1093
1094 index_cache->ins_graph[j] = NULL;
1095 }
1096
1097 if (index_cache->sel_graph[j] != NULL) {
1098
1099 fts_que_graph_free_check_lock(
1100 NULL, index_cache,
1101 index_cache->sel_graph[j]);
1102
1103 index_cache->sel_graph[j] = NULL;
1104 }
1105 }
1106
1107 index_cache->doc_stats = NULL;
1108 }
1109
1110 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1111 cache->sync_heap->arg = NULL;
1112
1113 fts_need_sync = false;
1114
1115 cache->total_size = 0;
1116
1117 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1118 cache->deleted_doc_ids = NULL;
1119 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1120 }
1121
1122 /*********************************************************************//**
1123 Search the index specific cache for a particular FTS index.
1124 @return the index cache else NULL */
1125 UNIV_INLINE
1126 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1127 fts_get_index_cache(
1128 /*================*/
1129 fts_cache_t* cache, /*!< in: cache to search */
1130 const dict_index_t* index) /*!< in: index to search for */
1131 {
1132 ulint i;
1133
1134 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1135 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1136
1137 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1138 fts_index_cache_t* index_cache;
1139
1140 index_cache = static_cast<fts_index_cache_t*>(
1141 ib_vector_get(cache->indexes, i));
1142
1143 if (index_cache->index == index) {
1144
1145 return(index_cache);
1146 }
1147 }
1148
1149 return(NULL);
1150 }
1151
1152 #ifdef FTS_DEBUG
1153 /*********************************************************************//**
1154 Search the index cache for a get_doc structure.
1155 @return the fts_get_doc_t item else NULL */
1156 static
1157 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1158 fts_get_index_get_doc(
1159 /*==================*/
1160 fts_cache_t* cache, /*!< in: cache to search */
1161 const dict_index_t* index) /*!< in: index to search for */
1162 {
1163 ulint i;
1164
1165 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1166
1167 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1168 fts_get_doc_t* get_doc;
1169
1170 get_doc = static_cast<fts_get_doc_t*>(
1171 ib_vector_get(cache->get_docs, i));
1172
1173 if (get_doc->index_cache->index == index) {
1174
1175 return(get_doc);
1176 }
1177 }
1178
1179 return(NULL);
1180 }
1181 #endif
1182
1183 /**********************************************************************//**
1184 Free the FTS cache. */
1185 void
fts_cache_destroy(fts_cache_t * cache)1186 fts_cache_destroy(
1187 /*==============*/
1188 fts_cache_t* cache) /*!< in: cache*/
1189 {
1190 rw_lock_free(&cache->lock);
1191 rw_lock_free(&cache->init_lock);
1192 mutex_free(&cache->optimize_lock);
1193 mutex_free(&cache->deleted_lock);
1194 mutex_free(&cache->doc_id_lock);
1195 os_event_destroy(cache->sync->event);
1196
1197 if (cache->stopword_info.cached_stopword) {
1198 rbt_free(cache->stopword_info.cached_stopword);
1199 }
1200
1201 if (cache->sync_heap->arg) {
1202 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1203 }
1204
1205 mem_heap_free(cache->cache_heap);
1206 }
1207
1208 /**********************************************************************//**
1209 Find an existing word, or if not found, create one and return it.
1210 @return specified word token */
1211 static
1212 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1213 fts_tokenizer_word_get(
1214 /*===================*/
1215 fts_cache_t* cache, /*!< in: cache */
1216 fts_index_cache_t*
1217 index_cache, /*!< in: index cache */
1218 fts_string_t* text) /*!< in: node text */
1219 {
1220 fts_tokenizer_word_t* word;
1221 ib_rbt_bound_t parent;
1222
1223 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1224
1225 ut_ad(current_thd != NULL);
1226 /* If it is a stopword, do not index it */
1227 if (!fts_check_token(text,
1228 cache->stopword_info.cached_stopword,
1229 index_cache->index->is_ngram,
1230 index_cache->charset,
1231 thd_has_ft_ignore_stopwords(current_thd))) {
1232
1233 return(NULL);
1234 }
1235
1236 /* Check if we found a match, if not then add word to tree. */
1237 if (rbt_search(index_cache->words, &parent, text) != 0) {
1238 mem_heap_t* heap;
1239 fts_tokenizer_word_t new_word;
1240
1241 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1242
1243 new_word.nodes = ib_vector_create(
1244 cache->sync_heap, sizeof(fts_node_t), 4);
1245
1246 fts_string_dup(&new_word.text, text, heap);
1247
1248 parent.last = rbt_add_node(
1249 index_cache->words, &parent, &new_word);
1250
1251 /* Take into account the RB tree memory use and the vector. */
1252 cache->total_size += sizeof(new_word)
1253 + sizeof(ib_rbt_node_t)
1254 + text->f_len
1255 + (sizeof(fts_node_t) * 4)
1256 + sizeof(*new_word.nodes);
1257
1258 ut_ad(rbt_validate(index_cache->words));
1259 }
1260
1261 word = rbt_value(fts_tokenizer_word_t, parent.last);
1262
1263 return(word);
1264 }
1265
1266 /**********************************************************************//**
1267 Add the given doc_id/word positions to the given node's ilist. */
1268 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1269 fts_cache_node_add_positions(
1270 /*=========================*/
1271 fts_cache_t* cache, /*!< in: cache */
1272 fts_node_t* node, /*!< in: word node */
1273 doc_id_t doc_id, /*!< in: doc id */
1274 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1275 {
1276 ulint i;
1277 byte* ptr;
1278 byte* ilist;
1279 ulint enc_len;
1280 ulint last_pos;
1281 byte* ptr_start;
1282 ulint doc_id_delta;
1283
1284 #ifdef UNIV_DEBUG
1285 if (cache) {
1286 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1287 }
1288 #endif /* UNIV_DEBUG */
1289
1290 ut_ad(doc_id >= node->last_doc_id);
1291
1292 /* Calculate the space required to store the ilist. */
1293 doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1294 enc_len = fts_get_encoded_len(doc_id_delta);
1295
1296 last_pos = 0;
1297 for (i = 0; i < ib_vector_size(positions); i++) {
1298 ulint pos = *(static_cast<ulint*>(
1299 ib_vector_get(positions, i)));
1300
1301 ut_ad(last_pos == 0 || pos > last_pos);
1302
1303 enc_len += fts_get_encoded_len(pos - last_pos);
1304 last_pos = pos;
1305 }
1306
1307 /* The 0x00 byte at the end of the token positions list. */
1308 enc_len++;
1309
1310 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1311 /* No need to allocate more space, we can fit in the new
1312 data at the end of the old one. */
1313 ilist = NULL;
1314 ptr = node->ilist + node->ilist_size;
1315 } else {
1316 ulint new_size = node->ilist_size + enc_len;
1317
1318 /* Over-reserve space by a fixed size for small lengths and
1319 by 20% for lengths >= 48 bytes. */
1320 if (new_size < 16) {
1321 new_size = 16;
1322 } else if (new_size < 32) {
1323 new_size = 32;
1324 } else if (new_size < 48) {
1325 new_size = 48;
1326 } else {
1327 new_size = (ulint)(1.2 * new_size);
1328 }
1329
1330 ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1331 ptr = ilist + node->ilist_size;
1332
1333 node->ilist_size_alloc = new_size;
1334 if (cache) {
1335 cache->total_size += new_size;
1336 }
1337 }
1338
1339 ptr_start = ptr;
1340
1341 /* Encode the new fragment. */
1342 ptr += fts_encode_int(doc_id_delta, ptr);
1343
1344 last_pos = 0;
1345 for (i = 0; i < ib_vector_size(positions); i++) {
1346 ulint pos = *(static_cast<ulint*>(
1347 ib_vector_get(positions, i)));
1348
1349 ptr += fts_encode_int(pos - last_pos, ptr);
1350 last_pos = pos;
1351 }
1352
1353 *ptr++ = 0;
1354
1355 ut_a(enc_len == (ulint)(ptr - ptr_start));
1356
1357 if (ilist) {
1358 /* Copy old ilist to the start of the new one and switch the
1359 new one into place in the node. */
1360 if (node->ilist_size > 0) {
1361 memcpy(ilist, node->ilist, node->ilist_size);
1362 ut_free(node->ilist);
1363 if (cache) {
1364 cache->total_size -= node->ilist_size;
1365 }
1366 }
1367
1368 node->ilist = ilist;
1369 }
1370
1371 node->ilist_size += enc_len;
1372
1373 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1374 node->first_doc_id = doc_id;
1375 }
1376
1377 node->last_doc_id = doc_id;
1378 ++node->doc_count;
1379 }
1380
1381 /**********************************************************************//**
1382 Add document to the cache. */
1383 static
1384 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1385 fts_cache_add_doc(
1386 /*==============*/
1387 fts_cache_t* cache, /*!< in: cache */
1388 fts_index_cache_t*
1389 index_cache, /*!< in: index cache */
1390 doc_id_t doc_id, /*!< in: doc id to add */
1391 ib_rbt_t* tokens) /*!< in: document tokens */
1392 {
1393 const ib_rbt_node_t* node;
1394 ulint n_words;
1395 fts_doc_stats_t* doc_stats;
1396
1397 if (!tokens) {
1398 return;
1399 }
1400
1401 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1402
1403 n_words = rbt_size(tokens);
1404
1405 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1406
1407 fts_tokenizer_word_t* word;
1408 fts_node_t* fts_node = NULL;
1409 fts_token_t* token = rbt_value(fts_token_t, node);
1410
1411 /* Find and/or add token to the cache. */
1412 word = fts_tokenizer_word_get(
1413 cache, index_cache, &token->text);
1414
1415 if (!word) {
1416 ut_free(rbt_remove_node(tokens, node));
1417 continue;
1418 }
1419
1420 if (ib_vector_size(word->nodes) > 0) {
1421 fts_node = static_cast<fts_node_t*>(
1422 ib_vector_last(word->nodes));
1423 }
1424
1425 if (fts_node == NULL || fts_node->synced
1426 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1427 || doc_id < fts_node->last_doc_id) {
1428
1429 fts_node = static_cast<fts_node_t*>(
1430 ib_vector_push(word->nodes, NULL));
1431
1432 memset(fts_node, 0x0, sizeof(*fts_node));
1433
1434 cache->total_size += sizeof(*fts_node);
1435 }
1436
1437 fts_cache_node_add_positions(
1438 cache, fts_node, doc_id, token->positions);
1439
1440 ut_free(rbt_remove_node(tokens, node));
1441 }
1442
1443 ut_a(rbt_empty(tokens));
1444
1445 /* Add to doc ids processed so far. */
1446 doc_stats = static_cast<fts_doc_stats_t*>(
1447 ib_vector_push(index_cache->doc_stats, NULL));
1448
1449 doc_stats->doc_id = doc_id;
1450 doc_stats->word_count = n_words;
1451
1452 /* Add the doc stats memory usage too. */
1453 cache->total_size += sizeof(*doc_stats);
1454
1455 if (doc_id > cache->sync->max_doc_id) {
1456 cache->sync->max_doc_id = doc_id;
1457 }
1458 }
1459
1460 /****************************************************************//**
1461 Drops a table. If the table can't be found we return a SUCCESS code.
1462 @return DB_SUCCESS or error code */
1463 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1464 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1465 fts_drop_table(
1466 /*===========*/
1467 trx_t* trx, /*!< in: transaction */
1468 const char* table_name) /*!< in: table to drop */
1469 {
1470 dict_table_t* table;
1471 dberr_t error = DB_SUCCESS;
1472
1473 /* Check that the table exists in our data dictionary.
1474 Similar to regular drop table case, we will open table with
1475 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1476 table = dict_table_open_on_name(
1477 table_name, TRUE, FALSE,
1478 static_cast<dict_err_ignore_t>(
1479 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1480
1481 if (table != 0) {
1482
1483 dict_table_close(table, TRUE, FALSE);
1484
1485 /* Pass nonatomic=false (dont allow data dict unlock),
1486 because the transaction may hold locks on SYS_* tables from
1487 previous calls to fts_drop_table(). */
1488 error = row_drop_table_for_mysql(table_name, trx, true, false);
1489
1490 if (error != DB_SUCCESS) {
1491 ib::error() << "Unable to drop FTS index aux table "
1492 << table_name << ": " << ut_strerr(error);
1493 }
1494 } else {
1495 error = DB_FAIL;
1496 }
1497
1498 return(error);
1499 }
1500
1501 /****************************************************************//**
1502 Rename a single auxiliary table due to database name change.
1503 @return DB_SUCCESS or error code */
1504 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1505 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1506 fts_rename_one_aux_table(
1507 /*=====================*/
1508 const char* new_name, /*!< in: new parent tbl name */
1509 const char* fts_table_old_name, /*!< in: old aux tbl name */
1510 trx_t* trx) /*!< in: transaction */
1511 {
1512 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1513 ulint new_db_name_len = dict_get_db_name_len(new_name);
1514 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1515 ulint table_new_name_len = strlen(fts_table_old_name)
1516 + new_db_name_len - old_db_name_len;
1517
1518 /* Check if the new and old database names are the same, if so,
1519 nothing to do */
1520 ut_ad((new_db_name_len != old_db_name_len)
1521 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1522
1523 /* Get the database name from "new_name", and table name
1524 from the fts_table_old_name */
1525 strncpy(fts_table_new_name, new_name, new_db_name_len);
1526 strncpy(fts_table_new_name + new_db_name_len,
1527 strchr(fts_table_old_name, '/'),
1528 table_new_name_len - new_db_name_len);
1529 fts_table_new_name[table_new_name_len] = 0;
1530
1531 return(row_rename_table_for_mysql(
1532 fts_table_old_name, fts_table_new_name, trx, false));
1533 }
1534
1535 /****************************************************************//**
1536 Rename auxiliary tables for all fts index for a table. This(rename)
1537 is due to database name change
1538 @return DB_SUCCESS or error code */
1539 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1540 fts_rename_aux_tables(
1541 /*==================*/
1542 dict_table_t* table, /*!< in: user Table */
1543 const char* new_name, /*!< in: new table name */
1544 trx_t* trx) /*!< in: transaction */
1545 {
1546 ulint i;
1547 fts_table_t fts_table;
1548
1549 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1550
1551 /* Rename common auxiliary tables */
1552 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1553 char old_table_name[MAX_FULL_NAME_LEN];
1554 dberr_t err = DB_SUCCESS;
1555
1556 fts_table.suffix = fts_common_tables[i];
1557
1558 fts_get_table_name(&fts_table, old_table_name);
1559
1560 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1561
1562 if (err != DB_SUCCESS) {
1563 return(err);
1564 }
1565 }
1566
1567 fts_t* fts = table->fts;
1568
1569 /* Rename index specific auxiliary tables */
1570 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1571 ++i) {
1572 dict_index_t* index;
1573
1574 index = static_cast<dict_index_t*>(
1575 ib_vector_getp(fts->indexes, i));
1576
1577 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1578
1579 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1580 dberr_t err;
1581 char old_table_name[MAX_FULL_NAME_LEN];
1582
1583 fts_table.suffix = fts_get_suffix(j);
1584
1585 fts_get_table_name(&fts_table, old_table_name);
1586
1587 err = fts_rename_one_aux_table(
1588 new_name, old_table_name, trx);
1589
1590 DBUG_EXECUTE_IF("fts_rename_failure",
1591 err = DB_DEADLOCK;
1592 fts_sql_rollback(trx););
1593
1594 if (err != DB_SUCCESS) {
1595 return(err);
1596 }
1597 }
1598 }
1599
1600 return(DB_SUCCESS);
1601 }
1602
1603 /****************************************************************//**
1604 Drops the common ancillary tables needed for supporting an FTS index
1605 on the given table. row_mysql_lock_data_dictionary must have been called
1606 before this.
1607 @return DB_SUCCESS or error code */
1608 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1609 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1610 fts_drop_common_tables(
1611 /*===================*/
1612 trx_t* trx, /*!< in: transaction */
1613 fts_table_t* fts_table) /*!< in: table with an FTS
1614 index */
1615 {
1616 ulint i;
1617 dberr_t error = DB_SUCCESS;
1618
1619 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1620 dberr_t err;
1621 char table_name[MAX_FULL_NAME_LEN];
1622
1623 fts_table->suffix = fts_common_tables[i];
1624
1625 fts_get_table_name(fts_table, table_name);
1626
1627 err = fts_drop_table(trx, table_name);
1628
1629 /* We only return the status of the last error. */
1630 if (err != DB_SUCCESS && err != DB_FAIL) {
1631 error = err;
1632 }
1633 }
1634
1635 return(error);
1636 }
1637
1638 /****************************************************************//**
1639 Since we do a horizontal split on the index table, we need to drop
1640 all the split tables.
1641 @return DB_SUCCESS or error code */
1642 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1643 fts_drop_index_split_tables(
1644 /*========================*/
1645 trx_t* trx, /*!< in: transaction */
1646 dict_index_t* index) /*!< in: fts instance */
1647
1648 {
1649 ulint i;
1650 fts_table_t fts_table;
1651 dberr_t error = DB_SUCCESS;
1652
1653 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1654
1655 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1656 dberr_t err;
1657 char table_name[MAX_FULL_NAME_LEN];
1658
1659 fts_table.suffix = fts_get_suffix(i);
1660
1661 fts_get_table_name(&fts_table, table_name);
1662
1663 err = fts_drop_table(trx, table_name);
1664
1665 /* We only return the status of the last error. */
1666 if (err != DB_SUCCESS && err != DB_FAIL) {
1667 error = err;
1668 }
1669 }
1670
1671 return(error);
1672 }
1673
1674 /****************************************************************//**
1675 Drops FTS auxiliary tables for an FTS index
1676 @return DB_SUCCESS or error code */
1677 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1678 fts_drop_index_tables(
1679 /*==================*/
1680 trx_t* trx, /*!< in: transaction */
1681 dict_index_t* index) /*!< in: Index to drop */
1682 {
1683 dberr_t error = DB_SUCCESS;
1684
1685 #ifdef FTS_DOC_STATS_DEBUG
1686 fts_table_t fts_table;
1687 static const char* index_tables[] = {
1688 "DOC_ID",
1689 NULL
1690 };
1691 #endif /* FTS_DOC_STATS_DEBUG */
1692
1693 dberr_t err = fts_drop_index_split_tables(trx, index);
1694
1695 /* We only return the status of the last error. */
1696 if (err != DB_SUCCESS) {
1697 error = err;
1698 }
1699
1700 #ifdef FTS_DOC_STATS_DEBUG
1701 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1702
1703 for (ulint i = 0; index_tables[i] != NULL; ++i) {
1704 char table_name[MAX_FULL_NAME_LEN];
1705
1706 fts_table.suffix = index_tables[i];
1707
1708 fts_get_table_name(&fts_table, table_name);
1709
1710 err = fts_drop_table(trx, table_name);
1711
1712 /* We only return the status of the last error. */
1713 if (err != DB_SUCCESS && err != DB_FAIL) {
1714 error = err;
1715 }
1716 }
1717 #endif /* FTS_DOC_STATS_DEBUG */
1718
1719 return(error);
1720 }
1721
1722 /****************************************************************//**
1723 Drops FTS ancillary tables needed for supporting an FTS index
1724 on the given table. row_mysql_lock_data_dictionary must have been called
1725 before this.
1726 @return DB_SUCCESS or error code */
1727 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1728 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1729 fts_drop_all_index_tables(
1730 /*======================*/
1731 trx_t* trx, /*!< in: transaction */
1732 fts_t* fts) /*!< in: fts instance */
1733 {
1734 dberr_t error = DB_SUCCESS;
1735
1736 for (ulint i = 0;
1737 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1738 ++i) {
1739
1740 dberr_t err;
1741 dict_index_t* index;
1742
1743 index = static_cast<dict_index_t*>(
1744 ib_vector_getp(fts->indexes, i));
1745
1746 err = fts_drop_index_tables(trx, index);
1747
1748 if (err != DB_SUCCESS) {
1749 error = err;
1750 }
1751 }
1752
1753 return(error);
1754 }
1755
1756 /*********************************************************************//**
1757 Drops the ancillary tables needed for supporting an FTS index on a
1758 given table. row_mysql_lock_data_dictionary must have been called before
1759 this.
1760 @return DB_SUCCESS or error code */
1761 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1762 fts_drop_tables(
1763 /*============*/
1764 trx_t* trx, /*!< in: transaction */
1765 dict_table_t* table) /*!< in: table has the FTS index */
1766 {
1767 dberr_t error;
1768 fts_table_t fts_table;
1769
1770 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1771
1772 /* TODO: This is not atomic and can cause problems during recovery. */
1773
1774 error = fts_drop_common_tables(trx, &fts_table);
1775
1776 if (error == DB_SUCCESS) {
1777 error = fts_drop_all_index_tables(trx, table->fts);
1778 }
1779
1780 return(error);
1781 }
1782
1783 /** Extract only the required flags from table->flags2 for FTS Aux
1784 tables.
1785 @param[in] in_flags2 Table flags2
1786 @return extracted flags2 for FTS aux tables */
1787 static inline
1788 ulint
fts_get_table_flags2_for_aux_tables(ulint flags2)1789 fts_get_table_flags2_for_aux_tables(
1790 ulint flags2)
1791 {
1792 /* Extract the file_per_table flag, temporary file flag and
1793 encryption flag from the main FTS table flags2 */
1794 return((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1795 (flags2 & DICT_TF2_ENCRYPTION) |
1796 (flags2 & DICT_TF2_TEMPORARY));
1797 }
1798
1799 /** Create dict_table_t object for FTS Aux tables.
1800 @param[in] aux_table_name FTS Aux table name
1801 @param[in] table table object of FTS Index
1802 @param[in] n_cols number of columns for FTS Aux table
1803 @return table object for FTS Aux table */
1804 static
1805 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1806 fts_create_in_mem_aux_table(
1807 const char* aux_table_name,
1808 const dict_table_t* table,
1809 ulint n_cols)
1810 {
1811 dict_table_t* new_table = dict_mem_table_create(
1812 aux_table_name, table->space, n_cols, 0, table->flags,
1813 fts_get_table_flags2_for_aux_tables(table->flags2));
1814
1815 if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1816 ut_ad(table->space == fil_space_get_id_by_name(
1817 table->tablespace()));
1818 new_table->tablespace = mem_heap_strdup(
1819 new_table->heap, table->tablespace);
1820 }
1821
1822 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1823 ut_ad(table->data_dir_path != NULL);
1824 new_table->data_dir_path = mem_heap_strdup(
1825 new_table->heap, table->data_dir_path);
1826 }
1827
1828 return(new_table);
1829 }
1830
1831 /** Function to create on FTS common table.
1832 @param[in,out] trx InnoDB transaction
1833 @param[in] table Table that has FTS Index
1834 @param[in] fts_table_name FTS AUX table name
1835 @param[in] fts_suffix FTS AUX table suffix
1836 @param[in] heap heap
1837 @return table object if created, else NULL */
1838 static
1839 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1840 fts_create_one_common_table(
1841 trx_t* trx,
1842 const dict_table_t* table,
1843 const char* fts_table_name,
1844 const char* fts_suffix,
1845 mem_heap_t* heap)
1846 {
1847 dict_table_t* new_table = NULL;
1848 dberr_t error;
1849 bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
1850
1851 if (!is_config) {
1852
1853 new_table = fts_create_in_mem_aux_table(
1854 fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1855
1856 dict_mem_table_add_col(
1857 new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1858 FTS_DELETED_TABLE_COL_LEN);
1859 } else {
1860 /* Config table has different schema. */
1861 new_table = fts_create_in_mem_aux_table(
1862 fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1863
1864 dict_mem_table_add_col(
1865 new_table, heap, "key", DATA_VARCHAR, 0,
1866 FTS_CONFIG_TABLE_KEY_COL_LEN);
1867
1868 dict_mem_table_add_col(
1869 new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1870 FTS_CONFIG_TABLE_VALUE_COL_LEN);
1871 }
1872
1873 error = row_create_table_for_mysql(new_table, NULL, trx, false,
1874 FIL_ENCRYPTION_DEFAULT, CreateInfoEncryptionKeyId());
1875
1876 if (error == DB_SUCCESS) {
1877
1878 dict_index_t* index = dict_mem_index_create(
1879 fts_table_name, "FTS_COMMON_TABLE_IND",
1880 new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
1881
1882 if (!is_config) {
1883 dict_mem_index_add_field(index, "doc_id", 0);
1884 } else {
1885 dict_mem_index_add_field(index, "key", 0);
1886 }
1887
1888 /* We save and restore trx->dict_operation because
1889 row_create_index_for_mysql() changes the operation to
1890 TRX_DICT_OP_TABLE. */
1891 trx_dict_op_t op = trx_get_dict_operation(trx);
1892
1893 error = row_create_index_for_mysql(index, trx, NULL, NULL);
1894
1895 trx->dict_operation = op;
1896 }
1897
1898 if (error != DB_SUCCESS) {
1899 trx->error_state = error;
1900 dict_mem_table_free(new_table);
1901 new_table = NULL;
1902 ib::warn() << "Failed to create FTS common table "
1903 << fts_table_name;
1904 }
1905 return(new_table);
1906 }
1907
1908 /** Creates the common auxiliary tables needed for supporting an FTS index
1909 on the given table. row_mysql_lock_data_dictionary must have been called
1910 before this.
1911 The following tables are created.
1912 CREATE TABLE $FTS_PREFIX_DELETED
1913 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1914 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1915 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1916 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1917 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1918 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1919 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1920 CREATE TABLE $FTS_PREFIX_CONFIG
1921 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1922 @param[in,out] trx transaction
1923 @param[in] table table with FTS index
1924 @param[in] name table name normalized
1925 @param[in] skip_doc_id_index Skip index on doc id
1926 @return DB_SUCCESS if succeed */
1927 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1928 fts_create_common_tables(
1929 trx_t* trx,
1930 const dict_table_t* table,
1931 const char* name,
1932 bool skip_doc_id_index)
1933 {
1934 dberr_t error;
1935 que_t* graph;
1936 fts_table_t fts_table;
1937 mem_heap_t* heap = mem_heap_create(1024);
1938 pars_info_t* info;
1939 char fts_name[MAX_FULL_NAME_LEN];
1940 char full_name[sizeof(fts_common_tables) / sizeof(char*)]
1941 [MAX_FULL_NAME_LEN];
1942
1943 dict_index_t* index = NULL;
1944 trx_dict_op_t op;
1945 /* common_tables vector is used for dropping FTS common tables
1946 on error condition. */
1947 std::vector<dict_table_t*> common_tables;
1948 std::vector<dict_table_t*>::const_iterator it;
1949
1950 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1951
1952 error = fts_drop_common_tables(trx, &fts_table);
1953
1954 if (error != DB_SUCCESS) {
1955
1956 goto func_exit;
1957 }
1958
1959 /* Create the FTS tables that are common to an FTS index. */
1960 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1961
1962 fts_table.suffix = fts_common_tables[i];
1963 fts_get_table_name(&fts_table, full_name[i]);
1964 dict_table_t* common_table = fts_create_one_common_table(
1965 trx, table, full_name[i], fts_table.suffix, heap);
1966
1967 if (common_table == NULL) {
1968 error = DB_ERROR;
1969 goto func_exit;
1970 } else {
1971 common_tables.push_back(common_table);
1972 }
1973
1974 DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1975 /* Return error after creating FTS_AUX_CONFIG table. */
1976 if (i == 4) {
1977 error = DB_ERROR;
1978 goto func_exit;
1979 }
1980 );
1981
1982 }
1983
1984 /* Write the default settings to the config table. */
1985 info = pars_info_create();
1986
1987 fts_table.suffix = "CONFIG";
1988 fts_get_table_name(&fts_table, fts_name);
1989 pars_info_bind_id(info, true, "config_table", fts_name);
1990
1991 graph = fts_parse_sql_no_dict_lock(
1992 &fts_table, info, fts_config_table_insert_values_sql);
1993
1994 error = fts_eval_sql(trx, graph);
1995
1996 que_graph_free(graph);
1997
1998 if (error != DB_SUCCESS || skip_doc_id_index) {
1999
2000 goto func_exit;
2001 }
2002
2003 index = dict_mem_index_create(
2004 name, FTS_DOC_ID_INDEX_NAME, table->space,
2005 DICT_UNIQUE, 1);
2006 dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
2007
2008 op = trx_get_dict_operation(trx);
2009
2010 error = row_create_index_for_mysql(index, trx, NULL, NULL);
2011
2012 trx->dict_operation = op;
2013
2014 func_exit:
2015 if (error != DB_SUCCESS) {
2016
2017 for (it = common_tables.begin(); it != common_tables.end();
2018 ++it) {
2019 row_drop_table_for_mysql(
2020 (*it)->name.m_name, trx, FALSE);
2021 }
2022 }
2023
2024 common_tables.clear();
2025 mem_heap_free(heap);
2026
2027 return(error);
2028 }
2029 /** Creates one FTS auxiliary index table for an FTS index.
2030 @param[in,out] trx transaction
2031 @param[in] index the index instance
2032 @param[in] fts_table fts_table structure
2033 @param[in] heap memory heap
2034 @return DB_SUCCESS or error code */
2035 static
2036 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)2037 fts_create_one_index_table(
2038 trx_t* trx,
2039 const dict_index_t* index,
2040 fts_table_t* fts_table,
2041 mem_heap_t* heap)
2042 {
2043 dict_field_t* field;
2044 dict_table_t* new_table = NULL;
2045 char table_name[MAX_FULL_NAME_LEN];
2046 dberr_t error;
2047 CHARSET_INFO* charset;
2048
2049 ut_ad(index->type & DICT_FTS);
2050
2051 fts_get_table_name(fts_table, table_name);
2052
2053 new_table = fts_create_in_mem_aux_table(
2054 table_name, fts_table->table,
2055 FTS_AUX_INDEX_TABLE_NUM_COLS);
2056
2057 field = dict_index_get_nth_field(index, 0);
2058 charset = fts_get_charset(field->col->prtype);
2059
2060 dict_mem_table_add_col(new_table, heap, "word",
2061 charset == &my_charset_latin1
2062 ? DATA_VARCHAR : DATA_VARMYSQL,
2063 field->col->prtype,
2064 FTS_INDEX_WORD_LEN);
2065
2066 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2067 DATA_NOT_NULL | DATA_UNSIGNED,
2068 FTS_INDEX_FIRST_DOC_ID_LEN);
2069
2070 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2071 DATA_NOT_NULL | DATA_UNSIGNED,
2072 FTS_INDEX_LAST_DOC_ID_LEN);
2073
2074 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2075 DATA_NOT_NULL | DATA_UNSIGNED,
2076 FTS_INDEX_DOC_COUNT_LEN);
2077
2078 /* The precise type calculation is as follows:
2079 least signficiant byte: MySQL type code (not applicable for sys cols)
2080 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2081 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2082
2083 dict_mem_table_add_col(
2084 new_table, heap, "ilist", DATA_BLOB,
2085 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2086 FTS_INDEX_ILIST_LEN);
2087
2088 error = row_create_table_for_mysql(new_table, NULL, trx, false,
2089 FIL_ENCRYPTION_DEFAULT, CreateInfoEncryptionKeyId());
2090
2091 if (error == DB_SUCCESS) {
2092 dict_index_t* index = dict_mem_index_create(
2093 table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2094 DICT_UNIQUE|DICT_CLUSTERED, 2);
2095 dict_mem_index_add_field(index, "word", 0);
2096 dict_mem_index_add_field(index, "first_doc_id", 0);
2097
2098 trx_dict_op_t op = trx_get_dict_operation(trx);
2099
2100 error = row_create_index_for_mysql(index, trx, NULL, NULL);
2101
2102 trx->dict_operation = op;
2103 }
2104
2105 if (error != DB_SUCCESS) {
2106 trx->error_state = error;
2107 dict_mem_table_free(new_table);
2108 new_table = NULL;
2109 ib::warn() << "Failed to create FTS index table "
2110 << table_name;
2111 }
2112
2113 return(new_table);
2114 }
2115
2116 /** Create auxiliary index tables for an FTS index.
2117 @param[in,out] trx transaction
2118 @param[in] index the index instance
2119 @param[in] table_name table name
2120 @param[in] table_id the table id
2121 @return DB_SUCCESS or error code */
2122 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2123 fts_create_index_tables_low(
2124 trx_t* trx,
2125 const dict_index_t* index,
2126 const char* table_name,
2127 table_id_t table_id)
2128 {
2129 ulint i;
2130 fts_table_t fts_table;
2131 dberr_t error = DB_SUCCESS;
2132 mem_heap_t* heap = mem_heap_create(1024);
2133
2134 fts_table.type = FTS_INDEX_TABLE;
2135 fts_table.index_id = index->id;
2136 fts_table.table_id = table_id;
2137 fts_table.parent = table_name;
2138 fts_table.table = index->table;
2139
2140 #ifdef FTS_DOC_STATS_DEBUG
2141 /* Create the FTS auxiliary tables that are specific
2142 to an FTS index. */
2143 info = pars_info_create();
2144
2145 fts_table.suffix = "DOC_ID";
2146 fts_get_table_name(&fts_table, fts_name);
2147
2148 pars_info_bind_id(info, true, "doc_id_table", fts_name);
2149
2150 graph = fts_parse_sql_no_dict_lock(NULL, info,
2151 fts_create_index_tables_sql);
2152
2153 error = fts_eval_sql(trx, graph);
2154 que_graph_free(graph);
2155 #endif /* FTS_DOC_STATS_DEBUG */
2156
2157 /* aux_idx_tables vector is used for dropping FTS AUX INDEX
2158 tables on error condition. */
2159 std::vector<dict_table_t*> aux_idx_tables;
2160 std::vector<dict_table_t*>::const_iterator it;
2161
2162 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2163 dict_table_t* new_table;
2164
2165 /* Create the FTS auxiliary tables that are specific
2166 to an FTS index. We need to preserve the table_id %s
2167 which fts_parse_sql_no_dict_lock() will fill in for us. */
2168 fts_table.suffix = fts_get_suffix(i);
2169
2170 new_table = fts_create_one_index_table(
2171 trx, index, &fts_table, heap);
2172
2173 if (new_table == NULL) {
2174 error = DB_FAIL;
2175 break;
2176 } else {
2177 aux_idx_tables.push_back(new_table);
2178 }
2179
2180 DBUG_EXECUTE_IF("ib_fts_index_table_error",
2181 /* Return error after creating FTS_INDEX_5
2182 aux table. */
2183 if (i == 4) {
2184 error = DB_FAIL;
2185 break;
2186 }
2187 );
2188 }
2189
2190 if (error != DB_SUCCESS) {
2191
2192 for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2193 ++it) {
2194 row_drop_table_for_mysql(
2195 (*it)->name.m_name, trx, FALSE);
2196 }
2197 }
2198
2199 aux_idx_tables.clear();
2200 mem_heap_free(heap);
2201
2202 return(error);
2203 }
2204
2205 /** Creates the column specific ancillary tables needed for supporting an
2206 FTS index on the given table. row_mysql_lock_data_dictionary must have
2207 been called before this.
2208
2209 All FTS AUX Index tables have the following schema.
2210 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2211 word VARCHAR(FTS_MAX_WORD_LEN),
2212 first_doc_id INT NOT NULL,
2213 last_doc_id UNSIGNED NOT NULL,
2214 doc_count UNSIGNED INT NOT NULL,
2215 ilist VARBINARY NOT NULL,
2216 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2217 @param[in,out] trx transaction
2218 @param[in] index index instance
2219 @return DB_SUCCESS or error code */
2220 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2221 fts_create_index_tables(
2222 trx_t* trx,
2223 const dict_index_t* index)
2224 {
2225 dberr_t err;
2226 dict_table_t* table;
2227
2228 table = dict_table_get_low(index->table_name);
2229 ut_a(table != NULL);
2230
2231 err = fts_create_index_tables_low(
2232 trx, index, table->name.m_name, table->id);
2233
2234 if (err == DB_SUCCESS) {
2235 trx_commit(trx);
2236 }
2237
2238 return(err);
2239 }
2240 #if 0
2241 /******************************************************************//**
2242 Return string representation of state. */
2243 static
2244 const char*
2245 fts_get_state_str(
2246 /*==============*/
2247 /* out: string representation of state */
2248 fts_row_state state) /*!< in: state */
2249 {
2250 switch (state) {
2251 case FTS_INSERT:
2252 return("INSERT");
2253
2254 case FTS_MODIFY:
2255 return("MODIFY");
2256
2257 case FTS_DELETE:
2258 return("DELETE");
2259
2260 case FTS_NOTHING:
2261 return("NOTHING");
2262
2263 case FTS_INVALID:
2264 return("INVALID");
2265
2266 default:
2267 return("UNKNOWN");
2268 }
2269 }
2270 #endif
2271
2272 /******************************************************************//**
2273 Calculate the new state of a row given the existing state and a new event.
2274 @return new state of row */
2275 static
2276 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2277 fts_trx_row_get_new_state(
2278 /*======================*/
2279 fts_row_state old_state, /*!< in: existing state of row */
2280 fts_row_state event) /*!< in: new event */
2281 {
2282 /* The rules for transforming states:
2283
2284 I = inserted
2285 M = modified
2286 D = deleted
2287 N = nothing
2288
2289 M+D -> D:
2290
2291 If the row existed before the transaction started and it is modified
2292 during the transaction, followed by a deletion of the row, only the
2293 deletion will be signaled.
2294
2295 M+ -> M:
2296
2297 If the row existed before the transaction started and it is modified
2298 more than once during the transaction, only the last modification
2299 will be signaled.
2300
2301 IM*D -> N:
2302
2303 If a new row is added during the transaction (and possibly modified
2304 after its initial insertion) but it is deleted before the end of the
2305 transaction, nothing will be signaled.
2306
2307 IM* -> I:
2308
2309 If a new row is added during the transaction and modified after its
2310 initial insertion, only the addition will be signaled.
2311
2312 M*DI -> M:
2313
2314 If the row existed before the transaction started and it is deleted,
2315 then re-inserted, only a modification will be signaled. Note that
2316 this case is only possible if the table is using the row's primary
2317 key for FTS row ids, since those can be re-inserted by the user,
2318 which is not true for InnoDB generated row ids.
2319
2320 It is easily seen that the above rules decompose such that we do not
2321 need to store the row's entire history of events. Instead, we can
2322 store just one state for the row and update that when new events
2323 arrive. Then we can implement the above rules as a two-dimensional
2324 look-up table, and get checking of invalid combinations "for free"
2325 in the process. */
2326
2327 /* The lookup table for transforming states. old_state is the
2328 Y-axis, event is the X-axis. */
2329 static const fts_row_state table[4][4] = {
2330 /* I M D N */
2331 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2332 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2333 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2334 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2335 };
2336
2337 fts_row_state result;
2338
2339 ut_a(old_state < FTS_INVALID);
2340 ut_a(event < FTS_INVALID);
2341
2342 result = table[(int) old_state][(int) event];
2343 ut_a(result != FTS_INVALID);
2344
2345 return(result);
2346 }
2347
2348 /******************************************************************//**
2349 Create a savepoint instance.
2350 @return savepoint instance */
2351 static
2352 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2353 fts_savepoint_create(
2354 /*=================*/
2355 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2356 const char* name, /*!< in: savepoint name */
2357 mem_heap_t* heap) /*!< in: heap */
2358 {
2359 fts_savepoint_t* savepoint;
2360
2361 savepoint = static_cast<fts_savepoint_t*>(
2362 ib_vector_push(savepoints, NULL));
2363
2364 memset(savepoint, 0x0, sizeof(*savepoint));
2365
2366 if (name) {
2367 savepoint->name = mem_heap_strdup(heap, name);
2368 }
2369
2370 savepoint->tables = rbt_create(
2371 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2372
2373 return(savepoint);
2374 }
2375
2376 /******************************************************************//**
2377 Create an FTS trx.
2378 @return FTS trx */
2379 static
2380 fts_trx_t*
fts_trx_create(trx_t * trx)2381 fts_trx_create(
2382 /*===========*/
2383 trx_t* trx) /*!< in/out: InnoDB
2384 transaction */
2385 {
2386 fts_trx_t* ftt;
2387 ib_alloc_t* heap_alloc;
2388 mem_heap_t* heap = mem_heap_create(1024);
2389 trx_named_savept_t* savep;
2390
2391 ut_a(trx->fts_trx == NULL);
2392
2393 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2394 ftt->trx = trx;
2395 ftt->heap = heap;
2396
2397 heap_alloc = ib_heap_allocator_create(heap);
2398
2399 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2400 heap_alloc, sizeof(fts_savepoint_t), 4));
2401
2402 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2403 heap_alloc, sizeof(fts_savepoint_t), 4));
2404
2405 /* Default instance has no name and no heap. */
2406 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2407 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2408
2409 /* Copy savepoints that already set before. */
2410 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2411 savep != NULL;
2412 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2413
2414 fts_savepoint_take(trx, ftt, savep->name);
2415 }
2416
2417 return(ftt);
2418 }
2419
2420 /******************************************************************//**
2421 Create an FTS trx table.
2422 @return FTS trx table */
2423 static
2424 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2425 fts_trx_table_create(
2426 /*=================*/
2427 fts_trx_t* fts_trx, /*!< in: FTS trx */
2428 dict_table_t* table) /*!< in: table */
2429 {
2430 fts_trx_table_t* ftt;
2431
2432 ftt = static_cast<fts_trx_table_t*>(
2433 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2434
2435 if (ftt != NULL) {
2436 memset(ftt, 0x0, sizeof(*ftt));
2437 }
2438
2439 ftt->table = table;
2440 ftt->fts_trx = fts_trx;
2441
2442 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2443
2444 return(ftt);
2445 }
2446
2447 /******************************************************************//**
2448 Clone an FTS trx table.
2449 @return FTS trx table */
2450 static
2451 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2452 fts_trx_table_clone(
2453 /*=================*/
2454 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2455 {
2456 fts_trx_table_t* ftt;
2457
2458 ftt = static_cast<fts_trx_table_t*>(
2459 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2460
2461 memset(ftt, 0x0, sizeof(*ftt));
2462
2463 ftt->table = ftt_src->table;
2464 ftt->fts_trx = ftt_src->fts_trx;
2465
2466 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2467
2468 /* Copy the rb tree values to the new savepoint. */
2469 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2470
2471 /* These are only added on commit. At this stage we only have
2472 the updated row state. */
2473 ut_a(ftt_src->added_doc_ids == NULL);
2474
2475 return(ftt);
2476 }
2477
2478 /******************************************************************//**
2479 Initialize the FTS trx instance.
2480 @return FTS trx instance */
2481 static
2482 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2483 fts_trx_init(
2484 /*=========*/
2485 trx_t* trx, /*!< in: transaction */
2486 dict_table_t* table, /*!< in: FTS table instance */
2487 ib_vector_t* savepoints) /*!< in: Savepoints */
2488 {
2489 fts_trx_table_t* ftt;
2490 ib_rbt_bound_t parent;
2491 ib_rbt_t* tables;
2492 fts_savepoint_t* savepoint;
2493
2494 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2495
2496 tables = savepoint->tables;
2497 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2498
2499 if (parent.result == 0) {
2500 fts_trx_table_t** fttp;
2501
2502 fttp = rbt_value(fts_trx_table_t*, parent.last);
2503 ftt = *fttp;
2504 } else {
2505 ftt = fts_trx_table_create(trx->fts_trx, table);
2506 rbt_add_node(tables, &parent, &ftt);
2507 }
2508
2509 ut_a(ftt->table == table);
2510
2511 return(ftt);
2512 }
2513
2514 /******************************************************************//**
2515 Notify the FTS system about an operation on an FTS-indexed table. */
2516 static
2517 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2518 fts_trx_table_add_op(
2519 /*=================*/
2520 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2521 doc_id_t doc_id, /*!< in: doc id */
2522 fts_row_state state, /*!< in: state of the row */
2523 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2524 {
2525 ib_rbt_t* rows;
2526 ib_rbt_bound_t parent;
2527
2528 rows = ftt->rows;
2529 rbt_search(rows, &parent, &doc_id);
2530
2531 /* Row id found, update state, and if new state is FTS_NOTHING,
2532 we delete the row from our tree. */
2533 if (parent.result == 0) {
2534 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2535
2536 row->state = fts_trx_row_get_new_state(row->state, state);
2537
2538 if (row->state == FTS_NOTHING) {
2539 if (row->fts_indexes) {
2540 ib_vector_free(row->fts_indexes);
2541 }
2542
2543 ut_free(rbt_remove_node(rows, parent.last));
2544 row = NULL;
2545 } else if (row->fts_indexes != NULL) {
2546 ib_vector_free(row->fts_indexes);
2547 row->fts_indexes = fts_indexes;
2548 }
2549
2550 } else { /* Row-id not found, create a new one. */
2551 fts_trx_row_t row;
2552
2553 row.doc_id = doc_id;
2554 row.state = state;
2555 row.fts_indexes = fts_indexes;
2556
2557 rbt_add_node(rows, &parent, &row);
2558 }
2559 }
2560
2561 /******************************************************************//**
2562 Notify the FTS system about an operation on an FTS-indexed table. */
2563 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2564 fts_trx_add_op(
2565 /*===========*/
2566 trx_t* trx, /*!< in: InnoDB transaction */
2567 dict_table_t* table, /*!< in: table */
2568 doc_id_t doc_id, /*!< in: new doc id */
2569 fts_row_state state, /*!< in: state of the row */
2570 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2571 (NULL=all) */
2572 {
2573 fts_trx_table_t* tran_ftt;
2574 fts_trx_table_t* stmt_ftt;
2575
2576 if (!trx->fts_trx) {
2577 trx->fts_trx = fts_trx_create(trx);
2578 }
2579
2580 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2581 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2582
2583 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2584 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2585 }
2586
2587 /******************************************************************//**
2588 Fetch callback that converts a textual document id to a binary value and
2589 stores it in the given place.
2590 @return always returns NULL */
2591 static
2592 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2593 fts_fetch_store_doc_id(
2594 /*===================*/
2595 void* row, /*!< in: sel_node_t* */
2596 void* user_arg) /*!< in: doc_id_t* to store
2597 doc_id in */
2598 {
2599 int n_parsed;
2600 sel_node_t* node = static_cast<sel_node_t*>(row);
2601 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2602 dfield_t* dfield = que_node_get_val(node->select_list);
2603 dtype_t* type = dfield_get_type(dfield);
2604 ulint len = dfield_get_len(dfield);
2605
2606 char buf[32];
2607
2608 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2609 ut_a(len > 0 && len < sizeof(buf));
2610
2611 memcpy(buf, dfield_get_data(dfield), len);
2612 buf[len] = '\0';
2613
2614 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2615 ut_a(n_parsed == 1);
2616
2617 return(FALSE);
2618 }
2619
2620 #ifdef FTS_CACHE_SIZE_DEBUG
2621 /******************************************************************//**
2622 Get the max cache size in bytes. If there is an error reading the
2623 value we simply print an error message here and return the default
2624 value to the caller.
2625 @return max cache size in bytes */
2626 static
2627 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2628 fts_get_max_cache_size(
2629 /*===================*/
2630 trx_t* trx, /*!< in: transaction */
2631 fts_table_t* fts_table) /*!< in: table instance */
2632 {
2633 dberr_t error;
2634 fts_string_t value;
2635 ulint cache_size_in_mb;
2636
2637 /* Set to the default value. */
2638 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2639
2640 /* We set the length of value to the max bytes it can hold. This
2641 information is used by the callback that reads the value. */
2642 value.f_n_char = 0;
2643 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2644 value.f_str = ut_malloc_nokey(value.f_len + 1);
2645
2646 error = fts_config_get_value(
2647 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2648
2649 if (error == DB_SUCCESS) {
2650
2651 value.f_str[value.f_len] = 0;
2652 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2653
2654 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2655
2656 ib::warn() << "FTS max cache size ("
2657 << cache_size_in_mb << ") out of range."
2658 " Minimum value is "
2659 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2660 << "MB and the maximum value is "
2661 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2662 << "MB, setting cache size to upper limit";
2663
2664 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2665
2666 } else if (cache_size_in_mb
2667 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2668
2669 ib::warn() << "FTS max cache size ("
2670 << cache_size_in_mb << ") out of range."
2671 " Minimum value is "
2672 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2673 << "MB and the maximum value is"
2674 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2675 << "MB, setting cache size to lower limit";
2676
2677 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2678 }
2679 } else {
2680 ib::error() << "(" << ut_strerr(error) << ") reading max"
2681 " cache config value from config table";
2682 }
2683
2684 ut_free(value.f_str);
2685
2686 return(cache_size_in_mb * 1024 * 1024);
2687 }
2688 #endif
2689
2690 #ifdef FTS_DOC_STATS_DEBUG
2691 /*********************************************************************//**
2692 Get the total number of words in the FTS for a particular FTS index.
2693 @return DB_SUCCESS if all OK else error code */
2694 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2695 fts_get_total_word_count(
2696 /*=====================*/
2697 trx_t* trx, /*!< in: transaction */
2698 dict_index_t* index, /*!< in: for this index */
2699 ulint* total) /* out: total words */
2700 {
2701 dberr_t error;
2702 fts_string_t value;
2703
2704 *total = 0;
2705
2706 /* We set the length of value to the max bytes it can hold. This
2707 information is used by the callback that reads the value. */
2708 value.f_n_char = 0;
2709 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2710 value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
2711
2712 error = fts_config_get_index_value(
2713 trx, index, FTS_TOTAL_WORD_COUNT, &value);
2714
2715 if (error == DB_SUCCESS) {
2716
2717 value.f_str[value.f_len] = 0;
2718 *total = strtoul((char*) value.f_str, NULL, 10);
2719 } else {
2720 ib::error() << "(" << ut_strerr(error) << ") reading total"
2721 " words value from config table";
2722 }
2723
2724 ut_free(value.f_str);
2725
2726 return(error);
2727 }
2728 #endif /* FTS_DOC_STATS_DEBUG */
2729
2730 /*********************************************************************//**
2731 Update the next and last Doc ID in the CONFIG table to be the input
2732 "doc_id" value (+ 1). We would do so after each FTS index build or
2733 table truncate */
2734 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2735 fts_update_next_doc_id(
2736 /*===================*/
2737 trx_t* trx, /*!< in/out: transaction */
2738 const dict_table_t* table, /*!< in: table */
2739 const char* table_name, /*!< in: table name, or NULL */
2740 doc_id_t doc_id) /*!< in: DOC ID to set */
2741 {
2742 table->fts->cache->synced_doc_id = doc_id;
2743 table->fts->cache->next_doc_id = doc_id + 1;
2744
2745 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2746
2747 fts_update_sync_doc_id(
2748 table, table_name, table->fts->cache->synced_doc_id, trx);
2749
2750 }
2751
2752 /*********************************************************************//**
2753 Get the next available document id.
2754 @return DB_SUCCESS if OK */
2755 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2756 fts_get_next_doc_id(
2757 /*================*/
2758 const dict_table_t* table, /*!< in: table */
2759 doc_id_t* doc_id) /*!< out: new document id */
2760 {
2761 fts_cache_t* cache = table->fts->cache;
2762
2763 /* If the Doc ID system has not yet been initialized, we
2764 will consult the CONFIG table and user table to re-establish
2765 the initial value of the Doc ID */
2766 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2767 fts_init_doc_id(table);
2768 }
2769
2770 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2771 *doc_id = FTS_NULL_DOC_ID;
2772 return(DB_SUCCESS);
2773 }
2774
2775 mutex_enter(&cache->doc_id_lock);
2776 *doc_id = ++cache->next_doc_id;
2777 mutex_exit(&cache->doc_id_lock);
2778
2779 return(DB_SUCCESS);
2780 }
2781
2782 /*********************************************************************//**
2783 This function fetch the Doc ID from CONFIG table, and compare with
2784 the Doc ID supplied. And store the larger one to the CONFIG table.
2785 @return DB_SUCCESS if OK */
2786 static MY_ATTRIBUTE((nonnull))
2787 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2788 fts_cmp_set_sync_doc_id(
2789 /*====================*/
2790 const dict_table_t* table, /*!< in: table */
2791 doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
2792 ibool read_only, /*!< in: TRUE if read the
2793 synced_doc_id only */
2794 doc_id_t* doc_id) /*!< out: larger document id
2795 after comparing "doc_id_cmp"
2796 to the one stored in CONFIG
2797 table */
2798 {
2799 trx_t* trx;
2800 pars_info_t* info;
2801 dberr_t error;
2802 fts_table_t fts_table;
2803 que_t* graph = NULL;
2804 fts_cache_t* cache = table->fts->cache;
2805 char table_name[MAX_FULL_NAME_LEN];
2806 retry:
2807 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2808
2809 fts_table.suffix = "CONFIG";
2810 fts_table.table_id = table->id;
2811 fts_table.type = FTS_COMMON_TABLE;
2812 fts_table.table = table;
2813
2814 fts_table.parent = table->name.m_name;
2815
2816 trx = trx_allocate_for_background();
2817
2818 trx->op_info = "update the next FTS document id";
2819
2820 info = pars_info_create();
2821
2822 pars_info_bind_function(
2823 info, "my_func", fts_fetch_store_doc_id, doc_id);
2824
2825 fts_get_table_name(&fts_table, table_name);
2826 pars_info_bind_id(info, true, "config_table", table_name);
2827
2828 graph = fts_parse_sql(
2829 &fts_table, info,
2830 "DECLARE FUNCTION my_func;\n"
2831 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2832 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2833 "BEGIN\n"
2834 ""
2835 "OPEN c;\n"
2836 "WHILE 1 = 1 LOOP\n"
2837 " FETCH c INTO my_func();\n"
2838 " IF c % NOTFOUND THEN\n"
2839 " EXIT;\n"
2840 " END IF;\n"
2841 "END LOOP;\n"
2842 "CLOSE c;");
2843
2844 *doc_id = 0;
2845
2846 error = fts_eval_sql(trx, graph);
2847
2848 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2849
2850 // FIXME: We need to retry deadlock errors
2851 if (error != DB_SUCCESS) {
2852 goto func_exit;
2853 }
2854
2855 if (read_only) {
2856 goto func_exit;
2857 }
2858
2859 if (doc_id_cmp == 0 && *doc_id) {
2860 cache->synced_doc_id = *doc_id - 1;
2861 } else {
2862 cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2863 }
2864
2865 mutex_enter(&cache->doc_id_lock);
2866 /* For each sync operation, we will add next_doc_id by 1,
2867 so to mark a sync operation */
2868 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2869 cache->next_doc_id = cache->synced_doc_id + 1;
2870 }
2871 mutex_exit(&cache->doc_id_lock);
2872
2873 if (doc_id_cmp > *doc_id) {
2874 error = fts_update_sync_doc_id(
2875 table, table->name.m_name, cache->synced_doc_id, trx);
2876 }
2877
2878 *doc_id = cache->next_doc_id;
2879
2880 func_exit:
2881
2882 if (error == DB_SUCCESS) {
2883 fts_sql_commit(trx);
2884 } else {
2885 *doc_id = 0;
2886
2887 ib::error() << "(" << ut_strerr(error) << ") while getting"
2888 " next doc id.";
2889 fts_sql_rollback(trx);
2890
2891 if (error == DB_DEADLOCK) {
2892 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2893 goto retry;
2894 }
2895 }
2896
2897 trx_free_for_background(trx);
2898
2899 return(error);
2900 }
2901
2902 /*********************************************************************//**
2903 Update the last document id. This function could create a new
2904 transaction to update the last document id.
2905 @return DB_SUCCESS if OK */
2906 static
2907 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2908 fts_update_sync_doc_id(
2909 /*===================*/
2910 const dict_table_t* table, /*!< in: table */
2911 const char* table_name, /*!< in: table name, or NULL */
2912 doc_id_t doc_id, /*!< in: last document id */
2913 trx_t* trx) /*!< in: update trx, or NULL */
2914 {
2915 byte id[FTS_MAX_ID_LEN];
2916 pars_info_t* info;
2917 fts_table_t fts_table;
2918 ulint id_len;
2919 que_t* graph = NULL;
2920 dberr_t error;
2921 ibool local_trx = FALSE;
2922 fts_cache_t* cache = table->fts->cache;
2923 char fts_name[MAX_FULL_NAME_LEN];
2924
2925 fts_table.suffix = "CONFIG";
2926 fts_table.table_id = table->id;
2927 fts_table.type = FTS_COMMON_TABLE;
2928 fts_table.table = table;
2929 if (table_name) {
2930 fts_table.parent = table_name;
2931 } else {
2932 fts_table.parent = table->name.m_name;
2933 }
2934
2935 if (!trx) {
2936 trx = trx_allocate_for_background();
2937
2938 trx->op_info = "setting last FTS document id";
2939 local_trx = TRUE;
2940 }
2941
2942 info = pars_info_create();
2943
2944 id_len = ut_snprintf(
2945 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2946
2947 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2948
2949 fts_get_table_name(&fts_table, fts_name);
2950 pars_info_bind_id(info, true, "table_name", fts_name);
2951
2952 graph = fts_parse_sql(
2953 &fts_table, info,
2954 "BEGIN"
2955 " UPDATE $table_name SET value = :doc_id"
2956 " WHERE key = 'synced_doc_id';");
2957
2958 error = fts_eval_sql(trx, graph);
2959
2960 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2961
2962 if (local_trx) {
2963 if (error == DB_SUCCESS) {
2964 fts_sql_commit(trx);
2965 cache->synced_doc_id = doc_id;
2966 } else {
2967
2968 ib::error() << "(" << ut_strerr(error) << ") while"
2969 " updating last doc id.";
2970
2971 fts_sql_rollback(trx);
2972 }
2973 trx_free_for_background(trx);
2974 }
2975
2976 return(error);
2977 }
2978
2979 /*********************************************************************//**
2980 Create a new fts_doc_ids_t.
2981 @return new fts_doc_ids_t */
2982 fts_doc_ids_t*
fts_doc_ids_create(void)2983 fts_doc_ids_create(void)
2984 /*====================*/
2985 {
2986 fts_doc_ids_t* fts_doc_ids;
2987 mem_heap_t* heap = mem_heap_create(512);
2988
2989 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2990 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2991
2992 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2993
2994 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2995 fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2996
2997 return(fts_doc_ids);
2998 }
2999
3000 /*********************************************************************//**
3001 Free a fts_doc_ids_t. */
3002 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)3003 fts_doc_ids_free(
3004 /*=============*/
3005 fts_doc_ids_t* fts_doc_ids)
3006 {
3007 mem_heap_t* heap = static_cast<mem_heap_t*>(
3008 fts_doc_ids->self_heap->arg);
3009
3010 memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
3011
3012 mem_heap_free(heap);
3013 }
3014
3015 /*********************************************************************//**
3016 Do commit-phase steps necessary for the insertion of a new row.
3017 @return DB_SUCCESS or error code */
3018 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3019 fts_add(
3020 /*====*/
3021 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3022 fts_trx_row_t* row) /*!< in: row */
3023 {
3024 dict_table_t* table = ftt->table;
3025 doc_id_t doc_id = row->doc_id;
3026
3027 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3028
3029 fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3030
3031 mutex_enter(&table->fts->cache->deleted_lock);
3032 ++table->fts->cache->added;
3033 mutex_exit(&table->fts->cache->deleted_lock);
3034
3035 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
3036 && doc_id >= table->fts->cache->next_doc_id) {
3037 table->fts->cache->next_doc_id = doc_id + 1;
3038 }
3039 }
3040
3041 /*********************************************************************//**
3042 Do commit-phase steps necessary for the deletion of a row.
3043 @return DB_SUCCESS or error code */
3044 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3045 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3046 fts_delete(
3047 /*=======*/
3048 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3049 fts_trx_row_t* row) /*!< in: row */
3050 {
3051 que_t* graph;
3052 fts_table_t fts_table;
3053 dberr_t error = DB_SUCCESS;
3054 doc_id_t write_doc_id;
3055 dict_table_t* table = ftt->table;
3056 doc_id_t doc_id = row->doc_id;
3057 trx_t* trx = ftt->fts_trx->trx;
3058 pars_info_t* info = pars_info_create();
3059 fts_cache_t* cache = table->fts->cache;
3060
3061 /* we do not index Documents whose Doc ID value is 0 */
3062 if (doc_id == FTS_NULL_DOC_ID) {
3063 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3064 return(error);
3065 }
3066
3067 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3068
3069 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
3070
3071 /* Convert to "storage" byte order. */
3072 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3073 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3074
3075 /* It is possible we update a record that has not yet been sync-ed
3076 into cache from last crash (delete Doc will not initialize the
3077 sync). Avoid any added counter accounting until the FTS cache
3078 is re-established and sync-ed */
3079 if (table->fts->fts_status & ADDED_TABLE_SYNCED
3080 && doc_id > cache->synced_doc_id) {
3081 mutex_enter(&table->fts->cache->deleted_lock);
3082
3083 /* The Doc ID could belong to those left in
3084 ADDED table from last crash. So need to check
3085 if it is less than first_doc_id when we initialize
3086 the Doc ID system after reboot */
3087 if (doc_id >= table->fts->cache->first_doc_id
3088 && table->fts->cache->added > 0) {
3089 --table->fts->cache->added;
3090 }
3091
3092 mutex_exit(&table->fts->cache->deleted_lock);
3093
3094 /* Only if the row was really deleted. */
3095 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3096 }
3097
3098 /* Note the deleted document for OPTIMIZE to purge. */
3099 if (error == DB_SUCCESS) {
3100 char table_name[MAX_FULL_NAME_LEN];
3101
3102 trx->op_info = "adding doc id to FTS DELETED";
3103
3104 info->graph_owns_us = TRUE;
3105
3106 fts_table.suffix = "DELETED";
3107
3108 fts_get_table_name(&fts_table, table_name);
3109 pars_info_bind_id(info, true, "deleted", table_name);
3110
3111 graph = fts_parse_sql(
3112 &fts_table,
3113 info,
3114 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3115
3116 error = fts_eval_sql(trx, graph);
3117
3118 fts_que_graph_free(graph);
3119 } else {
3120 pars_info_free(info);
3121 }
3122
3123 /* Increment the total deleted count, this is used to calculate the
3124 number of documents indexed. */
3125 if (error == DB_SUCCESS) {
3126 mutex_enter(&table->fts->cache->deleted_lock);
3127
3128 ++table->fts->cache->deleted;
3129
3130 mutex_exit(&table->fts->cache->deleted_lock);
3131 }
3132
3133 return(error);
3134 }
3135
3136 /*********************************************************************//**
3137 Do commit-phase steps necessary for the modification of a row.
3138 @return DB_SUCCESS or error code */
3139 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3140 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3141 fts_modify(
3142 /*=======*/
3143 fts_trx_table_t* ftt, /*!< in: FTS trx table */
3144 fts_trx_row_t* row) /*!< in: row */
3145 {
3146 dberr_t error;
3147
3148 ut_a(row->state == FTS_MODIFY);
3149
3150 error = fts_delete(ftt, row);
3151
3152 if (error == DB_SUCCESS) {
3153 fts_add(ftt, row);
3154 }
3155
3156 return(error);
3157 }
3158
3159 /*********************************************************************//**
3160 Create a new document id.
3161 @return DB_SUCCESS if all went well else error */
3162 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3163 fts_create_doc_id(
3164 /*==============*/
3165 dict_table_t* table, /*!< in: row is of this table. */
3166 dtuple_t* row, /* in/out: add doc id value to this
3167 row. This is the current row that is
3168 being inserted. */
3169 mem_heap_t* heap) /*!< in: heap */
3170 {
3171 doc_id_t doc_id;
3172 dberr_t error = DB_SUCCESS;
3173
3174 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3175
3176 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3177 if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3178 error = fts_get_next_doc_id(table, &doc_id);
3179 }
3180 return(error);
3181 }
3182
3183 error = fts_get_next_doc_id(table, &doc_id);
3184
3185 if (error == DB_SUCCESS) {
3186 dfield_t* dfield;
3187 doc_id_t* write_doc_id;
3188
3189 ut_a(doc_id > 0);
3190
3191 dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3192 write_doc_id = static_cast<doc_id_t*>(
3193 mem_heap_alloc(heap, sizeof(*write_doc_id)));
3194
3195 ut_a(doc_id != FTS_NULL_DOC_ID);
3196 ut_a(sizeof(doc_id) == dfield->type.len);
3197 fts_write_doc_id((byte*) write_doc_id, doc_id);
3198
3199 dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3200 }
3201
3202 return(error);
3203 }
3204
3205 /*********************************************************************//**
3206 The given transaction is about to be committed; do whatever is necessary
3207 from the FTS system's POV.
3208 @return DB_SUCCESS or error code */
3209 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3210 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3211 fts_commit_table(
3212 /*=============*/
3213 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
3214 {
3215 const ib_rbt_node_t* node;
3216 ib_rbt_t* rows;
3217 dberr_t error = DB_SUCCESS;
3218 fts_cache_t* cache = ftt->table->fts->cache;
3219 trx_t* trx = trx_allocate_for_background();
3220
3221 rows = ftt->rows;
3222
3223 ftt->fts_trx->trx = trx;
3224
3225 if (cache->get_docs == NULL) {
3226 rw_lock_x_lock(&cache->init_lock);
3227 if (cache->get_docs == NULL) {
3228 cache->get_docs = fts_get_docs_create(cache);
3229 }
3230 rw_lock_x_unlock(&cache->init_lock);
3231 }
3232
3233 for (node = rbt_first(rows);
3234 node != NULL && error == DB_SUCCESS;
3235 node = rbt_next(rows, node)) {
3236
3237 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3238
3239 switch (row->state) {
3240 case FTS_INSERT:
3241 fts_add(ftt, row);
3242 break;
3243
3244 case FTS_MODIFY:
3245 error = fts_modify(ftt, row);
3246 break;
3247
3248 case FTS_DELETE:
3249 error = fts_delete(ftt, row);
3250 break;
3251
3252 default:
3253 ut_error;
3254 }
3255 }
3256
3257 fts_sql_commit(trx);
3258
3259 trx_free_for_background(trx);
3260
3261 return(error);
3262 }
3263
3264 /*********************************************************************//**
3265 The given transaction is about to be committed; do whatever is necessary
3266 from the FTS system's POV.
3267 @return DB_SUCCESS or error code */
3268 dberr_t
fts_commit(trx_t * trx)3269 fts_commit(
3270 /*=======*/
3271 trx_t* trx) /*!< in: transaction */
3272 {
3273 const ib_rbt_node_t* node;
3274 dberr_t error;
3275 ib_rbt_t* tables;
3276 fts_savepoint_t* savepoint;
3277
3278 savepoint = static_cast<fts_savepoint_t*>(
3279 ib_vector_last(trx->fts_trx->savepoints));
3280 tables = savepoint->tables;
3281
3282 for (node = rbt_first(tables), error = DB_SUCCESS;
3283 node != NULL && error == DB_SUCCESS;
3284 node = rbt_next(tables, node)) {
3285
3286 fts_trx_table_t** ftt;
3287
3288 ftt = rbt_value(fts_trx_table_t*, node);
3289
3290 error = fts_commit_table(*ftt);
3291 }
3292
3293 return(error);
3294 }
3295
3296 /*********************************************************************//**
3297 Initialize a document. */
3298 void
fts_doc_init(fts_doc_t * doc)3299 fts_doc_init(
3300 /*=========*/
3301 fts_doc_t* doc) /*!< in: doc to initialize */
3302 {
3303 mem_heap_t* heap = mem_heap_create(32);
3304
3305 memset(doc, 0, sizeof(*doc));
3306
3307 doc->self_heap = ib_heap_allocator_create(heap);
3308 }
3309
3310 /*********************************************************************//**
3311 Free document. */
3312 void
fts_doc_free(fts_doc_t * doc)3313 fts_doc_free(
3314 /*=========*/
3315 fts_doc_t* doc) /*!< in: document */
3316 {
3317 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3318
3319 if (doc->tokens) {
3320 rbt_free(doc->tokens);
3321 }
3322
3323 ut_d(memset(doc, 0, sizeof(*doc)));
3324
3325 mem_heap_free(heap);
3326 }
3327
3328 /*********************************************************************//**
3329 Callback function for fetch that stores a row id to the location pointed.
3330 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3331 @return always returns NULL */
3332 void*
fts_fetch_row_id(void * row,void * user_arg)3333 fts_fetch_row_id(
3334 /*=============*/
3335 void* row, /*!< in: sel_node_t* */
3336 void* user_arg) /*!< in: data pointer */
3337 {
3338 sel_node_t* node = static_cast<sel_node_t*>(row);
3339
3340 dfield_t* dfield = que_node_get_val(node->select_list);
3341 dtype_t* type = dfield_get_type(dfield);
3342 ulint len = dfield_get_len(dfield);
3343
3344 ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3345 ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3346 ut_a(len == 8);
3347
3348 memcpy(user_arg, dfield_get_data(dfield), 8);
3349
3350 return(NULL);
3351 }
3352
3353 /*********************************************************************//**
3354 Callback function for fetch that stores the text of an FTS document,
3355 converting each column to UTF-16.
3356 @return always FALSE */
3357 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3358 fts_query_expansion_fetch_doc(
3359 /*==========================*/
3360 void* row, /*!< in: sel_node_t* */
3361 void* user_arg) /*!< in: fts_doc_t* */
3362 {
3363 que_node_t* exp;
3364 sel_node_t* node = static_cast<sel_node_t*>(row);
3365 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3366 dfield_t* dfield;
3367 ulint len;
3368 ulint doc_len;
3369 fts_doc_t doc;
3370 CHARSET_INFO* doc_charset = NULL;
3371 ulint field_no = 0;
3372
3373 len = 0;
3374
3375 fts_doc_init(&doc);
3376 doc.found = TRUE;
3377
3378 exp = node->select_list;
3379 doc_len = 0;
3380
3381 doc_charset = result_doc->charset;
3382
3383 /* Copy each indexed column content into doc->text.f_str */
3384 while (exp) {
3385 dfield = que_node_get_val(exp);
3386 len = dfield_get_len(dfield);
3387
3388 /* NULL column */
3389 if (len == UNIV_SQL_NULL) {
3390 exp = que_node_get_next(exp);
3391 continue;
3392 }
3393
3394 if (!doc_charset) {
3395 doc_charset = fts_get_charset(dfield->type.prtype);
3396 }
3397
3398 doc.charset = doc_charset;
3399 doc.is_ngram = result_doc->is_ngram;
3400
3401 if (dfield_is_ext(dfield)) {
3402 /* We ignore columns that are stored externally, this
3403 could result in too many words to search */
3404 exp = que_node_get_next(exp);
3405 continue;
3406 } else {
3407 doc.text.f_n_char = 0;
3408
3409 doc.text.f_str = static_cast<byte*>(
3410 dfield_get_data(dfield));
3411
3412 doc.text.f_len = len;
3413 }
3414
3415 if (field_no == 0) {
3416 fts_tokenize_document(&doc, result_doc,
3417 result_doc->parser);
3418 } else {
3419 fts_tokenize_document_next(&doc, doc_len, result_doc,
3420 result_doc->parser);
3421 }
3422
3423 exp = que_node_get_next(exp);
3424
3425 doc_len += (exp) ? len + 1 : len;
3426
3427 field_no++;
3428 }
3429
3430 ut_ad(doc_charset);
3431
3432 if (!result_doc->charset) {
3433 result_doc->charset = doc_charset;
3434 }
3435
3436 fts_doc_free(&doc);
3437
3438 return(FALSE);
3439 }
3440
3441 /*********************************************************************//**
3442 fetch and tokenize the document. */
3443 static
3444 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3445 fts_fetch_doc_from_rec(
3446 /*===================*/
3447 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3448 dict_index_t* clust_index, /*!< in: cluster index */
3449 btr_pcur_t* pcur, /*!< in: cursor whose position
3450 has been stored */
3451 ulint* offsets, /*!< in: offsets */
3452 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3453 documents */
3454 {
3455 dict_index_t* index;
3456 dict_table_t* table;
3457 const rec_t* clust_rec;
3458 ulint num_field;
3459 const dict_field_t* ifield;
3460 const dict_col_t* col;
3461 ulint clust_pos;
3462 ulint i;
3463 ulint doc_len = 0;
3464 ulint processed_doc = 0;
3465 st_mysql_ftparser* parser;
3466
3467 if (!get_doc) {
3468 return;
3469 }
3470
3471 index = get_doc->index_cache->index;
3472 table = get_doc->index_cache->index->table;
3473 parser = get_doc->index_cache->index->parser;
3474
3475 clust_rec = btr_pcur_get_rec(pcur);
3476
3477 num_field = dict_index_get_n_fields(index);
3478
3479 for (i = 0; i < num_field; i++) {
3480 ifield = dict_index_get_nth_field(index, i);
3481 col = dict_field_get_col(ifield);
3482 clust_pos = dict_col_get_clust_pos(col, clust_index);
3483
3484 if (!get_doc->index_cache->charset) {
3485 get_doc->index_cache->charset = fts_get_charset(
3486 ifield->col->prtype);
3487 }
3488
3489 if (rec_offs_nth_extern(offsets, clust_pos)) {
3490 doc->text.f_str =
3491 btr_rec_copy_externally_stored_field(
3492 clust_rec, offsets,
3493 dict_table_page_size(table),
3494 clust_pos, &doc->text.f_len,
3495 static_cast<mem_heap_t*>(
3496 doc->self_heap->arg));
3497 } else {
3498 doc->text.f_str = (byte*) rec_get_nth_field(
3499 clust_rec, offsets, clust_pos,
3500 &doc->text.f_len);
3501 }
3502
3503 doc->found = TRUE;
3504 doc->charset = get_doc->index_cache->charset;
3505 doc->is_ngram = index->is_ngram;
3506
3507 /* Null Field */
3508 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3509 continue;
3510 }
3511
3512 if (processed_doc == 0) {
3513 fts_tokenize_document(doc, NULL, parser);
3514 } else {
3515 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3516 }
3517
3518 processed_doc++;
3519 doc_len += doc->text.f_len + 1;
3520 }
3521 }
3522
3523 /*********************************************************************//**
3524 This function fetches the document inserted during the committing
3525 transaction, and tokenize the inserted text data and insert into
3526 FTS auxiliary table and its cache.
3527 @return TRUE if successful */
3528 static
3529 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3530 fts_add_doc_by_id(
3531 /*==============*/
3532 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3533 doc_id_t doc_id, /*!< in: doc id */
3534 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
3535 /*!< in: affected fts indexes */
3536 {
3537 mtr_t mtr;
3538 mem_heap_t* heap;
3539 btr_pcur_t pcur;
3540 dict_table_t* table;
3541 dtuple_t* tuple;
3542 dfield_t* dfield;
3543 fts_get_doc_t* get_doc;
3544 doc_id_t temp_doc_id;
3545 dict_index_t* clust_index;
3546 dict_index_t* fts_id_index;
3547 ibool is_id_cluster;
3548 fts_cache_t* cache = ftt->table->fts->cache;
3549
3550 ut_ad(cache->get_docs);
3551
3552 /* If Doc ID has been supplied by the user, then the table
3553 might not yet be sync-ed */
3554
3555 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3556 fts_init_index(ftt->table, FALSE);
3557 }
3558
3559 /* Get the first FTS index's get_doc */
3560 get_doc = static_cast<fts_get_doc_t*>(
3561 ib_vector_get(cache->get_docs, 0));
3562 ut_ad(get_doc);
3563
3564 table = get_doc->index_cache->index->table;
3565
3566 heap = mem_heap_create(512);
3567
3568 clust_index = dict_table_get_first_index(table);
3569 fts_id_index = table->fts_doc_id_index;
3570
3571 /* Check whether the index on FTS_DOC_ID is cluster index */
3572 is_id_cluster = (clust_index == fts_id_index);
3573
3574 mtr_start(&mtr);
3575 btr_pcur_init(&pcur);
3576
3577 /* Search based on Doc ID. Here, we'll need to consider the case
3578 when there is no primary index on Doc ID */
3579 tuple = dtuple_create(heap, 1);
3580 dfield = dtuple_get_nth_field(tuple, 0);
3581 dfield->type.mtype = DATA_INT;
3582 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3583
3584 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3585 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3586
3587 btr_pcur_open_with_no_init(
3588 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3589 &pcur, 0, &mtr);
3590
3591 /* If we have a match, add the data to doc structure */
3592 if (btr_pcur_get_low_match(&pcur) == 1) {
3593 const rec_t* rec;
3594 btr_pcur_t* doc_pcur;
3595 const rec_t* clust_rec;
3596 btr_pcur_t clust_pcur;
3597 ulint* offsets = NULL;
3598 ulint num_idx = ib_vector_size(cache->get_docs);
3599
3600 rec = btr_pcur_get_rec(&pcur);
3601
3602 /* Doc could be deleted */
3603 if (page_rec_is_infimum(rec)
3604 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3605
3606 goto func_exit;
3607 }
3608
3609 if (is_id_cluster) {
3610 clust_rec = rec;
3611 doc_pcur = &pcur;
3612 } else {
3613 dtuple_t* clust_ref;
3614 ulint n_fields;
3615
3616 btr_pcur_init(&clust_pcur);
3617 n_fields = dict_index_get_n_unique(clust_index);
3618
3619 clust_ref = dtuple_create(heap, n_fields);
3620 dict_index_copy_types(clust_ref, clust_index, n_fields);
3621
3622 row_build_row_ref_in_tuple(
3623 clust_ref, rec, fts_id_index, NULL, NULL);
3624
3625 btr_pcur_open_with_no_init(
3626 clust_index, clust_ref, PAGE_CUR_LE,
3627 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3628
3629 doc_pcur = &clust_pcur;
3630 clust_rec = btr_pcur_get_rec(&clust_pcur);
3631
3632 }
3633
3634 offsets = rec_get_offsets(clust_rec, clust_index,
3635 NULL, ULINT_UNDEFINED, &heap);
3636
3637 for (ulint i = 0; i < num_idx; ++i) {
3638 fts_doc_t doc;
3639 dict_table_t* table;
3640 fts_get_doc_t* get_doc;
3641
3642 get_doc = static_cast<fts_get_doc_t*>(
3643 ib_vector_get(cache->get_docs, i));
3644
3645 table = get_doc->index_cache->index->table;
3646
3647 fts_doc_init(&doc);
3648
3649 fts_fetch_doc_from_rec(
3650 get_doc, clust_index, doc_pcur, offsets, &doc);
3651
3652 if (doc.found) {
3653 ibool success MY_ATTRIBUTE((unused));
3654
3655 btr_pcur_store_position(doc_pcur, &mtr);
3656 mtr_commit(&mtr);
3657
3658 DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3659 rw_lock_x_lock(&table->fts->cache->lock);
3660
3661 if (table->fts->cache->stopword_info.status
3662 & STOPWORD_NOT_INIT) {
3663 fts_load_stopword(table, NULL, NULL,
3664 NULL, TRUE, TRUE);
3665 }
3666
3667 fts_cache_add_doc(
3668 table->fts->cache,
3669 get_doc->index_cache,
3670 doc_id, doc.tokens);
3671
3672 bool need_sync = false;
3673 if ((cache->total_size -
3674 cache->total_size_before_sync >
3675 fts_max_cache_size / 10 || fts_need_sync)
3676 && !cache->sync->in_progress) {
3677 need_sync = true;
3678 cache->total_size_before_sync =
3679 cache->total_size;
3680 }
3681
3682 rw_lock_x_unlock(&table->fts->cache->lock);
3683
3684 DBUG_EXECUTE_IF(
3685 "fts_instrument_sync_cache_wait",
3686 srv_fatal_semaphore_wait_threshold = 25;
3687 fts_max_cache_size = 100;
3688 fts_sync(cache->sync, true, true, false);
3689 );
3690
3691 DBUG_EXECUTE_IF(
3692 "fts_instrument_sync",
3693 fts_optimize_request_sync_table(table);
3694 os_event_wait(cache->sync->event);
3695 );
3696
3697 DBUG_EXECUTE_IF(
3698 "fts_instrument_sync_debug",
3699 fts_sync(cache->sync, true, true, false);
3700 );
3701
3702 DEBUG_SYNC_C("fts_instrument_sync_request");
3703 DBUG_EXECUTE_IF(
3704 "fts_instrument_sync_request",
3705 fts_optimize_request_sync_table(table);
3706 );
3707
3708 if (need_sync) {
3709 fts_optimize_request_sync_table(table);
3710 }
3711
3712 mtr_start(&mtr);
3713
3714 if (i < num_idx - 1) {
3715
3716 success = btr_pcur_restore_position(
3717 BTR_SEARCH_LEAF, doc_pcur,
3718 &mtr);
3719
3720 ut_ad(success);
3721 }
3722 }
3723
3724 fts_doc_free(&doc);
3725 }
3726
3727 if (!is_id_cluster) {
3728 btr_pcur_close(doc_pcur);
3729 }
3730 }
3731 func_exit:
3732 mtr_commit(&mtr);
3733
3734 btr_pcur_close(&pcur);
3735
3736 mem_heap_free(heap);
3737 return(TRUE);
3738 }
3739
3740
3741 /*********************************************************************//**
3742 Callback function to read a single ulint column.
3743 return always returns TRUE */
3744 static
3745 ibool
fts_read_ulint(void * row,void * user_arg)3746 fts_read_ulint(
3747 /*===========*/
3748 void* row, /*!< in: sel_node_t* */
3749 void* user_arg) /*!< in: pointer to ulint */
3750 {
3751 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3752 ulint* value = static_cast<ulint*>(user_arg);
3753 que_node_t* exp = sel_node->select_list;
3754 dfield_t* dfield = que_node_get_val(exp);
3755 void* data = dfield_get_data(dfield);
3756
3757 *value = static_cast<ulint>(mach_read_from_4(
3758 static_cast<const byte*>(data)));
3759
3760 return(TRUE);
3761 }
3762
3763 /*********************************************************************//**
3764 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3765 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3766 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3767 fts_get_max_doc_id(
3768 /*===============*/
3769 dict_table_t* table) /*!< in: user table */
3770 {
3771 dict_index_t* index;
3772 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3773 doc_id_t doc_id = 0;
3774 mtr_t mtr;
3775 btr_pcur_t pcur;
3776
3777 index = table->fts_doc_id_index;
3778
3779 if (!index) {
3780 return(0);
3781 }
3782
3783 dfield = dict_index_get_nth_field(index, 0);
3784
3785 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3786 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3787 #endif
3788
3789 mtr_start(&mtr);
3790
3791 /* fetch the largest indexes value */
3792 btr_pcur_open_at_index_side(
3793 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3794
3795 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3796 const rec_t* rec = NULL;
3797 ulint offsets_[REC_OFFS_NORMAL_SIZE];
3798 ulint* offsets = offsets_;
3799 mem_heap_t* heap = NULL;
3800 ulint len;
3801 const void* data;
3802
3803 rec_offs_init(offsets_);
3804
3805 do {
3806 rec = btr_pcur_get_rec(&pcur);
3807
3808 if (page_rec_is_user_rec(rec)) {
3809 break;
3810 }
3811 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3812
3813 if (!rec) {
3814 goto func_exit;
3815 }
3816
3817 offsets = rec_get_offsets(
3818 rec, index, offsets, ULINT_UNDEFINED, &heap);
3819
3820 data = rec_get_nth_field(rec, offsets, 0, &len);
3821
3822 doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3823 static_cast<const byte*>(data)));
3824 }
3825
3826 func_exit:
3827 btr_pcur_close(&pcur);
3828 mtr_commit(&mtr);
3829 return(doc_id);
3830 }
3831
3832 /*********************************************************************//**
3833 Fetch document with the given document id.
3834 @return DB_SUCCESS if OK else error */
3835 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3836 fts_doc_fetch_by_doc_id(
3837 /*====================*/
3838 fts_get_doc_t* get_doc, /*!< in: state */
3839 doc_id_t doc_id, /*!< in: id of document to
3840 fetch */
3841 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3842 or NULL */
3843 ulint option, /*!< in: search option, if it is
3844 greater than doc_id or equal */
3845 fts_sql_callback
3846 callback, /*!< in: callback to read */
3847 void* arg) /*!< in: callback arg */
3848 {
3849 pars_info_t* info;
3850 dberr_t error;
3851 const char* select_str;
3852 doc_id_t write_doc_id;
3853 dict_index_t* index;
3854 trx_t* trx = trx_allocate_for_background();
3855 que_t* graph;
3856
3857 trx->op_info = "fetching indexed FTS document";
3858
3859 /* The FTS index can be supplied by caller directly with
3860 "index_to_use", otherwise, get it from "get_doc" */
3861 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3862
3863 if (get_doc && get_doc->get_document_graph) {
3864 info = get_doc->get_document_graph->info;
3865 } else {
3866 info = pars_info_create();
3867 }
3868
3869 /* Convert to "storage" byte order. */
3870 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3871 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3872 pars_info_bind_function(info, "my_func", callback, arg);
3873
3874 select_str = fts_get_select_columns_str(index, info, info->heap);
3875 pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3876
3877 if (!get_doc || !get_doc->get_document_graph) {
3878 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3879 graph = fts_parse_sql(
3880 NULL,
3881 info,
3882 mem_heap_printf(info->heap,
3883 "DECLARE FUNCTION my_func;\n"
3884 "DECLARE CURSOR c IS"
3885 " SELECT %s FROM $table_name"
3886 " WHERE %s = :doc_id;\n"
3887 "BEGIN\n"
3888 ""
3889 "OPEN c;\n"
3890 "WHILE 1 = 1 LOOP\n"
3891 " FETCH c INTO my_func();\n"
3892 " IF c %% NOTFOUND THEN\n"
3893 " EXIT;\n"
3894 " END IF;\n"
3895 "END LOOP;\n"
3896 "CLOSE c;",
3897 select_str, FTS_DOC_ID_COL_NAME));
3898 } else {
3899 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3900
3901 /* This is used for crash recovery of table with
3902 hidden DOC ID or FTS indexes. We will scan the table
3903 to re-processing user table rows whose DOC ID or
3904 FTS indexed documents have not been sync-ed to disc
3905 during recent crash.
3906 In the case that all fulltext indexes are dropped
3907 for a table, we will keep the "hidden" FTS_DOC_ID
3908 column, and this scan is to retreive the largest
3909 DOC ID being used in the table to determine the
3910 appropriate next DOC ID.
3911 In the case of there exists fulltext index(es), this
3912 operation will re-tokenize any docs that have not
3913 been sync-ed to the disk, and re-prime the FTS
3914 cached */
3915 graph = fts_parse_sql(
3916 NULL,
3917 info,
3918 mem_heap_printf(info->heap,
3919 "DECLARE FUNCTION my_func;\n"
3920 "DECLARE CURSOR c IS"
3921 " SELECT %s, %s FROM $table_name"
3922 " WHERE %s > :doc_id;\n"
3923 "BEGIN\n"
3924 ""
3925 "OPEN c;\n"
3926 "WHILE 1 = 1 LOOP\n"
3927 " FETCH c INTO my_func();\n"
3928 " IF c %% NOTFOUND THEN\n"
3929 " EXIT;\n"
3930 " END IF;\n"
3931 "END LOOP;\n"
3932 "CLOSE c;",
3933 FTS_DOC_ID_COL_NAME,
3934 select_str, FTS_DOC_ID_COL_NAME));
3935 }
3936 if (get_doc) {
3937 get_doc->get_document_graph = graph;
3938 }
3939 } else {
3940 graph = get_doc->get_document_graph;
3941 }
3942
3943 error = fts_eval_sql(trx, graph);
3944
3945 if (error == DB_SUCCESS) {
3946 fts_sql_commit(trx);
3947 } else {
3948 fts_sql_rollback(trx);
3949 }
3950
3951 trx_free_for_background(trx);
3952
3953 if (!get_doc) {
3954 fts_que_graph_free(graph);
3955 }
3956
3957 return(error);
3958 }
3959
3960 /*********************************************************************//**
3961 Write out a single word's data as new entry/entries in the INDEX table.
3962 @return DB_SUCCESS if all OK. */
3963 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3964 fts_write_node(
3965 /*===========*/
3966 trx_t* trx, /*!< in: transaction */
3967 que_t** graph, /*!< in: query graph */
3968 fts_table_t* fts_table, /*!< in: aux table */
3969 fts_string_t* word, /*!< in: word in UTF-8 */
3970 fts_node_t* node) /*!< in: node columns */
3971 {
3972 pars_info_t* info;
3973 dberr_t error;
3974 ib_uint32_t doc_count;
3975 ib_time_monotonic_t start_time;
3976 doc_id_t last_doc_id;
3977 doc_id_t first_doc_id;
3978 char table_name[MAX_FULL_NAME_LEN];
3979
3980 ut_a(node->ilist != NULL);
3981
3982 if (*graph) {
3983 info = (*graph)->info;
3984 } else {
3985 info = pars_info_create();
3986
3987 fts_get_table_name(fts_table, table_name);
3988 pars_info_bind_id(info, true, "index_table_name", table_name);
3989 }
3990
3991 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3992
3993 /* Convert to "storage" byte order. */
3994 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3995 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3996
3997 /* Convert to "storage" byte order. */
3998 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3999 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
4000
4001 ut_a(node->last_doc_id >= node->first_doc_id);
4002
4003 /* Convert to "storage" byte order. */
4004 mach_write_to_4((byte*) &doc_count, node->doc_count);
4005 pars_info_bind_int4_literal(
4006 info, "doc_count", (const ib_uint32_t*) &doc_count);
4007
4008 /* Set copy_name to FALSE since it's a static. */
4009 pars_info_bind_literal(
4010 info, "ilist", node->ilist, node->ilist_size,
4011 DATA_BLOB, DATA_BINARY_TYPE);
4012
4013 if (!*graph) {
4014
4015 *graph = fts_parse_sql(
4016 fts_table,
4017 info,
4018 "BEGIN\n"
4019 "INSERT INTO $index_table_name VALUES"
4020 " (:token, :first_doc_id,"
4021 " :last_doc_id, :doc_count, :ilist);");
4022 }
4023
4024 start_time = ut_time_monotonic();
4025 error = fts_eval_sql(trx, *graph);
4026 elapsed_time += ut_time_monotonic() - start_time;
4027 ++n_nodes;
4028
4029 return(error);
4030 }
4031
4032 /*********************************************************************//**
4033 Add rows to the DELETED_CACHE table.
4034 @return DB_SUCCESS if all went well else error code*/
4035 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4036 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4037 fts_sync_add_deleted_cache(
4038 /*=======================*/
4039 fts_sync_t* sync, /*!< in: sync state */
4040 ib_vector_t* doc_ids) /*!< in: doc ids to add */
4041 {
4042 ulint i;
4043 pars_info_t* info;
4044 que_t* graph;
4045 fts_table_t fts_table;
4046 char table_name[MAX_FULL_NAME_LEN];
4047 doc_id_t dummy = 0;
4048 dberr_t error = DB_SUCCESS;
4049 ulint n_elems = ib_vector_size(doc_ids);
4050
4051 ut_a(ib_vector_size(doc_ids) > 0);
4052
4053 ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4054
4055 info = pars_info_create();
4056
4057 fts_bind_doc_id(info, "doc_id", &dummy);
4058
4059 FTS_INIT_FTS_TABLE(
4060 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
4061
4062 fts_get_table_name(&fts_table, table_name);
4063 pars_info_bind_id(info, true, "table_name", table_name);
4064
4065 graph = fts_parse_sql(
4066 &fts_table,
4067 info,
4068 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4069
4070 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4071 fts_update_t* update;
4072 doc_id_t write_doc_id;
4073
4074 update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
4075
4076 /* Convert to "storage" byte order. */
4077 fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
4078 fts_bind_doc_id(info, "doc_id", &write_doc_id);
4079
4080 error = fts_eval_sql(sync->trx, graph);
4081 }
4082
4083 fts_que_graph_free(graph);
4084
4085 return(error);
4086 }
4087
4088 /** Write the words and ilist to disk.
4089 @param[in,out] trx transaction
4090 @param[in] index_cache index cache
4091 @param[in] unlock_cache whether unlock cache when write node
4092 Also set this to true if sync takes
4093 very long
4094 @param[in] sync_start_time Holds the timestamp of start of sync
4095 for deducing the length of sync time
4096 @return DB_SUCCESS if all went well else error code */
4097 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4098 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4099 fts_sync_write_words(
4100 trx_t* trx,
4101 fts_index_cache_t* index_cache,
4102 bool unlock_cache,
4103 ib_time_t sync_start_time)
4104 {
4105 fts_table_t fts_table;
4106 ulint n_nodes = 0;
4107 ulint n_words = 0;
4108 const ib_rbt_node_t* rbt_node;
4109 dberr_t error = DB_SUCCESS;
4110 ibool print_error = FALSE;
4111 dict_table_t* table = index_cache->index->table;
4112 /* We use this to deduce threshold value of time
4113 that we can let sync to go on holding cache lock */
4114 const float cutoff = 0.98;
4115 ulint lock_threshold =
4116 (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4117 * cutoff;
4118 bool timeout_extended = false;
4119 #ifdef FTS_DOC_STATS_DEBUG
4120 ulint n_new_words = 0;
4121 #endif /* FTS_DOC_STATS_DEBUG */
4122
4123 FTS_INIT_INDEX_TABLE(
4124 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4125
4126 n_words = rbt_size(index_cache->words);
4127
4128 /* We iterate over the entire tree, even if there is an error,
4129 since we want to free the memory used during caching. */
4130 for (rbt_node = rbt_first(index_cache->words);
4131 rbt_node;
4132 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4133
4134 ulint i;
4135 ulint selected;
4136 fts_tokenizer_word_t* word;
4137
4138 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4139
4140 DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
4141 os_thread_sleep(300000););
4142
4143 selected = fts_select_index(
4144 index_cache->charset, word->text.f_str,
4145 word->text.f_len);
4146
4147 fts_table.suffix = fts_get_suffix(selected);
4148
4149 #ifdef FTS_DOC_STATS_DEBUG
4150 /* Check if the word exists in the FTS index and if not
4151 then we need to increment the total word count stats. */
4152 if (error == DB_SUCCESS && fts_enable_diag_print) {
4153 ibool found = FALSE;
4154
4155 error = fts_is_word_in_index(
4156 trx,
4157 &index_cache->sel_graph[selected],
4158 &fts_table,
4159 &word->text, &found);
4160
4161 if (error == DB_SUCCESS && !found) {
4162
4163 ++n_new_words;
4164 }
4165 }
4166 #endif /* FTS_DOC_STATS_DEBUG */
4167
4168 /* We iterate over all the nodes even if there was an error */
4169 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4170
4171 fts_node_t* fts_node = static_cast<fts_node_t*>(
4172 ib_vector_get(word->nodes, i));
4173
4174 if (fts_node->synced) {
4175 continue;
4176 } else {
4177 fts_node->synced = true;
4178 }
4179
4180 /*FIXME: we need to handle the error properly. */
4181 if (error == DB_SUCCESS) {
4182 DEBUG_SYNC_C("fts_instrument_sync");
4183 DBUG_EXECUTE_IF("fts_instrument_sync",
4184 os_thread_sleep(10000000););
4185 if (!unlock_cache) {
4186 ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4187 if (cache_lock_time > lock_threshold) {
4188 if (!timeout_extended) {
4189 os_atomic_increment_ulint(
4190 &srv_fatal_semaphore_wait_threshold,
4191 SRV_SEMAPHORE_WAIT_EXTENSION);
4192 timeout_extended = true;
4193 lock_threshold +=
4194 SRV_SEMAPHORE_WAIT_EXTENSION;
4195 } else {
4196 unlock_cache = true;
4197 os_atomic_decrement_ulint(
4198 &srv_fatal_semaphore_wait_threshold,
4199 SRV_SEMAPHORE_WAIT_EXTENSION);
4200 timeout_extended = false;
4201
4202 }
4203 }
4204 }
4205
4206 if (unlock_cache) {
4207 rw_lock_x_unlock(
4208 &table->fts->cache->lock);
4209 }
4210
4211 error = fts_write_node(
4212 trx,
4213 &index_cache->ins_graph[selected],
4214 &fts_table, &word->text, fts_node);
4215 DBUG_EXECUTE_IF("fts_instrument_sync",
4216 os_thread_sleep(15000000););
4217
4218 DEBUG_SYNC_C("fts_write_node");
4219 DBUG_EXECUTE_IF("fts_write_node_crash",
4220 DBUG_SUICIDE(););
4221
4222 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4223 os_thread_sleep(1000000);
4224 );
4225
4226 if (unlock_cache) {
4227 rw_lock_x_lock(
4228 &table->fts->cache->lock);
4229 }
4230 }
4231 }
4232
4233 n_nodes += ib_vector_size(word->nodes);
4234
4235 if (error != DB_SUCCESS && !print_error) {
4236 ib::error() << "(" << ut_strerr(error) << ") writing"
4237 " word node to FTS auxiliary index table.";
4238 print_error = TRUE;
4239 }
4240 }
4241
4242 #ifdef FTS_DOC_STATS_DEBUG
4243 if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4244 fts_table_t fts_table;
4245
4246 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4247
4248 /* Increment the total number of words in the FTS index */
4249 error = fts_config_increment_index_value(
4250 trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4251 n_new_words);
4252 }
4253 #endif /* FTS_DOC_STATS_DEBUG */
4254
4255 if (fts_enable_diag_print) {
4256 printf("Avg number of nodes: %lf\n",
4257 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4258 }
4259
4260 return(error);
4261 }
4262
4263 #ifdef FTS_DOC_STATS_DEBUG
4264 /*********************************************************************//**
4265 Write a single documents statistics to disk.
4266 @return DB_SUCCESS if all went well else error code */
4267 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4268 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4269 fts_sync_write_doc_stat(
4270 /*====================*/
4271 trx_t* trx, /*!< in: transaction */
4272 dict_index_t* index, /*!< in: index */
4273 que_t** graph, /* out: query graph */
4274 const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */
4275 {
4276 pars_info_t* info;
4277 doc_id_t doc_id;
4278 dberr_t error = DB_SUCCESS;
4279 ib_uint32_t word_count;
4280 char table_name[MAX_FULL_NAME_LEN];
4281
4282 if (*graph) {
4283 info = (*graph)->info;
4284 } else {
4285 info = pars_info_create();
4286 }
4287
4288 /* Convert to "storage" byte order. */
4289 mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4290 pars_info_bind_int4_literal(
4291 info, "count", (const ib_uint32_t*) &word_count);
4292
4293 /* Convert to "storage" byte order. */
4294 fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4295 fts_bind_doc_id(info, "doc_id", &doc_id);
4296
4297 if (!*graph) {
4298 fts_table_t fts_table;
4299
4300 FTS_INIT_INDEX_TABLE(
4301 &fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4302
4303 fts_get_table_name(&fts_table, table_name);
4304
4305 pars_info_bind_id(info, true, "doc_id_table", table_name);
4306
4307 *graph = fts_parse_sql(
4308 &fts_table,
4309 info,
4310 "BEGIN"
4311 " INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
4312 }
4313
4314 for (;;) {
4315 error = fts_eval_sql(trx, *graph);
4316
4317 if (error == DB_SUCCESS) {
4318
4319 break; /* Exit the loop. */
4320 } else {
4321
4322 if (error == DB_LOCK_WAIT_TIMEOUT) {
4323 ib::warn() << "Lock wait timeout writing to"
4324 " FTS doc_id. Retrying!";
4325
4326 trx->error_state = DB_SUCCESS;
4327 } else {
4328 ib::error() << "(" << ut_strerr(error)
4329 << ") while writing to FTS doc_id.";
4330
4331 break; /* Exit the loop. */
4332 }
4333 }
4334 }
4335
4336 return(error);
4337 }
4338
4339 /*********************************************************************//**
4340 Write document statistics to disk.
4341 @return DB_SUCCESS if all OK */
4342 static
4343 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4344 fts_sync_write_doc_stats(
4345 /*=====================*/
4346 trx_t* trx, /*!< in: transaction */
4347 const fts_index_cache_t*index_cache) /*!< in: index cache */
4348 {
4349 dberr_t error = DB_SUCCESS;
4350 que_t* graph = NULL;
4351 fts_doc_stats_t* doc_stat;
4352
4353 if (ib_vector_is_empty(index_cache->doc_stats)) {
4354 return(DB_SUCCESS);
4355 }
4356
4357 doc_stat = static_cast<ts_doc_stats_t*>(
4358 ib_vector_pop(index_cache->doc_stats));
4359
4360 while (doc_stat) {
4361 error = fts_sync_write_doc_stat(
4362 trx, index_cache->index, &graph, doc_stat);
4363
4364 if (error != DB_SUCCESS) {
4365 break;
4366 }
4367
4368 if (ib_vector_is_empty(index_cache->doc_stats)) {
4369 break;
4370 }
4371
4372 doc_stat = static_cast<ts_doc_stats_t*>(
4373 ib_vector_pop(index_cache->doc_stats));
4374 }
4375
4376 if (graph != NULL) {
4377 fts_que_graph_free_check_lock(NULL, index_cache, graph);
4378 }
4379
4380 return(error);
4381 }
4382
4383 /*********************************************************************//**
4384 Callback to check the existince of a word.
4385 @return always return NULL */
4386 static
4387 ibool
fts_lookup_word(void * row,void * user_arg)4388 fts_lookup_word(
4389 /*============*/
4390 void* row, /*!< in: sel_node_t* */
4391 void* user_arg) /*!< in: fts_doc_t* */
4392 {
4393
4394 que_node_t* exp;
4395 sel_node_t* node = static_cast<sel_node_t*>(row);
4396 ibool* found = static_cast<ibool*>(user_arg);
4397
4398 exp = node->select_list;
4399
4400 while (exp) {
4401 dfield_t* dfield = que_node_get_val(exp);
4402 ulint len = dfield_get_len(dfield);
4403
4404 if (len != UNIV_SQL_NULL && len != 0) {
4405 *found = TRUE;
4406 }
4407
4408 exp = que_node_get_next(exp);
4409 }
4410
4411 return(FALSE);
4412 }
4413
4414 /*********************************************************************//**
4415 Check whether a particular word (term) exists in the FTS index.
4416 @return DB_SUCCESS if all went well else error code */
4417 static
4418 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4419 fts_is_word_in_index(
4420 /*=================*/
4421 trx_t* trx, /*!< in: FTS query state */
4422 que_t** graph, /* out: Query graph */
4423 fts_table_t* fts_table, /*!< in: table instance */
4424 const fts_string_t*
4425 word, /*!< in: the word to check */
4426 ibool* found) /* out: TRUE if exists */
4427 {
4428 pars_info_t* info;
4429 dberr_t error;
4430 char table_name[MAX_FULL_NAME_LEN];
4431
4432 trx->op_info = "looking up word in FTS index";
4433
4434 if (*graph) {
4435 info = (*graph)->info;
4436 } else {
4437 info = pars_info_create();
4438 }
4439
4440 fts_get_table_name(fts_table, table_name);
4441 pars_info_bind_id(info, true, "table_name", table_name);
4442 pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4443 pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4444
4445 if (*graph == NULL) {
4446 *graph = fts_parse_sql(
4447 fts_table,
4448 info,
4449 "DECLARE FUNCTION my_func;\n"
4450 "DECLARE CURSOR c IS"
4451 " SELECT doc_count\n"
4452 " FROM $table_name\n"
4453 " WHERE word = :word"
4454 " ORDER BY first_doc_id;\n"
4455 "BEGIN\n"
4456 "\n"
4457 "OPEN c;\n"
4458 "WHILE 1 = 1 LOOP\n"
4459 " FETCH c INTO my_func();\n"
4460 " IF c % NOTFOUND THEN\n"
4461 " EXIT;\n"
4462 " END IF;\n"
4463 "END LOOP;\n"
4464 "CLOSE c;");
4465 }
4466
4467 for (;;) {
4468 error = fts_eval_sql(trx, *graph);
4469
4470 if (error == DB_SUCCESS) {
4471
4472 break; /* Exit the loop. */
4473 } else {
4474
4475 if (error == DB_LOCK_WAIT_TIMEOUT) {
4476 ib::warn() << "Lock wait timeout reading"
4477 " FTS index. Retrying!";
4478
4479 trx->error_state = DB_SUCCESS;
4480 } else {
4481 ib::error() << "(" << ut_strerr(error)
4482 << ") while reading FTS index.";
4483
4484 break; /* Exit the loop. */
4485 }
4486 }
4487 }
4488
4489 return(error);
4490 }
4491 #endif /* FTS_DOC_STATS_DEBUG */
4492
4493 /*********************************************************************//**
4494 Begin Sync, create transaction, acquire locks, etc. */
4495 static
4496 void
fts_sync_begin(fts_sync_t * sync)4497 fts_sync_begin(
4498 /*===========*/
4499 fts_sync_t* sync) /*!< in: sync state */
4500 {
4501 fts_cache_t* cache = sync->table->fts->cache;
4502
4503 n_nodes = 0;
4504 elapsed_time = 0;
4505
4506 sync->start_time = ut_time_monotonic();
4507
4508 sync->trx = trx_allocate_for_background();
4509
4510 if (fts_enable_diag_print) {
4511 ib::info() << "FTS SYNC for table " << sync->table->name
4512 << ", deleted count: "
4513 << ib_vector_size(cache->deleted_doc_ids)
4514 << " size: " << cache->total_size << " bytes";
4515 }
4516 }
4517
4518 /*********************************************************************//**
4519 Run SYNC on the table, i.e., write out data from the index specific
4520 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4521 @return DB_SUCCESS if all OK */
4522 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4523 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4524 fts_sync_index(
4525 /*===========*/
4526 fts_sync_t* sync, /*!< in: sync state */
4527 fts_index_cache_t* index_cache) /*!< in: index cache */
4528 {
4529 trx_t* trx = sync->trx;
4530 dberr_t error = DB_SUCCESS;
4531
4532 trx->op_info = "doing SYNC index";
4533
4534 if (fts_enable_diag_print) {
4535 ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4536 }
4537
4538 ut_ad(rbt_validate(index_cache->words));
4539
4540 error = fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4541 sync->start_time);
4542
4543 #ifdef FTS_DOC_STATS_DEBUG
4544 /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4545 is not used currently for ranking. We disable fts_sync_write_doc_stats()
4546 for now */
4547 /* Write the per doc statistics that will be used for ranking. */
4548 if (error == DB_SUCCESS) {
4549
4550 error = fts_sync_write_doc_stats(trx, index_cache);
4551 }
4552 #endif /* FTS_DOC_STATS_DEBUG */
4553
4554 return(error);
4555 }
4556
4557 /** Check if index cache has been synced completely
4558 @param[in,out] index_cache index cache
4559 @return true if index is synced, otherwise false. */
4560 static
4561 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4562 fts_sync_index_check(
4563 fts_index_cache_t* index_cache)
4564 {
4565 const ib_rbt_node_t* rbt_node;
4566
4567 for (rbt_node = rbt_first(index_cache->words);
4568 rbt_node != NULL;
4569 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4570
4571 fts_tokenizer_word_t* word;
4572 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4573
4574 fts_node_t* fts_node;
4575 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4576
4577 if (!fts_node->synced) {
4578 return(false);
4579 }
4580 }
4581
4582 return(true);
4583 }
4584
4585 /** Reset synced flag in index cache when rollback
4586 @param[in,out] index_cache index cache */
4587 static
4588 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4589 fts_sync_index_reset(
4590 fts_index_cache_t* index_cache)
4591 {
4592 const ib_rbt_node_t* rbt_node;
4593
4594 for (rbt_node = rbt_first(index_cache->words);
4595 rbt_node != NULL;
4596 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4597
4598 fts_tokenizer_word_t* word;
4599 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4600
4601 fts_node_t* fts_node;
4602 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4603
4604 fts_node->synced = false;
4605 }
4606 }
4607
4608 /** Commit the SYNC, change state of processed doc ids etc.
4609 @param[in,out] sync sync state
4610 @return DB_SUCCESS if all OK */
4611 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4612 dberr_t
fts_sync_commit(fts_sync_t * sync)4613 fts_sync_commit(
4614 fts_sync_t* sync)
4615 {
4616 dberr_t error;
4617 trx_t* trx = sync->trx;
4618 fts_cache_t* cache = sync->table->fts->cache;
4619 doc_id_t last_doc_id;
4620
4621 trx->op_info = "doing SYNC commit";
4622
4623 /* After each Sync, update the CONFIG table about the max doc id
4624 we just sync-ed to index table */
4625 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4626 &last_doc_id);
4627
4628 /* Get the list of deleted documents that are either in the
4629 cache or were headed there but were deleted before the add
4630 thread got to them. */
4631
4632 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4633
4634 error = fts_sync_add_deleted_cache(
4635 sync, cache->deleted_doc_ids);
4636 }
4637
4638 /* We need to do this within the deleted lock since fts_delete() can
4639 attempt to add a deleted doc id to the cache deleted id array. */
4640 fts_cache_clear(cache);
4641 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4642 fts_cache_init(cache);
4643 rw_lock_x_unlock(&cache->lock);
4644
4645 if (error == DB_SUCCESS) {
4646
4647 fts_sql_commit(trx);
4648
4649 } else if (error != DB_SUCCESS) {
4650
4651 fts_sql_rollback(trx);
4652
4653 ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
4654 }
4655
4656 if (fts_enable_diag_print && elapsed_time) {
4657 ib::info() << "SYNC for table " << sync->table->name
4658 << ": SYNC time: "
4659 << (ut_time_monotonic() - sync->start_time)
4660 << " secs: elapsed "
4661 << (double) n_nodes / elapsed_time
4662 << " ins/sec";
4663 }
4664
4665 /* Avoid assertion in trx_free(). */
4666 trx->dict_operation_lock_mode = 0;
4667 trx_free_for_background(trx);
4668
4669 return(error);
4670 }
4671
4672 /*********************************************************************//**
4673 Rollback a sync operation */
4674 static
4675 void
fts_sync_rollback(fts_sync_t * sync)4676 fts_sync_rollback(
4677 /*==============*/
4678 fts_sync_t* sync) /*!< in: sync state */
4679 {
4680 trx_t* trx = sync->trx;
4681 fts_cache_t* cache = sync->table->fts->cache;
4682
4683 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4684 ulint j;
4685 fts_index_cache_t* index_cache;
4686
4687 index_cache = static_cast<fts_index_cache_t*>(
4688 ib_vector_get(cache->indexes, i));
4689
4690 /* Reset synced flag so nodes will not be skipped
4691 in the next sync, see fts_sync_write_words(). */
4692 fts_sync_index_reset(index_cache);
4693
4694 for (j = 0; fts_index_selector[j].value; ++j) {
4695
4696 if (index_cache->ins_graph[j] != NULL) {
4697
4698 fts_que_graph_free_check_lock(
4699 NULL, index_cache,
4700 index_cache->ins_graph[j]);
4701
4702 index_cache->ins_graph[j] = NULL;
4703 }
4704
4705 if (index_cache->sel_graph[j] != NULL) {
4706
4707 fts_que_graph_free_check_lock(
4708 NULL, index_cache,
4709 index_cache->sel_graph[j]);
4710
4711 index_cache->sel_graph[j] = NULL;
4712 }
4713 }
4714 }
4715
4716 rw_lock_x_unlock(&cache->lock);
4717
4718 fts_sql_rollback(trx);
4719
4720 /* Avoid assertion in trx_free(). */
4721 trx->dict_operation_lock_mode = 0;
4722 trx_free_for_background(trx);
4723 }
4724
4725 /** Check that all indexes are synced.
4726 @param[in,out] sync sync state
4727 @return true if all indexes are synced, false otherwise. */
4728 static
4729 bool
fts_check_all_indexes_synced(fts_sync_t * sync)4730 fts_check_all_indexes_synced(
4731 fts_sync_t* sync)
4732 {
4733 ulint i;
4734 fts_cache_t* cache = sync->table->fts->cache;
4735
4736 /* Make sure all the caches are synced. */
4737 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4738 fts_index_cache_t* index_cache;
4739
4740 index_cache = static_cast<fts_index_cache_t*>(
4741 ib_vector_get(cache->indexes, i));
4742
4743 if (index_cache->index->to_be_dropped
4744 || index_cache->index->table->to_be_dropped
4745 || fts_sync_index_check(index_cache)) {
4746 continue;
4747 }
4748
4749 return false;
4750 }
4751
4752 return true;
4753 }
4754
4755 /** Run SYNC on the table, i.e., write out data from the cache to the
4756 FTS auxiliary INDEX table and clear the cache at the end.
4757 @param[in,out] sync sync state
4758 @param[in] unlock_cache whether unlock cache lock when write node
4759 @param[in] wait whether wait when a sync is in progress
4760 @param[in] has_dict_lock whether has dict operation lock
4761 @return DB_SUCCESS if all OK */
4762 static
4763 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict_lock)4764 fts_sync(
4765 fts_sync_t* sync,
4766 bool unlock_cache,
4767 bool wait,
4768 bool has_dict_lock)
4769 {
4770 ulint i;
4771 dberr_t error = DB_SUCCESS;
4772 fts_cache_t* cache = sync->table->fts->cache;
4773
4774 rw_lock_x_lock(&cache->lock);
4775
4776 /* Check if cache is being synced.
4777 Note: we release cache lock in fts_sync_write_words() to
4778 avoid long wait for the lock by other threads. */
4779 while (sync->in_progress) {
4780 rw_lock_x_unlock(&cache->lock);
4781
4782 if (wait) {
4783 os_event_wait(sync->event);
4784 } else {
4785 return(DB_SUCCESS);
4786 }
4787
4788 rw_lock_x_lock(&cache->lock);
4789 }
4790
4791 sync->unlock_cache = unlock_cache;
4792 sync->in_progress = true;
4793
4794 DEBUG_SYNC_C("fts_sync_begin");
4795 fts_sync_begin(sync);
4796
4797 if (has_dict_lock) {
4798 /* If lock is already taken mark that in transaction
4799 * so rollback will not try to take it again.
4800 */
4801 sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4802 }
4803
4804 do {
4805 if (cache->total_size > fts_max_cache_size) {
4806 /* Avoid the case: sync never finish when
4807 insert/update keeps comming. */
4808 ut_ad(sync->unlock_cache);
4809 sync->unlock_cache = false;
4810 }
4811
4812 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4813 fts_index_cache_t* index_cache;
4814
4815 index_cache = static_cast<fts_index_cache_t*>(
4816 ib_vector_get(cache->indexes, i));
4817
4818 if (index_cache->index->to_be_dropped
4819 || index_cache->index->table->to_be_dropped) {
4820 continue;
4821 }
4822
4823 DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4824 os_thread_sleep(300000););
4825
4826 index_cache->index->index_fts_syncing = true;
4827
4828 error = fts_sync_index(sync, index_cache);
4829
4830 if (error != DB_SUCCESS) {
4831 break;
4832 }
4833 }
4834
4835 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4836 sync->interrupted = true;
4837 error = DB_INTERRUPTED;
4838 );
4839
4840 if (error != DB_SUCCESS) {
4841 break;
4842 }
4843 } while (!fts_check_all_indexes_synced(sync));
4844
4845 if (error == DB_SUCCESS && !sync->interrupted) {
4846 error = fts_sync_commit(sync);
4847 } else {
4848 fts_sync_rollback(sync);
4849 }
4850
4851 rw_lock_x_lock(&cache->lock);
4852 /* Clear fts syncing flags of any indexes in case sync is
4853 interrupted */
4854 DEBUG_SYNC_C("fts_instrument_sync");
4855 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4856 fts_index_cache_t* index_cache;
4857 index_cache = static_cast<fts_index_cache_t*>(
4858 ib_vector_get(cache->indexes, i));
4859 if (index_cache->index->index_fts_syncing == true) {
4860 index_cache->index->index_fts_syncing = false;
4861 }
4862 }
4863 sync->interrupted = false;
4864 sync->in_progress = false;
4865 os_event_set(sync->event);
4866 rw_lock_x_unlock(&cache->lock);
4867
4868 /* We need to check whether an optimize is required, for that
4869 we make copies of the two variables that control the trigger. These
4870 variables can change behind our back and we don't want to hold the
4871 lock for longer than is needed. */
4872 mutex_enter(&cache->deleted_lock);
4873
4874 cache->added = 0;
4875 cache->deleted = 0;
4876
4877 mutex_exit(&cache->deleted_lock);
4878
4879 return(error);
4880 }
4881
4882 /** Run SYNC on the table, i.e., write out data from the cache to the
4883 FTS auxiliary INDEX table and clear the cache at the end.
4884 @param[in,out] table fts table
4885 @param[in] unlock_cache whether unlock cache when write node
4886 @param[in] wait whether wait for existing sync to finish
4887 @param[in] has_dict whether has dict operation lock
4888 @return DB_SUCCESS on success, error code on failure. */
4889 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4890 fts_sync_table(
4891 dict_table_t* table,
4892 bool unlock_cache,
4893 bool wait,
4894 bool has_dict)
4895 {
4896 dberr_t err = DB_SUCCESS;
4897
4898 ut_ad(table->fts);
4899
4900 if (!dict_table_is_discarded(table) && table->fts->cache
4901 && !dict_table_is_corrupted(table)) {
4902 err = fts_sync(table->fts->cache->sync,
4903 unlock_cache, wait, has_dict);
4904 }
4905
4906 return(err);
4907 }
4908
4909 /** Check fts token
4910 1. for ngram token, check whether the token contains any words in stopwords
4911 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4912 or greater than fts_max_token_size.
4913 @param[in] token token string
4914 @param[in] stopwords stopwords rb tree
4915 @param[in] is_ngram is ngram parser
4916 @param[in] cs token charset
4917 @param[in] skip true if the check should be skipped
4918 @retval true if it is not stopword and length in range
4919 @retval false if it is stopword or lenght not in range */
4920 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs,bool skip)4921 fts_check_token(
4922 const fts_string_t* token,
4923 const ib_rbt_t* stopwords,
4924 bool is_ngram,
4925 const CHARSET_INFO* cs,
4926 bool skip)
4927 {
4928 ut_ad(cs != NULL || stopwords == NULL);
4929
4930 if (skip) {
4931 return(true);
4932 }
4933
4934 if (!is_ngram) {
4935 ib_rbt_bound_t parent;
4936
4937 if (token->f_n_char < fts_min_token_size
4938 || token->f_n_char > fts_max_token_size
4939 || (stopwords != NULL
4940 && rbt_search(stopwords, &parent, token) == 0)) {
4941 return(false);
4942 } else {
4943 return(true);
4944 }
4945 }
4946
4947 /* Check token for ngram. */
4948 DBUG_EXECUTE_IF(
4949 "fts_instrument_ignore_ngram_check",
4950 return(true);
4951 );
4952
4953 /* We ignore fts_min_token_size when ngram */
4954 ut_ad(token->f_n_char > 0
4955 && token->f_n_char <= fts_max_token_size);
4956
4957 if (stopwords == NULL) {
4958 return(true);
4959 }
4960
4961 /*Ngram checks whether the token contains any words in stopwords.
4962 We can't simply use CONTAIN to search in stopwords, because it's
4963 built on COMPARE. So we need to tokenize the token into words
4964 from unigram to f_n_char, and check them separately. */
4965 for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4966 ngram_token_size ++) {
4967 const char* start;
4968 const char* next;
4969 const char* end;
4970 ulint char_len;
4971 ulint n_chars;
4972
4973 start = reinterpret_cast<char*>(token->f_str);
4974 next = start;
4975 end = start + token->f_len;
4976 n_chars = 0;
4977
4978 while (next < end) {
4979 char_len = my_mbcharlen_ptr(cs, next, end);
4980
4981 if (next + char_len > end || char_len == 0) {
4982 break;
4983 } else {
4984 /* Skip SPACE */
4985 if (char_len == 1 && *next == ' ') {
4986 start = next + 1;
4987 next = start;
4988 n_chars = 0;
4989
4990 continue;
4991 }
4992
4993 next += char_len;
4994 n_chars++;
4995 }
4996
4997 if (n_chars == ngram_token_size) {
4998 fts_string_t ngram_token;
4999 ngram_token.f_str =
5000 reinterpret_cast<byte*>(
5001 const_cast<char*>(start));
5002 ngram_token.f_len = next - start;
5003 ngram_token.f_n_char = ngram_token_size;
5004
5005 ib_rbt_bound_t parent;
5006 if (rbt_search(stopwords, &parent,
5007 &ngram_token) == 0) {
5008 return(false);
5009 }
5010
5011 /* Move a char forward */
5012 start += my_mbcharlen_ptr(cs, start, end);
5013 n_chars = ngram_token_size - 1;
5014 }
5015 }
5016 }
5017
5018 return(true);
5019 }
5020
5021 /** Add the token and its start position to the token's list of positions.
5022 @param[in,out] result_doc result doc rb tree
5023 @param[in] str token string
5024 @param[in] position token position */
5025 static
5026 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)5027 fts_add_token(
5028 fts_doc_t* result_doc,
5029 fts_string_t str,
5030 ulint position)
5031 {
5032 /* Ignore string whose character number is less than
5033 "fts_min_token_size" or more than "fts_max_token_size" */
5034
5035 ut_ad(current_thd != NULL);
5036 if (fts_check_token(&str, NULL, result_doc->is_ngram,
5037 result_doc->charset,
5038 thd_has_ft_ignore_stopwords(current_thd))) {
5039
5040 mem_heap_t* heap;
5041 fts_string_t t_str;
5042 fts_token_t* token;
5043 ib_rbt_bound_t parent;
5044 ulint newlen;
5045
5046 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
5047
5048 t_str.f_n_char = str.f_n_char;
5049
5050 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
5051
5052 t_str.f_str = static_cast<byte*>(
5053 mem_heap_alloc(heap, t_str.f_len));
5054
5055 /* For binary collations, a case sensitive search is
5056 performed. Hence don't convert to lower case. */
5057 if (my_binary_compare(result_doc->charset)) {
5058 memcpy(t_str.f_str, str.f_str, str.f_len);
5059 t_str.f_str[str.f_len]= 0;
5060 newlen= str.f_len;
5061 } else {
5062 newlen = innobase_fts_casedn_str(
5063 result_doc->charset, (char*) str.f_str, str.f_len,
5064 (char*) t_str.f_str, t_str.f_len);
5065 }
5066
5067 t_str.f_len = newlen;
5068 t_str.f_str[newlen] = 0;
5069
5070 /* Add the word to the document statistics. If the word
5071 hasn't been seen before we create a new entry for it. */
5072 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
5073 fts_token_t new_token;
5074
5075 new_token.text.f_len = newlen;
5076 new_token.text.f_str = t_str.f_str;
5077 new_token.text.f_n_char = t_str.f_n_char;
5078
5079 new_token.positions = ib_vector_create(
5080 result_doc->self_heap, sizeof(ulint), 32);
5081
5082 parent.last = rbt_add_node(
5083 result_doc->tokens, &parent, &new_token);
5084
5085 ut_ad(rbt_validate(result_doc->tokens));
5086 }
5087
5088 token = rbt_value(fts_token_t, parent.last);
5089 ib_vector_push(token->positions, &position);
5090 }
5091 }
5092
5093 /********************************************************************
5094 Process next token from document starting at the given position, i.e., add
5095 the token's start position to the token's list of positions.
5096 @return number of characters handled in this call */
5097 static
5098 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)5099 fts_process_token(
5100 /*==============*/
5101 fts_doc_t* doc, /* in/out: document to
5102 tokenize */
5103 fts_doc_t* result, /* out: if provided, save
5104 result here */
5105 ulint start_pos, /*!< in: start position in text */
5106 ulint add_pos) /*!< in: add this position to all
5107 tokens from this tokenization */
5108 {
5109 ulint ret;
5110 fts_string_t str;
5111 ulint position;
5112 fts_doc_t* result_doc;
5113 byte buf[FTS_MAX_WORD_LEN + 1];
5114
5115 str.f_str = buf;
5116
5117 /* Determine where to save the result. */
5118 result_doc = (result != NULL) ? result : doc;
5119
5120 /* The length of a string in characters is set here only. */
5121
5122 ret = innobase_mysql_fts_get_token(
5123 doc->charset, doc->text.f_str + start_pos,
5124 doc->text.f_str + doc->text.f_len, false, &str);
5125
5126 position = start_pos + ret - str.f_len + add_pos;
5127
5128 fts_add_token(result_doc, str, position);
5129
5130 return(ret);
5131 }
5132
5133 /*************************************************************//**
5134 Get token char size by charset
5135 @return token size */
5136 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)5137 fts_get_token_size(
5138 /*===============*/
5139 const CHARSET_INFO* cs, /*!< in: Character set */
5140 const char* token, /*!< in: token */
5141 ulint len) /*!< in: token length */
5142 {
5143 char* start;
5144 char* end;
5145 ulint size = 0;
5146
5147 /* const_cast is for reinterpret_cast below, or it will fail. */
5148 start = const_cast<char*>(token);
5149 end = start + len;
5150 while (start < end) {
5151 int ctype;
5152 int mbl;
5153
5154 mbl = cs->cset->ctype(
5155 cs, &ctype,
5156 reinterpret_cast<uchar*>(start),
5157 reinterpret_cast<uchar*>(end));
5158
5159 size++;
5160
5161 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
5162 }
5163
5164 return(size);
5165 }
5166
5167 /*************************************************************//**
5168 FTS plugin parser 'myql_parser' callback function for document tokenize.
5169 Refer to 'st_mysql_ftparser_param' for more detail.
5170 @return always returns 0 */
5171 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)5172 fts_tokenize_document_internal(
5173 /*===========================*/
5174 MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
5175 char* doc, /*!< in/out: document */
5176 int len) /*!< in: document length */
5177 {
5178 fts_string_t str;
5179 byte buf[FTS_MAX_WORD_LEN + 1];
5180 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
5181 { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
5182
5183 ut_ad(len >= 0);
5184
5185 str.f_str = buf;
5186
5187 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
5188 inc = innobase_mysql_fts_get_token(
5189 const_cast<CHARSET_INFO*>(param->cs),
5190 reinterpret_cast<byte*>(doc) + i,
5191 reinterpret_cast<byte*>(doc) + len, false,
5192 &str);
5193
5194 if (str.f_len > 0) {
5195 bool_info.position =
5196 static_cast<int>(i + inc - str.f_len);
5197 ut_ad(bool_info.position >= 0);
5198
5199 /* Stop when add word fails */
5200 if (param->mysql_add_word(
5201 param,
5202 reinterpret_cast<char*>(str.f_str),
5203 static_cast<int>(str.f_len),
5204 &bool_info)) {
5205 break;
5206 }
5207 }
5208 }
5209
5210 return(0);
5211 }
5212
5213 /******************************************************************//**
5214 FTS plugin parser 'myql_add_word' callback function for document tokenize.
5215 Refer to 'st_mysql_ftparser_param' for more detail.
5216 @return always returns 0 */
5217 static
5218 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)5219 fts_tokenize_add_word_for_parser(
5220 /*=============================*/
5221 MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
5222 char* word, /* in: token word */
5223 int word_len, /* in: word len */
5224 MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
5225 {
5226 fts_string_t str;
5227 fts_tokenize_param_t* fts_param;
5228 fts_doc_t* result_doc;
5229 ulint position;
5230
5231 fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
5232 result_doc = fts_param->result_doc;
5233 ut_ad(result_doc != NULL);
5234
5235 str.f_str = reinterpret_cast<byte*>(word);
5236 str.f_len = word_len;
5237 str.f_n_char = fts_get_token_size(
5238 const_cast<CHARSET_INFO*>(param->cs), word, word_len);
5239
5240 ut_ad(boolean_info->position >= 0);
5241 position = boolean_info->position + fts_param->add_pos;
5242
5243 fts_add_token(result_doc, str, position);
5244
5245 return(0);
5246 }
5247
5248 /******************************************************************//**
5249 Parse a document using an external / user supplied parser */
5250 static
5251 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)5252 fts_tokenize_by_parser(
5253 /*===================*/
5254 fts_doc_t* doc, /* in/out: document to tokenize */
5255 st_mysql_ftparser* parser, /* in: plugin fts parser */
5256 fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
5257 {
5258 MYSQL_FTPARSER_PARAM param;
5259
5260 ut_a(parser);
5261
5262 /* Set paramters for param */
5263 param.mysql_parse = fts_tokenize_document_internal;
5264 param.mysql_add_word = fts_tokenize_add_word_for_parser;
5265 param.mysql_ftparam = fts_param;
5266 param.cs = doc->charset;
5267 param.doc = reinterpret_cast<char*>(doc->text.f_str);
5268 param.length = static_cast<int>(doc->text.f_len);
5269 param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
5270
5271 PARSER_INIT(parser, ¶m);
5272 parser->parse(¶m);
5273 PARSER_DEINIT(parser, ¶m);
5274 }
5275
5276 /******************************************************************//**
5277 Tokenize a document. */
5278 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)5279 fts_tokenize_document(
5280 /*==================*/
5281 fts_doc_t* doc, /* in/out: document to
5282 tokenize */
5283 fts_doc_t* result, /* out: if provided, save
5284 the result token here */
5285 st_mysql_ftparser* parser) /* in: plugin fts parser */
5286 {
5287 ut_a(!doc->tokens);
5288 ut_a(doc->charset);
5289
5290 doc->tokens = rbt_create_arg_cmp(
5291 sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
5292
5293 if (parser != NULL) {
5294 fts_tokenize_param_t fts_param;
5295
5296 fts_param.result_doc = (result != NULL) ? result : doc;
5297 fts_param.add_pos = 0;
5298
5299 fts_tokenize_by_parser(doc, parser, &fts_param);
5300 } else {
5301 ulint inc;
5302
5303 for (ulint i = 0; i < doc->text.f_len; i += inc) {
5304 inc = fts_process_token(doc, result, i, 0);
5305 ut_a(inc > 0);
5306 }
5307 }
5308 }
5309
5310 /******************************************************************//**
5311 Continue to tokenize a document. */
5312 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)5313 fts_tokenize_document_next(
5314 /*=======================*/
5315 fts_doc_t* doc, /*!< in/out: document to
5316 tokenize */
5317 ulint add_pos, /*!< in: add this position to all
5318 tokens from this tokenization */
5319 fts_doc_t* result, /*!< out: if provided, save
5320 the result token here */
5321 st_mysql_ftparser* parser) /* in: plugin fts parser */
5322 {
5323 ut_a(doc->tokens);
5324
5325 if (parser) {
5326 fts_tokenize_param_t fts_param;
5327
5328 fts_param.result_doc = (result != NULL) ? result : doc;
5329 fts_param.add_pos = add_pos;
5330
5331 fts_tokenize_by_parser(doc, parser, &fts_param);
5332 } else {
5333 ulint inc;
5334
5335 for (ulint i = 0; i < doc->text.f_len; i += inc) {
5336 inc = fts_process_token(doc, result, i, add_pos);
5337 ut_a(inc > 0);
5338 }
5339 }
5340 }
5341
5342 /********************************************************************
5343 Create the vector of fts_get_doc_t instances. */
5344 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)5345 fts_get_docs_create(
5346 /*================*/
5347 /* out: vector of
5348 fts_get_doc_t instances */
5349 fts_cache_t* cache) /*!< in: fts cache */
5350 {
5351 ib_vector_t* get_docs;
5352
5353 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
5354
5355 /* We need one instance of fts_get_doc_t per index. */
5356 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
5357
5358 /* Create the get_doc instance, we need one of these
5359 per FTS index. */
5360 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
5361
5362 dict_index_t** index;
5363 fts_get_doc_t* get_doc;
5364
5365 index = static_cast<dict_index_t**>(
5366 ib_vector_get(cache->indexes, i));
5367
5368 get_doc = static_cast<fts_get_doc_t*>(
5369 ib_vector_push(get_docs, NULL));
5370
5371 memset(get_doc, 0x0, sizeof(*get_doc));
5372
5373 get_doc->index_cache = fts_get_index_cache(cache, *index);
5374 get_doc->cache = cache;
5375
5376 /* Must find the index cache. */
5377 ut_a(get_doc->index_cache != NULL);
5378 }
5379
5380 return(get_docs);
5381 }
5382
5383 /********************************************************************
5384 Release any resources held by the fts_get_doc_t instances. */
5385 static
5386 void
fts_get_docs_clear(ib_vector_t * get_docs)5387 fts_get_docs_clear(
5388 /*===============*/
5389 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
5390 {
5391 ulint i;
5392
5393 /* Release the get doc graphs if any. */
5394 for (i = 0; i < ib_vector_size(get_docs); ++i) {
5395
5396 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
5397 ib_vector_get(get_docs, i));
5398
5399 if (get_doc->get_document_graph != NULL) {
5400
5401 ut_a(get_doc->index_cache);
5402
5403 fts_que_graph_free(get_doc->get_document_graph);
5404 get_doc->get_document_graph = NULL;
5405 }
5406 }
5407 }
5408
5409 /*********************************************************************//**
5410 Get the initial Doc ID by consulting the CONFIG table
5411 @return initial Doc ID */
5412 doc_id_t
fts_init_doc_id(const dict_table_t * table)5413 fts_init_doc_id(
5414 /*============*/
5415 const dict_table_t* table) /*!< in: table */
5416 {
5417 doc_id_t max_doc_id = 0;
5418
5419 rw_lock_x_lock(&table->fts->cache->lock);
5420
5421 /* Return if the table is already initialized for DOC ID */
5422 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5423 rw_lock_x_unlock(&table->fts->cache->lock);
5424 return(0);
5425 }
5426
5427 DEBUG_SYNC_C("fts_initialize_doc_id");
5428
5429 /* Then compare this value with the ID value stored in the CONFIG
5430 table. The larger one will be our new initial Doc ID */
5431 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5432
5433 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5434 creating index (and add doc id column. No need to recovery
5435 documents */
5436 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5437 fts_init_index((dict_table_t*) table, TRUE);
5438 }
5439
5440 table->fts->fts_status |= ADDED_TABLE_SYNCED;
5441
5442 table->fts->cache->first_doc_id = max_doc_id;
5443
5444 rw_lock_x_unlock(&table->fts->cache->lock);
5445
5446 ut_ad(max_doc_id > 0);
5447
5448 return(max_doc_id);
5449 }
5450
5451 #ifdef FTS_MULT_INDEX
5452 /*********************************************************************//**
5453 Check if the index is in the affected set.
5454 @return TRUE if index is updated */
5455 static
5456 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5457 fts_is_index_updated(
5458 /*=================*/
5459 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
5460 const fts_get_doc_t* get_doc) /*!< in: info for reading
5461 document */
5462 {
5463 ulint i;
5464 dict_index_t* index = get_doc->index_cache->index;
5465
5466 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5467 const dict_index_t* updated_fts_index;
5468
5469 updated_fts_index = static_cast<const dict_index_t*>(
5470 ib_vector_getp_const(fts_indexes, i));
5471
5472 ut_a(updated_fts_index != NULL);
5473
5474 if (updated_fts_index == index) {
5475 return(TRUE);
5476 }
5477 }
5478
5479 return(FALSE);
5480 }
5481 #endif
5482
5483 /*********************************************************************//**
5484 Fetch COUNT(*) from specified table.
5485 @return the number of rows in the table */
5486 ulint
fts_get_rows_count(fts_table_t * fts_table)5487 fts_get_rows_count(
5488 /*===============*/
5489 fts_table_t* fts_table) /*!< in: fts table to read */
5490 {
5491 trx_t* trx;
5492 pars_info_t* info;
5493 que_t* graph;
5494 dberr_t error;
5495 ulint count = 0;
5496 char table_name[MAX_FULL_NAME_LEN];
5497
5498 trx = trx_allocate_for_background();
5499
5500 trx->op_info = "fetching FT table rows count";
5501
5502 info = pars_info_create();
5503
5504 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5505
5506 fts_get_table_name(fts_table, table_name);
5507 pars_info_bind_id(info, true, "table_name", table_name);
5508
5509 graph = fts_parse_sql(
5510 fts_table,
5511 info,
5512 "DECLARE FUNCTION my_func;\n"
5513 "DECLARE CURSOR c IS"
5514 " SELECT COUNT(*)"
5515 " FROM $table_name;\n"
5516 "BEGIN\n"
5517 "\n"
5518 "OPEN c;\n"
5519 "WHILE 1 = 1 LOOP\n"
5520 " FETCH c INTO my_func();\n"
5521 " IF c % NOTFOUND THEN\n"
5522 " EXIT;\n"
5523 " END IF;\n"
5524 "END LOOP;\n"
5525 "CLOSE c;");
5526
5527 for (;;) {
5528 error = fts_eval_sql(trx, graph);
5529
5530 if (error == DB_SUCCESS) {
5531 fts_sql_commit(trx);
5532
5533 break; /* Exit the loop. */
5534 } else {
5535 fts_sql_rollback(trx);
5536
5537 if (error == DB_LOCK_WAIT_TIMEOUT) {
5538 ib::warn() << "lock wait timeout reading"
5539 " FTS table. Retrying!";
5540
5541 trx->error_state = DB_SUCCESS;
5542 } else {
5543 ib::error() << "(" << ut_strerr(error)
5544 << ") while reading FTS table.";
5545
5546 break; /* Exit the loop. */
5547 }
5548 }
5549 }
5550
5551 fts_que_graph_free(graph);
5552
5553 trx_free_for_background(trx);
5554
5555 return(count);
5556 }
5557
5558 #ifdef FTS_CACHE_SIZE_DEBUG
5559 /*********************************************************************//**
5560 Read the max cache size parameter from the config table. */
5561 static
5562 void
fts_update_max_cache_size(fts_sync_t * sync)5563 fts_update_max_cache_size(
5564 /*======================*/
5565 fts_sync_t* sync) /*!< in: sync state */
5566 {
5567 trx_t* trx;
5568 fts_table_t fts_table;
5569
5570 trx = trx_allocate_for_background();
5571
5572 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5573
5574 /* The size returned is in bytes. */
5575 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5576
5577 fts_sql_commit(trx);
5578
5579 trx_free_for_background(trx);
5580 }
5581 #endif /* FTS_CACHE_SIZE_DEBUG */
5582
5583 /*********************************************************************//**
5584 Free the modified rows of a table. */
5585 UNIV_INLINE
5586 void
fts_trx_table_rows_free(ib_rbt_t * rows)5587 fts_trx_table_rows_free(
5588 /*====================*/
5589 ib_rbt_t* rows) /*!< in: rbt of rows to free */
5590 {
5591 const ib_rbt_node_t* node;
5592
5593 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5594 fts_trx_row_t* row;
5595
5596 row = rbt_value(fts_trx_row_t, node);
5597
5598 if (row->fts_indexes != NULL) {
5599 /* This vector shouldn't be using the
5600 heap allocator. */
5601 ut_a(row->fts_indexes->allocator->arg == NULL);
5602
5603 ib_vector_free(row->fts_indexes);
5604 row->fts_indexes = NULL;
5605 }
5606
5607 ut_free(rbt_remove_node(rows, node));
5608 }
5609
5610 ut_a(rbt_empty(rows));
5611 rbt_free(rows);
5612 }
5613
5614 /*********************************************************************//**
5615 Free an FTS savepoint instance. */
5616 UNIV_INLINE
5617 void
fts_savepoint_free(fts_savepoint_t * savepoint)5618 fts_savepoint_free(
5619 /*===============*/
5620 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5621 {
5622 const ib_rbt_node_t* node;
5623 ib_rbt_t* tables = savepoint->tables;
5624
5625 /* Nothing to free! */
5626 if (tables == NULL) {
5627 return;
5628 }
5629
5630 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5631 fts_trx_table_t* ftt;
5632 fts_trx_table_t** fttp;
5633
5634 fttp = rbt_value(fts_trx_table_t*, node);
5635 ftt = *fttp;
5636
5637 /* This can be NULL if a savepoint was released. */
5638 if (ftt->rows != NULL) {
5639 fts_trx_table_rows_free(ftt->rows);
5640 ftt->rows = NULL;
5641 }
5642
5643 /* This can be NULL if a savepoint was released. */
5644 if (ftt->added_doc_ids != NULL) {
5645 fts_doc_ids_free(ftt->added_doc_ids);
5646 ftt->added_doc_ids = NULL;
5647 }
5648
5649 /* The default savepoint name must be NULL. */
5650 if (ftt->docs_added_graph) {
5651 fts_que_graph_free(ftt->docs_added_graph);
5652 }
5653
5654 /* NOTE: We are responsible for free'ing the node */
5655 ut_free(rbt_remove_node(tables, node));
5656 }
5657
5658 ut_a(rbt_empty(tables));
5659 rbt_free(tables);
5660 savepoint->tables = NULL;
5661 }
5662
5663 /*********************************************************************//**
5664 Free an FTS trx. */
5665 void
fts_trx_free(fts_trx_t * fts_trx)5666 fts_trx_free(
5667 /*=========*/
5668 fts_trx_t* fts_trx) /* in, own: FTS trx */
5669 {
5670 ulint i;
5671
5672 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5673 fts_savepoint_t* savepoint;
5674
5675 savepoint = static_cast<fts_savepoint_t*>(
5676 ib_vector_get(fts_trx->savepoints, i));
5677
5678 /* The default savepoint name must be NULL. */
5679 if (i == 0) {
5680 ut_a(savepoint->name == NULL);
5681 }
5682
5683 fts_savepoint_free(savepoint);
5684 }
5685
5686 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5687 fts_savepoint_t* savepoint;
5688
5689 savepoint = static_cast<fts_savepoint_t*>(
5690 ib_vector_get(fts_trx->last_stmt, i));
5691
5692 /* The default savepoint name must be NULL. */
5693 if (i == 0) {
5694 ut_a(savepoint->name == NULL);
5695 }
5696
5697 fts_savepoint_free(savepoint);
5698 }
5699
5700 if (fts_trx->heap) {
5701 mem_heap_free(fts_trx->heap);
5702 }
5703 }
5704
5705 /*********************************************************************//**
5706 Extract the doc id from the FTS hidden column.
5707 @return doc id that was extracted from rec */
5708 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5709 fts_get_doc_id_from_row(
5710 /*====================*/
5711 dict_table_t* table, /*!< in: table */
5712 dtuple_t* row) /*!< in: row whose FTS doc id we
5713 want to extract.*/
5714 {
5715 dfield_t* field;
5716 doc_id_t doc_id = 0;
5717
5718 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5719
5720 field = dtuple_get_nth_field(row, table->fts->doc_col);
5721
5722 ut_a(dfield_get_len(field) == sizeof(doc_id));
5723 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5724
5725 doc_id = fts_read_doc_id(
5726 static_cast<const byte*>(dfield_get_data(field)));
5727
5728 return(doc_id);
5729 }
5730
5731 /** Extract the doc id from the record that belongs to index.
5732 @param[in] table table
5733 @param[in] rec record contains FTS_DOC_ID
5734 @param[in] index index of rec
5735 @param[in] heap heap memory
5736 @return doc id that was extracted from rec */
5737 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5738 fts_get_doc_id_from_rec(
5739 dict_table_t* table,
5740 const rec_t* rec,
5741 const dict_index_t* index,
5742 mem_heap_t* heap)
5743 {
5744 ulint len;
5745 const byte* data;
5746 ulint col_no;
5747 doc_id_t doc_id = 0;
5748 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5749 ulint* offsets = offsets_;
5750 mem_heap_t* my_heap = heap;
5751
5752 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5753
5754 rec_offs_init(offsets_);
5755
5756 offsets = rec_get_offsets(
5757 rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5758
5759 col_no = dict_col_get_index_pos(
5760 &table->cols[table->fts->doc_col], index);
5761
5762 ut_ad(col_no != ULINT_UNDEFINED);
5763
5764 data = rec_get_nth_field(rec, offsets, col_no, &len);
5765
5766 ut_a(len == 8);
5767 ut_ad(8 == sizeof(doc_id));
5768 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5769
5770 if (my_heap && !heap) {
5771 mem_heap_free(my_heap);
5772 }
5773
5774 return(doc_id);
5775 }
5776
5777 /*********************************************************************//**
5778 Search the index specific cache for a particular FTS index.
5779 @return the index specific cache else NULL */
5780 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5781 fts_find_index_cache(
5782 /*=================*/
5783 const fts_cache_t* cache, /*!< in: cache to search */
5784 const dict_index_t* index) /*!< in: index to search for */
5785 {
5786 /* We cast away the const because our internal function, takes
5787 non-const cache arg and returns a non-const pointer. */
5788 return(static_cast<fts_index_cache_t*>(
5789 fts_get_index_cache((fts_cache_t*) cache, index)));
5790 }
5791
5792 /*********************************************************************//**
5793 Search cache for word.
5794 @return the word node vector if found else NULL */
5795 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5796 fts_cache_find_word(
5797 /*================*/
5798 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5799 const fts_string_t* text) /*!< in: word to search for */
5800 {
5801 ib_rbt_bound_t parent;
5802 const ib_vector_t* nodes = NULL;
5803 #ifdef UNIV_DEBUG
5804 dict_table_t* table = index_cache->index->table;
5805 fts_cache_t* cache = table->fts->cache;
5806
5807 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5808 #endif /* UNIV_DEBUG */
5809
5810 /* Lookup the word in the rb tree */
5811 if (rbt_search(index_cache->words, &parent, text) == 0) {
5812 const fts_tokenizer_word_t* word;
5813
5814 word = rbt_value(fts_tokenizer_word_t, parent.last);
5815
5816 nodes = word->nodes;
5817 }
5818
5819 return(nodes);
5820 }
5821
5822 /*********************************************************************//**
5823 Check cache for deleted doc id.
5824 @return TRUE if deleted */
5825 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5826 fts_cache_is_deleted_doc_id(
5827 /*========================*/
5828 const fts_cache_t* cache, /*!< in: cache ito search */
5829 doc_id_t doc_id) /*!< in: doc id to search for */
5830 {
5831 ut_ad(mutex_own(&cache->deleted_lock));
5832
5833 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5834 const fts_update_t* update;
5835
5836 update = static_cast<const fts_update_t*>(
5837 ib_vector_get_const(cache->deleted_doc_ids, i));
5838
5839 if (doc_id == update->doc_id) {
5840
5841 return(TRUE);
5842 }
5843 }
5844
5845 return(FALSE);
5846 }
5847
5848 /*********************************************************************//**
5849 Append deleted doc ids to vector. */
5850 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5851 fts_cache_append_deleted_doc_ids(
5852 /*=============================*/
5853 const fts_cache_t* cache, /*!< in: cache to use */
5854 ib_vector_t* vector) /*!< in: append to this vector */
5855 {
5856 mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5857
5858 if (cache->deleted_doc_ids == NULL) {
5859 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5860 return;
5861 }
5862
5863
5864 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5865 fts_update_t* update;
5866
5867 update = static_cast<fts_update_t*>(
5868 ib_vector_get(cache->deleted_doc_ids, i));
5869
5870 ib_vector_push(vector, &update->doc_id);
5871 }
5872
5873 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5874 }
5875
5876 /*********************************************************************//**
5877 Wait for the background thread to start. We poll to detect change
5878 of state, which is acceptable, since the wait should happen only
5879 once during startup.
5880 @return true if the thread started else FALSE (i.e timed out) */
5881 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5882 fts_wait_for_background_thread_to_start(
5883 /*====================================*/
5884 dict_table_t* table, /*!< in: table to which the thread
5885 is attached */
5886 ulint max_wait) /*!< in: time in microseconds, if
5887 set to 0 then it disables
5888 timeout checking */
5889 {
5890 ulint count = 0;
5891 ibool done = FALSE;
5892
5893 ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5894
5895 for (;;) {
5896 fts_t* fts = table->fts;
5897
5898 mutex_enter(&fts->bg_threads_mutex);
5899
5900 if (fts->fts_status & BG_THREAD_READY) {
5901
5902 done = TRUE;
5903 }
5904
5905 mutex_exit(&fts->bg_threads_mutex);
5906
5907 if (!done) {
5908 os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5909
5910 if (max_wait > 0) {
5911
5912 max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5913
5914 /* We ignore the residual value. */
5915 if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5916 break;
5917 }
5918 }
5919
5920 ++count;
5921 } else {
5922 break;
5923 }
5924
5925 if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5926 ib::error() << "The background thread for the FTS"
5927 " table " << table->name
5928 << " refuses to start";
5929
5930 count = 0;
5931 }
5932 }
5933
5934 return(done);
5935 }
5936
5937 /*********************************************************************//**
5938 Add the FTS document id hidden column. */
5939 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5940 fts_add_doc_id_column(
5941 /*==================*/
5942 dict_table_t* table, /*!< in/out: Table with FTS index */
5943 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5944 {
5945 dict_mem_table_add_col(
5946 table, heap,
5947 FTS_DOC_ID_COL_NAME,
5948 DATA_INT,
5949 dtype_form_prtype(
5950 DATA_NOT_NULL | DATA_UNSIGNED
5951 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5952 sizeof(doc_id_t));
5953 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5954 }
5955
5956 /** Add new fts doc id to the update vector.
5957 @param[in] table the table that contains the FTS index.
5958 @param[in,out] ufield the fts doc id field in the update vector.
5959 No new memory is allocated for this in this
5960 function.
5961 @param[in,out] next_doc_id the fts doc id that has been added to the
5962 update vector. If 0, a new fts doc id is
5963 automatically generated. The memory provided
5964 for this argument will be used by the update
5965 vector. Ensure that the life time of this
5966 memory matches that of the update vector.
5967 @return the fts doc id used in the update vector */
5968 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5969 fts_update_doc_id(
5970 dict_table_t* table,
5971 upd_field_t* ufield,
5972 doc_id_t* next_doc_id)
5973 {
5974 doc_id_t doc_id;
5975 dberr_t error = DB_SUCCESS;
5976
5977 if (*next_doc_id) {
5978 doc_id = *next_doc_id;
5979 } else {
5980 /* Get the new document id that will be added. */
5981 error = fts_get_next_doc_id(table, &doc_id);
5982 }
5983
5984 if (error == DB_SUCCESS) {
5985 dict_index_t* clust_index;
5986 dict_col_t* col = dict_table_get_nth_col(
5987 table, table->fts->doc_col);
5988
5989 ufield->exp = NULL;
5990
5991 ufield->new_val.len = sizeof(doc_id);
5992
5993 clust_index = dict_table_get_first_index(table);
5994
5995 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5996 dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5997
5998 /* It is possible we update record that has
5999 not yet be sync-ed from last crash. */
6000
6001 /* Convert to storage byte order. */
6002 ut_a(doc_id != FTS_NULL_DOC_ID);
6003 fts_write_doc_id((byte*) next_doc_id, doc_id);
6004
6005 ufield->new_val.data = next_doc_id;
6006 ufield->new_val.ext = 0;
6007 }
6008
6009 return(doc_id);
6010 }
6011
6012 /*********************************************************************//**
6013 Check if the table has an FTS index. This is the non-inline version
6014 of dict_table_has_fts_index().
6015 @return TRUE if table has an FTS index */
6016 ibool
fts_dict_table_has_fts_index(dict_table_t * table)6017 fts_dict_table_has_fts_index(
6018 /*=========================*/
6019 dict_table_t* table) /*!< in: table */
6020 {
6021 return(dict_table_has_fts_index(table));
6022 }
6023
6024 /** fts_t constructor.
6025 @param[in] table table with FTS indexes
6026 @param[in,out] heap memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)6027 fts_t::fts_t(
6028 const dict_table_t* table,
6029 mem_heap_t* heap)
6030 :
6031 bg_threads(0),
6032 fts_status(0),
6033 add_wq(NULL),
6034 cache(NULL),
6035 doc_col(ULINT_UNDEFINED),
6036 fts_heap(heap)
6037 {
6038 ut_a(table->fts == NULL);
6039
6040 mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
6041
6042 ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
6043
6044 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
6045
6046 dict_table_get_all_fts_indexes(table, indexes);
6047 }
6048
6049 /** fts_t destructor. */
~fts_t()6050 fts_t::~fts_t()
6051 {
6052 mutex_free(&bg_threads_mutex);
6053
6054 ut_ad(add_wq == NULL);
6055
6056 if (cache != NULL) {
6057 fts_cache_clear(cache);
6058 fts_cache_destroy(cache);
6059 cache = NULL;
6060 }
6061
6062 /* There is no need to call ib_vector_free() on this->indexes
6063 because it is stored in this->fts_heap. */
6064 }
6065
6066 /*********************************************************************//**
6067 Create an instance of fts_t.
6068 @return instance of fts_t */
6069 fts_t*
fts_create(dict_table_t * table)6070 fts_create(
6071 /*=======*/
6072 dict_table_t* table) /*!< in/out: table with FTS indexes */
6073 {
6074 fts_t* fts;
6075 mem_heap_t* heap;
6076
6077 heap = mem_heap_create(512);
6078
6079 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
6080
6081 new(fts) fts_t(table, heap);
6082
6083 return(fts);
6084 }
6085
6086 /*********************************************************************//**
6087 Free the FTS resources. */
6088 void
fts_free(dict_table_t * table)6089 fts_free(
6090 /*=====*/
6091 dict_table_t* table) /*!< in/out: table with FTS indexes */
6092 {
6093 fts_t* fts = table->fts;
6094
6095 fts->~fts_t();
6096
6097 mem_heap_free(fts->fts_heap);
6098
6099 table->fts = NULL;
6100 }
6101
6102 /*********************************************************************//**
6103 Signal FTS threads to initiate shutdown. */
6104 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)6105 fts_start_shutdown(
6106 /*===============*/
6107 dict_table_t* table, /*!< in: table with FTS indexes */
6108 fts_t* fts) /*!< in: fts instance that needs
6109 to be informed about shutdown */
6110 {
6111 mutex_enter(&fts->bg_threads_mutex);
6112
6113 fts->fts_status |= BG_THREAD_STOP;
6114
6115 mutex_exit(&fts->bg_threads_mutex);
6116
6117 }
6118
6119 /*********************************************************************//**
6120 Wait for FTS threads to shutdown. */
6121 void
fts_shutdown(dict_table_t * table,fts_t * fts)6122 fts_shutdown(
6123 /*=========*/
6124 dict_table_t* table, /*!< in: table with FTS indexes */
6125 fts_t* fts) /*!< in: fts instance to shutdown */
6126 {
6127 mutex_enter(&fts->bg_threads_mutex);
6128
6129 ut_a(fts->fts_status & BG_THREAD_STOP);
6130
6131 dict_table_wait_for_bg_threads_to_exit(table, 20000);
6132
6133 mutex_exit(&fts->bg_threads_mutex);
6134 }
6135
6136 /*********************************************************************//**
6137 Take a FTS savepoint. */
6138 UNIV_INLINE
6139 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)6140 fts_savepoint_copy(
6141 /*===============*/
6142 const fts_savepoint_t* src, /*!< in: source savepoint */
6143 fts_savepoint_t* dst) /*!< out: destination savepoint */
6144 {
6145 const ib_rbt_node_t* node;
6146 const ib_rbt_t* tables;
6147
6148 tables = src->tables;
6149
6150 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
6151
6152 fts_trx_table_t* ftt_dst;
6153 const fts_trx_table_t** ftt_src;
6154
6155 ftt_src = rbt_value(const fts_trx_table_t*, node);
6156
6157 ftt_dst = fts_trx_table_clone(*ftt_src);
6158
6159 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
6160 }
6161 }
6162
6163 /*********************************************************************//**
6164 Take a FTS savepoint. */
6165 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)6166 fts_savepoint_take(
6167 /*===============*/
6168 trx_t* trx, /*!< in: transaction */
6169 fts_trx_t* fts_trx, /*!< in: fts transaction */
6170 const char* name) /*!< in: savepoint name */
6171 {
6172 mem_heap_t* heap;
6173 fts_savepoint_t* savepoint;
6174 fts_savepoint_t* last_savepoint;
6175
6176 ut_a(name != NULL);
6177
6178 heap = fts_trx->heap;
6179
6180 /* The implied savepoint must exist. */
6181 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
6182
6183 last_savepoint = static_cast<fts_savepoint_t*>(
6184 ib_vector_last(fts_trx->savepoints));
6185 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
6186
6187 if (last_savepoint->tables != NULL) {
6188 fts_savepoint_copy(last_savepoint, savepoint);
6189 }
6190 }
6191
6192 /*********************************************************************//**
6193 Lookup a savepoint instance by name.
6194 @return ULINT_UNDEFINED if not found */
6195 UNIV_INLINE
6196 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)6197 fts_savepoint_lookup(
6198 /*==================*/
6199 ib_vector_t* savepoints, /*!< in: savepoints */
6200 const char* name) /*!< in: savepoint name */
6201 {
6202 ulint i;
6203
6204 ut_a(ib_vector_size(savepoints) > 0);
6205
6206 for (i = 1; i < ib_vector_size(savepoints); ++i) {
6207 fts_savepoint_t* savepoint;
6208
6209 savepoint = static_cast<fts_savepoint_t*>(
6210 ib_vector_get(savepoints, i));
6211
6212 if (strcmp(name, savepoint->name) == 0) {
6213 return(i);
6214 }
6215 }
6216
6217 return(ULINT_UNDEFINED);
6218 }
6219
6220 /*********************************************************************//**
6221 Release the savepoint data identified by name. All savepoints created
6222 after the named savepoint are kept.
6223 @return DB_SUCCESS or error code */
6224 void
fts_savepoint_release(trx_t * trx,const char * name)6225 fts_savepoint_release(
6226 /*==================*/
6227 trx_t* trx, /*!< in: transaction */
6228 const char* name) /*!< in: savepoint name */
6229 {
6230 ut_a(name != NULL);
6231
6232 ib_vector_t* savepoints = trx->fts_trx->savepoints;
6233
6234 ut_a(ib_vector_size(savepoints) > 0);
6235
6236 ulint i = fts_savepoint_lookup(savepoints, name);
6237 if (i != ULINT_UNDEFINED) {
6238 ut_a(i >= 1);
6239
6240 fts_savepoint_t* savepoint;
6241 savepoint = static_cast<fts_savepoint_t*>(
6242 ib_vector_get(savepoints, i));
6243
6244 if (i == ib_vector_size(savepoints) - 1) {
6245 /* If the savepoint is the last, we save its
6246 tables to the previous savepoint. */
6247 fts_savepoint_t* prev_savepoint;
6248 prev_savepoint = static_cast<fts_savepoint_t*>(
6249 ib_vector_get(savepoints, i - 1));
6250
6251 ib_rbt_t* tables = savepoint->tables;
6252 savepoint->tables = prev_savepoint->tables;
6253 prev_savepoint->tables = tables;
6254 }
6255
6256 fts_savepoint_free(savepoint);
6257 ib_vector_remove(savepoints, *(void**)savepoint);
6258
6259 /* Make sure we don't delete the implied savepoint. */
6260 ut_a(ib_vector_size(savepoints) > 0);
6261 }
6262 }
6263
6264 /**********************************************************************//**
6265 Refresh last statement savepoint. */
6266 void
fts_savepoint_laststmt_refresh(trx_t * trx)6267 fts_savepoint_laststmt_refresh(
6268 /*===========================*/
6269 trx_t* trx) /*!< in: transaction */
6270 {
6271
6272 fts_trx_t* fts_trx;
6273 fts_savepoint_t* savepoint;
6274
6275 fts_trx = trx->fts_trx;
6276
6277 savepoint = static_cast<fts_savepoint_t*>(
6278 ib_vector_pop(fts_trx->last_stmt));
6279 fts_savepoint_free(savepoint);
6280
6281 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
6282 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
6283 }
6284
6285 /********************************************************************
6286 Undo the Doc ID add/delete operations in last stmt */
6287 static
6288 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)6289 fts_undo_last_stmt(
6290 /*===============*/
6291 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
6292 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
6293 {
6294 ib_rbt_t* s_rows;
6295 ib_rbt_t* l_rows;
6296 const ib_rbt_node_t* node;
6297
6298 l_rows = l_ftt->rows;
6299 s_rows = s_ftt->rows;
6300
6301 for (node = rbt_first(l_rows);
6302 node;
6303 node = rbt_next(l_rows, node)) {
6304 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
6305 ib_rbt_bound_t parent;
6306
6307 rbt_search(s_rows, &parent, &(l_row->doc_id));
6308
6309 if (parent.result == 0) {
6310 fts_trx_row_t* s_row = rbt_value(
6311 fts_trx_row_t, parent.last);
6312
6313 switch (l_row->state) {
6314 case FTS_INSERT:
6315 ut_free(rbt_remove_node(s_rows, parent.last));
6316 break;
6317
6318 case FTS_DELETE:
6319 if (s_row->state == FTS_NOTHING) {
6320 s_row->state = FTS_INSERT;
6321 } else if (s_row->state == FTS_DELETE) {
6322 ut_free(rbt_remove_node(
6323 s_rows, parent.last));
6324 }
6325 break;
6326
6327 /* FIXME: Check if FTS_MODIFY need to be addressed */
6328 case FTS_MODIFY:
6329 case FTS_NOTHING:
6330 break;
6331 default:
6332 ut_error;
6333 }
6334 }
6335 }
6336 }
6337
6338 /**********************************************************************//**
6339 Rollback to savepoint indentified by name.
6340 @return DB_SUCCESS or error code */
6341 void
fts_savepoint_rollback_last_stmt(trx_t * trx)6342 fts_savepoint_rollback_last_stmt(
6343 /*=============================*/
6344 trx_t* trx) /*!< in: transaction */
6345 {
6346 ib_vector_t* savepoints;
6347 fts_savepoint_t* savepoint;
6348 fts_savepoint_t* last_stmt;
6349 fts_trx_t* fts_trx;
6350 ib_rbt_bound_t parent;
6351 const ib_rbt_node_t* node;
6352 ib_rbt_t* l_tables;
6353 ib_rbt_t* s_tables;
6354
6355 fts_trx = trx->fts_trx;
6356 savepoints = fts_trx->savepoints;
6357
6358 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
6359 last_stmt = static_cast<fts_savepoint_t*>(
6360 ib_vector_last(fts_trx->last_stmt));
6361
6362 l_tables = last_stmt->tables;
6363 s_tables = savepoint->tables;
6364
6365 for (node = rbt_first(l_tables);
6366 node;
6367 node = rbt_next(l_tables, node)) {
6368
6369 fts_trx_table_t** l_ftt;
6370
6371 l_ftt = rbt_value(fts_trx_table_t*, node);
6372
6373 rbt_search_cmp(
6374 s_tables, &parent, &(*l_ftt)->table->id,
6375 fts_trx_table_id_cmp, NULL);
6376
6377 if (parent.result == 0) {
6378 fts_trx_table_t** s_ftt;
6379
6380 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
6381
6382 fts_undo_last_stmt(*s_ftt, *l_ftt);
6383 }
6384 }
6385 }
6386
6387 /**********************************************************************//**
6388 Rollback to savepoint indentified by name.
6389 @return DB_SUCCESS or error code */
6390 void
fts_savepoint_rollback(trx_t * trx,const char * name)6391 fts_savepoint_rollback(
6392 /*===================*/
6393 trx_t* trx, /*!< in: transaction */
6394 const char* name) /*!< in: savepoint name */
6395 {
6396 ulint i;
6397 ib_vector_t* savepoints;
6398
6399 ut_a(name != NULL);
6400
6401 savepoints = trx->fts_trx->savepoints;
6402
6403 /* We pop all savepoints from the the top of the stack up to
6404 and including the instance that was found. */
6405 i = fts_savepoint_lookup(savepoints, name);
6406
6407 if (i != ULINT_UNDEFINED) {
6408 fts_savepoint_t* savepoint;
6409
6410 ut_a(i > 0);
6411
6412 while (ib_vector_size(savepoints) > i) {
6413 fts_savepoint_t* savepoint;
6414
6415 savepoint = static_cast<fts_savepoint_t*>(
6416 ib_vector_pop(savepoints));
6417
6418 if (savepoint->name != NULL) {
6419 /* Since name was allocated on the heap, the
6420 memory will be released when the transaction
6421 completes. */
6422 savepoint->name = NULL;
6423
6424 fts_savepoint_free(savepoint);
6425 }
6426 }
6427
6428 /* Pop all a elements from the top of the stack that may
6429 have been released. We have to be careful that we don't
6430 delete the implied savepoint. */
6431
6432 for (savepoint = static_cast<fts_savepoint_t*>(
6433 ib_vector_last(savepoints));
6434 ib_vector_size(savepoints) > 1
6435 && savepoint->name == NULL;
6436 savepoint = static_cast<fts_savepoint_t*>(
6437 ib_vector_last(savepoints))) {
6438
6439 ib_vector_pop(savepoints);
6440 }
6441
6442 /* Make sure we don't delete the implied savepoint. */
6443 ut_a(ib_vector_size(savepoints) > 0);
6444
6445 /* Restore the savepoint. */
6446 fts_savepoint_take(trx, trx->fts_trx, name);
6447 }
6448 }
6449
6450 /** Check if a table is an FTS auxiliary table name.
6451 @param[out] table FTS table info
6452 @param[in] name Table name
6453 @param[in] len Length of table name
6454 @return true if the name matches an auxiliary table name pattern */
6455 static
6456 bool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6457 fts_is_aux_table_name(
6458 fts_aux_table_t* table,
6459 const char* name,
6460 ulint len)
6461 {
6462 const char* ptr;
6463 char* end;
6464 char my_name[MAX_FULL_NAME_LEN + 1];
6465
6466 ut_ad(len <= MAX_FULL_NAME_LEN);
6467 ut_memcpy(my_name, name, len);
6468 my_name[len] = 0;
6469 end = my_name + len;
6470
6471 ptr = static_cast<const char*>(memchr(my_name, '/', len));
6472
6473 if (ptr != NULL) {
6474 /* We will start the match after the '/' */
6475 ++ptr;
6476 len = end - ptr;
6477 }
6478
6479 /* All auxiliary tables are prefixed with "FTS_" and the name
6480 length will be at the very least greater than 20 bytes. */
6481 if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6482 ulint i;
6483
6484 /* Skip the prefix. */
6485 ptr += 4;
6486 len -= 4;
6487
6488 /* Try and read the table id. */
6489 if (!fts_read_object_id(&table->parent_id, ptr)) {
6490 return(false);
6491 }
6492
6493 /* Skip the table id. */
6494 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6495
6496 if (ptr == NULL) {
6497 return(false);
6498 }
6499
6500 /* Skip the underscore. */
6501 ++ptr;
6502 ut_a(end > ptr);
6503 len = end - ptr;
6504
6505 /* First search the common table suffix array. */
6506 for (i = 0; fts_common_tables[i] != NULL; ++i) {
6507
6508 if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6509 return(true);
6510 }
6511 }
6512
6513 /* Could be obsolete common tables. */
6514 if (strncmp(ptr, "ADDED", len) == 0
6515 || strncmp(ptr, "STOPWORDS", len) == 0) {
6516 return(true);
6517 }
6518
6519 /* Try and read the index id. */
6520 if (!fts_read_object_id(&table->index_id, ptr)) {
6521 return(false);
6522 }
6523
6524 /* Skip the table id. */
6525 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6526
6527 if (ptr == NULL) {
6528 return(false);
6529 }
6530
6531 /* Skip the underscore. */
6532 ++ptr;
6533 ut_a(end > ptr);
6534 len = end - ptr;
6535
6536 /* Search the FT index specific array. */
6537 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
6538
6539 if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6540 return(true);
6541 }
6542 }
6543
6544 /* Other FT index specific table(s). */
6545 if (strncmp(ptr, "DOC_ID", len) == 0) {
6546 return(true);
6547 }
6548 }
6549
6550 return(false);
6551 }
6552
6553 /**********************************************************************//**
6554 Callback function to read a single table ID column.
6555 @return Always return TRUE */
6556 static
6557 ibool
fts_read_tables(void * row,void * user_arg)6558 fts_read_tables(
6559 /*============*/
6560 void* row, /*!< in: sel_node_t* */
6561 void* user_arg) /*!< in: pointer to ib_vector_t */
6562 {
6563 int i;
6564 fts_aux_table_t*table;
6565 mem_heap_t* heap;
6566 ibool done = FALSE;
6567 ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
6568 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
6569 que_node_t* exp = sel_node->select_list;
6570
6571 /* Must be a heap allocated vector. */
6572 ut_a(tables->allocator->arg != NULL);
6573
6574 /* We will use this heap for allocating strings. */
6575 heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6576 table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6577
6578 memset(table, 0x0, sizeof(*table));
6579
6580 /* Iterate over the columns and read the values. */
6581 for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6582
6583 dfield_t* dfield = que_node_get_val(exp);
6584 void* data = dfield_get_data(dfield);
6585 ulint len = dfield_get_len(dfield);
6586
6587 ut_a(len != UNIV_SQL_NULL);
6588
6589 /* Note: The column numbers below must match the SELECT */
6590 switch (i) {
6591 case 0: /* NAME */
6592
6593 if (!fts_is_aux_table_name(
6594 table, static_cast<const char*>(data), len)) {
6595 ib_vector_pop(tables);
6596 done = TRUE;
6597 break;
6598 }
6599
6600 table->name = static_cast<char*>(
6601 mem_heap_alloc(heap, len + 1));
6602 memcpy(table->name, data, len);
6603 table->name[len] = 0;
6604 break;
6605
6606 case 1: /* ID */
6607 ut_a(len == 8);
6608 table->id = mach_read_from_8(
6609 static_cast<const byte*>(data));
6610 break;
6611
6612 default:
6613 ut_error;
6614 }
6615 }
6616
6617 return(TRUE);
6618 }
6619
6620 /******************************************************************//**
6621 Callback that sets a hex formatted FTS table's flags2 in
6622 SYS_TABLES. The flags is stored in MIX_LEN column.
6623 @return FALSE if all OK */
6624 static
6625 ibool
fts_set_hex_format(void * row,void * user_arg)6626 fts_set_hex_format(
6627 /*===============*/
6628 void* row, /*!< in: sel_node_t* */
6629 void* user_arg) /*!< in: bool set/unset flag */
6630 {
6631 sel_node_t* node = static_cast<sel_node_t*>(row);
6632 dfield_t* dfield = que_node_get_val(node->select_list);
6633
6634 ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6635 ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6636 /* There should be at most one matching record. So the value
6637 must be the default value. */
6638 ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6639 == ULINT32_UNDEFINED);
6640
6641 ulint flags2 = mach_read_from_4(
6642 static_cast<byte*>(dfield_get_data(dfield)));
6643
6644 flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6645
6646 mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6647
6648 return(FALSE);
6649 }
6650
6651 /*****************************************************************//**
6652 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6653 @return DB_SUCCESS or error code. */
6654 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6655 fts_update_hex_format_flag(
6656 /*=======================*/
6657 trx_t* trx, /*!< in/out: transaction that
6658 covers the update */
6659 table_id_t table_id, /*!< in: Table for which we want
6660 to set the root table->flags2 */
6661 bool dict_locked) /*!< in: set to true if the
6662 caller already owns the
6663 dict_sys_t::mutex. */
6664 {
6665 pars_info_t* info;
6666 ib_uint32_t flags2;
6667
6668 static const char sql[] =
6669 "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6670 "DECLARE FUNCTION my_func;\n"
6671 "DECLARE CURSOR c IS\n"
6672 " SELECT MIX_LEN"
6673 " FROM SYS_TABLES"
6674 " WHERE ID = :table_id FOR UPDATE;"
6675 "\n"
6676 "BEGIN\n"
6677 "OPEN c;\n"
6678 "WHILE 1 = 1 LOOP\n"
6679 " FETCH c INTO my_func();\n"
6680 " IF c % NOTFOUND THEN\n"
6681 " EXIT;\n"
6682 " END IF;\n"
6683 "END LOOP;\n"
6684 "UPDATE SYS_TABLES"
6685 " SET MIX_LEN = :flags2"
6686 " WHERE ID = :table_id;\n"
6687 "CLOSE c;\n"
6688 "END;\n";
6689
6690 flags2 = ULINT32_UNDEFINED;
6691
6692 info = pars_info_create();
6693
6694 pars_info_add_ull_literal(info, "table_id", table_id);
6695 pars_info_bind_int4_literal(info, "flags2", &flags2);
6696
6697 pars_info_bind_function(
6698 info, "my_func", fts_set_hex_format, &flags2);
6699
6700 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6701 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6702 }
6703
6704 dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6705
6706 ut_a(flags2 != ULINT32_UNDEFINED);
6707
6708 return(err);
6709 }
6710
6711 /*********************************************************************//**
6712 Rename an aux table to HEX format. It's called when "%016llu" is used
6713 to format an object id in table name, which only happens in Windows. */
6714 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6715 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6716 fts_rename_one_aux_table_to_hex_format(
6717 /*===================================*/
6718 trx_t* trx, /*!< in: transaction */
6719 const fts_aux_table_t* aux_table, /*!< in: table info */
6720 const dict_table_t* parent_table) /*!< in: parent table name */
6721 {
6722 const char* ptr;
6723 fts_table_t fts_table;
6724 char new_name[MAX_FULL_NAME_LEN];
6725 dberr_t error;
6726
6727 ptr = strchr(aux_table->name, '/');
6728 ut_a(ptr != NULL);
6729 ++ptr;
6730 /* Skip "FTS_", table id and underscore */
6731 for (ulint i = 0; i < 2; ++i) {
6732 ptr = strchr(ptr, '_');
6733 ut_a(ptr != NULL);
6734 ++ptr;
6735 }
6736
6737 fts_table.suffix = NULL;
6738 if (aux_table->index_id == 0) {
6739 fts_table.type = FTS_COMMON_TABLE;
6740
6741 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6742 if (strcmp(ptr, fts_common_tables[i]) == 0) {
6743 fts_table.suffix = fts_common_tables[i];
6744 break;
6745 }
6746 }
6747 } else {
6748 fts_table.type = FTS_INDEX_TABLE;
6749
6750 /* Skip index id and underscore */
6751 ptr = strchr(ptr, '_');
6752 ut_a(ptr != NULL);
6753 ++ptr;
6754
6755 for (ulint i = 0; fts_index_selector[i].value; ++i) {
6756 if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6757 fts_table.suffix = fts_get_suffix(i);
6758 break;
6759 }
6760 }
6761 }
6762
6763 ut_a(fts_table.suffix != NULL);
6764
6765 fts_table.parent = parent_table->name.m_name;
6766 fts_table.table_id = aux_table->parent_id;
6767 fts_table.index_id = aux_table->index_id;
6768 fts_table.table = parent_table;
6769
6770 fts_get_table_name(&fts_table, new_name);
6771 ut_ad(strcmp(new_name, aux_table->name) != 0);
6772
6773 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6774 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6775 }
6776
6777 error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6778 FALSE);
6779
6780 if (error != DB_SUCCESS) {
6781 ib::warn() << "Failed to rename aux table '"
6782 << aux_table->name << "' to new format '"
6783 << new_name << "'.";
6784 } else {
6785 ib::info() << "Renamed aux table '" << aux_table->name
6786 << "' to '" << new_name << "'.";
6787 }
6788
6789 return(error);
6790 }
6791
6792 /**********************************************************************//**
6793 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6794 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6795 It's called when "%016llu" is used to format an object id in table name,
6796 which only happens in Windows.
6797 Note the ids in tables are correct but the names are old ambiguous ones.
6798
6799 This function should make sure that either all the parent table and aux tables
6800 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6801 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6802 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6803 fts_rename_aux_tables_to_hex_format_low(
6804 /*====================================*/
6805 trx_t* trx, /*!< in: transaction */
6806 dict_table_t* parent_table, /*!< in: parent table */
6807 ib_vector_t* tables) /*!< in: aux tables to rename. */
6808 {
6809 dberr_t error;
6810 ulint count;
6811
6812 ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6813 ut_ad(!ib_vector_is_empty(tables));
6814
6815 error = fts_update_hex_format_flag(trx, parent_table->id, true);
6816
6817 if (error != DB_SUCCESS) {
6818 ib::warn() << "Setting parent table " << parent_table->name
6819 << " to hex format failed.";
6820 fts_sql_rollback(trx);
6821 return(error);
6822 }
6823
6824 DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6825
6826 for (count = 0; count < ib_vector_size(tables); ++count) {
6827 dict_table_t* table;
6828 fts_aux_table_t* aux_table;
6829
6830 aux_table = static_cast<fts_aux_table_t*>(
6831 ib_vector_get(tables, count));
6832
6833 table = dict_table_open_on_id(aux_table->id, TRUE,
6834 DICT_TABLE_OP_NORMAL);
6835
6836 ut_ad(table != NULL);
6837 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6838
6839 /* Set HEX_NAME flag here to make sure we can get correct
6840 new table name in following function */
6841 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6842 error = fts_rename_one_aux_table_to_hex_format(trx,
6843 aux_table, parent_table);
6844 /* We will rollback the trx if the error != DB_SUCCESS,
6845 so setting the flag here is the same with setting it in
6846 row_rename_table_for_mysql */
6847 DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6848
6849 if (error != DB_SUCCESS) {
6850 dict_table_close(table, TRUE, FALSE);
6851
6852 ib::warn() << "Failed to rename one aux table "
6853 << aux_table->name << ". Will revert"
6854 " all successful rename operations.";
6855
6856 fts_sql_rollback(trx);
6857 break;
6858 }
6859
6860 error = fts_update_hex_format_flag(trx, aux_table->id, true);
6861 dict_table_close(table, TRUE, FALSE);
6862
6863 if (error != DB_SUCCESS) {
6864 ib::warn() << "Setting aux table " << aux_table->name
6865 << " to hex format failed.";
6866
6867 fts_sql_rollback(trx);
6868 break;
6869 }
6870 }
6871
6872 if (error != DB_SUCCESS) {
6873 ut_ad(count != ib_vector_size(tables));
6874
6875 /* If rename fails, thr trx would be rolled back, we can't
6876 use it any more, we'll start a new background trx to do
6877 the reverting. */
6878
6879 ut_ad(!trx_is_started(trx));
6880
6881 bool not_rename = false;
6882
6883 /* Try to revert those succesful rename operations
6884 in order to revert the ibd file rename. */
6885 for (ulint i = 0; i <= count; ++i) {
6886 dict_table_t* table;
6887 fts_aux_table_t* aux_table;
6888 trx_t* trx_bg;
6889 dberr_t err;
6890
6891 aux_table = static_cast<fts_aux_table_t*>(
6892 ib_vector_get(tables, i));
6893
6894 table = dict_table_open_on_id(aux_table->id, TRUE,
6895 DICT_TABLE_OP_NORMAL);
6896 ut_ad(table != NULL);
6897
6898 if (not_rename) {
6899 DICT_TF2_FLAG_UNSET(table,
6900 DICT_TF2_FTS_AUX_HEX_NAME);
6901 }
6902
6903 if (!DICT_TF2_FLAG_IS_SET(table,
6904 DICT_TF2_FTS_AUX_HEX_NAME)) {
6905 dict_table_close(table, TRUE, FALSE);
6906 continue;
6907 }
6908
6909 trx_bg = trx_allocate_for_background();
6910 trx_bg->op_info = "Revert half done rename";
6911 trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6912 trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6913
6914 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6915 err = row_rename_table_for_mysql(table->name.m_name,
6916 aux_table->name,
6917 trx_bg, FALSE);
6918
6919 trx_bg->dict_operation_lock_mode = 0;
6920 dict_table_close(table, TRUE, FALSE);
6921
6922 if (err != DB_SUCCESS) {
6923 ib::warn() << "Failed to revert table "
6924 << table->name << ". Please revert"
6925 " manually.";
6926 fts_sql_rollback(trx_bg);
6927 trx_free_for_background(trx_bg);
6928 /* Continue to clear aux tables' flags2 */
6929 not_rename = true;
6930 continue;
6931 }
6932
6933 fts_sql_commit(trx_bg);
6934 trx_free_for_background(trx_bg);
6935 }
6936
6937 DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6938 }
6939
6940 return(error);
6941 }
6942
6943 /**********************************************************************//**
6944 Convert an id, which is actually a decimal number but was regard as a HEX
6945 from a string, to its real value. */
6946 static
6947 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6948 fts_fake_hex_to_dec(
6949 /*================*/
6950 ib_id_t id) /*!< in: number to convert */
6951 {
6952 ib_id_t dec_id = 0;
6953 char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6954
6955 #ifdef UNIV_DEBUG
6956 int ret =
6957 #endif /* UNIV_DEBUG */
6958 sprintf(tmp_id, UINT64PFx, id);
6959 ut_ad(ret == 16);
6960 #ifdef UNIV_DEBUG
6961 ret =
6962 #endif /* UNIV_DEBUG */
6963 #ifdef _WIN32
6964 sscanf(tmp_id, "%016llu", &dec_id);
6965 #else
6966 sscanf(tmp_id, "%016" PRIu64, &dec_id);
6967 #endif /* _WIN32 */
6968 ut_ad(ret == 1);
6969
6970 return dec_id;
6971 }
6972
6973 /*********************************************************************//**
6974 Compare two fts_aux_table_t parent_ids.
6975 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6976 UNIV_INLINE
6977 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6978 fts_check_aux_table_parent_id_cmp(
6979 /*==============================*/
6980 const void* p1, /*!< in: id1 */
6981 const void* p2) /*!< in: id2 */
6982 {
6983 const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1);
6984 const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2);
6985
6986 return static_cast<int>(fa1->parent_id - fa2->parent_id);
6987 }
6988
6989 /** Mark all the fts index associated with the parent table as corrupted.
6990 @param[in] trx transaction
6991 @param[in, out] parent_table fts index associated with this parent table
6992 will be marked as corrupted. */
6993 static
6994 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6995 fts_parent_all_index_set_corrupt(
6996 trx_t* trx,
6997 dict_table_t* parent_table)
6998 {
6999 fts_t* fts = parent_table->fts;
7000
7001 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7002 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7003 }
7004
7005 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7006 dict_index_t* index = static_cast<dict_index_t*>(
7007 ib_vector_getp_const(fts->indexes, j));
7008 dict_set_corrupted(index,
7009 trx, "DROP ORPHANED TABLE");
7010 }
7011 }
7012
7013 /** Mark the fts index which index id matches the id as corrupted.
7014 @param[in] trx transaction
7015 @param[in] id index id to search
7016 @param[in, out] parent_table parent table to check with all
7017 the index. */
7018 static
7019 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)7020 fts_set_index_corrupt(
7021 trx_t* trx,
7022 index_id_t id,
7023 dict_table_t* table)
7024 {
7025 fts_t* fts = table->fts;
7026
7027 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7028 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7029 }
7030
7031 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7032 dict_index_t* index = static_cast<dict_index_t*>(
7033 ib_vector_getp_const(fts->indexes, j));
7034 if (index->id == id) {
7035 dict_set_corrupted(index, trx,
7036 "DROP ORPHANED TABLE");
7037 break;
7038 }
7039 }
7040 }
7041
7042 /** Check the index for the aux table is corrupted.
7043 @param[in] aux_table auxiliary table
7044 @retval nonzero if index is corrupted, zero for valid index */
7045 static
7046 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)7047 fts_check_corrupt_index(
7048 fts_aux_table_t* aux_table)
7049 {
7050 dict_table_t* table;
7051 dict_index_t* index;
7052 table = dict_table_open_on_id(
7053 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7054
7055 if (table == NULL) {
7056 return(0);
7057 }
7058
7059 for (index = UT_LIST_GET_FIRST(table->indexes);
7060 index;
7061 index = UT_LIST_GET_NEXT(indexes, index)) {
7062 if (index->id == aux_table->index_id) {
7063 ut_ad(index->type & DICT_FTS);
7064 dict_table_close(table, true, false);
7065 return(dict_index_is_corrupted(index));
7066 }
7067 }
7068
7069 dict_table_close(table, true, false);
7070 return(0);
7071 }
7072
7073 /* Get parent table name if it's a fts aux table
7074 @param[in] aux_table_name aux table name
7075 @param[in] aux_table_len aux table length
7076 @return parent table name, or NULL */
7077 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)7078 fts_get_parent_table_name(
7079 const char* aux_table_name,
7080 ulint aux_table_len)
7081 {
7082 fts_aux_table_t aux_table;
7083 char* parent_table_name = NULL;
7084
7085 if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
7086 dict_table_t* parent_table;
7087
7088 parent_table = dict_table_open_on_id(
7089 aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7090
7091 if (parent_table != NULL) {
7092 parent_table_name = mem_strdupl(
7093 parent_table->name.m_name,
7094 strlen(parent_table->name.m_name));
7095
7096 dict_table_close(parent_table, TRUE, FALSE);
7097 }
7098 }
7099
7100 return(parent_table_name);
7101 }
7102
7103 /** Check the validity of the parent table.
7104 @param[in] aux_table auxiliary table
7105 @return true if it is a valid table or false if it is not */
7106 static
7107 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)7108 fts_valid_parent_table(
7109 const fts_aux_table_t* aux_table)
7110 {
7111 dict_table_t* parent_table;
7112 bool valid = false;
7113
7114 parent_table = dict_table_open_on_id(
7115 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7116
7117 if (parent_table != NULL && parent_table->fts != NULL) {
7118 if (aux_table->index_id == 0) {
7119 valid = true;
7120 } else {
7121 index_id_t id = aux_table->index_id;
7122 dict_index_t* index;
7123
7124 /* Search for the FT index in the table's list. */
7125 for (index = UT_LIST_GET_FIRST(parent_table->indexes);
7126 index;
7127 index = UT_LIST_GET_NEXT(indexes, index)) {
7128 if (index->id == id) {
7129 valid = true;
7130 break;
7131 }
7132
7133 }
7134 }
7135 }
7136
7137 if (parent_table) {
7138 dict_table_close(parent_table, TRUE, FALSE);
7139 }
7140
7141 return(valid);
7142 }
7143
7144 /** Try to rename all aux tables of the specified parent table.
7145 @param[in] aux_tables aux_tables to be renamed
7146 @param[in] parent_table parent table of all aux
7147 tables stored in tables. */
7148 static
7149 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)7150 fts_rename_aux_tables_to_hex_format(
7151 ib_vector_t* aux_tables,
7152 dict_table_t* parent_table)
7153 {
7154 dberr_t err;
7155 trx_t* trx_rename = trx_allocate_for_background();
7156 trx_rename->op_info = "Rename aux tables to hex format";
7157 trx_rename->dict_operation_lock_mode = RW_X_LATCH;
7158 trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
7159
7160 err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
7161 parent_table, aux_tables);
7162
7163 trx_rename->dict_operation_lock_mode = 0;
7164
7165 if (err != DB_SUCCESS) {
7166
7167 ib::warn() << "Rollback operations on all aux tables of "
7168 "table "<< parent_table->name << ". All the fts index "
7169 "associated with the table are marked as corrupted. "
7170 "Please rebuild the index again.";
7171
7172 /* Corrupting the fts index related to parent table. */
7173 trx_t* trx_corrupt;
7174 trx_corrupt = trx_allocate_for_background();
7175 trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
7176 trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
7177 fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
7178 trx_corrupt->dict_operation_lock_mode = 0;
7179 fts_sql_commit(trx_corrupt);
7180 trx_free_for_background(trx_corrupt);
7181 } else {
7182 fts_sql_commit(trx_rename);
7183 }
7184
7185 trx_free_for_background(trx_rename);
7186 ib_vector_reset(aux_tables);
7187 }
7188
7189 /** Set the hex format flag for the parent table.
7190 @param[in, out] parent_table parent table
7191 @param[in] trx transaction */
7192 static
7193 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)7194 fts_set_parent_hex_format_flag(
7195 dict_table_t* parent_table,
7196 trx_t* trx)
7197 {
7198 if (!DICT_TF2_FLAG_IS_SET(parent_table,
7199 DICT_TF2_FTS_AUX_HEX_NAME)) {
7200 DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
7201
7202 dberr_t err = fts_update_hex_format_flag(
7203 trx, parent_table->id, true);
7204
7205 if (err != DB_SUCCESS) {
7206 ib::fatal() << "Setting parent table "
7207 << parent_table->name
7208 << "to hex format failed. Please try "
7209 << "to restart the server again, if it "
7210 << "doesn't work, the system tables "
7211 << "might be corrupted.";
7212 } else {
7213 DICT_TF2_FLAG_SET(
7214 parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
7215 }
7216 }
7217 }
7218
7219 /** Drop the obsolete auxilary table.
7220 @param[in] tables tables to be dropped. */
7221 static
7222 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)7223 fts_drop_obsolete_aux_table_from_vector(
7224 ib_vector_t* tables)
7225 {
7226 dberr_t err;
7227
7228 for (ulint count = 0; count < ib_vector_size(tables);
7229 ++count) {
7230
7231 fts_aux_table_t* aux_drop_table;
7232 aux_drop_table = static_cast<fts_aux_table_t*>(
7233 ib_vector_get(tables, count));
7234 trx_t* trx_drop = trx_allocate_for_background();
7235 trx_drop->op_info = "Drop obsolete aux tables";
7236 trx_drop->dict_operation_lock_mode = RW_X_LATCH;
7237 trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
7238
7239 err = row_drop_table_for_mysql(
7240 aux_drop_table->name, trx_drop, false, true);
7241
7242 trx_drop->dict_operation_lock_mode = 0;
7243
7244 if (err != DB_SUCCESS) {
7245 /* We don't need to worry about the
7246 failure, since server would try to
7247 drop it on next restart, even if
7248 the table was broken. */
7249 ib::warn() << "Failed to drop obsolete aux table "
7250 << aux_drop_table->name << ", which is "
7251 << "harmless. will try to drop it on next "
7252 << "restart.";
7253
7254 fts_sql_rollback(trx_drop);
7255 } else {
7256 ib::info() << "Dropped obsolete aux"
7257 " table '" << aux_drop_table->name
7258 << "'.";
7259
7260 fts_sql_commit(trx_drop);
7261 }
7262
7263 trx_free_for_background(trx_drop);
7264 }
7265 }
7266
7267 /** Drop all the auxiliary table present in the vector.
7268 @param[in] trx transaction
7269 @param[in] tables tables to be dropped */
7270 static
7271 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)7272 fts_drop_aux_table_from_vector(
7273 trx_t* trx,
7274 ib_vector_t* tables)
7275 {
7276 for (ulint count = 0; count < ib_vector_size(tables);
7277 ++count) {
7278 fts_aux_table_t* aux_drop_table;
7279 aux_drop_table = static_cast<fts_aux_table_t*>(
7280 ib_vector_get(tables, count));
7281
7282 /* Check for the validity of the parent table */
7283 if (!fts_valid_parent_table(aux_drop_table)) {
7284
7285 ib::warn() << "Parent table of FTS auxiliary table "
7286 << aux_drop_table->name << " not found.";
7287
7288 dberr_t err = fts_drop_table(trx, aux_drop_table->name);
7289 if (err == DB_FAIL) {
7290
7291 char* path = fil_make_filepath(
7292 NULL, aux_drop_table->name, IBD, false);
7293
7294 if (path != NULL) {
7295 os_file_delete_if_exists(
7296 innodb_data_file_key,
7297 path , NULL);
7298 ut_free(path);
7299 }
7300 }
7301 }
7302 }
7303 }
7304
7305 /**********************************************************************//**
7306 Check and drop all orphaned FTS auxiliary tables, those that don't have
7307 a parent table or FTS index defined on them.
7308 @return DB_SUCCESS or error code */
7309 static MY_ATTRIBUTE((nonnull))
7310 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)7311 fts_check_and_drop_orphaned_tables(
7312 /*===============================*/
7313 trx_t* trx, /*!< in: transaction */
7314 ib_vector_t* tables) /*!< in: tables to check */
7315 {
7316 mem_heap_t* heap;
7317 ib_vector_t* aux_tables_to_rename;
7318 ib_vector_t* invalid_aux_tables;
7319 ib_vector_t* valid_aux_tables;
7320 ib_vector_t* drop_aux_tables;
7321 ib_vector_t* obsolete_aux_tables;
7322 ib_alloc_t* heap_alloc;
7323
7324 heap = mem_heap_create(1024);
7325 heap_alloc = ib_heap_allocator_create(heap);
7326
7327 /* We store all aux tables belonging to the same parent table here,
7328 and rename all these tables in a batch mode. */
7329 aux_tables_to_rename = ib_vector_create(heap_alloc,
7330 sizeof(fts_aux_table_t), 128);
7331
7332 /* We store all fake auxiliary table and orphaned table here. */
7333 invalid_aux_tables = ib_vector_create(heap_alloc,
7334 sizeof(fts_aux_table_t), 128);
7335
7336 /* We store all valid aux tables. We use this to filter the
7337 fake auxiliary table from invalid auxiliary tables. */
7338 valid_aux_tables = ib_vector_create(heap_alloc,
7339 sizeof(fts_aux_table_t), 128);
7340
7341 /* We store all auxiliary tables to be dropped. */
7342 drop_aux_tables = ib_vector_create(heap_alloc,
7343 sizeof(fts_aux_table_t), 128);
7344
7345 /* We store all obsolete auxiliary tables to be dropped. */
7346 obsolete_aux_tables = ib_vector_create(heap_alloc,
7347 sizeof(fts_aux_table_t), 128);
7348
7349 /* Sort by parent_id first, in case rename will fail */
7350 ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
7351
7352 for (ulint i = 0; i < ib_vector_size(tables); ++i) {
7353 dict_table_t* parent_table;
7354 fts_aux_table_t* aux_table;
7355 bool drop = false;
7356 dict_table_t* table;
7357 fts_aux_table_t* next_aux_table = NULL;
7358 ib_id_t orig_parent_id = 0;
7359 ib_id_t orig_index_id = 0;
7360 bool rename = false;
7361
7362 aux_table = static_cast<fts_aux_table_t*>(
7363 ib_vector_get(tables, i));
7364
7365 table = dict_table_open_on_id(
7366 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7367 orig_parent_id = aux_table->parent_id;
7368 orig_index_id = aux_table->index_id;
7369
7370 if (table == NULL
7371 || strcmp(table->name.m_name, aux_table->name)) {
7372
7373 bool fake_aux = false;
7374
7375 if (table != NULL) {
7376 dict_table_close(table, TRUE, FALSE);
7377 }
7378
7379 if (i + 1 < ib_vector_size(tables)) {
7380 next_aux_table = static_cast<fts_aux_table_t*>(
7381 ib_vector_get(tables, i + 1));
7382 }
7383
7384 /* To know whether aux table is fake fts or
7385 orphan fts table. */
7386 for (ulint count = 0;
7387 count < ib_vector_size(valid_aux_tables);
7388 count++) {
7389 fts_aux_table_t* valid_aux;
7390 valid_aux = static_cast<fts_aux_table_t*>(
7391 ib_vector_get(valid_aux_tables, count));
7392 if (strcmp(valid_aux->name,
7393 aux_table->name) == 0) {
7394 fake_aux = true;
7395 break;
7396 }
7397 }
7398
7399 /* All aux tables of parent table, whose id is
7400 last_parent_id, have been checked, try to rename
7401 them if necessary. */
7402 if ((next_aux_table == NULL
7403 || orig_parent_id != next_aux_table->parent_id)
7404 && (!ib_vector_is_empty(aux_tables_to_rename))) {
7405
7406 ib_id_t parent_id = fts_fake_hex_to_dec(
7407 aux_table->parent_id);
7408
7409 parent_table = dict_table_open_on_id(
7410 parent_id, TRUE,
7411 DICT_TABLE_OP_NORMAL);
7412
7413 fts_rename_aux_tables_to_hex_format(
7414 aux_tables_to_rename, parent_table);
7415
7416 dict_table_close(parent_table, TRUE,
7417 FALSE);
7418 }
7419
7420 /* If the aux table is fake aux table. Skip it. */
7421 if (!fake_aux) {
7422 ib_vector_push(invalid_aux_tables, aux_table);
7423 }
7424
7425 continue;
7426 } else if (!DICT_TF2_FLAG_IS_SET(table,
7427 DICT_TF2_FTS_AUX_HEX_NAME)) {
7428
7429 aux_table->parent_id = fts_fake_hex_to_dec(
7430 aux_table->parent_id);
7431
7432 if (aux_table->index_id != 0) {
7433 aux_table->index_id = fts_fake_hex_to_dec(
7434 aux_table->index_id);
7435 }
7436
7437 ut_ad(aux_table->id > aux_table->parent_id);
7438
7439 /* Check whether parent table id and index id
7440 are stored as decimal format. */
7441 if (fts_valid_parent_table(aux_table)) {
7442
7443 parent_table = dict_table_open_on_id(
7444 aux_table->parent_id, true,
7445 DICT_TABLE_OP_NORMAL);
7446
7447 ut_ad(parent_table != NULL);
7448 ut_ad(parent_table->fts != NULL);
7449
7450 if (!DICT_TF2_FLAG_IS_SET(
7451 parent_table,
7452 DICT_TF2_FTS_AUX_HEX_NAME)) {
7453 rename = true;
7454 }
7455
7456 dict_table_close(parent_table, TRUE, FALSE);
7457 }
7458
7459 if (!rename) {
7460 /* Reassign the original value of
7461 aux table if it is not in decimal format */
7462 aux_table->parent_id = orig_parent_id;
7463 aux_table->index_id = orig_index_id;
7464 }
7465 }
7466
7467 if (table != NULL) {
7468 dict_table_close(table, TRUE, FALSE);
7469 }
7470
7471 if (!rename) {
7472 /* Check the validity of the parent table. */
7473 if (!fts_valid_parent_table(aux_table)) {
7474 drop = true;
7475 }
7476 }
7477
7478 /* Filter out the fake aux table by comparing with the
7479 current valid auxiliary table name. */
7480 for (ulint count = 0;
7481 count < ib_vector_size(invalid_aux_tables); count++) {
7482 fts_aux_table_t* invalid_aux;
7483 invalid_aux = static_cast<fts_aux_table_t*>(
7484 ib_vector_get(invalid_aux_tables, count));
7485 if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7486 ib_vector_remove(
7487 invalid_aux_tables,
7488 *reinterpret_cast<void**>(invalid_aux));
7489 break;
7490 }
7491 }
7492
7493 ib_vector_push(valid_aux_tables, aux_table);
7494
7495 /* If the index associated with aux table is corrupted,
7496 skip it. */
7497 if (fts_check_corrupt_index(aux_table) > 0) {
7498
7499 if (i + 1 < ib_vector_size(tables)) {
7500 next_aux_table = static_cast<fts_aux_table_t*>(
7501 ib_vector_get(tables, i + 1));
7502 }
7503
7504 if (next_aux_table == NULL
7505 || orig_parent_id != next_aux_table->parent_id) {
7506
7507 parent_table = dict_table_open_on_id(
7508 aux_table->parent_id, TRUE,
7509 DICT_TABLE_OP_NORMAL);
7510
7511 if (!ib_vector_is_empty(aux_tables_to_rename)) {
7512 fts_rename_aux_tables_to_hex_format(
7513 aux_tables_to_rename, parent_table);
7514 } else {
7515 fts_set_parent_hex_format_flag(
7516 parent_table, trx);
7517 }
7518
7519 dict_table_close(parent_table, TRUE, FALSE);
7520 }
7521
7522 continue;
7523 }
7524
7525 parent_table = dict_table_open_on_id(
7526 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7527
7528 if (drop) {
7529 ib_vector_push(drop_aux_tables, aux_table);
7530 } else {
7531 if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7532 ib_vector_push(obsolete_aux_tables, aux_table);
7533 continue;
7534 }
7535 }
7536
7537 /* If the aux table is in decimal format, we should
7538 rename it, so push it to aux_tables_to_rename */
7539 if (!drop && rename) {
7540 bool rename_table = true;
7541 for (ulint count = 0;
7542 count < ib_vector_size(aux_tables_to_rename);
7543 count++) {
7544 fts_aux_table_t* rename_aux =
7545 static_cast<fts_aux_table_t*>(
7546 ib_vector_get(aux_tables_to_rename,
7547 count));
7548 if (strcmp(rename_aux->name,
7549 aux_table->name) == 0) {
7550 rename_table = false;
7551 break;
7552 }
7553 }
7554
7555 if (rename_table) {
7556 ib_vector_push(aux_tables_to_rename,
7557 aux_table);
7558 }
7559 }
7560
7561 if (i + 1 < ib_vector_size(tables)) {
7562 next_aux_table = static_cast<fts_aux_table_t*>(
7563 ib_vector_get(tables, i + 1));
7564 }
7565
7566 if ((next_aux_table == NULL
7567 || orig_parent_id != next_aux_table->parent_id)
7568 && !ib_vector_is_empty(aux_tables_to_rename)) {
7569
7570 ut_ad(rename);
7571 ut_ad(!DICT_TF2_FLAG_IS_SET(
7572 parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7573
7574 fts_rename_aux_tables_to_hex_format(
7575 aux_tables_to_rename,parent_table);
7576 }
7577
7578 /* The IDs are already in correct hex format. */
7579 if (!drop && !rename) {
7580 dict_table_t* table;
7581
7582 table = dict_table_open_on_id(
7583 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7584
7585 if (table != NULL
7586 && strcmp(table->name.m_name, aux_table->name)) {
7587 dict_table_close(table, TRUE, FALSE);
7588 table = NULL;
7589 }
7590
7591 if (table != NULL
7592 && !DICT_TF2_FLAG_IS_SET(
7593 table,
7594 DICT_TF2_FTS_AUX_HEX_NAME)) {
7595
7596 DBUG_EXECUTE_IF("aux_table_flag_fail",
7597 ib::warn() << "Setting aux table "
7598 << table->name << " to hex "
7599 "format failed.";
7600 fts_set_index_corrupt(
7601 trx, aux_table->index_id,
7602 parent_table);
7603 goto table_exit;);
7604
7605 dberr_t err = fts_update_hex_format_flag(
7606 trx, table->id, true);
7607
7608 if (err != DB_SUCCESS) {
7609 ib::warn() << "Setting aux table "
7610 << table->name << " to hex "
7611 "format failed.";
7612
7613 fts_set_index_corrupt(
7614 trx, aux_table->index_id,
7615 parent_table);
7616 } else {
7617 DICT_TF2_FLAG_SET(table,
7618 DICT_TF2_FTS_AUX_HEX_NAME);
7619 }
7620 }
7621 #ifndef NDEBUG
7622 table_exit:
7623 #endif /* !NDEBUG */
7624
7625 if (table != NULL) {
7626 dict_table_close(table, TRUE, FALSE);
7627 }
7628
7629 ut_ad(parent_table != NULL);
7630
7631 fts_set_parent_hex_format_flag(
7632 parent_table, trx);
7633 }
7634
7635 if (parent_table != NULL) {
7636 dict_table_close(parent_table, TRUE, FALSE);
7637 }
7638 }
7639
7640 fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7641 fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7642 fts_sql_commit(trx);
7643
7644 fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7645
7646 /* Free the memory allocated at the beginning */
7647 if (heap != NULL) {
7648 mem_heap_free(heap);
7649 }
7650 }
7651
7652 /**********************************************************************//**
7653 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7654 table or FTS index defined on them. */
7655 void
fts_drop_orphaned_tables(void)7656 fts_drop_orphaned_tables(void)
7657 /*==========================*/
7658 {
7659 trx_t* trx;
7660 pars_info_t* info;
7661 mem_heap_t* heap;
7662 que_t* graph;
7663 ib_vector_t* tables;
7664 ib_alloc_t* heap_alloc;
7665 space_name_list_t space_name_list;
7666 dberr_t error = DB_SUCCESS;
7667
7668 /* Note: We have to free the memory after we are done with the list. */
7669 error = fil_get_space_names(space_name_list);
7670
7671 if (error == DB_OUT_OF_MEMORY) {
7672 ib::fatal() << "Out of memory";
7673 }
7674
7675 heap = mem_heap_create(1024);
7676 heap_alloc = ib_heap_allocator_create(heap);
7677
7678 /* We store the table ids of all the FTS indexes that were found. */
7679 tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7680
7681 /* Get the list of all known .ibd files and check for orphaned
7682 FTS auxiliary files in that list. We need to remove them because
7683 users can't map them back to table names and this will create
7684 unnecessary clutter. */
7685
7686 for (space_name_list_t::iterator it = space_name_list.begin();
7687 it != space_name_list.end();
7688 ++it) {
7689
7690 fts_aux_table_t* fts_aux_table;
7691
7692 fts_aux_table = static_cast<fts_aux_table_t*>(
7693 ib_vector_push(tables, NULL));
7694
7695 memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7696
7697 if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7698 ib_vector_pop(tables);
7699 } else {
7700 ulint len = strlen(*it);
7701
7702 fts_aux_table->id = fil_space_get_id_by_name(*it);
7703
7704 /* We got this list from fil0fil.cc. The tablespace
7705 with this name must exist. */
7706 ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7707
7708 fts_aux_table->name = static_cast<char*>(
7709 mem_heap_dup(heap, *it, len + 1));
7710
7711 fts_aux_table->name[len] = 0;
7712 }
7713 }
7714
7715 trx = trx_allocate_for_background();
7716 trx->op_info = "dropping orphaned FTS tables";
7717 row_mysql_lock_data_dictionary(trx);
7718
7719 info = pars_info_create();
7720
7721 pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7722
7723 graph = fts_parse_sql_no_dict_lock(
7724 NULL,
7725 info,
7726 "DECLARE FUNCTION my_func;\n"
7727 "DECLARE CURSOR c IS"
7728 " SELECT NAME, ID"
7729 " FROM SYS_TABLES;\n"
7730 "BEGIN\n"
7731 "\n"
7732 "OPEN c;\n"
7733 "WHILE 1 = 1 LOOP\n"
7734 " FETCH c INTO my_func();\n"
7735 " IF c % NOTFOUND THEN\n"
7736 " EXIT;\n"
7737 " END IF;\n"
7738 "END LOOP;\n"
7739 "CLOSE c;");
7740
7741 for (;;) {
7742 error = fts_eval_sql(trx, graph);
7743
7744 if (error == DB_SUCCESS) {
7745 fts_check_and_drop_orphaned_tables(trx, tables);
7746 break; /* Exit the loop. */
7747 } else {
7748 ib_vector_reset(tables);
7749
7750 fts_sql_rollback(trx);
7751
7752 if (error == DB_LOCK_WAIT_TIMEOUT) {
7753 ib::warn() << "lock wait timeout reading"
7754 " SYS_TABLES. Retrying!";
7755
7756 trx->error_state = DB_SUCCESS;
7757 } else {
7758 ib::error() << "(" << ut_strerr(error)
7759 << ") while reading SYS_TABLES.";
7760
7761 break; /* Exit the loop. */
7762 }
7763 }
7764 }
7765
7766 que_graph_free(graph);
7767
7768 row_mysql_unlock_data_dictionary(trx);
7769
7770 trx_free_for_background(trx);
7771
7772 if (heap != NULL) {
7773 mem_heap_free(heap);
7774 }
7775
7776 /** Free the memory allocated to store the .ibd names. */
7777 for (space_name_list_t::iterator it = space_name_list.begin();
7778 it != space_name_list.end();
7779 ++it) {
7780
7781 UT_DELETE_ARRAY(*it);
7782 }
7783 }
7784
7785 /**********************************************************************//**
7786 Check whether user supplied stopword table is of the right format.
7787 Caller is responsible to hold dictionary locks.
7788 @return the stopword column charset if qualifies */
7789 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7790 fts_valid_stopword_table(
7791 /*=====================*/
7792 const char* stopword_table_name) /*!< in: Stopword table
7793 name */
7794 {
7795 dict_table_t* table;
7796 dict_col_t* col = NULL;
7797
7798 if (!stopword_table_name) {
7799 return(NULL);
7800 }
7801
7802 table = dict_table_get_low(stopword_table_name);
7803
7804 if (!table) {
7805 ib::error() << "User stopword table " << stopword_table_name
7806 << " does not exist.";
7807
7808 return(NULL);
7809 } else {
7810 const char* col_name;
7811
7812 col_name = dict_table_get_col_name(table, 0);
7813
7814 if (ut_strcmp(col_name, "value")) {
7815 ib::error() << "Invalid column name for stopword"
7816 " table " << stopword_table_name << ". Its"
7817 " first column must be named as 'value'.";
7818
7819 return(NULL);
7820 }
7821
7822 col = dict_table_get_nth_col(table, 0);
7823
7824 if (col->mtype != DATA_VARCHAR
7825 && col->mtype != DATA_VARMYSQL) {
7826 ib::error() << "Invalid column type for stopword"
7827 " table " << stopword_table_name << ". Its"
7828 " first column must be of varchar type";
7829
7830 return(NULL);
7831 }
7832 }
7833
7834 ut_ad(col);
7835
7836 return(fts_get_charset(col->prtype));
7837 }
7838
7839 /**********************************************************************//**
7840 This function loads the stopword into the FTS cache. It also
7841 records/fetches stopword configuration to/from FTS configure
7842 table, depending on whether we are creating or reloading the
7843 FTS.
7844 @return TRUE if load operation is successful */
7845 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7846 fts_load_stopword(
7847 /*==============*/
7848 const dict_table_t*
7849 table, /*!< in: Table with FTS */
7850 trx_t* trx, /*!< in: Transactions */
7851 const char* global_stopword_table, /*!< in: Global stopword table
7852 name */
7853 const char* session_stopword_table, /*!< in: Session stopword table
7854 name */
7855 ibool stopword_is_on, /*!< in: Whether stopword
7856 option is turned on/off */
7857 ibool reload) /*!< in: Whether it is
7858 for reloading FTS table */
7859 {
7860 fts_table_t fts_table;
7861 fts_string_t str;
7862 dberr_t error = DB_SUCCESS;
7863 ulint use_stopword;
7864 fts_cache_t* cache;
7865 const char* stopword_to_use = NULL;
7866 ibool new_trx = FALSE;
7867 byte str_buffer[MAX_FULL_NAME_LEN + 1];
7868
7869 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7870
7871 cache = table->fts->cache;
7872
7873 if (!reload && !(cache->stopword_info.status
7874 & STOPWORD_NOT_INIT)) {
7875 return(TRUE);
7876 }
7877
7878 if (!trx) {
7879 trx = trx_allocate_for_background();
7880 trx->op_info = "upload FTS stopword";
7881 new_trx = TRUE;
7882 }
7883
7884 /* First check whether stopword filtering is turned off */
7885 if (reload) {
7886 error = fts_config_get_ulint(
7887 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7888 } else {
7889 use_stopword = (ulint) stopword_is_on;
7890
7891 error = fts_config_set_ulint(
7892 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7893 }
7894
7895 if (error != DB_SUCCESS) {
7896 goto cleanup;
7897 }
7898
7899 /* If stopword is turned off, no need to continue to load the
7900 stopword into cache, but still need to do initialization */
7901 if (!use_stopword) {
7902 cache->stopword_info.status = STOPWORD_OFF;
7903 goto cleanup;
7904 }
7905
7906 if (reload) {
7907 /* Fetch the stopword table name from FTS config
7908 table */
7909 str.f_n_char = 0;
7910 str.f_str = str_buffer;
7911 str.f_len = sizeof(str_buffer) - 1;
7912
7913 error = fts_config_get_value(
7914 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7915
7916 if (error != DB_SUCCESS) {
7917 goto cleanup;
7918 }
7919
7920 if (strlen((char*) str.f_str) > 0) {
7921 stopword_to_use = (const char*) str.f_str;
7922 }
7923 } else {
7924 stopword_to_use = (session_stopword_table)
7925 ? session_stopword_table : global_stopword_table;
7926 }
7927
7928 if (stopword_to_use
7929 && fts_load_user_stopword(table->fts, stopword_to_use,
7930 &cache->stopword_info)) {
7931 /* Save the stopword table name to the configure
7932 table */
7933 if (!reload) {
7934 str.f_n_char = 0;
7935 str.f_str = (byte*) stopword_to_use;
7936 str.f_len = ut_strlen(stopword_to_use);
7937
7938 error = fts_config_set_value(
7939 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7940 }
7941 } else {
7942 /* Load system default stopword list */
7943 fts_load_default_stopword(&cache->stopword_info);
7944 }
7945
7946 cleanup:
7947 if (new_trx) {
7948 if (error == DB_SUCCESS) {
7949 fts_sql_commit(trx);
7950 } else {
7951 fts_sql_rollback(trx);
7952 }
7953
7954 trx_free_for_background(trx);
7955 }
7956
7957 if (!cache->stopword_info.cached_stopword) {
7958 cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
7959 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
7960 &my_charset_latin1);
7961 }
7962
7963 return(error == DB_SUCCESS);
7964 }
7965
7966 /**********************************************************************//**
7967 Callback function when we initialize the FTS at the start up
7968 time. It recovers the maximum Doc IDs presented in the current table.
7969 @return: always returns TRUE */
7970 static
7971 ibool
fts_init_get_doc_id(void * row,void * user_arg)7972 fts_init_get_doc_id(
7973 /*================*/
7974 void* row, /*!< in: sel_node_t* */
7975 void* user_arg) /*!< in: fts cache */
7976 {
7977 doc_id_t doc_id = FTS_NULL_DOC_ID;
7978 sel_node_t* node = static_cast<sel_node_t*>(row);
7979 que_node_t* exp = node->select_list;
7980 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
7981
7982 ut_ad(ib_vector_is_empty(cache->get_docs));
7983
7984 /* Copy each indexed column content into doc->text.f_str */
7985 if (exp) {
7986 dfield_t* dfield = que_node_get_val(exp);
7987 dtype_t* type = dfield_get_type(dfield);
7988 void* data = dfield_get_data(dfield);
7989
7990 ut_a(dtype_get_mtype(type) == DATA_INT);
7991
7992 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7993 static_cast<const byte*>(data)));
7994
7995 if (doc_id >= cache->next_doc_id) {
7996 cache->next_doc_id = doc_id + 1;
7997 }
7998 }
7999
8000 return(TRUE);
8001 }
8002
8003 /**********************************************************************//**
8004 Callback function when we initialize the FTS at the start up
8005 time. It recovers Doc IDs that have not sync-ed to the auxiliary
8006 table, and require to bring them back into FTS index.
8007 @return: always returns TRUE */
8008 static
8009 ibool
fts_init_recover_doc(void * row,void * user_arg)8010 fts_init_recover_doc(
8011 /*=================*/
8012 void* row, /*!< in: sel_node_t* */
8013 void* user_arg) /*!< in: fts cache */
8014 {
8015
8016 fts_doc_t doc;
8017 ulint doc_len = 0;
8018 ulint field_no = 0;
8019 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
8020 doc_id_t doc_id = FTS_NULL_DOC_ID;
8021 sel_node_t* node = static_cast<sel_node_t*>(row);
8022 que_node_t* exp = node->select_list;
8023 fts_cache_t* cache = get_doc->cache;
8024 st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
8025
8026 fts_doc_init(&doc);
8027 doc.found = TRUE;
8028
8029 ut_ad(cache);
8030
8031 /* Copy each indexed column content into doc->text.f_str */
8032 while (exp) {
8033 dfield_t* dfield = que_node_get_val(exp);
8034 ulint len = dfield_get_len(dfield);
8035
8036 if (field_no == 0) {
8037 dtype_t* type = dfield_get_type(dfield);
8038 void* data = dfield_get_data(dfield);
8039
8040 ut_a(dtype_get_mtype(type) == DATA_INT);
8041
8042 doc_id = static_cast<doc_id_t>(mach_read_from_8(
8043 static_cast<const byte*>(data)));
8044
8045 field_no++;
8046 exp = que_node_get_next(exp);
8047 continue;
8048 }
8049
8050 if (len == UNIV_SQL_NULL) {
8051 exp = que_node_get_next(exp);
8052 continue;
8053 }
8054
8055 ut_ad(get_doc);
8056
8057 if (!get_doc->index_cache->charset) {
8058 get_doc->index_cache->charset = fts_get_charset(
8059 dfield->type.prtype);
8060 }
8061
8062 doc.charset = get_doc->index_cache->charset;
8063 doc.is_ngram = get_doc->index_cache->index->is_ngram;
8064
8065 if (dfield_is_ext(dfield)) {
8066 dict_table_t* table = cache->sync->table;
8067
8068 doc.text.f_str = btr_copy_externally_stored_field(
8069 &doc.text.f_len,
8070 static_cast<byte*>(dfield_get_data(dfield)),
8071 dict_table_page_size(table), len,
8072 static_cast<mem_heap_t*>(doc.self_heap->arg));
8073 } else {
8074 doc.text.f_str = static_cast<byte*>(
8075 dfield_get_data(dfield));
8076
8077 doc.text.f_len = len;
8078 }
8079
8080 if (field_no == 1) {
8081 fts_tokenize_document(&doc, NULL, parser);
8082 } else {
8083 fts_tokenize_document_next(&doc, doc_len, NULL, parser);
8084 }
8085
8086 exp = que_node_get_next(exp);
8087
8088 doc_len += (exp) ? len + 1 : len;
8089
8090 field_no++;
8091 }
8092
8093 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
8094
8095 fts_doc_free(&doc);
8096
8097 cache->added++;
8098
8099 if (doc_id >= cache->next_doc_id) {
8100 cache->next_doc_id = doc_id + 1;
8101 }
8102
8103 return(TRUE);
8104 }
8105
8106 /**********************************************************************//**
8107 This function brings FTS index in sync when FTS index is first
8108 used. There are documents that have not yet sync-ed to auxiliary
8109 tables from last server abnormally shutdown, we will need to bring
8110 such document into FTS cache before any further operations
8111 @return TRUE if all OK */
8112 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)8113 fts_init_index(
8114 /*===========*/
8115 dict_table_t* table, /*!< in: Table with FTS */
8116 ibool has_cache_lock) /*!< in: Whether we already have
8117 cache lock */
8118 {
8119 dict_index_t* index;
8120 doc_id_t start_doc;
8121 fts_get_doc_t* get_doc = NULL;
8122 fts_cache_t* cache = table->fts->cache;
8123 bool need_init = false;
8124
8125 ut_ad(!mutex_own(&dict_sys->mutex));
8126
8127 /* First check cache->get_docs is initialized */
8128 if (!has_cache_lock) {
8129 rw_lock_x_lock(&cache->lock);
8130 }
8131
8132 rw_lock_x_lock(&cache->init_lock);
8133 if (cache->get_docs == NULL) {
8134 cache->get_docs = fts_get_docs_create(cache);
8135 }
8136 rw_lock_x_unlock(&cache->init_lock);
8137
8138 if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
8139 goto func_exit;
8140 }
8141
8142 need_init = true;
8143
8144 start_doc = cache->synced_doc_id;
8145
8146 if (!start_doc) {
8147 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
8148 cache->synced_doc_id = start_doc;
8149 }
8150
8151 /* No FTS index, this is the case when previous FTS index
8152 dropped, and we re-initialize the Doc ID system for subsequent
8153 insertion */
8154 if (ib_vector_is_empty(cache->get_docs)) {
8155 index = table->fts_doc_id_index;
8156
8157 ut_a(index);
8158
8159 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8160 FTS_FETCH_DOC_BY_ID_LARGE,
8161 fts_init_get_doc_id, cache);
8162 } else {
8163 if (table->fts->cache->stopword_info.status
8164 & STOPWORD_NOT_INIT) {
8165 fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
8166 }
8167
8168 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
8169 get_doc = static_cast<fts_get_doc_t*>(
8170 ib_vector_get(cache->get_docs, i));
8171
8172 index = get_doc->index_cache->index;
8173
8174 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8175 FTS_FETCH_DOC_BY_ID_LARGE,
8176 fts_init_recover_doc, get_doc);
8177 }
8178 }
8179
8180 table->fts->fts_status |= ADDED_TABLE_SYNCED;
8181
8182 fts_get_docs_clear(cache->get_docs);
8183
8184 func_exit:
8185 if (!has_cache_lock) {
8186 rw_lock_x_unlock(&cache->lock);
8187 }
8188
8189 if (need_init) {
8190 mutex_enter(&dict_sys->mutex);
8191 /* Register the table with the optimize thread. */
8192 fts_optimize_add_table(table);
8193 mutex_exit(&dict_sys->mutex);
8194 }
8195
8196 return(TRUE);
8197 }
8198
8199 /** Check if the all the auxillary tables associated with FTS index are in
8200 consistent state. For now consistency is check only by ensuring
8201 index->page_no != FIL_NULL
8202 @param[out] base_table table has host fts index
8203 @param[in,out] trx trx handler */
8204 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)8205 fts_check_corrupt(
8206 dict_table_t* base_table,
8207 trx_t* trx)
8208 {
8209 bool sane = true;
8210 fts_table_t fts_table;
8211
8212 /* Iterate over the common table and check for their sanity. */
8213 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
8214
8215 for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
8216
8217 char table_name[MAX_FULL_NAME_LEN];
8218
8219 fts_table.suffix = fts_common_tables[i];
8220 fts_get_table_name(&fts_table, table_name);
8221
8222 dict_table_t* aux_table = dict_table_open_on_name(
8223 table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
8224
8225 if (aux_table == NULL) {
8226 dict_set_corrupted(
8227 dict_table_get_first_index(base_table),
8228 trx, "FTS_SANITY_CHECK");
8229 ut_ad(base_table->corrupted == TRUE);
8230 sane = false;
8231 continue;
8232 }
8233
8234 for (dict_index_t* aux_table_index =
8235 UT_LIST_GET_FIRST(aux_table->indexes);
8236 aux_table_index != NULL;
8237 aux_table_index =
8238 UT_LIST_GET_NEXT(indexes, aux_table_index)) {
8239
8240 /* Check if auxillary table needed for FTS is sane. */
8241 if (aux_table_index->page == FIL_NULL) {
8242 dict_set_corrupted(
8243 dict_table_get_first_index(base_table),
8244 trx, "FTS_SANITY_CHECK");
8245 ut_ad(base_table->corrupted == TRUE);
8246 sane = false;
8247 }
8248 }
8249
8250 dict_table_close(aux_table, FALSE, FALSE);
8251 }
8252 }
8253