1 /*****************************************************************************
2
3 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31
32 #include "ha_prototypes.h"
33
34 #include "trx0roll.h"
35 #include "row0mysql.h"
36 #include "row0upd.h"
37 #include "dict0types.h"
38 #include "dict0stats_bg.h"
39 #include "row0sel.h"
40 #include "fts0fts.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "fts0plugin.h"
46 #include "dict0priv.h"
47 #include "dict0stats.h"
48 #include "btr0pcur.h"
49 #include "sync0sync.h"
50 #include "ut0new.h"
51
52 static const ulint FTS_MAX_ID_LEN = 32;
53
54 /** Column name from the FTS config table */
55 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
56
57 /** Verify if a aux table name is a obsolete table
58 by looking up the key word in the obsolete table names */
59 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
60 (strstr((table_name), "DOC_ID") != NULL \
61 || strstr((table_name), "ADDED") != NULL \
62 || strstr((table_name), "STOPWORDS") != NULL)
63
64 /** This is maximum FTS cache for each table and would be
65 a configurable variable */
66 ulong fts_max_cache_size;
67
68 /** Whether the total memory used for FTS cache is exhausted, and we will
69 need a sync to free some memory */
70 bool fts_need_sync = false;
71
72 /** Variable specifying the total memory allocated for FTS cache */
73 ulong fts_max_total_cache_size;
74
75 /** This is FTS result cache limit for each query and would be
76 a configurable variable */
77 ulong fts_result_cache_limit;
78
79 /** Variable specifying the maximum FTS max token size */
80 ulong fts_max_token_size;
81
82 /** Variable specifying the minimum FTS max token size */
83 ulong fts_min_token_size;
84
85
86 // FIXME: testing
87 ib_time_monotonic_t elapsed_time = 0;
88 ulint n_nodes = 0;
89
90 #ifdef FTS_CACHE_SIZE_DEBUG
91 /** The cache size permissible lower limit (1K) */
92 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
93
94 /** The cache size permissible upper limit (1G) */
95 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
96 #endif
97
98 /** Time to sleep after DEADLOCK error before retrying operation. */
99 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
100
101 /** variable to record innodb_fts_internal_tbl_name for information
102 schema table INNODB_FTS_INSERTED etc. */
103 char* fts_internal_tbl_name = NULL;
104
105 /** InnoDB default stopword list:
106 There are different versions of stopwords, the stop words listed
107 below comes from "Google Stopword" list. Reference:
108 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
109 The final version of InnoDB default stopword list is still pending
110 for decision */
111 const char *fts_default_stopword[] =
112 {
113 "a",
114 "about",
115 "an",
116 "are",
117 "as",
118 "at",
119 "be",
120 "by",
121 "com",
122 "de",
123 "en",
124 "for",
125 "from",
126 "how",
127 "i",
128 "in",
129 "is",
130 "it",
131 "la",
132 "of",
133 "on",
134 "or",
135 "that",
136 "the",
137 "this",
138 "to",
139 "was",
140 "what",
141 "when",
142 "where",
143 "who",
144 "will",
145 "with",
146 "und",
147 "the",
148 "www",
149 NULL
150 };
151
152 /** For storing table info when checking for orphaned tables. */
153 struct fts_aux_table_t {
154 table_id_t id; /*!< Table id */
155 table_id_t parent_id; /*!< Parent table id */
156 table_id_t index_id; /*!< Table FT index id */
157 char* name; /*!< Name of the table */
158 };
159
160 #ifdef FTS_DOC_STATS_DEBUG
161 /** Template for creating the FTS auxiliary index specific tables. This is
162 mainly designed for the statistics work in the future */
163 static const char* fts_create_index_tables_sql = {
164 "BEGIN\n"
165 ""
166 "CREATE TABLE $doc_id_table (\n"
167 " doc_id BIGINT UNSIGNED,\n"
168 " word_count INTEGER UNSIGNED NOT NULL\n"
169 ") COMPACT;\n"
170 "CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
171 };
172 #endif
173
174 /** FTS auxiliary table suffixes that are common to all FT indexes. */
175 const char* fts_common_tables[] = {
176 "BEING_DELETED",
177 "BEING_DELETED_CACHE",
178 "CONFIG",
179 "DELETED",
180 "DELETED_CACHE",
181 NULL
182 };
183
184 /** FTS auxiliary INDEX split intervals. */
185 const fts_index_selector_t fts_index_selector[] = {
186 { 9, "INDEX_1" },
187 { 65, "INDEX_2" },
188 { 70, "INDEX_3" },
189 { 75, "INDEX_4" },
190 { 80, "INDEX_5" },
191 { 85, "INDEX_6" },
192 { 0 , NULL }
193 };
194
195 /** Default config values for FTS indexes on a table. */
196 static const char* fts_config_table_insert_values_sql =
197 "BEGIN\n"
198 "\n"
199 "INSERT INTO $config_table VALUES('"
200 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
201 ""
202 "INSERT INTO $config_table VALUES('"
203 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
204 ""
205 "INSERT INTO $config_table VALUES ('"
206 FTS_SYNCED_DOC_ID "', '0');\n"
207 ""
208 "INSERT INTO $config_table VALUES ('"
209 FTS_TOTAL_DELETED_COUNT "', '0');\n"
210 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
211 "INSERT INTO $config_table VALUES ('"
212 FTS_TABLE_STATE "', '0');\n";
213
214 /** FTS tokenize parmameter for plugin parser */
215 struct fts_tokenize_param_t {
216 fts_doc_t* result_doc; /*!< Result doc for tokens */
217 ulint add_pos; /*!< Added position for tokens */
218 };
219
220 /** Run SYNC on the table, i.e., write out data from the cache to the
221 FTS auxiliary INDEX table and clear the cache at the end.
222 @param[in,out] sync sync state
223 @param[in] unlock_cache whether unlock cache lock when write node
224 @param[in] wait whether wait when a sync is in progress
225 @param[in] has_dict_lock whether has dict operation lock
226 @return DB_SUCCESS if all OK */
227 static
228 dberr_t
229 fts_sync(
230 fts_sync_t* sync,
231 bool unlock_cache,
232 bool wait,
233 bool has_dict_lock);
234
235 /****************************************************************//**
236 Release all resources help by the words rb tree e.g., the node ilist. */
237 static
238 void
239 fts_words_free(
240 /*===========*/
241 ib_rbt_t* words) /*!< in: rb tree of words */
242 MY_ATTRIBUTE((nonnull));
243 #ifdef FTS_CACHE_SIZE_DEBUG
244 /****************************************************************//**
245 Read the max cache size parameter from the config table. */
246 static
247 void
248 fts_update_max_cache_size(
249 /*======================*/
250 fts_sync_t* sync); /*!< in: sync state */
251 #endif
252
253 /*********************************************************************//**
254 This function fetches the document just inserted right before
255 we commit the transaction, and tokenize the inserted text data
256 and insert into FTS auxiliary table and its cache.
257 @return TRUE if successful */
258 static
259 ulint
260 fts_add_doc_by_id(
261 /*==============*/
262 fts_trx_table_t*ftt, /*!< in: FTS trx table */
263 doc_id_t doc_id, /*!< in: doc id */
264 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
265 /*!< in: affected fts indexes */
266 #ifdef FTS_DOC_STATS_DEBUG
267 /****************************************************************//**
268 Check whether a particular word (term) exists in the FTS index.
269 @return DB_SUCCESS if all went fine */
270 static
271 dberr_t
272 fts_is_word_in_index(
273 /*=================*/
274 trx_t* trx, /*!< in: FTS query state */
275 que_t** graph, /*!< out: Query graph */
276 fts_table_t* fts_table, /*!< in: table instance */
277 const fts_string_t* word, /*!< in: the word to check */
278 ibool* found) /*!< out: TRUE if exists */
279 MY_ATTRIBUTE((nonnull, warn_unused_result));
280 #endif /* FTS_DOC_STATS_DEBUG */
281
282 /******************************************************************//**
283 Update the last document id. This function could create a new
284 transaction to update the last document id.
285 @return DB_SUCCESS if OK */
286 static
287 dberr_t
288 fts_update_sync_doc_id(
289 /*===================*/
290 const dict_table_t* table, /*!< in: table */
291 const char* table_name, /*!< in: table name, or NULL */
292 doc_id_t doc_id, /*!< in: last document id */
293 trx_t* trx) /*!< in: update trx, or NULL */
294 MY_ATTRIBUTE((nonnull(1)));
295
296 /** Get a character set based on precise type.
297 @param prtype precise type
298 @return the corresponding character set */
299 UNIV_INLINE
300 CHARSET_INFO*
fts_get_charset(ulint prtype)301 fts_get_charset(ulint prtype)
302 {
303 #ifdef UNIV_DEBUG
304 switch (prtype & DATA_MYSQL_TYPE_MASK) {
305 case MYSQL_TYPE_BIT:
306 case MYSQL_TYPE_STRING:
307 case MYSQL_TYPE_VAR_STRING:
308 case MYSQL_TYPE_TINY_BLOB:
309 case MYSQL_TYPE_MEDIUM_BLOB:
310 case MYSQL_TYPE_BLOB:
311 case MYSQL_TYPE_LONG_BLOB:
312 case MYSQL_TYPE_VARCHAR:
313 break;
314 default:
315 ut_error;
316 }
317 #endif /* UNIV_DEBUG */
318
319 uint cs_num = (uint) dtype_get_charset_coll(prtype);
320
321 if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
322 return(cs);
323 }
324
325 ib::fatal() << "Unable to find charset-collation " << cs_num;
326 return(NULL);
327 }
328
329 /****************************************************************//**
330 This function loads the default InnoDB stopword list */
331 static
332 void
fts_load_default_stopword(fts_stopword_t * stopword_info)333 fts_load_default_stopword(
334 /*======================*/
335 fts_stopword_t* stopword_info) /*!< in: stopword info */
336 {
337 fts_string_t str;
338 mem_heap_t* heap;
339 ib_alloc_t* allocator;
340 ib_rbt_t* stop_words;
341
342 allocator = stopword_info->heap;
343 heap = static_cast<mem_heap_t*>(allocator->arg);
344
345 if (!stopword_info->cached_stopword) {
346 stopword_info->cached_stopword = rbt_create_arg_cmp(
347 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
348 &my_charset_latin1);
349 }
350
351 stop_words = stopword_info->cached_stopword;
352
353 str.f_n_char = 0;
354
355 for (ulint i = 0; fts_default_stopword[i]; ++i) {
356 char* word;
357 fts_tokenizer_word_t new_word;
358
359 /* We are going to duplicate the value below. */
360 word = const_cast<char*>(fts_default_stopword[i]);
361
362 new_word.nodes = ib_vector_create(
363 allocator, sizeof(fts_node_t), 4);
364
365 str.f_len = ut_strlen(word);
366 str.f_str = reinterpret_cast<byte*>(word);
367
368 fts_string_dup(&new_word.text, &str, heap);
369
370 rbt_insert(stop_words, &new_word, &new_word);
371 }
372
373 stopword_info->status = STOPWORD_FROM_DEFAULT;
374 }
375
376 /****************************************************************//**
377 Callback function to read a single stopword value.
378 @return Always return TRUE */
379 static
380 ibool
fts_read_stopword(void * row,void * user_arg)381 fts_read_stopword(
382 /*==============*/
383 void* row, /*!< in: sel_node_t* */
384 void* user_arg) /*!< in: pointer to ib_vector_t */
385 {
386 ib_alloc_t* allocator;
387 fts_stopword_t* stopword_info;
388 sel_node_t* sel_node;
389 que_node_t* exp;
390 ib_rbt_t* stop_words;
391 dfield_t* dfield;
392 fts_string_t str;
393 mem_heap_t* heap;
394 ib_rbt_bound_t parent;
395
396 sel_node = static_cast<sel_node_t*>(row);
397 stopword_info = static_cast<fts_stopword_t*>(user_arg);
398
399 stop_words = stopword_info->cached_stopword;
400 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
401 heap = static_cast<mem_heap_t*>(allocator->arg);
402
403 exp = sel_node->select_list;
404
405 /* We only need to read the first column */
406 dfield = que_node_get_val(exp);
407
408 str.f_n_char = 0;
409 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
410 str.f_len = dfield_get_len(dfield);
411
412 /* Only create new node if it is a value not already existed */
413 if (str.f_len != UNIV_SQL_NULL
414 && rbt_search(stop_words, &parent, &str) != 0) {
415
416 fts_tokenizer_word_t new_word;
417
418 new_word.nodes = ib_vector_create(
419 allocator, sizeof(fts_node_t), 4);
420
421 new_word.text.f_str = static_cast<byte*>(
422 mem_heap_alloc(heap, str.f_len + 1));
423
424 memcpy(new_word.text.f_str, str.f_str, str.f_len);
425
426 new_word.text.f_n_char = 0;
427 new_word.text.f_len = str.f_len;
428 new_word.text.f_str[str.f_len] = 0;
429
430 rbt_insert(stop_words, &new_word, &new_word);
431 }
432
433 return(TRUE);
434 }
435
436 /******************************************************************//**
437 Load user defined stopword from designated user table
438 @return TRUE if load operation is successful */
439 static
440 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)441 fts_load_user_stopword(
442 /*===================*/
443 fts_t* fts, /*!< in: FTS struct */
444 const char* stopword_table_name, /*!< in: Stopword table
445 name */
446 fts_stopword_t* stopword_info) /*!< in: Stopword info */
447 {
448 pars_info_t* info;
449 que_t* graph;
450 dberr_t error = DB_SUCCESS;
451 ibool ret = TRUE;
452 trx_t* trx;
453 ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
454
455 trx = trx_allocate_for_background();
456 trx->op_info = "Load user stopword table into FTS cache";
457
458 if (!has_lock) {
459 mutex_enter(&dict_sys->mutex);
460 }
461
462 /* Validate the user table existence and in the right
463 format */
464 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
465 if (!stopword_info->charset) {
466 ret = FALSE;
467 goto cleanup;
468 } else if (!stopword_info->cached_stopword) {
469 /* Create the stopword RB tree with the stopword column
470 charset. All comparison will use this charset */
471 stopword_info->cached_stopword = rbt_create_arg_cmp(
472 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
473 stopword_info->charset);
474
475 }
476
477 info = pars_info_create();
478
479 pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
480
481 pars_info_bind_function(info, "my_func", fts_read_stopword,
482 stopword_info);
483
484 graph = fts_parse_sql_no_dict_lock(
485 NULL,
486 info,
487 "DECLARE FUNCTION my_func;\n"
488 "DECLARE CURSOR c IS"
489 " SELECT value"
490 " FROM $table_stopword;\n"
491 "BEGIN\n"
492 "\n"
493 "OPEN c;\n"
494 "WHILE 1 = 1 LOOP\n"
495 " FETCH c INTO my_func();\n"
496 " IF c % NOTFOUND THEN\n"
497 " EXIT;\n"
498 " END IF;\n"
499 "END LOOP;\n"
500 "CLOSE c;");
501
502 for (;;) {
503 error = fts_eval_sql(trx, graph);
504
505 if (error == DB_SUCCESS) {
506 fts_sql_commit(trx);
507 stopword_info->status = STOPWORD_USER_TABLE;
508 break;
509 } else {
510
511 fts_sql_rollback(trx);
512
513 if (error == DB_LOCK_WAIT_TIMEOUT) {
514 ib::warn() << "Lock wait timeout reading user"
515 " stopword table. Retrying!";
516
517 trx->error_state = DB_SUCCESS;
518 } else {
519 ib::error() << "Error '" << ut_strerr(error)
520 << "' while reading user stopword"
521 " table.";
522 ret = FALSE;
523 break;
524 }
525 }
526 }
527
528 que_graph_free(graph);
529
530 cleanup:
531 if (!has_lock) {
532 mutex_exit(&dict_sys->mutex);
533 }
534
535 trx_free_for_background(trx);
536 return(ret);
537 }
538
539 /******************************************************************//**
540 Initialize the index cache. */
541 static
542 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)543 fts_index_cache_init(
544 /*=================*/
545 ib_alloc_t* allocator, /*!< in: the allocator to use */
546 fts_index_cache_t* index_cache) /*!< in: index cache */
547 {
548 ulint i;
549
550 ut_a(index_cache->words == NULL);
551
552 index_cache->words = rbt_create_arg_cmp(
553 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
554 index_cache->charset);
555
556 ut_a(index_cache->doc_stats == NULL);
557
558 index_cache->doc_stats = ib_vector_create(
559 allocator, sizeof(fts_doc_stats_t), 4);
560
561 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
562 ut_a(index_cache->ins_graph[i] == NULL);
563 ut_a(index_cache->sel_graph[i] == NULL);
564 }
565 }
566
567 /*********************************************************************//**
568 Initialize FTS cache. */
569 void
fts_cache_init(fts_cache_t * cache)570 fts_cache_init(
571 /*===========*/
572 fts_cache_t* cache) /*!< in: cache to initialize */
573 {
574 ulint i;
575
576 /* Just to make sure */
577 ut_a(cache->sync_heap->arg == NULL);
578
579 cache->sync_heap->arg = mem_heap_create(1024);
580
581 cache->total_size = 0;
582
583 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
584 cache->deleted_doc_ids = ib_vector_create(
585 cache->sync_heap, sizeof(fts_update_t), 4);
586 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
587
588 /* Reset the cache data for all the FTS indexes. */
589 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
590 fts_index_cache_t* index_cache;
591
592 index_cache = static_cast<fts_index_cache_t*>(
593 ib_vector_get(cache->indexes, i));
594
595 fts_index_cache_init(cache->sync_heap, index_cache);
596 }
597 }
598
599 /****************************************************************//**
600 Create a FTS cache. */
601 fts_cache_t*
fts_cache_create(dict_table_t * table)602 fts_cache_create(
603 /*=============*/
604 dict_table_t* table) /*!< in: table owns the FTS cache */
605 {
606 mem_heap_t* heap;
607 fts_cache_t* cache;
608
609 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
610
611 cache = static_cast<fts_cache_t*>(
612 mem_heap_zalloc(heap, sizeof(*cache)));
613
614 cache->cache_heap = heap;
615
616 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
617
618 rw_lock_create(
619 fts_cache_init_rw_lock_key, &cache->init_lock,
620 SYNC_FTS_CACHE_INIT);
621
622 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
623
624 mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
625
626 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
627
628 /* This is the heap used to create the cache itself. */
629 cache->self_heap = ib_heap_allocator_create(heap);
630
631 /* This is a transient heap, used for storing sync data. */
632 cache->sync_heap = ib_heap_allocator_create(heap);
633 cache->sync_heap->arg = NULL;
634
635 cache->sync = static_cast<fts_sync_t*>(
636 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
637
638 cache->sync->table = table;
639 cache->sync->event = os_event_create(0);
640
641 /* Create the index cache vector that will hold the inverted indexes. */
642 cache->indexes = ib_vector_create(
643 cache->self_heap, sizeof(fts_index_cache_t), 2);
644
645 fts_cache_init(cache);
646
647 cache->stopword_info.cached_stopword = NULL;
648 cache->stopword_info.charset = NULL;
649
650 cache->stopword_info.heap = cache->self_heap;
651
652 cache->stopword_info.status = STOPWORD_NOT_INIT;
653
654 return(cache);
655 }
656
657 /*******************************************************************//**
658 Add a newly create index into FTS cache */
659 void
fts_add_index(dict_index_t * index,dict_table_t * table)660 fts_add_index(
661 /*==========*/
662 dict_index_t* index, /*!< FTS index to be added */
663 dict_table_t* table) /*!< table */
664 {
665 fts_t* fts = table->fts;
666 fts_cache_t* cache;
667 fts_index_cache_t* index_cache;
668
669 ut_ad(fts);
670 cache = table->fts->cache;
671
672 rw_lock_x_lock(&cache->init_lock);
673
674 ib_vector_push(fts->indexes, &index);
675
676 index_cache = fts_find_index_cache(cache, index);
677
678 if (!index_cache) {
679 /* Add new index cache structure */
680 index_cache = fts_cache_index_cache_create(table, index);
681 }
682
683 rw_lock_x_unlock(&cache->init_lock);
684 }
685
686 /*******************************************************************//**
687 recalibrate get_doc structure after index_cache in cache->indexes changed */
688 static
689 void
fts_reset_get_doc(fts_cache_t * cache)690 fts_reset_get_doc(
691 /*==============*/
692 fts_cache_t* cache) /*!< in: FTS index cache */
693 {
694 fts_get_doc_t* get_doc;
695 ulint i;
696
697 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
698
699 ib_vector_reset(cache->get_docs);
700
701 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
702 fts_index_cache_t* ind_cache;
703
704 ind_cache = static_cast<fts_index_cache_t*>(
705 ib_vector_get(cache->indexes, i));
706
707 get_doc = static_cast<fts_get_doc_t*>(
708 ib_vector_push(cache->get_docs, NULL));
709
710 memset(get_doc, 0x0, sizeof(*get_doc));
711
712 get_doc->index_cache = ind_cache;
713 }
714
715 ut_ad(ib_vector_size(cache->get_docs)
716 == ib_vector_size(cache->indexes));
717 }
718
719 /*******************************************************************//**
720 Check an index is in the table->indexes list
721 @return TRUE if it exists */
722 static
723 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)724 fts_in_dict_index(
725 /*==============*/
726 dict_table_t* table, /*!< in: Table */
727 dict_index_t* index_check) /*!< in: index to be checked */
728 {
729 dict_index_t* index;
730
731 for (index = dict_table_get_first_index(table);
732 index != NULL;
733 index = dict_table_get_next_index(index)) {
734
735 if (index == index_check) {
736 return(TRUE);
737 }
738 }
739
740 return(FALSE);
741 }
742
743 /*******************************************************************//**
744 Check an index is in the fts->cache->indexes list
745 @return TRUE if it exists */
746 static
747 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)748 fts_in_index_cache(
749 /*===============*/
750 dict_table_t* table, /*!< in: Table */
751 dict_index_t* index) /*!< in: index to be checked */
752 {
753 ulint i;
754
755 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
756 fts_index_cache_t* index_cache;
757
758 index_cache = static_cast<fts_index_cache_t*>(
759 ib_vector_get(table->fts->cache->indexes, i));
760
761 if (index_cache->index == index) {
762 return(TRUE);
763 }
764 }
765
766 return(FALSE);
767 }
768
769 /*******************************************************************//**
770 Check indexes in the fts->indexes is also present in index cache and
771 table->indexes list
772 @return TRUE if all indexes match */
773 ibool
fts_check_cached_index(dict_table_t * table)774 fts_check_cached_index(
775 /*===================*/
776 dict_table_t* table) /*!< in: Table where indexes are dropped */
777 {
778 ulint i;
779
780 if (!table->fts || !table->fts->cache) {
781 return(TRUE);
782 }
783
784 ut_a(ib_vector_size(table->fts->indexes)
785 == ib_vector_size(table->fts->cache->indexes));
786
787 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
788 dict_index_t* index;
789
790 index = static_cast<dict_index_t*>(
791 ib_vector_getp(table->fts->indexes, i));
792
793 if (!fts_in_index_cache(table, index)) {
794 return(FALSE);
795 }
796
797 if (!fts_in_dict_index(table, index)) {
798 return(FALSE);
799 }
800 }
801
802 return(TRUE);
803 }
804
805 /*******************************************************************//**
806 Drop auxiliary tables related to an FTS index
807 @return DB_SUCCESS or error number */
808 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)809 fts_drop_index(
810 /*===========*/
811 dict_table_t* table, /*!< in: Table where indexes are dropped */
812 dict_index_t* index, /*!< in: Index to be dropped */
813 trx_t* trx) /*!< in: Transaction for the drop */
814 {
815 ib_vector_t* indexes = table->fts->indexes;
816 dberr_t err = DB_SUCCESS;
817
818 ut_a(indexes);
819
820 if ((ib_vector_size(indexes) == 1
821 && (index == static_cast<dict_index_t*>(
822 ib_vector_getp(table->fts->indexes, 0))))
823 || ib_vector_is_empty(indexes)) {
824 doc_id_t current_doc_id;
825 doc_id_t first_doc_id;
826
827 /* If we are dropping the only FTS index of the table,
828 remove it from optimize thread */
829 fts_optimize_remove_table(table);
830
831 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
832
833 /* If Doc ID column is not added internally by FTS index,
834 we can drop all FTS auxiliary tables. Otherwise, we will
835 need to keep some common table such as CONFIG table, so
836 as to keep track of incrementing Doc IDs */
837 if (!DICT_TF2_FLAG_IS_SET(
838 table, DICT_TF2_FTS_HAS_DOC_ID)) {
839
840 err = fts_drop_tables(trx, table);
841
842 err = fts_drop_index_tables(trx, index);
843
844 while (index->index_fts_syncing
845 && !trx_is_interrupted(trx)) {
846 DICT_BG_YIELD(trx);
847 }
848
849 fts_free(table);
850
851 return(err);
852 }
853
854 while (index->index_fts_syncing
855 && !trx_is_interrupted(trx)) {
856 DICT_BG_YIELD(trx);
857 }
858
859 current_doc_id = table->fts->cache->next_doc_id;
860 first_doc_id = table->fts->cache->first_doc_id;
861 fts_cache_clear(table->fts->cache);
862 fts_cache_destroy(table->fts->cache);
863 table->fts->cache = fts_cache_create(table);
864 table->fts->cache->next_doc_id = current_doc_id;
865 table->fts->cache->first_doc_id = first_doc_id;
866
867 } else {
868 fts_cache_t* cache = table->fts->cache;
869 fts_index_cache_t* index_cache;
870
871 rw_lock_x_lock(&cache->init_lock);
872
873 index_cache = fts_find_index_cache(cache, index);
874
875 if (index_cache != NULL) {
876 while (index->index_fts_syncing
877 && !trx_is_interrupted(trx)) {
878 DICT_BG_YIELD(trx);
879 }
880
881 if (index_cache->words) {
882 fts_words_free(index_cache->words);
883 rbt_free(index_cache->words);
884 }
885
886 ib_vector_remove(cache->indexes, *(void**) index_cache);
887 }
888
889 if (cache->get_docs) {
890 fts_reset_get_doc(cache);
891 }
892
893 rw_lock_x_unlock(&cache->init_lock);
894 }
895
896 err = fts_drop_index_tables(trx, index);
897
898 ib_vector_remove(indexes, (const void*) index);
899
900 return(err);
901 }
902
903 /****************************************************************//**
904 Free the query graph but check whether dict_sys->mutex is already
905 held */
906 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)907 fts_que_graph_free_check_lock(
908 /*==========================*/
909 fts_table_t* fts_table, /*!< in: FTS table */
910 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
911 que_t* graph) /*!< in: query graph */
912 {
913 ibool has_dict = FALSE;
914
915 if (fts_table && fts_table->table) {
916 ut_ad(fts_table->table->fts);
917
918 has_dict = fts_table->table->fts->fts_status
919 & TABLE_DICT_LOCKED;
920 } else if (index_cache) {
921 ut_ad(index_cache->index->table->fts);
922
923 has_dict = index_cache->index->table->fts->fts_status
924 & TABLE_DICT_LOCKED;
925 }
926
927 if (!has_dict) {
928 mutex_enter(&dict_sys->mutex);
929 }
930
931 ut_ad(mutex_own(&dict_sys->mutex));
932
933 que_graph_free(graph);
934
935 if (!has_dict) {
936 mutex_exit(&dict_sys->mutex);
937 }
938 }
939
940 /****************************************************************//**
941 Create an FTS index cache. */
942 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)943 fts_index_get_charset(
944 /*==================*/
945 dict_index_t* index) /*!< in: FTS index */
946 {
947 CHARSET_INFO* charset = NULL;
948 dict_field_t* field;
949 ulint prtype;
950
951 field = dict_index_get_nth_field(index, 0);
952 prtype = field->col->prtype;
953
954 charset = fts_get_charset(prtype);
955
956 #ifdef FTS_DEBUG
957 /* Set up charset info for this index. Please note all
958 field of the FTS index should have the same charset */
959 for (i = 1; i < index->n_fields; i++) {
960 CHARSET_INFO* fld_charset;
961
962 field = dict_index_get_nth_field(index, i);
963 prtype = field->col->prtype;
964
965 fld_charset = fts_get_charset(prtype);
966
967 /* All FTS columns should have the same charset */
968 if (charset) {
969 ut_a(charset == fld_charset);
970 } else {
971 charset = fld_charset;
972 }
973 }
974 #endif
975
976 return(charset);
977
978 }
979 /****************************************************************//**
980 Create an FTS index cache.
981 @return Index Cache */
982 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)983 fts_cache_index_cache_create(
984 /*=========================*/
985 dict_table_t* table, /*!< in: table with FTS index */
986 dict_index_t* index) /*!< in: FTS index */
987 {
988 ulint n_bytes;
989 fts_index_cache_t* index_cache;
990 fts_cache_t* cache = table->fts->cache;
991
992 ut_a(cache != NULL);
993
994 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
995
996 /* Must not already exist in the cache vector. */
997 ut_a(fts_find_index_cache(cache, index) == NULL);
998
999 index_cache = static_cast<fts_index_cache_t*>(
1000 ib_vector_push(cache->indexes, NULL));
1001
1002 memset(index_cache, 0x0, sizeof(*index_cache));
1003
1004 index_cache->index = index;
1005
1006 index_cache->charset = fts_index_get_charset(index);
1007
1008 n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1009
1010 index_cache->ins_graph = static_cast<que_t**>(
1011 mem_heap_zalloc(static_cast<mem_heap_t*>(
1012 cache->self_heap->arg), n_bytes));
1013
1014 index_cache->sel_graph = static_cast<que_t**>(
1015 mem_heap_zalloc(static_cast<mem_heap_t*>(
1016 cache->self_heap->arg), n_bytes));
1017
1018 fts_index_cache_init(cache->sync_heap, index_cache);
1019
1020 if (cache->get_docs) {
1021 fts_reset_get_doc(cache);
1022 }
1023
1024 return(index_cache);
1025 }
1026
1027 /****************************************************************//**
1028 Release all resources help by the words rb tree e.g., the node ilist. */
1029 static
1030 void
fts_words_free(ib_rbt_t * words)1031 fts_words_free(
1032 /*===========*/
1033 ib_rbt_t* words) /*!< in: rb tree of words */
1034 {
1035 const ib_rbt_node_t* rbt_node;
1036
1037 /* Free the resources held by a word. */
1038 for (rbt_node = rbt_first(words);
1039 rbt_node != NULL;
1040 rbt_node = rbt_first(words)) {
1041
1042 ulint i;
1043 fts_tokenizer_word_t* word;
1044
1045 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1046
1047 /* Free the ilists of this word. */
1048 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1049
1050 fts_node_t* fts_node = static_cast<fts_node_t*>(
1051 ib_vector_get(word->nodes, i));
1052
1053 ut_free(fts_node->ilist);
1054 fts_node->ilist = NULL;
1055 }
1056
1057 /* NOTE: We are responsible for free'ing the node */
1058 ut_free(rbt_remove_node(words, rbt_node));
1059 }
1060 }
1061
1062 /** Clear cache.
1063 @param[in,out] cache fts cache */
1064 void
fts_cache_clear(fts_cache_t * cache)1065 fts_cache_clear(
1066 fts_cache_t* cache)
1067 {
1068 ulint i;
1069
1070 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1071 ulint j;
1072 fts_index_cache_t* index_cache;
1073
1074 index_cache = static_cast<fts_index_cache_t*>(
1075 ib_vector_get(cache->indexes, i));
1076
1077 fts_words_free(index_cache->words);
1078
1079 rbt_free(index_cache->words);
1080
1081 index_cache->words = NULL;
1082
1083 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1084
1085 if (index_cache->ins_graph[j] != NULL) {
1086
1087 fts_que_graph_free_check_lock(
1088 NULL, index_cache,
1089 index_cache->ins_graph[j]);
1090
1091 index_cache->ins_graph[j] = NULL;
1092 }
1093
1094 if (index_cache->sel_graph[j] != NULL) {
1095
1096 fts_que_graph_free_check_lock(
1097 NULL, index_cache,
1098 index_cache->sel_graph[j]);
1099
1100 index_cache->sel_graph[j] = NULL;
1101 }
1102 }
1103
1104 index_cache->doc_stats = NULL;
1105 }
1106
1107 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1108 cache->sync_heap->arg = NULL;
1109
1110 fts_need_sync = false;
1111
1112 cache->total_size = 0;
1113
1114 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1115 cache->deleted_doc_ids = NULL;
1116 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1117 }
1118
1119 /*********************************************************************//**
1120 Search the index specific cache for a particular FTS index.
1121 @return the index cache else NULL */
1122 UNIV_INLINE
1123 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1124 fts_get_index_cache(
1125 /*================*/
1126 fts_cache_t* cache, /*!< in: cache to search */
1127 const dict_index_t* index) /*!< in: index to search for */
1128 {
1129 ulint i;
1130
1131 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1132 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1133
1134 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1135 fts_index_cache_t* index_cache;
1136
1137 index_cache = static_cast<fts_index_cache_t*>(
1138 ib_vector_get(cache->indexes, i));
1139
1140 if (index_cache->index == index) {
1141
1142 return(index_cache);
1143 }
1144 }
1145
1146 return(NULL);
1147 }
1148
1149 #ifdef FTS_DEBUG
1150 /*********************************************************************//**
1151 Search the index cache for a get_doc structure.
1152 @return the fts_get_doc_t item else NULL */
1153 static
1154 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1155 fts_get_index_get_doc(
1156 /*==================*/
1157 fts_cache_t* cache, /*!< in: cache to search */
1158 const dict_index_t* index) /*!< in: index to search for */
1159 {
1160 ulint i;
1161
1162 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1163
1164 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1165 fts_get_doc_t* get_doc;
1166
1167 get_doc = static_cast<fts_get_doc_t*>(
1168 ib_vector_get(cache->get_docs, i));
1169
1170 if (get_doc->index_cache->index == index) {
1171
1172 return(get_doc);
1173 }
1174 }
1175
1176 return(NULL);
1177 }
1178 #endif
1179
1180 /**********************************************************************//**
1181 Free the FTS cache. */
1182 void
fts_cache_destroy(fts_cache_t * cache)1183 fts_cache_destroy(
1184 /*==============*/
1185 fts_cache_t* cache) /*!< in: cache*/
1186 {
1187 rw_lock_free(&cache->lock);
1188 rw_lock_free(&cache->init_lock);
1189 mutex_free(&cache->optimize_lock);
1190 mutex_free(&cache->deleted_lock);
1191 mutex_free(&cache->doc_id_lock);
1192 os_event_destroy(cache->sync->event);
1193
1194 if (cache->stopword_info.cached_stopword) {
1195 rbt_free(cache->stopword_info.cached_stopword);
1196 }
1197
1198 if (cache->sync_heap->arg) {
1199 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1200 }
1201
1202 mem_heap_free(cache->cache_heap);
1203 }
1204
1205 /**********************************************************************//**
1206 Find an existing word, or if not found, create one and return it.
1207 @return specified word token */
1208 static
1209 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1210 fts_tokenizer_word_get(
1211 /*===================*/
1212 fts_cache_t* cache, /*!< in: cache */
1213 fts_index_cache_t*
1214 index_cache, /*!< in: index cache */
1215 fts_string_t* text) /*!< in: node text */
1216 {
1217 fts_tokenizer_word_t* word;
1218 ib_rbt_bound_t parent;
1219
1220 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1221
1222 /* If it is a stopword, do not index it */
1223 if (!fts_check_token(text,
1224 cache->stopword_info.cached_stopword,
1225 index_cache->index->is_ngram,
1226 index_cache->charset)) {
1227
1228 return(NULL);
1229 }
1230
1231 /* Check if we found a match, if not then add word to tree. */
1232 if (rbt_search(index_cache->words, &parent, text) != 0) {
1233 mem_heap_t* heap;
1234 fts_tokenizer_word_t new_word;
1235
1236 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1237
1238 new_word.nodes = ib_vector_create(
1239 cache->sync_heap, sizeof(fts_node_t), 4);
1240
1241 fts_string_dup(&new_word.text, text, heap);
1242
1243 parent.last = rbt_add_node(
1244 index_cache->words, &parent, &new_word);
1245
1246 /* Take into account the RB tree memory use and the vector. */
1247 cache->total_size += sizeof(new_word)
1248 + sizeof(ib_rbt_node_t)
1249 + text->f_len
1250 + (sizeof(fts_node_t) * 4)
1251 + sizeof(*new_word.nodes);
1252
1253 ut_ad(rbt_validate(index_cache->words));
1254 }
1255
1256 word = rbt_value(fts_tokenizer_word_t, parent.last);
1257
1258 return(word);
1259 }
1260
1261 /**********************************************************************//**
1262 Add the given doc_id/word positions to the given node's ilist. */
1263 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1264 fts_cache_node_add_positions(
1265 /*=========================*/
1266 fts_cache_t* cache, /*!< in: cache */
1267 fts_node_t* node, /*!< in: word node */
1268 doc_id_t doc_id, /*!< in: doc id */
1269 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1270 {
1271 ulint i;
1272 byte* ptr;
1273 byte* ilist;
1274 ulint enc_len;
1275 ulint last_pos;
1276 byte* ptr_start;
1277 ulint doc_id_delta;
1278
1279 #ifdef UNIV_DEBUG
1280 if (cache) {
1281 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1282 }
1283 #endif /* UNIV_DEBUG */
1284
1285 ut_ad(doc_id >= node->last_doc_id);
1286
1287 /* Calculate the space required to store the ilist. */
1288 doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1289 enc_len = fts_get_encoded_len(doc_id_delta);
1290
1291 last_pos = 0;
1292 for (i = 0; i < ib_vector_size(positions); i++) {
1293 ulint pos = *(static_cast<ulint*>(
1294 ib_vector_get(positions, i)));
1295
1296 ut_ad(last_pos == 0 || pos > last_pos);
1297
1298 enc_len += fts_get_encoded_len(pos - last_pos);
1299 last_pos = pos;
1300 }
1301
1302 /* The 0x00 byte at the end of the token positions list. */
1303 enc_len++;
1304
1305 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1306 /* No need to allocate more space, we can fit in the new
1307 data at the end of the old one. */
1308 ilist = NULL;
1309 ptr = node->ilist + node->ilist_size;
1310 } else {
1311 ulint new_size = node->ilist_size + enc_len;
1312
1313 /* Over-reserve space by a fixed size for small lengths and
1314 by 20% for lengths >= 48 bytes. */
1315 if (new_size < 16) {
1316 new_size = 16;
1317 } else if (new_size < 32) {
1318 new_size = 32;
1319 } else if (new_size < 48) {
1320 new_size = 48;
1321 } else {
1322 new_size = (ulint)(1.2 * new_size);
1323 }
1324
1325 ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1326 ptr = ilist + node->ilist_size;
1327
1328 node->ilist_size_alloc = new_size;
1329 }
1330
1331 ptr_start = ptr;
1332
1333 /* Encode the new fragment. */
1334 ptr += fts_encode_int(doc_id_delta, ptr);
1335
1336 last_pos = 0;
1337 for (i = 0; i < ib_vector_size(positions); i++) {
1338 ulint pos = *(static_cast<ulint*>(
1339 ib_vector_get(positions, i)));
1340
1341 ptr += fts_encode_int(pos - last_pos, ptr);
1342 last_pos = pos;
1343 }
1344
1345 *ptr++ = 0;
1346
1347 ut_a(enc_len == (ulint)(ptr - ptr_start));
1348
1349 if (ilist) {
1350 /* Copy old ilist to the start of the new one and switch the
1351 new one into place in the node. */
1352 if (node->ilist_size > 0) {
1353 memcpy(ilist, node->ilist, node->ilist_size);
1354 ut_free(node->ilist);
1355 }
1356
1357 node->ilist = ilist;
1358 }
1359
1360 node->ilist_size += enc_len;
1361
1362 if (cache) {
1363 cache->total_size += enc_len;
1364 }
1365
1366 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1367 node->first_doc_id = doc_id;
1368 }
1369
1370 node->last_doc_id = doc_id;
1371 ++node->doc_count;
1372 }
1373
1374 /**********************************************************************//**
1375 Add document to the cache. */
1376 static
1377 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1378 fts_cache_add_doc(
1379 /*==============*/
1380 fts_cache_t* cache, /*!< in: cache */
1381 fts_index_cache_t*
1382 index_cache, /*!< in: index cache */
1383 doc_id_t doc_id, /*!< in: doc id to add */
1384 ib_rbt_t* tokens) /*!< in: document tokens */
1385 {
1386 const ib_rbt_node_t* node;
1387 ulint n_words;
1388 fts_doc_stats_t* doc_stats;
1389
1390 if (!tokens) {
1391 return;
1392 }
1393
1394 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1395
1396 n_words = rbt_size(tokens);
1397
1398 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1399
1400 fts_tokenizer_word_t* word;
1401 fts_node_t* fts_node = NULL;
1402 fts_token_t* token = rbt_value(fts_token_t, node);
1403
1404 /* Find and/or add token to the cache. */
1405 word = fts_tokenizer_word_get(
1406 cache, index_cache, &token->text);
1407
1408 if (!word) {
1409 ut_free(rbt_remove_node(tokens, node));
1410 continue;
1411 }
1412
1413 if (ib_vector_size(word->nodes) > 0) {
1414 fts_node = static_cast<fts_node_t*>(
1415 ib_vector_last(word->nodes));
1416 }
1417
1418 if (fts_node == NULL || fts_node->synced
1419 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1420 || doc_id < fts_node->last_doc_id) {
1421
1422 fts_node = static_cast<fts_node_t*>(
1423 ib_vector_push(word->nodes, NULL));
1424
1425 memset(fts_node, 0x0, sizeof(*fts_node));
1426
1427 cache->total_size += sizeof(*fts_node);
1428 }
1429
1430 fts_cache_node_add_positions(
1431 cache, fts_node, doc_id, token->positions);
1432
1433 ut_free(rbt_remove_node(tokens, node));
1434 }
1435
1436 ut_a(rbt_empty(tokens));
1437
1438 /* Add to doc ids processed so far. */
1439 doc_stats = static_cast<fts_doc_stats_t*>(
1440 ib_vector_push(index_cache->doc_stats, NULL));
1441
1442 doc_stats->doc_id = doc_id;
1443 doc_stats->word_count = n_words;
1444
1445 /* Add the doc stats memory usage too. */
1446 cache->total_size += sizeof(*doc_stats);
1447
1448 if (doc_id > cache->sync->max_doc_id) {
1449 cache->sync->max_doc_id = doc_id;
1450 }
1451 }
1452
1453 /****************************************************************//**
1454 Drops a table. If the table can't be found we return a SUCCESS code.
1455 @return DB_SUCCESS or error code */
1456 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1457 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1458 fts_drop_table(
1459 /*===========*/
1460 trx_t* trx, /*!< in: transaction */
1461 const char* table_name) /*!< in: table to drop */
1462 {
1463 dict_table_t* table;
1464 dberr_t error = DB_SUCCESS;
1465
1466 /* Check that the table exists in our data dictionary.
1467 Similar to regular drop table case, we will open table with
1468 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1469 table = dict_table_open_on_name(
1470 table_name, TRUE, FALSE,
1471 static_cast<dict_err_ignore_t>(
1472 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1473
1474 if (table != 0) {
1475
1476 dict_table_close(table, TRUE, FALSE);
1477
1478 /* Pass nonatomic=false (dont allow data dict unlock),
1479 because the transaction may hold locks on SYS_* tables from
1480 previous calls to fts_drop_table(). */
1481 error = row_drop_table_for_mysql(table_name, trx, true, false);
1482
1483 if (error != DB_SUCCESS) {
1484 ib::error() << "Unable to drop FTS index aux table "
1485 << table_name << ": " << ut_strerr(error);
1486 }
1487 } else {
1488 error = DB_FAIL;
1489 }
1490
1491 return(error);
1492 }
1493
1494 /****************************************************************//**
1495 Rename a single auxiliary table due to database name change.
1496 @return DB_SUCCESS or error code */
1497 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1498 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1499 fts_rename_one_aux_table(
1500 /*=====================*/
1501 const char* new_name, /*!< in: new parent tbl name */
1502 const char* fts_table_old_name, /*!< in: old aux tbl name */
1503 trx_t* trx) /*!< in: transaction */
1504 {
1505 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1506 ulint new_db_name_len = dict_get_db_name_len(new_name);
1507 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1508 ulint table_new_name_len = strlen(fts_table_old_name)
1509 + new_db_name_len - old_db_name_len;
1510
1511 /* Check if the new and old database names are the same, if so,
1512 nothing to do */
1513 ut_ad((new_db_name_len != old_db_name_len)
1514 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1515
1516 /* Get the database name from "new_name", and table name
1517 from the fts_table_old_name */
1518 strncpy(fts_table_new_name, new_name, new_db_name_len);
1519 strncpy(fts_table_new_name + new_db_name_len,
1520 strchr(fts_table_old_name, '/'),
1521 table_new_name_len - new_db_name_len);
1522 fts_table_new_name[table_new_name_len] = 0;
1523
1524 return(row_rename_table_for_mysql(
1525 fts_table_old_name, fts_table_new_name, trx, false));
1526 }
1527
1528 /****************************************************************//**
1529 Rename auxiliary tables for all fts index for a table. This(rename)
1530 is due to database name change
1531 @return DB_SUCCESS or error code */
1532 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1533 fts_rename_aux_tables(
1534 /*==================*/
1535 dict_table_t* table, /*!< in: user Table */
1536 const char* new_name, /*!< in: new table name */
1537 trx_t* trx) /*!< in: transaction */
1538 {
1539 ulint i;
1540 fts_table_t fts_table;
1541
1542 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1543
1544 /* Rename common auxiliary tables */
1545 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1546 char old_table_name[MAX_FULL_NAME_LEN];
1547 dberr_t err = DB_SUCCESS;
1548
1549 fts_table.suffix = fts_common_tables[i];
1550
1551 fts_get_table_name(&fts_table, old_table_name);
1552
1553 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1554
1555 if (err != DB_SUCCESS) {
1556 return(err);
1557 }
1558 }
1559
1560 fts_t* fts = table->fts;
1561
1562 /* Rename index specific auxiliary tables */
1563 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1564 ++i) {
1565 dict_index_t* index;
1566
1567 index = static_cast<dict_index_t*>(
1568 ib_vector_getp(fts->indexes, i));
1569
1570 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1571
1572 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1573 dberr_t err;
1574 char old_table_name[MAX_FULL_NAME_LEN];
1575
1576 fts_table.suffix = fts_get_suffix(j);
1577
1578 fts_get_table_name(&fts_table, old_table_name);
1579
1580 err = fts_rename_one_aux_table(
1581 new_name, old_table_name, trx);
1582
1583 DBUG_EXECUTE_IF("fts_rename_failure",
1584 err = DB_DEADLOCK;
1585 fts_sql_rollback(trx););
1586
1587 if (err != DB_SUCCESS) {
1588 return(err);
1589 }
1590 }
1591 }
1592
1593 return(DB_SUCCESS);
1594 }
1595
1596 /****************************************************************//**
1597 Drops the common ancillary tables needed for supporting an FTS index
1598 on the given table. row_mysql_lock_data_dictionary must have been called
1599 before this.
1600 @return DB_SUCCESS or error code */
1601 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1602 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1603 fts_drop_common_tables(
1604 /*===================*/
1605 trx_t* trx, /*!< in: transaction */
1606 fts_table_t* fts_table) /*!< in: table with an FTS
1607 index */
1608 {
1609 ulint i;
1610 dberr_t error = DB_SUCCESS;
1611
1612 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1613 dberr_t err;
1614 char table_name[MAX_FULL_NAME_LEN];
1615
1616 fts_table->suffix = fts_common_tables[i];
1617
1618 fts_get_table_name(fts_table, table_name);
1619
1620 err = fts_drop_table(trx, table_name);
1621
1622 /* We only return the status of the last error. */
1623 if (err != DB_SUCCESS && err != DB_FAIL) {
1624 error = err;
1625 }
1626 }
1627
1628 return(error);
1629 }
1630
1631 /****************************************************************//**
1632 Since we do a horizontal split on the index table, we need to drop
1633 all the split tables.
1634 @return DB_SUCCESS or error code */
1635 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1636 fts_drop_index_split_tables(
1637 /*========================*/
1638 trx_t* trx, /*!< in: transaction */
1639 dict_index_t* index) /*!< in: fts instance */
1640
1641 {
1642 ulint i;
1643 fts_table_t fts_table;
1644 dberr_t error = DB_SUCCESS;
1645
1646 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1647
1648 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1649 dberr_t err;
1650 char table_name[MAX_FULL_NAME_LEN];
1651
1652 fts_table.suffix = fts_get_suffix(i);
1653
1654 fts_get_table_name(&fts_table, table_name);
1655
1656 err = fts_drop_table(trx, table_name);
1657
1658 /* We only return the status of the last error. */
1659 if (err != DB_SUCCESS && err != DB_FAIL) {
1660 error = err;
1661 }
1662 }
1663
1664 return(error);
1665 }
1666
1667 /****************************************************************//**
1668 Drops FTS auxiliary tables for an FTS index
1669 @return DB_SUCCESS or error code */
1670 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1671 fts_drop_index_tables(
1672 /*==================*/
1673 trx_t* trx, /*!< in: transaction */
1674 dict_index_t* index) /*!< in: Index to drop */
1675 {
1676 dberr_t error = DB_SUCCESS;
1677
1678 #ifdef FTS_DOC_STATS_DEBUG
1679 fts_table_t fts_table;
1680 static const char* index_tables[] = {
1681 "DOC_ID",
1682 NULL
1683 };
1684 #endif /* FTS_DOC_STATS_DEBUG */
1685
1686 dberr_t err = fts_drop_index_split_tables(trx, index);
1687
1688 /* We only return the status of the last error. */
1689 if (err != DB_SUCCESS) {
1690 error = err;
1691 }
1692
1693 #ifdef FTS_DOC_STATS_DEBUG
1694 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1695
1696 for (ulint i = 0; index_tables[i] != NULL; ++i) {
1697 char table_name[MAX_FULL_NAME_LEN];
1698
1699 fts_table.suffix = index_tables[i];
1700
1701 fts_get_table_name(&fts_table, table_name);
1702
1703 err = fts_drop_table(trx, table_name);
1704
1705 /* We only return the status of the last error. */
1706 if (err != DB_SUCCESS && err != DB_FAIL) {
1707 error = err;
1708 }
1709 }
1710 #endif /* FTS_DOC_STATS_DEBUG */
1711
1712 return(error);
1713 }
1714
1715 /****************************************************************//**
1716 Drops FTS ancillary tables needed for supporting an FTS index
1717 on the given table. row_mysql_lock_data_dictionary must have been called
1718 before this.
1719 @return DB_SUCCESS or error code */
1720 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1721 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1722 fts_drop_all_index_tables(
1723 /*======================*/
1724 trx_t* trx, /*!< in: transaction */
1725 fts_t* fts) /*!< in: fts instance */
1726 {
1727 dberr_t error = DB_SUCCESS;
1728
1729 for (ulint i = 0;
1730 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1731 ++i) {
1732
1733 dberr_t err;
1734 dict_index_t* index;
1735
1736 index = static_cast<dict_index_t*>(
1737 ib_vector_getp(fts->indexes, i));
1738
1739 err = fts_drop_index_tables(trx, index);
1740
1741 if (err != DB_SUCCESS) {
1742 error = err;
1743 }
1744 }
1745
1746 return(error);
1747 }
1748
1749 /*********************************************************************//**
1750 Drops the ancillary tables needed for supporting an FTS index on a
1751 given table. row_mysql_lock_data_dictionary must have been called before
1752 this.
1753 @return DB_SUCCESS or error code */
1754 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1755 fts_drop_tables(
1756 /*============*/
1757 trx_t* trx, /*!< in: transaction */
1758 dict_table_t* table) /*!< in: table has the FTS index */
1759 {
1760 dberr_t error;
1761 fts_table_t fts_table;
1762
1763 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1764
1765 /* TODO: This is not atomic and can cause problems during recovery. */
1766
1767 error = fts_drop_common_tables(trx, &fts_table);
1768
1769 if (error == DB_SUCCESS) {
1770 error = fts_drop_all_index_tables(trx, table->fts);
1771 }
1772
1773 return(error);
1774 }
1775
1776 /** Extract only the required flags from table->flags2 for FTS Aux
1777 tables.
1778 @param[in] in_flags2 Table flags2
1779 @return extracted flags2 for FTS aux tables */
1780 static inline
1781 ulint
fts_get_table_flags2_for_aux_tables(ulint flags2)1782 fts_get_table_flags2_for_aux_tables(
1783 ulint flags2)
1784 {
1785 /* Extract the file_per_table flag, temporary file flag and
1786 encryption flag from the main FTS table flags2 */
1787 return((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1788 (flags2 & DICT_TF2_ENCRYPTION) |
1789 (flags2 & DICT_TF2_TEMPORARY));
1790 }
1791
1792 /** Create dict_table_t object for FTS Aux tables.
1793 @param[in] aux_table_name FTS Aux table name
1794 @param[in] table table object of FTS Index
1795 @param[in] n_cols number of columns for FTS Aux table
1796 @return table object for FTS Aux table */
1797 static
1798 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1799 fts_create_in_mem_aux_table(
1800 const char* aux_table_name,
1801 const dict_table_t* table,
1802 ulint n_cols)
1803 {
1804 dict_table_t* new_table = dict_mem_table_create(
1805 aux_table_name, table->space, n_cols, 0, table->flags,
1806 fts_get_table_flags2_for_aux_tables(table->flags2));
1807
1808 if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1809 ut_ad(table->space == fil_space_get_id_by_name(
1810 table->tablespace()));
1811 new_table->tablespace = mem_heap_strdup(
1812 new_table->heap, table->tablespace);
1813 }
1814
1815 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1816 ut_ad(table->data_dir_path != NULL);
1817 new_table->data_dir_path = mem_heap_strdup(
1818 new_table->heap, table->data_dir_path);
1819 }
1820
1821 return(new_table);
1822 }
1823
1824 /** Function to create on FTS common table.
1825 @param[in,out] trx InnoDB transaction
1826 @param[in] table Table that has FTS Index
1827 @param[in] fts_table_name FTS AUX table name
1828 @param[in] fts_suffix FTS AUX table suffix
1829 @param[in] heap heap
1830 @return table object if created, else NULL */
1831 static
1832 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1833 fts_create_one_common_table(
1834 trx_t* trx,
1835 const dict_table_t* table,
1836 const char* fts_table_name,
1837 const char* fts_suffix,
1838 mem_heap_t* heap)
1839 {
1840 dict_table_t* new_table = NULL;
1841 dberr_t error;
1842 bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
1843
1844 if (!is_config) {
1845
1846 new_table = fts_create_in_mem_aux_table(
1847 fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1848
1849 dict_mem_table_add_col(
1850 new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1851 FTS_DELETED_TABLE_COL_LEN);
1852 } else {
1853 /* Config table has different schema. */
1854 new_table = fts_create_in_mem_aux_table(
1855 fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1856
1857 dict_mem_table_add_col(
1858 new_table, heap, "key", DATA_VARCHAR, 0,
1859 FTS_CONFIG_TABLE_KEY_COL_LEN);
1860
1861 dict_mem_table_add_col(
1862 new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1863 FTS_CONFIG_TABLE_VALUE_COL_LEN);
1864 }
1865
1866 error = row_create_table_for_mysql(new_table, NULL, trx, false);
1867
1868 if (error == DB_SUCCESS) {
1869
1870 dict_index_t* index = dict_mem_index_create(
1871 fts_table_name, "FTS_COMMON_TABLE_IND",
1872 new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
1873
1874 if (!is_config) {
1875 dict_mem_index_add_field(index, "doc_id", 0);
1876 } else {
1877 dict_mem_index_add_field(index, "key", 0);
1878 }
1879
1880 /* We save and restore trx->dict_operation because
1881 row_create_index_for_mysql() changes the operation to
1882 TRX_DICT_OP_TABLE. */
1883 trx_dict_op_t op = trx_get_dict_operation(trx);
1884
1885 error = row_create_index_for_mysql(index, trx, NULL, NULL);
1886
1887 trx->dict_operation = op;
1888 }
1889
1890 if (error != DB_SUCCESS) {
1891 trx->error_state = error;
1892 dict_mem_table_free(new_table);
1893 new_table = NULL;
1894 ib::warn() << "Failed to create FTS common table "
1895 << fts_table_name;
1896 }
1897 return(new_table);
1898 }
1899
1900 /** Creates the common auxiliary tables needed for supporting an FTS index
1901 on the given table. row_mysql_lock_data_dictionary must have been called
1902 before this.
1903 The following tables are created.
1904 CREATE TABLE $FTS_PREFIX_DELETED
1905 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1906 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1907 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1908 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1909 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1910 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1911 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1912 CREATE TABLE $FTS_PREFIX_CONFIG
1913 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1914 @param[in,out] trx transaction
1915 @param[in] table table with FTS index
1916 @param[in] name table name normalized
1917 @param[in] skip_doc_id_index Skip index on doc id
1918 @return DB_SUCCESS if succeed */
1919 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1920 fts_create_common_tables(
1921 trx_t* trx,
1922 const dict_table_t* table,
1923 const char* name,
1924 bool skip_doc_id_index)
1925 {
1926 dberr_t error;
1927 que_t* graph;
1928 fts_table_t fts_table;
1929 mem_heap_t* heap = mem_heap_create(1024);
1930 pars_info_t* info;
1931 char fts_name[MAX_FULL_NAME_LEN];
1932 char full_name[sizeof(fts_common_tables) / sizeof(char*)]
1933 [MAX_FULL_NAME_LEN];
1934
1935 dict_index_t* index = NULL;
1936 trx_dict_op_t op;
1937 /* common_tables vector is used for dropping FTS common tables
1938 on error condition. */
1939 std::vector<dict_table_t*> common_tables;
1940 std::vector<dict_table_t*>::const_iterator it;
1941
1942 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1943
1944 error = fts_drop_common_tables(trx, &fts_table);
1945
1946 if (error != DB_SUCCESS) {
1947
1948 goto func_exit;
1949 }
1950
1951 /* Create the FTS tables that are common to an FTS index. */
1952 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1953
1954 fts_table.suffix = fts_common_tables[i];
1955 fts_get_table_name(&fts_table, full_name[i]);
1956 dict_table_t* common_table = fts_create_one_common_table(
1957 trx, table, full_name[i], fts_table.suffix, heap);
1958
1959 if (common_table == NULL) {
1960 error = DB_ERROR;
1961 goto func_exit;
1962 } else {
1963 common_tables.push_back(common_table);
1964 }
1965
1966 DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1967 /* Return error after creating FTS_AUX_CONFIG table. */
1968 if (i == 4) {
1969 error = DB_ERROR;
1970 goto func_exit;
1971 }
1972 );
1973
1974 }
1975
1976 /* Write the default settings to the config table. */
1977 info = pars_info_create();
1978
1979 fts_table.suffix = "CONFIG";
1980 fts_get_table_name(&fts_table, fts_name);
1981 pars_info_bind_id(info, true, "config_table", fts_name);
1982
1983 graph = fts_parse_sql_no_dict_lock(
1984 &fts_table, info, fts_config_table_insert_values_sql);
1985
1986 error = fts_eval_sql(trx, graph);
1987
1988 que_graph_free(graph);
1989
1990 if (error != DB_SUCCESS || skip_doc_id_index) {
1991
1992 goto func_exit;
1993 }
1994
1995 index = dict_mem_index_create(
1996 name, FTS_DOC_ID_INDEX_NAME, table->space,
1997 DICT_UNIQUE, 1);
1998 dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1999
2000 op = trx_get_dict_operation(trx);
2001
2002 error = row_create_index_for_mysql(index, trx, NULL, NULL);
2003
2004 trx->dict_operation = op;
2005
2006 func_exit:
2007 if (error != DB_SUCCESS) {
2008
2009 for (it = common_tables.begin(); it != common_tables.end();
2010 ++it) {
2011 row_drop_table_for_mysql(
2012 (*it)->name.m_name, trx, FALSE);
2013 }
2014 }
2015
2016 common_tables.clear();
2017 mem_heap_free(heap);
2018
2019 return(error);
2020 }
2021 /** Creates one FTS auxiliary index table for an FTS index.
2022 @param[in,out] trx transaction
2023 @param[in] index the index instance
2024 @param[in] fts_table fts_table structure
2025 @param[in] heap memory heap
2026 @return DB_SUCCESS or error code */
2027 static
2028 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)2029 fts_create_one_index_table(
2030 trx_t* trx,
2031 const dict_index_t* index,
2032 fts_table_t* fts_table,
2033 mem_heap_t* heap)
2034 {
2035 dict_field_t* field;
2036 dict_table_t* new_table = NULL;
2037 char table_name[MAX_FULL_NAME_LEN];
2038 dberr_t error;
2039 CHARSET_INFO* charset;
2040
2041 ut_ad(index->type & DICT_FTS);
2042
2043 fts_get_table_name(fts_table, table_name);
2044
2045 new_table = fts_create_in_mem_aux_table(
2046 table_name, fts_table->table,
2047 FTS_AUX_INDEX_TABLE_NUM_COLS);
2048
2049 field = dict_index_get_nth_field(index, 0);
2050 charset = fts_get_charset(field->col->prtype);
2051
2052 dict_mem_table_add_col(new_table, heap, "word",
2053 charset == &my_charset_latin1
2054 ? DATA_VARCHAR : DATA_VARMYSQL,
2055 field->col->prtype,
2056 FTS_INDEX_WORD_LEN);
2057
2058 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2059 DATA_NOT_NULL | DATA_UNSIGNED,
2060 FTS_INDEX_FIRST_DOC_ID_LEN);
2061
2062 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2063 DATA_NOT_NULL | DATA_UNSIGNED,
2064 FTS_INDEX_LAST_DOC_ID_LEN);
2065
2066 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2067 DATA_NOT_NULL | DATA_UNSIGNED,
2068 FTS_INDEX_DOC_COUNT_LEN);
2069
2070 /* The precise type calculation is as follows:
2071 least signficiant byte: MySQL type code (not applicable for sys cols)
2072 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2073 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2074
2075 dict_mem_table_add_col(
2076 new_table, heap, "ilist", DATA_BLOB,
2077 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2078 FTS_INDEX_ILIST_LEN);
2079
2080 error = row_create_table_for_mysql(new_table, NULL, trx, false);
2081
2082 if (error == DB_SUCCESS) {
2083 dict_index_t* index = dict_mem_index_create(
2084 table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2085 DICT_UNIQUE|DICT_CLUSTERED, 2);
2086 dict_mem_index_add_field(index, "word", 0);
2087 dict_mem_index_add_field(index, "first_doc_id", 0);
2088
2089 trx_dict_op_t op = trx_get_dict_operation(trx);
2090
2091 error = row_create_index_for_mysql(index, trx, NULL, NULL);
2092
2093 trx->dict_operation = op;
2094 }
2095
2096 if (error != DB_SUCCESS) {
2097 trx->error_state = error;
2098 dict_mem_table_free(new_table);
2099 new_table = NULL;
2100 ib::warn() << "Failed to create FTS index table "
2101 << table_name;
2102 }
2103
2104 return(new_table);
2105 }
2106
2107 /** Create auxiliary index tables for an FTS index.
2108 @param[in,out] trx transaction
2109 @param[in] index the index instance
2110 @param[in] table_name table name
2111 @param[in] table_id the table id
2112 @return DB_SUCCESS or error code */
2113 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2114 fts_create_index_tables_low(
2115 trx_t* trx,
2116 const dict_index_t* index,
2117 const char* table_name,
2118 table_id_t table_id)
2119 {
2120 ulint i;
2121 fts_table_t fts_table;
2122 dberr_t error = DB_SUCCESS;
2123 mem_heap_t* heap = mem_heap_create(1024);
2124
2125 fts_table.type = FTS_INDEX_TABLE;
2126 fts_table.index_id = index->id;
2127 fts_table.table_id = table_id;
2128 fts_table.parent = table_name;
2129 fts_table.table = index->table;
2130
2131 #ifdef FTS_DOC_STATS_DEBUG
2132 /* Create the FTS auxiliary tables that are specific
2133 to an FTS index. */
2134 info = pars_info_create();
2135
2136 fts_table.suffix = "DOC_ID";
2137 fts_get_table_name(&fts_table, fts_name);
2138
2139 pars_info_bind_id(info, true, "doc_id_table", fts_name);
2140
2141 graph = fts_parse_sql_no_dict_lock(NULL, info,
2142 fts_create_index_tables_sql);
2143
2144 error = fts_eval_sql(trx, graph);
2145 que_graph_free(graph);
2146 #endif /* FTS_DOC_STATS_DEBUG */
2147
2148 /* aux_idx_tables vector is used for dropping FTS AUX INDEX
2149 tables on error condition. */
2150 std::vector<dict_table_t*> aux_idx_tables;
2151 std::vector<dict_table_t*>::const_iterator it;
2152
2153 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2154 dict_table_t* new_table;
2155
2156 /* Create the FTS auxiliary tables that are specific
2157 to an FTS index. We need to preserve the table_id %s
2158 which fts_parse_sql_no_dict_lock() will fill in for us. */
2159 fts_table.suffix = fts_get_suffix(i);
2160
2161 new_table = fts_create_one_index_table(
2162 trx, index, &fts_table, heap);
2163
2164 if (new_table == NULL) {
2165 error = DB_FAIL;
2166 break;
2167 } else {
2168 aux_idx_tables.push_back(new_table);
2169 }
2170
2171 DBUG_EXECUTE_IF("ib_fts_index_table_error",
2172 /* Return error after creating FTS_INDEX_5
2173 aux table. */
2174 if (i == 4) {
2175 error = DB_FAIL;
2176 break;
2177 }
2178 );
2179 }
2180
2181 if (error != DB_SUCCESS) {
2182
2183 for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2184 ++it) {
2185 row_drop_table_for_mysql(
2186 (*it)->name.m_name, trx, FALSE);
2187 }
2188 }
2189
2190 aux_idx_tables.clear();
2191 mem_heap_free(heap);
2192
2193 return(error);
2194 }
2195
2196 /** Creates the column specific ancillary tables needed for supporting an
2197 FTS index on the given table. row_mysql_lock_data_dictionary must have
2198 been called before this.
2199
2200 All FTS AUX Index tables have the following schema.
2201 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2202 word VARCHAR(FTS_MAX_WORD_LEN),
2203 first_doc_id INT NOT NULL,
2204 last_doc_id UNSIGNED NOT NULL,
2205 doc_count UNSIGNED INT NOT NULL,
2206 ilist VARBINARY NOT NULL,
2207 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2208 @param[in,out] trx transaction
2209 @param[in] index index instance
2210 @return DB_SUCCESS or error code */
2211 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2212 fts_create_index_tables(
2213 trx_t* trx,
2214 const dict_index_t* index)
2215 {
2216 dberr_t err;
2217 dict_table_t* table;
2218
2219 table = dict_table_get_low(index->table_name);
2220 ut_a(table != NULL);
2221
2222 err = fts_create_index_tables_low(
2223 trx, index, table->name.m_name, table->id);
2224
2225 if (err == DB_SUCCESS) {
2226 trx_commit(trx);
2227 }
2228
2229 return(err);
2230 }
2231 #if 0
2232 /******************************************************************//**
2233 Return string representation of state. */
2234 static
2235 const char*
2236 fts_get_state_str(
2237 /*==============*/
2238 /* out: string representation of state */
2239 fts_row_state state) /*!< in: state */
2240 {
2241 switch (state) {
2242 case FTS_INSERT:
2243 return("INSERT");
2244
2245 case FTS_MODIFY:
2246 return("MODIFY");
2247
2248 case FTS_DELETE:
2249 return("DELETE");
2250
2251 case FTS_NOTHING:
2252 return("NOTHING");
2253
2254 case FTS_INVALID:
2255 return("INVALID");
2256
2257 default:
2258 return("UNKNOWN");
2259 }
2260 }
2261 #endif
2262
2263 /******************************************************************//**
2264 Calculate the new state of a row given the existing state and a new event.
2265 @return new state of row */
2266 static
2267 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2268 fts_trx_row_get_new_state(
2269 /*======================*/
2270 fts_row_state old_state, /*!< in: existing state of row */
2271 fts_row_state event) /*!< in: new event */
2272 {
2273 /* The rules for transforming states:
2274
2275 I = inserted
2276 M = modified
2277 D = deleted
2278 N = nothing
2279
2280 M+D -> D:
2281
2282 If the row existed before the transaction started and it is modified
2283 during the transaction, followed by a deletion of the row, only the
2284 deletion will be signaled.
2285
2286 M+ -> M:
2287
2288 If the row existed before the transaction started and it is modified
2289 more than once during the transaction, only the last modification
2290 will be signaled.
2291
2292 IM*D -> N:
2293
2294 If a new row is added during the transaction (and possibly modified
2295 after its initial insertion) but it is deleted before the end of the
2296 transaction, nothing will be signaled.
2297
2298 IM* -> I:
2299
2300 If a new row is added during the transaction and modified after its
2301 initial insertion, only the addition will be signaled.
2302
2303 M*DI -> M:
2304
2305 If the row existed before the transaction started and it is deleted,
2306 then re-inserted, only a modification will be signaled. Note that
2307 this case is only possible if the table is using the row's primary
2308 key for FTS row ids, since those can be re-inserted by the user,
2309 which is not true for InnoDB generated row ids.
2310
2311 It is easily seen that the above rules decompose such that we do not
2312 need to store the row's entire history of events. Instead, we can
2313 store just one state for the row and update that when new events
2314 arrive. Then we can implement the above rules as a two-dimensional
2315 look-up table, and get checking of invalid combinations "for free"
2316 in the process. */
2317
2318 /* The lookup table for transforming states. old_state is the
2319 Y-axis, event is the X-axis. */
2320 static const fts_row_state table[4][4] = {
2321 /* I M D N */
2322 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2323 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2324 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2325 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2326 };
2327
2328 fts_row_state result;
2329
2330 ut_a(old_state < FTS_INVALID);
2331 ut_a(event < FTS_INVALID);
2332
2333 result = table[(int) old_state][(int) event];
2334 ut_a(result != FTS_INVALID);
2335
2336 return(result);
2337 }
2338
2339 /******************************************************************//**
2340 Create a savepoint instance.
2341 @return savepoint instance */
2342 static
2343 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2344 fts_savepoint_create(
2345 /*=================*/
2346 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2347 const char* name, /*!< in: savepoint name */
2348 mem_heap_t* heap) /*!< in: heap */
2349 {
2350 fts_savepoint_t* savepoint;
2351
2352 savepoint = static_cast<fts_savepoint_t*>(
2353 ib_vector_push(savepoints, NULL));
2354
2355 memset(savepoint, 0x0, sizeof(*savepoint));
2356
2357 if (name) {
2358 savepoint->name = mem_heap_strdup(heap, name);
2359 }
2360
2361 savepoint->tables = rbt_create(
2362 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2363
2364 return(savepoint);
2365 }
2366
2367 /******************************************************************//**
2368 Create an FTS trx.
2369 @return FTS trx */
2370 static
2371 fts_trx_t*
fts_trx_create(trx_t * trx)2372 fts_trx_create(
2373 /*===========*/
2374 trx_t* trx) /*!< in/out: InnoDB
2375 transaction */
2376 {
2377 fts_trx_t* ftt;
2378 ib_alloc_t* heap_alloc;
2379 mem_heap_t* heap = mem_heap_create(1024);
2380 trx_named_savept_t* savep;
2381
2382 ut_a(trx->fts_trx == NULL);
2383
2384 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2385 ftt->trx = trx;
2386 ftt->heap = heap;
2387
2388 heap_alloc = ib_heap_allocator_create(heap);
2389
2390 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2391 heap_alloc, sizeof(fts_savepoint_t), 4));
2392
2393 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2394 heap_alloc, sizeof(fts_savepoint_t), 4));
2395
2396 /* Default instance has no name and no heap. */
2397 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2398 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2399
2400 /* Copy savepoints that already set before. */
2401 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2402 savep != NULL;
2403 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2404
2405 fts_savepoint_take(trx, ftt, savep->name);
2406 }
2407
2408 return(ftt);
2409 }
2410
2411 /******************************************************************//**
2412 Create an FTS trx table.
2413 @return FTS trx table */
2414 static
2415 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2416 fts_trx_table_create(
2417 /*=================*/
2418 fts_trx_t* fts_trx, /*!< in: FTS trx */
2419 dict_table_t* table) /*!< in: table */
2420 {
2421 fts_trx_table_t* ftt;
2422
2423 ftt = static_cast<fts_trx_table_t*>(
2424 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2425
2426 memset(ftt, 0x0, sizeof(*ftt));
2427
2428 ftt->table = table;
2429 ftt->fts_trx = fts_trx;
2430
2431 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2432
2433 return(ftt);
2434 }
2435
2436 /******************************************************************//**
2437 Clone an FTS trx table.
2438 @return FTS trx table */
2439 static
2440 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2441 fts_trx_table_clone(
2442 /*=================*/
2443 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2444 {
2445 fts_trx_table_t* ftt;
2446
2447 ftt = static_cast<fts_trx_table_t*>(
2448 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2449
2450 memset(ftt, 0x0, sizeof(*ftt));
2451
2452 ftt->table = ftt_src->table;
2453 ftt->fts_trx = ftt_src->fts_trx;
2454
2455 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2456
2457 /* Copy the rb tree values to the new savepoint. */
2458 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2459
2460 /* These are only added on commit. At this stage we only have
2461 the updated row state. */
2462 ut_a(ftt_src->added_doc_ids == NULL);
2463
2464 return(ftt);
2465 }
2466
2467 /******************************************************************//**
2468 Initialize the FTS trx instance.
2469 @return FTS trx instance */
2470 static
2471 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2472 fts_trx_init(
2473 /*=========*/
2474 trx_t* trx, /*!< in: transaction */
2475 dict_table_t* table, /*!< in: FTS table instance */
2476 ib_vector_t* savepoints) /*!< in: Savepoints */
2477 {
2478 fts_trx_table_t* ftt;
2479 ib_rbt_bound_t parent;
2480 ib_rbt_t* tables;
2481 fts_savepoint_t* savepoint;
2482
2483 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2484
2485 tables = savepoint->tables;
2486 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2487
2488 if (parent.result == 0) {
2489 fts_trx_table_t** fttp;
2490
2491 fttp = rbt_value(fts_trx_table_t*, parent.last);
2492 ftt = *fttp;
2493 } else {
2494 ftt = fts_trx_table_create(trx->fts_trx, table);
2495 rbt_add_node(tables, &parent, &ftt);
2496 }
2497
2498 ut_a(ftt->table == table);
2499
2500 return(ftt);
2501 }
2502
2503 /******************************************************************//**
2504 Notify the FTS system about an operation on an FTS-indexed table. */
2505 static
2506 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2507 fts_trx_table_add_op(
2508 /*=================*/
2509 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2510 doc_id_t doc_id, /*!< in: doc id */
2511 fts_row_state state, /*!< in: state of the row */
2512 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2513 {
2514 ib_rbt_t* rows;
2515 ib_rbt_bound_t parent;
2516
2517 rows = ftt->rows;
2518 rbt_search(rows, &parent, &doc_id);
2519
2520 /* Row id found, update state, and if new state is FTS_NOTHING,
2521 we delete the row from our tree. */
2522 if (parent.result == 0) {
2523 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2524
2525 row->state = fts_trx_row_get_new_state(row->state, state);
2526
2527 if (row->state == FTS_NOTHING) {
2528 if (row->fts_indexes) {
2529 ib_vector_free(row->fts_indexes);
2530 }
2531
2532 ut_free(rbt_remove_node(rows, parent.last));
2533 row = NULL;
2534 } else if (row->fts_indexes != NULL) {
2535 ib_vector_free(row->fts_indexes);
2536 row->fts_indexes = fts_indexes;
2537 }
2538
2539 } else { /* Row-id not found, create a new one. */
2540 fts_trx_row_t row;
2541
2542 row.doc_id = doc_id;
2543 row.state = state;
2544 row.fts_indexes = fts_indexes;
2545
2546 rbt_add_node(rows, &parent, &row);
2547 }
2548 }
2549
2550 /******************************************************************//**
2551 Notify the FTS system about an operation on an FTS-indexed table. */
2552 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2553 fts_trx_add_op(
2554 /*===========*/
2555 trx_t* trx, /*!< in: InnoDB transaction */
2556 dict_table_t* table, /*!< in: table */
2557 doc_id_t doc_id, /*!< in: new doc id */
2558 fts_row_state state, /*!< in: state of the row */
2559 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2560 (NULL=all) */
2561 {
2562 fts_trx_table_t* tran_ftt;
2563 fts_trx_table_t* stmt_ftt;
2564
2565 if (!trx->fts_trx) {
2566 trx->fts_trx = fts_trx_create(trx);
2567 }
2568
2569 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2570 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2571
2572 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2573 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2574 }
2575
2576 /******************************************************************//**
2577 Fetch callback that converts a textual document id to a binary value and
2578 stores it in the given place.
2579 @return always returns NULL */
2580 static
2581 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2582 fts_fetch_store_doc_id(
2583 /*===================*/
2584 void* row, /*!< in: sel_node_t* */
2585 void* user_arg) /*!< in: doc_id_t* to store
2586 doc_id in */
2587 {
2588 int n_parsed;
2589 sel_node_t* node = static_cast<sel_node_t*>(row);
2590 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2591 dfield_t* dfield = que_node_get_val(node->select_list);
2592 dtype_t* type = dfield_get_type(dfield);
2593 ulint len = dfield_get_len(dfield);
2594
2595 char buf[32];
2596
2597 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2598 ut_a(len > 0 && len < sizeof(buf));
2599
2600 memcpy(buf, dfield_get_data(dfield), len);
2601 buf[len] = '\0';
2602
2603 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2604 ut_a(n_parsed == 1);
2605
2606 return(FALSE);
2607 }
2608
2609 #ifdef FTS_CACHE_SIZE_DEBUG
2610 /******************************************************************//**
2611 Get the max cache size in bytes. If there is an error reading the
2612 value we simply print an error message here and return the default
2613 value to the caller.
2614 @return max cache size in bytes */
2615 static
2616 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2617 fts_get_max_cache_size(
2618 /*===================*/
2619 trx_t* trx, /*!< in: transaction */
2620 fts_table_t* fts_table) /*!< in: table instance */
2621 {
2622 dberr_t error;
2623 fts_string_t value;
2624 ulint cache_size_in_mb;
2625
2626 /* Set to the default value. */
2627 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2628
2629 /* We set the length of value to the max bytes it can hold. This
2630 information is used by the callback that reads the value. */
2631 value.f_n_char = 0;
2632 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2633 value.f_str = ut_malloc_nokey(value.f_len + 1);
2634
2635 error = fts_config_get_value(
2636 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2637
2638 if (error == DB_SUCCESS) {
2639
2640 value.f_str[value.f_len] = 0;
2641 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2642
2643 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2644
2645 ib::warn() << "FTS max cache size ("
2646 << cache_size_in_mb << ") out of range."
2647 " Minimum value is "
2648 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2649 << "MB and the maximum value is "
2650 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2651 << "MB, setting cache size to upper limit";
2652
2653 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2654
2655 } else if (cache_size_in_mb
2656 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2657
2658 ib::warn() << "FTS max cache size ("
2659 << cache_size_in_mb << ") out of range."
2660 " Minimum value is "
2661 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2662 << "MB and the maximum value is"
2663 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2664 << "MB, setting cache size to lower limit";
2665
2666 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2667 }
2668 } else {
2669 ib::error() << "(" << ut_strerr(error) << ") reading max"
2670 " cache config value from config table";
2671 }
2672
2673 ut_free(value.f_str);
2674
2675 return(cache_size_in_mb * 1024 * 1024);
2676 }
2677 #endif
2678
2679 #ifdef FTS_DOC_STATS_DEBUG
2680 /*********************************************************************//**
2681 Get the total number of words in the FTS for a particular FTS index.
2682 @return DB_SUCCESS if all OK else error code */
2683 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2684 fts_get_total_word_count(
2685 /*=====================*/
2686 trx_t* trx, /*!< in: transaction */
2687 dict_index_t* index, /*!< in: for this index */
2688 ulint* total) /* out: total words */
2689 {
2690 dberr_t error;
2691 fts_string_t value;
2692
2693 *total = 0;
2694
2695 /* We set the length of value to the max bytes it can hold. This
2696 information is used by the callback that reads the value. */
2697 value.f_n_char = 0;
2698 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2699 value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
2700
2701 error = fts_config_get_index_value(
2702 trx, index, FTS_TOTAL_WORD_COUNT, &value);
2703
2704 if (error == DB_SUCCESS) {
2705
2706 value.f_str[value.f_len] = 0;
2707 *total = strtoul((char*) value.f_str, NULL, 10);
2708 } else {
2709 ib::error() << "(" << ut_strerr(error) << ") reading total"
2710 " words value from config table";
2711 }
2712
2713 ut_free(value.f_str);
2714
2715 return(error);
2716 }
2717 #endif /* FTS_DOC_STATS_DEBUG */
2718
2719 /*********************************************************************//**
2720 Update the next and last Doc ID in the CONFIG table to be the input
2721 "doc_id" value (+ 1). We would do so after each FTS index build or
2722 table truncate */
2723 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2724 fts_update_next_doc_id(
2725 /*===================*/
2726 trx_t* trx, /*!< in/out: transaction */
2727 const dict_table_t* table, /*!< in: table */
2728 const char* table_name, /*!< in: table name, or NULL */
2729 doc_id_t doc_id) /*!< in: DOC ID to set */
2730 {
2731 table->fts->cache->synced_doc_id = doc_id;
2732 table->fts->cache->next_doc_id = doc_id + 1;
2733
2734 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2735
2736 fts_update_sync_doc_id(
2737 table, table_name, table->fts->cache->synced_doc_id, trx);
2738
2739 }
2740
2741 /*********************************************************************//**
2742 Get the next available document id.
2743 @return DB_SUCCESS if OK */
2744 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2745 fts_get_next_doc_id(
2746 /*================*/
2747 const dict_table_t* table, /*!< in: table */
2748 doc_id_t* doc_id) /*!< out: new document id */
2749 {
2750 fts_cache_t* cache = table->fts->cache;
2751
2752 /* If the Doc ID system has not yet been initialized, we
2753 will consult the CONFIG table and user table to re-establish
2754 the initial value of the Doc ID */
2755 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2756 fts_init_doc_id(table);
2757 }
2758
2759 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2760 *doc_id = FTS_NULL_DOC_ID;
2761 return(DB_SUCCESS);
2762 }
2763
2764 mutex_enter(&cache->doc_id_lock);
2765 *doc_id = ++cache->next_doc_id;
2766 mutex_exit(&cache->doc_id_lock);
2767
2768 return(DB_SUCCESS);
2769 }
2770
2771 /*********************************************************************//**
2772 This function fetch the Doc ID from CONFIG table, and compare with
2773 the Doc ID supplied. And store the larger one to the CONFIG table.
2774 @return DB_SUCCESS if OK */
2775 static MY_ATTRIBUTE((nonnull))
2776 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2777 fts_cmp_set_sync_doc_id(
2778 /*====================*/
2779 const dict_table_t* table, /*!< in: table */
2780 doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
2781 ibool read_only, /*!< in: TRUE if read the
2782 synced_doc_id only */
2783 doc_id_t* doc_id) /*!< out: larger document id
2784 after comparing "doc_id_cmp"
2785 to the one stored in CONFIG
2786 table */
2787 {
2788 trx_t* trx;
2789 pars_info_t* info;
2790 dberr_t error;
2791 fts_table_t fts_table;
2792 que_t* graph = NULL;
2793 fts_cache_t* cache = table->fts->cache;
2794 char table_name[MAX_FULL_NAME_LEN];
2795 retry:
2796 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2797
2798 fts_table.suffix = "CONFIG";
2799 fts_table.table_id = table->id;
2800 fts_table.type = FTS_COMMON_TABLE;
2801 fts_table.table = table;
2802
2803 fts_table.parent = table->name.m_name;
2804
2805 trx = trx_allocate_for_background();
2806
2807 trx->op_info = "update the next FTS document id";
2808
2809 info = pars_info_create();
2810
2811 pars_info_bind_function(
2812 info, "my_func", fts_fetch_store_doc_id, doc_id);
2813
2814 fts_get_table_name(&fts_table, table_name);
2815 pars_info_bind_id(info, true, "config_table", table_name);
2816
2817 graph = fts_parse_sql(
2818 &fts_table, info,
2819 "DECLARE FUNCTION my_func;\n"
2820 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2821 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2822 "BEGIN\n"
2823 ""
2824 "OPEN c;\n"
2825 "WHILE 1 = 1 LOOP\n"
2826 " FETCH c INTO my_func();\n"
2827 " IF c % NOTFOUND THEN\n"
2828 " EXIT;\n"
2829 " END IF;\n"
2830 "END LOOP;\n"
2831 "CLOSE c;");
2832
2833 *doc_id = 0;
2834
2835 error = fts_eval_sql(trx, graph);
2836
2837 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2838
2839 // FIXME: We need to retry deadlock errors
2840 if (error != DB_SUCCESS) {
2841 goto func_exit;
2842 }
2843
2844 if (read_only) {
2845 goto func_exit;
2846 }
2847
2848 if (doc_id_cmp == 0 && *doc_id) {
2849 cache->synced_doc_id = *doc_id - 1;
2850 } else {
2851 cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2852 }
2853
2854 mutex_enter(&cache->doc_id_lock);
2855 /* For each sync operation, we will add next_doc_id by 1,
2856 so to mark a sync operation */
2857 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2858 cache->next_doc_id = cache->synced_doc_id + 1;
2859 }
2860 mutex_exit(&cache->doc_id_lock);
2861
2862 if (doc_id_cmp > *doc_id) {
2863 error = fts_update_sync_doc_id(
2864 table, table->name.m_name, cache->synced_doc_id, trx);
2865 }
2866
2867 *doc_id = cache->next_doc_id;
2868
2869 func_exit:
2870
2871 if (error == DB_SUCCESS) {
2872 fts_sql_commit(trx);
2873 } else {
2874 *doc_id = 0;
2875
2876 ib::error() << "(" << ut_strerr(error) << ") while getting"
2877 " next doc id.";
2878 fts_sql_rollback(trx);
2879
2880 if (error == DB_DEADLOCK) {
2881 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2882 goto retry;
2883 }
2884 }
2885
2886 trx_free_for_background(trx);
2887
2888 return(error);
2889 }
2890
2891 /*********************************************************************//**
2892 Update the last document id. This function could create a new
2893 transaction to update the last document id.
2894 @return DB_SUCCESS if OK */
2895 static
2896 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2897 fts_update_sync_doc_id(
2898 /*===================*/
2899 const dict_table_t* table, /*!< in: table */
2900 const char* table_name, /*!< in: table name, or NULL */
2901 doc_id_t doc_id, /*!< in: last document id */
2902 trx_t* trx) /*!< in: update trx, or NULL */
2903 {
2904 byte id[FTS_MAX_ID_LEN];
2905 pars_info_t* info;
2906 fts_table_t fts_table;
2907 ulint id_len;
2908 que_t* graph = NULL;
2909 dberr_t error;
2910 ibool local_trx = FALSE;
2911 fts_cache_t* cache = table->fts->cache;
2912 char fts_name[MAX_FULL_NAME_LEN];
2913
2914 fts_table.suffix = "CONFIG";
2915 fts_table.table_id = table->id;
2916 fts_table.type = FTS_COMMON_TABLE;
2917 fts_table.table = table;
2918 if (table_name) {
2919 fts_table.parent = table_name;
2920 } else {
2921 fts_table.parent = table->name.m_name;
2922 }
2923
2924 if (!trx) {
2925 trx = trx_allocate_for_background();
2926
2927 trx->op_info = "setting last FTS document id";
2928 local_trx = TRUE;
2929 }
2930
2931 info = pars_info_create();
2932
2933 id_len = ut_snprintf(
2934 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2935
2936 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2937
2938 fts_get_table_name(&fts_table, fts_name);
2939 pars_info_bind_id(info, true, "table_name", fts_name);
2940
2941 graph = fts_parse_sql(
2942 &fts_table, info,
2943 "BEGIN"
2944 " UPDATE $table_name SET value = :doc_id"
2945 " WHERE key = 'synced_doc_id';");
2946
2947 error = fts_eval_sql(trx, graph);
2948
2949 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2950
2951 if (local_trx) {
2952 if (error == DB_SUCCESS) {
2953 fts_sql_commit(trx);
2954 cache->synced_doc_id = doc_id;
2955 } else {
2956
2957 ib::error() << "(" << ut_strerr(error) << ") while"
2958 " updating last doc id.";
2959
2960 fts_sql_rollback(trx);
2961 }
2962 trx_free_for_background(trx);
2963 }
2964
2965 return(error);
2966 }
2967
2968 /*********************************************************************//**
2969 Create a new fts_doc_ids_t.
2970 @return new fts_doc_ids_t */
2971 fts_doc_ids_t*
fts_doc_ids_create(void)2972 fts_doc_ids_create(void)
2973 /*====================*/
2974 {
2975 fts_doc_ids_t* fts_doc_ids;
2976 mem_heap_t* heap = mem_heap_create(512);
2977
2978 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2979 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2980
2981 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2982
2983 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2984 fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2985
2986 return(fts_doc_ids);
2987 }
2988
2989 /*********************************************************************//**
2990 Free a fts_doc_ids_t. */
2991 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2992 fts_doc_ids_free(
2993 /*=============*/
2994 fts_doc_ids_t* fts_doc_ids)
2995 {
2996 mem_heap_t* heap = static_cast<mem_heap_t*>(
2997 fts_doc_ids->self_heap->arg);
2998
2999 memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
3000
3001 mem_heap_free(heap);
3002 }
3003
3004 /*********************************************************************//**
3005 Do commit-phase steps necessary for the insertion of a new row.
3006 @return DB_SUCCESS or error code */
3007 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3008 fts_add(
3009 /*====*/
3010 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3011 fts_trx_row_t* row) /*!< in: row */
3012 {
3013 dict_table_t* table = ftt->table;
3014 doc_id_t doc_id = row->doc_id;
3015
3016 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3017
3018 fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3019
3020 mutex_enter(&table->fts->cache->deleted_lock);
3021 ++table->fts->cache->added;
3022 mutex_exit(&table->fts->cache->deleted_lock);
3023
3024 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
3025 && doc_id >= table->fts->cache->next_doc_id) {
3026 table->fts->cache->next_doc_id = doc_id + 1;
3027 }
3028 }
3029
3030 /*********************************************************************//**
3031 Do commit-phase steps necessary for the deletion of a row.
3032 @return DB_SUCCESS or error code */
3033 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3034 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3035 fts_delete(
3036 /*=======*/
3037 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3038 fts_trx_row_t* row) /*!< in: row */
3039 {
3040 que_t* graph;
3041 fts_table_t fts_table;
3042 dberr_t error = DB_SUCCESS;
3043 doc_id_t write_doc_id;
3044 dict_table_t* table = ftt->table;
3045 doc_id_t doc_id = row->doc_id;
3046 trx_t* trx = ftt->fts_trx->trx;
3047 pars_info_t* info = pars_info_create();
3048 fts_cache_t* cache = table->fts->cache;
3049
3050 /* we do not index Documents whose Doc ID value is 0 */
3051 if (doc_id == FTS_NULL_DOC_ID) {
3052 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3053 return(error);
3054 }
3055
3056 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3057
3058 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
3059
3060 /* Convert to "storage" byte order. */
3061 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3062 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3063
3064 /* It is possible we update a record that has not yet been sync-ed
3065 into cache from last crash (delete Doc will not initialize the
3066 sync). Avoid any added counter accounting until the FTS cache
3067 is re-established and sync-ed */
3068 if (table->fts->fts_status & ADDED_TABLE_SYNCED
3069 && doc_id > cache->synced_doc_id) {
3070 mutex_enter(&table->fts->cache->deleted_lock);
3071
3072 /* The Doc ID could belong to those left in
3073 ADDED table from last crash. So need to check
3074 if it is less than first_doc_id when we initialize
3075 the Doc ID system after reboot */
3076 if (doc_id >= table->fts->cache->first_doc_id
3077 && table->fts->cache->added > 0) {
3078 --table->fts->cache->added;
3079 }
3080
3081 mutex_exit(&table->fts->cache->deleted_lock);
3082
3083 /* Only if the row was really deleted. */
3084 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3085 }
3086
3087 /* Note the deleted document for OPTIMIZE to purge. */
3088 if (error == DB_SUCCESS) {
3089 char table_name[MAX_FULL_NAME_LEN];
3090
3091 trx->op_info = "adding doc id to FTS DELETED";
3092
3093 info->graph_owns_us = TRUE;
3094
3095 fts_table.suffix = "DELETED";
3096
3097 fts_get_table_name(&fts_table, table_name);
3098 pars_info_bind_id(info, true, "deleted", table_name);
3099
3100 graph = fts_parse_sql(
3101 &fts_table,
3102 info,
3103 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3104
3105 error = fts_eval_sql(trx, graph);
3106
3107 fts_que_graph_free(graph);
3108 } else {
3109 pars_info_free(info);
3110 }
3111
3112 /* Increment the total deleted count, this is used to calculate the
3113 number of documents indexed. */
3114 if (error == DB_SUCCESS) {
3115 mutex_enter(&table->fts->cache->deleted_lock);
3116
3117 ++table->fts->cache->deleted;
3118
3119 mutex_exit(&table->fts->cache->deleted_lock);
3120 }
3121
3122 return(error);
3123 }
3124
3125 /*********************************************************************//**
3126 Do commit-phase steps necessary for the modification of a row.
3127 @return DB_SUCCESS or error code */
3128 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3129 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3130 fts_modify(
3131 /*=======*/
3132 fts_trx_table_t* ftt, /*!< in: FTS trx table */
3133 fts_trx_row_t* row) /*!< in: row */
3134 {
3135 dberr_t error;
3136
3137 ut_a(row->state == FTS_MODIFY);
3138
3139 error = fts_delete(ftt, row);
3140
3141 if (error == DB_SUCCESS) {
3142 fts_add(ftt, row);
3143 }
3144
3145 return(error);
3146 }
3147
3148 /*********************************************************************//**
3149 Create a new document id.
3150 @return DB_SUCCESS if all went well else error */
3151 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3152 fts_create_doc_id(
3153 /*==============*/
3154 dict_table_t* table, /*!< in: row is of this table. */
3155 dtuple_t* row, /* in/out: add doc id value to this
3156 row. This is the current row that is
3157 being inserted. */
3158 mem_heap_t* heap) /*!< in: heap */
3159 {
3160 doc_id_t doc_id;
3161 dberr_t error = DB_SUCCESS;
3162
3163 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3164
3165 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3166 if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3167 error = fts_get_next_doc_id(table, &doc_id);
3168 }
3169 return(error);
3170 }
3171
3172 error = fts_get_next_doc_id(table, &doc_id);
3173
3174 if (error == DB_SUCCESS) {
3175 dfield_t* dfield;
3176 doc_id_t* write_doc_id;
3177
3178 ut_a(doc_id > 0);
3179
3180 dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3181 write_doc_id = static_cast<doc_id_t*>(
3182 mem_heap_alloc(heap, sizeof(*write_doc_id)));
3183
3184 ut_a(doc_id != FTS_NULL_DOC_ID);
3185 ut_a(sizeof(doc_id) == dfield->type.len);
3186 fts_write_doc_id((byte*) write_doc_id, doc_id);
3187
3188 dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3189 }
3190
3191 return(error);
3192 }
3193
3194 /*********************************************************************//**
3195 The given transaction is about to be committed; do whatever is necessary
3196 from the FTS system's POV.
3197 @return DB_SUCCESS or error code */
3198 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3199 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3200 fts_commit_table(
3201 /*=============*/
3202 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
3203 {
3204 const ib_rbt_node_t* node;
3205 ib_rbt_t* rows;
3206 dberr_t error = DB_SUCCESS;
3207 fts_cache_t* cache = ftt->table->fts->cache;
3208 trx_t* trx = trx_allocate_for_background();
3209
3210 rows = ftt->rows;
3211
3212 ftt->fts_trx->trx = trx;
3213
3214 if (cache->get_docs == NULL) {
3215 rw_lock_x_lock(&cache->init_lock);
3216 if (cache->get_docs == NULL) {
3217 cache->get_docs = fts_get_docs_create(cache);
3218 }
3219 rw_lock_x_unlock(&cache->init_lock);
3220 }
3221
3222 for (node = rbt_first(rows);
3223 node != NULL && error == DB_SUCCESS;
3224 node = rbt_next(rows, node)) {
3225
3226 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3227
3228 switch (row->state) {
3229 case FTS_INSERT:
3230 fts_add(ftt, row);
3231 break;
3232
3233 case FTS_MODIFY:
3234 error = fts_modify(ftt, row);
3235 break;
3236
3237 case FTS_DELETE:
3238 error = fts_delete(ftt, row);
3239 break;
3240
3241 default:
3242 ut_error;
3243 }
3244 }
3245
3246 fts_sql_commit(trx);
3247
3248 trx_free_for_background(trx);
3249
3250 return(error);
3251 }
3252
3253 /*********************************************************************//**
3254 The given transaction is about to be committed; do whatever is necessary
3255 from the FTS system's POV.
3256 @return DB_SUCCESS or error code */
3257 dberr_t
fts_commit(trx_t * trx)3258 fts_commit(
3259 /*=======*/
3260 trx_t* trx) /*!< in: transaction */
3261 {
3262 const ib_rbt_node_t* node;
3263 dberr_t error;
3264 ib_rbt_t* tables;
3265 fts_savepoint_t* savepoint;
3266
3267 savepoint = static_cast<fts_savepoint_t*>(
3268 ib_vector_last(trx->fts_trx->savepoints));
3269 tables = savepoint->tables;
3270
3271 for (node = rbt_first(tables), error = DB_SUCCESS;
3272 node != NULL && error == DB_SUCCESS;
3273 node = rbt_next(tables, node)) {
3274
3275 fts_trx_table_t** ftt;
3276
3277 ftt = rbt_value(fts_trx_table_t*, node);
3278
3279 error = fts_commit_table(*ftt);
3280 }
3281
3282 return(error);
3283 }
3284
3285 /*********************************************************************//**
3286 Initialize a document. */
3287 void
fts_doc_init(fts_doc_t * doc)3288 fts_doc_init(
3289 /*=========*/
3290 fts_doc_t* doc) /*!< in: doc to initialize */
3291 {
3292 mem_heap_t* heap = mem_heap_create(32);
3293
3294 memset(doc, 0, sizeof(*doc));
3295
3296 doc->self_heap = ib_heap_allocator_create(heap);
3297 }
3298
3299 /*********************************************************************//**
3300 Free document. */
3301 void
fts_doc_free(fts_doc_t * doc)3302 fts_doc_free(
3303 /*=========*/
3304 fts_doc_t* doc) /*!< in: document */
3305 {
3306 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3307
3308 if (doc->tokens) {
3309 rbt_free(doc->tokens);
3310 }
3311
3312 ut_d(memset(doc, 0, sizeof(*doc)));
3313
3314 mem_heap_free(heap);
3315 }
3316
3317 /*********************************************************************//**
3318 Callback function for fetch that stores a row id to the location pointed.
3319 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3320 @return always returns NULL */
3321 void*
fts_fetch_row_id(void * row,void * user_arg)3322 fts_fetch_row_id(
3323 /*=============*/
3324 void* row, /*!< in: sel_node_t* */
3325 void* user_arg) /*!< in: data pointer */
3326 {
3327 sel_node_t* node = static_cast<sel_node_t*>(row);
3328
3329 dfield_t* dfield = que_node_get_val(node->select_list);
3330 dtype_t* type = dfield_get_type(dfield);
3331 ulint len = dfield_get_len(dfield);
3332
3333 ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3334 ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3335 ut_a(len == 8);
3336
3337 memcpy(user_arg, dfield_get_data(dfield), 8);
3338
3339 return(NULL);
3340 }
3341
3342 /*********************************************************************//**
3343 Callback function for fetch that stores the text of an FTS document,
3344 converting each column to UTF-16.
3345 @return always FALSE */
3346 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3347 fts_query_expansion_fetch_doc(
3348 /*==========================*/
3349 void* row, /*!< in: sel_node_t* */
3350 void* user_arg) /*!< in: fts_doc_t* */
3351 {
3352 que_node_t* exp;
3353 sel_node_t* node = static_cast<sel_node_t*>(row);
3354 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3355 dfield_t* dfield;
3356 ulint len;
3357 ulint doc_len;
3358 fts_doc_t doc;
3359 CHARSET_INFO* doc_charset = NULL;
3360 ulint field_no = 0;
3361
3362 len = 0;
3363
3364 fts_doc_init(&doc);
3365 doc.found = TRUE;
3366
3367 exp = node->select_list;
3368 doc_len = 0;
3369
3370 doc_charset = result_doc->charset;
3371
3372 /* Copy each indexed column content into doc->text.f_str */
3373 while (exp) {
3374 dfield = que_node_get_val(exp);
3375 len = dfield_get_len(dfield);
3376
3377 /* NULL column */
3378 if (len == UNIV_SQL_NULL) {
3379 exp = que_node_get_next(exp);
3380 continue;
3381 }
3382
3383 if (!doc_charset) {
3384 doc_charset = fts_get_charset(dfield->type.prtype);
3385 }
3386
3387 doc.charset = doc_charset;
3388 doc.is_ngram = result_doc->is_ngram;
3389
3390 if (dfield_is_ext(dfield)) {
3391 /* We ignore columns that are stored externally, this
3392 could result in too many words to search */
3393 exp = que_node_get_next(exp);
3394 continue;
3395 } else {
3396 doc.text.f_n_char = 0;
3397
3398 doc.text.f_str = static_cast<byte*>(
3399 dfield_get_data(dfield));
3400
3401 doc.text.f_len = len;
3402 }
3403
3404 if (field_no == 0) {
3405 fts_tokenize_document(&doc, result_doc,
3406 result_doc->parser);
3407 } else {
3408 fts_tokenize_document_next(&doc, doc_len, result_doc,
3409 result_doc->parser);
3410 }
3411
3412 exp = que_node_get_next(exp);
3413
3414 doc_len += (exp) ? len + 1 : len;
3415
3416 field_no++;
3417 }
3418
3419 ut_ad(doc_charset);
3420
3421 if (!result_doc->charset) {
3422 result_doc->charset = doc_charset;
3423 }
3424
3425 fts_doc_free(&doc);
3426
3427 return(FALSE);
3428 }
3429
3430 /*********************************************************************//**
3431 fetch and tokenize the document. */
3432 static
3433 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3434 fts_fetch_doc_from_rec(
3435 /*===================*/
3436 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3437 dict_index_t* clust_index, /*!< in: cluster index */
3438 btr_pcur_t* pcur, /*!< in: cursor whose position
3439 has been stored */
3440 ulint* offsets, /*!< in: offsets */
3441 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3442 documents */
3443 {
3444 dict_index_t* index;
3445 dict_table_t* table;
3446 const rec_t* clust_rec;
3447 ulint num_field;
3448 const dict_field_t* ifield;
3449 const dict_col_t* col;
3450 ulint clust_pos;
3451 ulint i;
3452 ulint doc_len = 0;
3453 ulint processed_doc = 0;
3454 st_mysql_ftparser* parser;
3455
3456 if (!get_doc) {
3457 return;
3458 }
3459
3460 index = get_doc->index_cache->index;
3461 table = get_doc->index_cache->index->table;
3462 parser = get_doc->index_cache->index->parser;
3463
3464 clust_rec = btr_pcur_get_rec(pcur);
3465
3466 num_field = dict_index_get_n_fields(index);
3467
3468 for (i = 0; i < num_field; i++) {
3469 ifield = dict_index_get_nth_field(index, i);
3470 col = dict_field_get_col(ifield);
3471 clust_pos = dict_col_get_clust_pos(col, clust_index);
3472
3473 if (!get_doc->index_cache->charset) {
3474 get_doc->index_cache->charset = fts_get_charset(
3475 ifield->col->prtype);
3476 }
3477
3478 if (rec_offs_nth_extern(offsets, clust_pos)) {
3479 doc->text.f_str =
3480 btr_rec_copy_externally_stored_field(
3481 clust_rec, offsets,
3482 dict_table_page_size(table),
3483 clust_pos, &doc->text.f_len,
3484 static_cast<mem_heap_t*>(
3485 doc->self_heap->arg));
3486 } else {
3487 doc->text.f_str = (byte*) rec_get_nth_field(
3488 clust_rec, offsets, clust_pos,
3489 &doc->text.f_len);
3490 }
3491
3492 doc->found = TRUE;
3493 doc->charset = get_doc->index_cache->charset;
3494 doc->is_ngram = index->is_ngram;
3495
3496 /* Null Field */
3497 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3498 continue;
3499 }
3500
3501 if (processed_doc == 0) {
3502 fts_tokenize_document(doc, NULL, parser);
3503 } else {
3504 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3505 }
3506
3507 processed_doc++;
3508 doc_len += doc->text.f_len + 1;
3509 }
3510 }
3511
3512 /*********************************************************************//**
3513 This function fetches the document inserted during the committing
3514 transaction, and tokenize the inserted text data and insert into
3515 FTS auxiliary table and its cache.
3516 @return TRUE if successful */
3517 static
3518 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3519 fts_add_doc_by_id(
3520 /*==============*/
3521 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3522 doc_id_t doc_id, /*!< in: doc id */
3523 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
3524 /*!< in: affected fts indexes */
3525 {
3526 mtr_t mtr;
3527 mem_heap_t* heap;
3528 btr_pcur_t pcur;
3529 dict_table_t* table;
3530 dtuple_t* tuple;
3531 dfield_t* dfield;
3532 fts_get_doc_t* get_doc;
3533 doc_id_t temp_doc_id;
3534 dict_index_t* clust_index;
3535 dict_index_t* fts_id_index;
3536 ibool is_id_cluster;
3537 fts_cache_t* cache = ftt->table->fts->cache;
3538
3539 ut_ad(cache->get_docs);
3540
3541 /* If Doc ID has been supplied by the user, then the table
3542 might not yet be sync-ed */
3543
3544 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3545 fts_init_index(ftt->table, FALSE);
3546 }
3547
3548 /* Get the first FTS index's get_doc */
3549 get_doc = static_cast<fts_get_doc_t*>(
3550 ib_vector_get(cache->get_docs, 0));
3551 ut_ad(get_doc);
3552
3553 table = get_doc->index_cache->index->table;
3554
3555 heap = mem_heap_create(512);
3556
3557 clust_index = dict_table_get_first_index(table);
3558 fts_id_index = table->fts_doc_id_index;
3559
3560 /* Check whether the index on FTS_DOC_ID is cluster index */
3561 is_id_cluster = (clust_index == fts_id_index);
3562
3563 mtr_start(&mtr);
3564 btr_pcur_init(&pcur);
3565
3566 /* Search based on Doc ID. Here, we'll need to consider the case
3567 when there is no primary index on Doc ID */
3568 tuple = dtuple_create(heap, 1);
3569 dfield = dtuple_get_nth_field(tuple, 0);
3570 dfield->type.mtype = DATA_INT;
3571 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3572
3573 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3574 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3575
3576 btr_pcur_open_with_no_init(
3577 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3578 &pcur, 0, &mtr);
3579
3580 /* If we have a match, add the data to doc structure */
3581 if (btr_pcur_get_low_match(&pcur) == 1) {
3582 const rec_t* rec;
3583 btr_pcur_t* doc_pcur;
3584 const rec_t* clust_rec;
3585 btr_pcur_t clust_pcur;
3586 ulint* offsets = NULL;
3587 ulint num_idx = ib_vector_size(cache->get_docs);
3588
3589 rec = btr_pcur_get_rec(&pcur);
3590
3591 /* Doc could be deleted */
3592 if (page_rec_is_infimum(rec)
3593 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3594
3595 goto func_exit;
3596 }
3597
3598 if (is_id_cluster) {
3599 clust_rec = rec;
3600 doc_pcur = &pcur;
3601 } else {
3602 dtuple_t* clust_ref;
3603 ulint n_fields;
3604
3605 btr_pcur_init(&clust_pcur);
3606 n_fields = dict_index_get_n_unique(clust_index);
3607
3608 clust_ref = dtuple_create(heap, n_fields);
3609 dict_index_copy_types(clust_ref, clust_index, n_fields);
3610
3611 row_build_row_ref_in_tuple(
3612 clust_ref, rec, fts_id_index, NULL, NULL);
3613
3614 btr_pcur_open_with_no_init(
3615 clust_index, clust_ref, PAGE_CUR_LE,
3616 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3617
3618 doc_pcur = &clust_pcur;
3619 clust_rec = btr_pcur_get_rec(&clust_pcur);
3620
3621 }
3622
3623 offsets = rec_get_offsets(clust_rec, clust_index,
3624 NULL, ULINT_UNDEFINED, &heap);
3625
3626 for (ulint i = 0; i < num_idx; ++i) {
3627 fts_doc_t doc;
3628 dict_table_t* table;
3629 fts_get_doc_t* get_doc;
3630
3631 get_doc = static_cast<fts_get_doc_t*>(
3632 ib_vector_get(cache->get_docs, i));
3633
3634 table = get_doc->index_cache->index->table;
3635
3636 fts_doc_init(&doc);
3637
3638 fts_fetch_doc_from_rec(
3639 get_doc, clust_index, doc_pcur, offsets, &doc);
3640
3641 if (doc.found) {
3642 ibool success MY_ATTRIBUTE((unused));
3643
3644 btr_pcur_store_position(doc_pcur, &mtr);
3645 mtr_commit(&mtr);
3646
3647 DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3648 rw_lock_x_lock(&table->fts->cache->lock);
3649
3650 if (table->fts->cache->stopword_info.status
3651 & STOPWORD_NOT_INIT) {
3652 fts_load_stopword(table, NULL, NULL,
3653 NULL, TRUE, TRUE);
3654 }
3655
3656 fts_cache_add_doc(
3657 table->fts->cache,
3658 get_doc->index_cache,
3659 doc_id, doc.tokens);
3660
3661 bool need_sync = false;
3662 if ((cache->total_size > fts_max_cache_size / 10
3663 || fts_need_sync)
3664 && !cache->sync->in_progress) {
3665 need_sync = true;
3666 }
3667
3668 rw_lock_x_unlock(&table->fts->cache->lock);
3669
3670 DBUG_EXECUTE_IF(
3671 "fts_instrument_sync_cache_wait",
3672 srv_fatal_semaphore_wait_threshold = 25;
3673 fts_max_cache_size = 100;
3674 fts_sync(cache->sync, true, true, false);
3675 );
3676
3677 DBUG_EXECUTE_IF(
3678 "fts_instrument_sync",
3679 fts_optimize_request_sync_table(table);
3680 os_event_wait(cache->sync->event);
3681 );
3682
3683 DBUG_EXECUTE_IF(
3684 "fts_instrument_sync_debug",
3685 fts_sync(cache->sync, true, true, false);
3686 );
3687
3688 DEBUG_SYNC_C("fts_instrument_sync_request");
3689 DBUG_EXECUTE_IF(
3690 "fts_instrument_sync_request",
3691 fts_optimize_request_sync_table(table);
3692 );
3693
3694 if (need_sync) {
3695 fts_optimize_request_sync_table(table);
3696 }
3697
3698 mtr_start(&mtr);
3699
3700 if (i < num_idx - 1) {
3701
3702 success = btr_pcur_restore_position(
3703 BTR_SEARCH_LEAF, doc_pcur,
3704 &mtr);
3705
3706 ut_ad(success);
3707 }
3708 }
3709
3710 fts_doc_free(&doc);
3711 }
3712
3713 if (!is_id_cluster) {
3714 btr_pcur_close(doc_pcur);
3715 }
3716 }
3717 func_exit:
3718 mtr_commit(&mtr);
3719
3720 btr_pcur_close(&pcur);
3721
3722 mem_heap_free(heap);
3723 return(TRUE);
3724 }
3725
3726
3727 /*********************************************************************//**
3728 Callback function to read a single ulint column.
3729 return always returns TRUE */
3730 static
3731 ibool
fts_read_ulint(void * row,void * user_arg)3732 fts_read_ulint(
3733 /*===========*/
3734 void* row, /*!< in: sel_node_t* */
3735 void* user_arg) /*!< in: pointer to ulint */
3736 {
3737 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3738 ulint* value = static_cast<ulint*>(user_arg);
3739 que_node_t* exp = sel_node->select_list;
3740 dfield_t* dfield = que_node_get_val(exp);
3741 void* data = dfield_get_data(dfield);
3742
3743 *value = static_cast<ulint>(mach_read_from_4(
3744 static_cast<const byte*>(data)));
3745
3746 return(TRUE);
3747 }
3748
3749 /*********************************************************************//**
3750 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3751 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3752 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3753 fts_get_max_doc_id(
3754 /*===============*/
3755 dict_table_t* table) /*!< in: user table */
3756 {
3757 dict_index_t* index;
3758 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3759 doc_id_t doc_id = 0;
3760 mtr_t mtr;
3761 btr_pcur_t pcur;
3762
3763 index = table->fts_doc_id_index;
3764
3765 if (!index) {
3766 return(0);
3767 }
3768
3769 dfield = dict_index_get_nth_field(index, 0);
3770
3771 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3772 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3773 #endif
3774
3775 mtr_start(&mtr);
3776
3777 /* fetch the largest indexes value */
3778 btr_pcur_open_at_index_side(
3779 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3780
3781 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3782 const rec_t* rec = NULL;
3783 ulint offsets_[REC_OFFS_NORMAL_SIZE];
3784 ulint* offsets = offsets_;
3785 mem_heap_t* heap = NULL;
3786 ulint len;
3787 const void* data;
3788
3789 rec_offs_init(offsets_);
3790
3791 do {
3792 rec = btr_pcur_get_rec(&pcur);
3793
3794 if (page_rec_is_user_rec(rec)) {
3795 break;
3796 }
3797 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3798
3799 if (!rec) {
3800 goto func_exit;
3801 }
3802
3803 offsets = rec_get_offsets(
3804 rec, index, offsets, ULINT_UNDEFINED, &heap);
3805
3806 data = rec_get_nth_field(rec, offsets, 0, &len);
3807
3808 doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3809 static_cast<const byte*>(data)));
3810 }
3811
3812 func_exit:
3813 btr_pcur_close(&pcur);
3814 mtr_commit(&mtr);
3815 return(doc_id);
3816 }
3817
3818 /*********************************************************************//**
3819 Fetch document with the given document id.
3820 @return DB_SUCCESS if OK else error */
3821 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3822 fts_doc_fetch_by_doc_id(
3823 /*====================*/
3824 fts_get_doc_t* get_doc, /*!< in: state */
3825 doc_id_t doc_id, /*!< in: id of document to
3826 fetch */
3827 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3828 or NULL */
3829 ulint option, /*!< in: search option, if it is
3830 greater than doc_id or equal */
3831 fts_sql_callback
3832 callback, /*!< in: callback to read */
3833 void* arg) /*!< in: callback arg */
3834 {
3835 pars_info_t* info;
3836 dberr_t error;
3837 const char* select_str;
3838 doc_id_t write_doc_id;
3839 dict_index_t* index;
3840 trx_t* trx = trx_allocate_for_background();
3841 que_t* graph;
3842
3843 trx->op_info = "fetching indexed FTS document";
3844
3845 /* The FTS index can be supplied by caller directly with
3846 "index_to_use", otherwise, get it from "get_doc" */
3847 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3848
3849 if (get_doc && get_doc->get_document_graph) {
3850 info = get_doc->get_document_graph->info;
3851 } else {
3852 info = pars_info_create();
3853 }
3854
3855 /* Convert to "storage" byte order. */
3856 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3857 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3858 pars_info_bind_function(info, "my_func", callback, arg);
3859
3860 select_str = fts_get_select_columns_str(index, info, info->heap);
3861 pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3862
3863 if (!get_doc || !get_doc->get_document_graph) {
3864 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3865 graph = fts_parse_sql(
3866 NULL,
3867 info,
3868 mem_heap_printf(info->heap,
3869 "DECLARE FUNCTION my_func;\n"
3870 "DECLARE CURSOR c IS"
3871 " SELECT %s FROM $table_name"
3872 " WHERE %s = :doc_id;\n"
3873 "BEGIN\n"
3874 ""
3875 "OPEN c;\n"
3876 "WHILE 1 = 1 LOOP\n"
3877 " FETCH c INTO my_func();\n"
3878 " IF c %% NOTFOUND THEN\n"
3879 " EXIT;\n"
3880 " END IF;\n"
3881 "END LOOP;\n"
3882 "CLOSE c;",
3883 select_str, FTS_DOC_ID_COL_NAME));
3884 } else {
3885 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3886
3887 /* This is used for crash recovery of table with
3888 hidden DOC ID or FTS indexes. We will scan the table
3889 to re-processing user table rows whose DOC ID or
3890 FTS indexed documents have not been sync-ed to disc
3891 during recent crash.
3892 In the case that all fulltext indexes are dropped
3893 for a table, we will keep the "hidden" FTS_DOC_ID
3894 column, and this scan is to retreive the largest
3895 DOC ID being used in the table to determine the
3896 appropriate next DOC ID.
3897 In the case of there exists fulltext index(es), this
3898 operation will re-tokenize any docs that have not
3899 been sync-ed to the disk, and re-prime the FTS
3900 cached */
3901 graph = fts_parse_sql(
3902 NULL,
3903 info,
3904 mem_heap_printf(info->heap,
3905 "DECLARE FUNCTION my_func;\n"
3906 "DECLARE CURSOR c IS"
3907 " SELECT %s, %s FROM $table_name"
3908 " WHERE %s > :doc_id;\n"
3909 "BEGIN\n"
3910 ""
3911 "OPEN c;\n"
3912 "WHILE 1 = 1 LOOP\n"
3913 " FETCH c INTO my_func();\n"
3914 " IF c %% NOTFOUND THEN\n"
3915 " EXIT;\n"
3916 " END IF;\n"
3917 "END LOOP;\n"
3918 "CLOSE c;",
3919 FTS_DOC_ID_COL_NAME,
3920 select_str, FTS_DOC_ID_COL_NAME));
3921 }
3922 if (get_doc) {
3923 get_doc->get_document_graph = graph;
3924 }
3925 } else {
3926 graph = get_doc->get_document_graph;
3927 }
3928
3929 error = fts_eval_sql(trx, graph);
3930
3931 if (error == DB_SUCCESS) {
3932 fts_sql_commit(trx);
3933 } else {
3934 fts_sql_rollback(trx);
3935 }
3936
3937 trx_free_for_background(trx);
3938
3939 if (!get_doc) {
3940 fts_que_graph_free(graph);
3941 }
3942
3943 return(error);
3944 }
3945
3946 /*********************************************************************//**
3947 Write out a single word's data as new entry/entries in the INDEX table.
3948 @return DB_SUCCESS if all OK. */
3949 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3950 fts_write_node(
3951 /*===========*/
3952 trx_t* trx, /*!< in: transaction */
3953 que_t** graph, /*!< in: query graph */
3954 fts_table_t* fts_table, /*!< in: aux table */
3955 fts_string_t* word, /*!< in: word in UTF-8 */
3956 fts_node_t* node) /*!< in: node columns */
3957 {
3958 pars_info_t* info;
3959 dberr_t error;
3960 ib_uint32_t doc_count;
3961 ib_time_monotonic_t start_time;
3962 doc_id_t last_doc_id;
3963 doc_id_t first_doc_id;
3964 char table_name[MAX_FULL_NAME_LEN];
3965
3966 ut_a(node->ilist != NULL);
3967
3968 if (*graph) {
3969 info = (*graph)->info;
3970 } else {
3971 info = pars_info_create();
3972
3973 fts_get_table_name(fts_table, table_name);
3974 pars_info_bind_id(info, true, "index_table_name", table_name);
3975 }
3976
3977 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3978
3979 /* Convert to "storage" byte order. */
3980 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3981 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3982
3983 /* Convert to "storage" byte order. */
3984 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3985 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3986
3987 ut_a(node->last_doc_id >= node->first_doc_id);
3988
3989 /* Convert to "storage" byte order. */
3990 mach_write_to_4((byte*) &doc_count, node->doc_count);
3991 pars_info_bind_int4_literal(
3992 info, "doc_count", (const ib_uint32_t*) &doc_count);
3993
3994 /* Set copy_name to FALSE since it's a static. */
3995 pars_info_bind_literal(
3996 info, "ilist", node->ilist, node->ilist_size,
3997 DATA_BLOB, DATA_BINARY_TYPE);
3998
3999 if (!*graph) {
4000
4001 *graph = fts_parse_sql(
4002 fts_table,
4003 info,
4004 "BEGIN\n"
4005 "INSERT INTO $index_table_name VALUES"
4006 " (:token, :first_doc_id,"
4007 " :last_doc_id, :doc_count, :ilist);");
4008 }
4009
4010 start_time = ut_time_monotonic();
4011 error = fts_eval_sql(trx, *graph);
4012 elapsed_time += ut_time_monotonic() - start_time;
4013 ++n_nodes;
4014
4015 return(error);
4016 }
4017
4018 /*********************************************************************//**
4019 Add rows to the DELETED_CACHE table.
4020 @return DB_SUCCESS if all went well else error code*/
4021 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4022 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4023 fts_sync_add_deleted_cache(
4024 /*=======================*/
4025 fts_sync_t* sync, /*!< in: sync state */
4026 ib_vector_t* doc_ids) /*!< in: doc ids to add */
4027 {
4028 ulint i;
4029 pars_info_t* info;
4030 que_t* graph;
4031 fts_table_t fts_table;
4032 char table_name[MAX_FULL_NAME_LEN];
4033 doc_id_t dummy = 0;
4034 dberr_t error = DB_SUCCESS;
4035 ulint n_elems = ib_vector_size(doc_ids);
4036
4037 ut_a(ib_vector_size(doc_ids) > 0);
4038
4039 ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4040
4041 info = pars_info_create();
4042
4043 fts_bind_doc_id(info, "doc_id", &dummy);
4044
4045 FTS_INIT_FTS_TABLE(
4046 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
4047
4048 fts_get_table_name(&fts_table, table_name);
4049 pars_info_bind_id(info, true, "table_name", table_name);
4050
4051 graph = fts_parse_sql(
4052 &fts_table,
4053 info,
4054 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4055
4056 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4057 fts_update_t* update;
4058 doc_id_t write_doc_id;
4059
4060 update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
4061
4062 /* Convert to "storage" byte order. */
4063 fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
4064 fts_bind_doc_id(info, "doc_id", &write_doc_id);
4065
4066 error = fts_eval_sql(sync->trx, graph);
4067 }
4068
4069 fts_que_graph_free(graph);
4070
4071 return(error);
4072 }
4073
4074 /** Write the words and ilist to disk.
4075 @param[in,out] trx transaction
4076 @param[in] index_cache index cache
4077 @param[in] unlock_cache whether unlock cache when write node
4078 Also set this to true if sync takes
4079 very long
4080 @param[in] sync_start_time Holds the timestamp of start of sync
4081 for deducing the length of sync time
4082 @return DB_SUCCESS if all went well else error code */
4083 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4084 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4085 fts_sync_write_words(
4086 trx_t* trx,
4087 fts_index_cache_t* index_cache,
4088 bool unlock_cache,
4089 ib_time_t sync_start_time)
4090 {
4091 fts_table_t fts_table;
4092 ulint n_nodes = 0;
4093 ulint n_words = 0;
4094 const ib_rbt_node_t* rbt_node;
4095 dberr_t error = DB_SUCCESS;
4096 ibool print_error = FALSE;
4097 dict_table_t* table = index_cache->index->table;
4098 /* We use this to deduce threshold value of time
4099 that we can let sync to go on holding cache lock */
4100 const float cutoff = 0.98;
4101 ulint lock_threshold =
4102 (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4103 * cutoff;
4104 bool timeout_extended = false;
4105 #ifdef FTS_DOC_STATS_DEBUG
4106 ulint n_new_words = 0;
4107 #endif /* FTS_DOC_STATS_DEBUG */
4108
4109 FTS_INIT_INDEX_TABLE(
4110 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4111
4112 n_words = rbt_size(index_cache->words);
4113
4114 /* We iterate over the entire tree, even if there is an error,
4115 since we want to free the memory used during caching. */
4116 for (rbt_node = rbt_first(index_cache->words);
4117 rbt_node;
4118 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4119
4120 ulint i;
4121 ulint selected;
4122 fts_tokenizer_word_t* word;
4123
4124 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4125
4126 DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
4127 os_thread_sleep(300000););
4128
4129 selected = fts_select_index(
4130 index_cache->charset, word->text.f_str,
4131 word->text.f_len);
4132
4133 fts_table.suffix = fts_get_suffix(selected);
4134
4135 #ifdef FTS_DOC_STATS_DEBUG
4136 /* Check if the word exists in the FTS index and if not
4137 then we need to increment the total word count stats. */
4138 if (error == DB_SUCCESS && fts_enable_diag_print) {
4139 ibool found = FALSE;
4140
4141 error = fts_is_word_in_index(
4142 trx,
4143 &index_cache->sel_graph[selected],
4144 &fts_table,
4145 &word->text, &found);
4146
4147 if (error == DB_SUCCESS && !found) {
4148
4149 ++n_new_words;
4150 }
4151 }
4152 #endif /* FTS_DOC_STATS_DEBUG */
4153
4154 /* We iterate over all the nodes even if there was an error */
4155 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4156
4157 fts_node_t* fts_node = static_cast<fts_node_t*>(
4158 ib_vector_get(word->nodes, i));
4159
4160 if (fts_node->synced) {
4161 continue;
4162 } else {
4163 fts_node->synced = true;
4164 }
4165
4166 /*FIXME: we need to handle the error properly. */
4167 if (error == DB_SUCCESS) {
4168 DEBUG_SYNC_C("fts_instrument_sync");
4169 DBUG_EXECUTE_IF("fts_instrument_sync",
4170 os_thread_sleep(10000000););
4171 if (!unlock_cache) {
4172 ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4173 if (cache_lock_time > lock_threshold) {
4174 if (!timeout_extended) {
4175 os_atomic_increment_ulint(
4176 &srv_fatal_semaphore_wait_threshold,
4177 SRV_SEMAPHORE_WAIT_EXTENSION);
4178 timeout_extended = true;
4179 lock_threshold +=
4180 SRV_SEMAPHORE_WAIT_EXTENSION;
4181 } else {
4182 unlock_cache = true;
4183 os_atomic_decrement_ulint(
4184 &srv_fatal_semaphore_wait_threshold,
4185 SRV_SEMAPHORE_WAIT_EXTENSION);
4186 timeout_extended = false;
4187
4188 }
4189 }
4190 }
4191
4192 if (unlock_cache) {
4193 rw_lock_x_unlock(
4194 &table->fts->cache->lock);
4195 }
4196
4197 error = fts_write_node(
4198 trx,
4199 &index_cache->ins_graph[selected],
4200 &fts_table, &word->text, fts_node);
4201 DBUG_EXECUTE_IF("fts_instrument_sync",
4202 os_thread_sleep(15000000););
4203
4204 DEBUG_SYNC_C("fts_write_node");
4205 DBUG_EXECUTE_IF("fts_write_node_crash",
4206 DBUG_SUICIDE(););
4207
4208 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4209 os_thread_sleep(1000000);
4210 );
4211
4212 if (unlock_cache) {
4213 rw_lock_x_lock(
4214 &table->fts->cache->lock);
4215 }
4216 }
4217 }
4218
4219 n_nodes += ib_vector_size(word->nodes);
4220
4221 if (error != DB_SUCCESS && !print_error) {
4222 ib::error() << "(" << ut_strerr(error) << ") writing"
4223 " word node to FTS auxiliary index table.";
4224 print_error = TRUE;
4225 }
4226 }
4227
4228 #ifdef FTS_DOC_STATS_DEBUG
4229 if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4230 fts_table_t fts_table;
4231
4232 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4233
4234 /* Increment the total number of words in the FTS index */
4235 error = fts_config_increment_index_value(
4236 trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4237 n_new_words);
4238 }
4239 #endif /* FTS_DOC_STATS_DEBUG */
4240
4241 if (fts_enable_diag_print) {
4242 printf("Avg number of nodes: %lf\n",
4243 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4244 }
4245
4246 return(error);
4247 }
4248
4249 #ifdef FTS_DOC_STATS_DEBUG
4250 /*********************************************************************//**
4251 Write a single documents statistics to disk.
4252 @return DB_SUCCESS if all went well else error code */
4253 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4254 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4255 fts_sync_write_doc_stat(
4256 /*====================*/
4257 trx_t* trx, /*!< in: transaction */
4258 dict_index_t* index, /*!< in: index */
4259 que_t** graph, /* out: query graph */
4260 const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */
4261 {
4262 pars_info_t* info;
4263 doc_id_t doc_id;
4264 dberr_t error = DB_SUCCESS;
4265 ib_uint32_t word_count;
4266 char table_name[MAX_FULL_NAME_LEN];
4267
4268 if (*graph) {
4269 info = (*graph)->info;
4270 } else {
4271 info = pars_info_create();
4272 }
4273
4274 /* Convert to "storage" byte order. */
4275 mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4276 pars_info_bind_int4_literal(
4277 info, "count", (const ib_uint32_t*) &word_count);
4278
4279 /* Convert to "storage" byte order. */
4280 fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4281 fts_bind_doc_id(info, "doc_id", &doc_id);
4282
4283 if (!*graph) {
4284 fts_table_t fts_table;
4285
4286 FTS_INIT_INDEX_TABLE(
4287 &fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4288
4289 fts_get_table_name(&fts_table, table_name);
4290
4291 pars_info_bind_id(info, true, "doc_id_table", table_name);
4292
4293 *graph = fts_parse_sql(
4294 &fts_table,
4295 info,
4296 "BEGIN"
4297 " INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
4298 }
4299
4300 for (;;) {
4301 error = fts_eval_sql(trx, *graph);
4302
4303 if (error == DB_SUCCESS) {
4304
4305 break; /* Exit the loop. */
4306 } else {
4307
4308 if (error == DB_LOCK_WAIT_TIMEOUT) {
4309 ib::warn() << "Lock wait timeout writing to"
4310 " FTS doc_id. Retrying!";
4311
4312 trx->error_state = DB_SUCCESS;
4313 } else {
4314 ib::error() << "(" << ut_strerr(error)
4315 << ") while writing to FTS doc_id.";
4316
4317 break; /* Exit the loop. */
4318 }
4319 }
4320 }
4321
4322 return(error);
4323 }
4324
4325 /*********************************************************************//**
4326 Write document statistics to disk.
4327 @return DB_SUCCESS if all OK */
4328 static
4329 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4330 fts_sync_write_doc_stats(
4331 /*=====================*/
4332 trx_t* trx, /*!< in: transaction */
4333 const fts_index_cache_t*index_cache) /*!< in: index cache */
4334 {
4335 dberr_t error = DB_SUCCESS;
4336 que_t* graph = NULL;
4337 fts_doc_stats_t* doc_stat;
4338
4339 if (ib_vector_is_empty(index_cache->doc_stats)) {
4340 return(DB_SUCCESS);
4341 }
4342
4343 doc_stat = static_cast<ts_doc_stats_t*>(
4344 ib_vector_pop(index_cache->doc_stats));
4345
4346 while (doc_stat) {
4347 error = fts_sync_write_doc_stat(
4348 trx, index_cache->index, &graph, doc_stat);
4349
4350 if (error != DB_SUCCESS) {
4351 break;
4352 }
4353
4354 if (ib_vector_is_empty(index_cache->doc_stats)) {
4355 break;
4356 }
4357
4358 doc_stat = static_cast<ts_doc_stats_t*>(
4359 ib_vector_pop(index_cache->doc_stats));
4360 }
4361
4362 if (graph != NULL) {
4363 fts_que_graph_free_check_lock(NULL, index_cache, graph);
4364 }
4365
4366 return(error);
4367 }
4368
4369 /*********************************************************************//**
4370 Callback to check the existince of a word.
4371 @return always return NULL */
4372 static
4373 ibool
fts_lookup_word(void * row,void * user_arg)4374 fts_lookup_word(
4375 /*============*/
4376 void* row, /*!< in: sel_node_t* */
4377 void* user_arg) /*!< in: fts_doc_t* */
4378 {
4379
4380 que_node_t* exp;
4381 sel_node_t* node = static_cast<sel_node_t*>(row);
4382 ibool* found = static_cast<ibool*>(user_arg);
4383
4384 exp = node->select_list;
4385
4386 while (exp) {
4387 dfield_t* dfield = que_node_get_val(exp);
4388 ulint len = dfield_get_len(dfield);
4389
4390 if (len != UNIV_SQL_NULL && len != 0) {
4391 *found = TRUE;
4392 }
4393
4394 exp = que_node_get_next(exp);
4395 }
4396
4397 return(FALSE);
4398 }
4399
4400 /*********************************************************************//**
4401 Check whether a particular word (term) exists in the FTS index.
4402 @return DB_SUCCESS if all went well else error code */
4403 static
4404 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4405 fts_is_word_in_index(
4406 /*=================*/
4407 trx_t* trx, /*!< in: FTS query state */
4408 que_t** graph, /* out: Query graph */
4409 fts_table_t* fts_table, /*!< in: table instance */
4410 const fts_string_t*
4411 word, /*!< in: the word to check */
4412 ibool* found) /* out: TRUE if exists */
4413 {
4414 pars_info_t* info;
4415 dberr_t error;
4416 char table_name[MAX_FULL_NAME_LEN];
4417
4418 trx->op_info = "looking up word in FTS index";
4419
4420 if (*graph) {
4421 info = (*graph)->info;
4422 } else {
4423 info = pars_info_create();
4424 }
4425
4426 fts_get_table_name(fts_table, table_name);
4427 pars_info_bind_id(info, true, "table_name", table_name);
4428 pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4429 pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4430
4431 if (*graph == NULL) {
4432 *graph = fts_parse_sql(
4433 fts_table,
4434 info,
4435 "DECLARE FUNCTION my_func;\n"
4436 "DECLARE CURSOR c IS"
4437 " SELECT doc_count\n"
4438 " FROM $table_name\n"
4439 " WHERE word = :word"
4440 " ORDER BY first_doc_id;\n"
4441 "BEGIN\n"
4442 "\n"
4443 "OPEN c;\n"
4444 "WHILE 1 = 1 LOOP\n"
4445 " FETCH c INTO my_func();\n"
4446 " IF c % NOTFOUND THEN\n"
4447 " EXIT;\n"
4448 " END IF;\n"
4449 "END LOOP;\n"
4450 "CLOSE c;");
4451 }
4452
4453 for (;;) {
4454 error = fts_eval_sql(trx, *graph);
4455
4456 if (error == DB_SUCCESS) {
4457
4458 break; /* Exit the loop. */
4459 } else {
4460
4461 if (error == DB_LOCK_WAIT_TIMEOUT) {
4462 ib::warn() << "Lock wait timeout reading"
4463 " FTS index. Retrying!";
4464
4465 trx->error_state = DB_SUCCESS;
4466 } else {
4467 ib::error() << "(" << ut_strerr(error)
4468 << ") while reading FTS index.";
4469
4470 break; /* Exit the loop. */
4471 }
4472 }
4473 }
4474
4475 return(error);
4476 }
4477 #endif /* FTS_DOC_STATS_DEBUG */
4478
4479 /*********************************************************************//**
4480 Begin Sync, create transaction, acquire locks, etc. */
4481 static
4482 void
fts_sync_begin(fts_sync_t * sync)4483 fts_sync_begin(
4484 /*===========*/
4485 fts_sync_t* sync) /*!< in: sync state */
4486 {
4487 fts_cache_t* cache = sync->table->fts->cache;
4488
4489 n_nodes = 0;
4490 elapsed_time = 0;
4491
4492 sync->start_time = ut_time_monotonic();
4493
4494 sync->trx = trx_allocate_for_background();
4495
4496 if (fts_enable_diag_print) {
4497 ib::info() << "FTS SYNC for table " << sync->table->name
4498 << ", deleted count: "
4499 << ib_vector_size(cache->deleted_doc_ids)
4500 << " size: " << cache->total_size << " bytes";
4501 }
4502 }
4503
4504 /*********************************************************************//**
4505 Run SYNC on the table, i.e., write out data from the index specific
4506 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4507 @return DB_SUCCESS if all OK */
4508 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4509 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4510 fts_sync_index(
4511 /*===========*/
4512 fts_sync_t* sync, /*!< in: sync state */
4513 fts_index_cache_t* index_cache) /*!< in: index cache */
4514 {
4515 trx_t* trx = sync->trx;
4516 dberr_t error = DB_SUCCESS;
4517
4518 trx->op_info = "doing SYNC index";
4519
4520 if (fts_enable_diag_print) {
4521 ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4522 }
4523
4524 ut_ad(rbt_validate(index_cache->words));
4525
4526 error = fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4527 sync->start_time);
4528
4529 #ifdef FTS_DOC_STATS_DEBUG
4530 /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4531 is not used currently for ranking. We disable fts_sync_write_doc_stats()
4532 for now */
4533 /* Write the per doc statistics that will be used for ranking. */
4534 if (error == DB_SUCCESS) {
4535
4536 error = fts_sync_write_doc_stats(trx, index_cache);
4537 }
4538 #endif /* FTS_DOC_STATS_DEBUG */
4539
4540 return(error);
4541 }
4542
4543 /** Check if index cache has been synced completely
4544 @param[in,out] index_cache index cache
4545 @return true if index is synced, otherwise false. */
4546 static
4547 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4548 fts_sync_index_check(
4549 fts_index_cache_t* index_cache)
4550 {
4551 const ib_rbt_node_t* rbt_node;
4552
4553 for (rbt_node = rbt_first(index_cache->words);
4554 rbt_node != NULL;
4555 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4556
4557 fts_tokenizer_word_t* word;
4558 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4559
4560 fts_node_t* fts_node;
4561 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4562
4563 if (!fts_node->synced) {
4564 return(false);
4565 }
4566 }
4567
4568 return(true);
4569 }
4570
4571 /** Reset synced flag in index cache when rollback
4572 @param[in,out] index_cache index cache */
4573 static
4574 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4575 fts_sync_index_reset(
4576 fts_index_cache_t* index_cache)
4577 {
4578 const ib_rbt_node_t* rbt_node;
4579
4580 for (rbt_node = rbt_first(index_cache->words);
4581 rbt_node != NULL;
4582 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4583
4584 fts_tokenizer_word_t* word;
4585 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4586
4587 fts_node_t* fts_node;
4588 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4589
4590 fts_node->synced = false;
4591 }
4592 }
4593
4594 /** Commit the SYNC, change state of processed doc ids etc.
4595 @param[in,out] sync sync state
4596 @return DB_SUCCESS if all OK */
4597 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4598 dberr_t
fts_sync_commit(fts_sync_t * sync)4599 fts_sync_commit(
4600 fts_sync_t* sync)
4601 {
4602 dberr_t error;
4603 trx_t* trx = sync->trx;
4604 fts_cache_t* cache = sync->table->fts->cache;
4605 doc_id_t last_doc_id;
4606
4607 trx->op_info = "doing SYNC commit";
4608
4609 /* After each Sync, update the CONFIG table about the max doc id
4610 we just sync-ed to index table */
4611 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4612 &last_doc_id);
4613
4614 /* Get the list of deleted documents that are either in the
4615 cache or were headed there but were deleted before the add
4616 thread got to them. */
4617
4618 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4619
4620 error = fts_sync_add_deleted_cache(
4621 sync, cache->deleted_doc_ids);
4622 }
4623
4624 /* We need to do this within the deleted lock since fts_delete() can
4625 attempt to add a deleted doc id to the cache deleted id array. */
4626 fts_cache_clear(cache);
4627 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4628 fts_cache_init(cache);
4629 rw_lock_x_unlock(&cache->lock);
4630
4631 if (error == DB_SUCCESS) {
4632
4633 fts_sql_commit(trx);
4634
4635 } else if (error != DB_SUCCESS) {
4636
4637 fts_sql_rollback(trx);
4638
4639 ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
4640 }
4641
4642 if (fts_enable_diag_print && elapsed_time) {
4643 ib::info() << "SYNC for table " << sync->table->name
4644 << ": SYNC time: "
4645 << (ut_time_monotonic() - sync->start_time)
4646 << " secs: elapsed "
4647 << (double) n_nodes / elapsed_time
4648 << " ins/sec";
4649 }
4650
4651 /* Avoid assertion in trx_free(). */
4652 trx->dict_operation_lock_mode = 0;
4653 trx_free_for_background(trx);
4654
4655 return(error);
4656 }
4657
4658 /*********************************************************************//**
4659 Rollback a sync operation */
4660 static
4661 void
fts_sync_rollback(fts_sync_t * sync)4662 fts_sync_rollback(
4663 /*==============*/
4664 fts_sync_t* sync) /*!< in: sync state */
4665 {
4666 trx_t* trx = sync->trx;
4667 fts_cache_t* cache = sync->table->fts->cache;
4668
4669 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4670 ulint j;
4671 fts_index_cache_t* index_cache;
4672
4673 index_cache = static_cast<fts_index_cache_t*>(
4674 ib_vector_get(cache->indexes, i));
4675
4676 /* Reset synced flag so nodes will not be skipped
4677 in the next sync, see fts_sync_write_words(). */
4678 fts_sync_index_reset(index_cache);
4679
4680 for (j = 0; fts_index_selector[j].value; ++j) {
4681
4682 if (index_cache->ins_graph[j] != NULL) {
4683
4684 fts_que_graph_free_check_lock(
4685 NULL, index_cache,
4686 index_cache->ins_graph[j]);
4687
4688 index_cache->ins_graph[j] = NULL;
4689 }
4690
4691 if (index_cache->sel_graph[j] != NULL) {
4692
4693 fts_que_graph_free_check_lock(
4694 NULL, index_cache,
4695 index_cache->sel_graph[j]);
4696
4697 index_cache->sel_graph[j] = NULL;
4698 }
4699 }
4700 }
4701
4702 rw_lock_x_unlock(&cache->lock);
4703
4704 fts_sql_rollback(trx);
4705
4706 /* Avoid assertion in trx_free(). */
4707 trx->dict_operation_lock_mode = 0;
4708 trx_free_for_background(trx);
4709 }
4710
4711 /** Check that all indexes are synced.
4712 @param[in,out] sync sync state
4713 @return true if all indexes are synced, false otherwise. */
4714 static
4715 bool
fts_check_all_indexes_synced(fts_sync_t * sync)4716 fts_check_all_indexes_synced(
4717 fts_sync_t* sync)
4718 {
4719 ulint i;
4720 fts_cache_t* cache = sync->table->fts->cache;
4721
4722 /* Make sure all the caches are synced. */
4723 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4724 fts_index_cache_t* index_cache;
4725
4726 index_cache = static_cast<fts_index_cache_t*>(
4727 ib_vector_get(cache->indexes, i));
4728
4729 if (index_cache->index->to_be_dropped
4730 || index_cache->index->table->to_be_dropped
4731 || fts_sync_index_check(index_cache)) {
4732 continue;
4733 }
4734
4735 return false;
4736 }
4737
4738 return true;
4739 }
4740
4741 /** Run SYNC on the table, i.e., write out data from the cache to the
4742 FTS auxiliary INDEX table and clear the cache at the end.
4743 @param[in,out] sync sync state
4744 @param[in] unlock_cache whether unlock cache lock when write node
4745 @param[in] wait whether wait when a sync is in progress
4746 @param[in] has_dict_lock whether has dict operation lock
4747 @return DB_SUCCESS if all OK */
4748 static
4749 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict_lock)4750 fts_sync(
4751 fts_sync_t* sync,
4752 bool unlock_cache,
4753 bool wait,
4754 bool has_dict_lock)
4755 {
4756 ulint i;
4757 dberr_t error = DB_SUCCESS;
4758 fts_cache_t* cache = sync->table->fts->cache;
4759
4760 rw_lock_x_lock(&cache->lock);
4761
4762 /* Check if cache is being synced.
4763 Note: we release cache lock in fts_sync_write_words() to
4764 avoid long wait for the lock by other threads. */
4765 while (sync->in_progress) {
4766 rw_lock_x_unlock(&cache->lock);
4767
4768 if (wait) {
4769 os_event_wait(sync->event);
4770 } else {
4771 return(DB_SUCCESS);
4772 }
4773
4774 rw_lock_x_lock(&cache->lock);
4775 }
4776
4777 sync->unlock_cache = unlock_cache;
4778 sync->in_progress = true;
4779
4780 DEBUG_SYNC_C("fts_sync_begin");
4781 fts_sync_begin(sync);
4782
4783 if (has_dict_lock) {
4784 /* If lock is already taken mark that in transaction
4785 * so rollback will not try to take it again.
4786 */
4787 sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4788 }
4789
4790 do {
4791 if (cache->total_size > fts_max_cache_size) {
4792 /* Avoid the case: sync never finish when
4793 insert/update keeps comming. */
4794 ut_ad(sync->unlock_cache);
4795 sync->unlock_cache = false;
4796 }
4797
4798 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4799 fts_index_cache_t* index_cache;
4800
4801 index_cache = static_cast<fts_index_cache_t*>(
4802 ib_vector_get(cache->indexes, i));
4803
4804 if (index_cache->index->to_be_dropped
4805 || index_cache->index->table->to_be_dropped) {
4806 continue;
4807 }
4808
4809 DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4810 os_thread_sleep(300000););
4811
4812 index_cache->index->index_fts_syncing = true;
4813
4814 error = fts_sync_index(sync, index_cache);
4815
4816 if (error != DB_SUCCESS) {
4817 break;
4818 }
4819 }
4820
4821 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4822 sync->interrupted = true;
4823 error = DB_INTERRUPTED;
4824 );
4825
4826 if (error != DB_SUCCESS) {
4827 break;
4828 }
4829 } while (!fts_check_all_indexes_synced(sync));
4830
4831 if (error == DB_SUCCESS && !sync->interrupted) {
4832 error = fts_sync_commit(sync);
4833 } else {
4834 fts_sync_rollback(sync);
4835 }
4836
4837 rw_lock_x_lock(&cache->lock);
4838 /* Clear fts syncing flags of any indexes in case sync is
4839 interrupted */
4840 DEBUG_SYNC_C("fts_instrument_sync");
4841 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4842 fts_index_cache_t* index_cache;
4843 index_cache = static_cast<fts_index_cache_t*>(
4844 ib_vector_get(cache->indexes, i));
4845 if (index_cache->index->index_fts_syncing == true) {
4846 index_cache->index->index_fts_syncing = false;
4847 }
4848 }
4849 sync->interrupted = false;
4850 sync->in_progress = false;
4851 os_event_set(sync->event);
4852 rw_lock_x_unlock(&cache->lock);
4853
4854 /* We need to check whether an optimize is required, for that
4855 we make copies of the two variables that control the trigger. These
4856 variables can change behind our back and we don't want to hold the
4857 lock for longer than is needed. */
4858 mutex_enter(&cache->deleted_lock);
4859
4860 cache->added = 0;
4861 cache->deleted = 0;
4862
4863 mutex_exit(&cache->deleted_lock);
4864
4865 return(error);
4866 }
4867
4868 /** Run SYNC on the table, i.e., write out data from the cache to the
4869 FTS auxiliary INDEX table and clear the cache at the end.
4870 @param[in,out] table fts table
4871 @param[in] unlock_cache whether unlock cache when write node
4872 @param[in] wait whether wait for existing sync to finish
4873 @param[in] has_dict whether has dict operation lock
4874 @return DB_SUCCESS on success, error code on failure. */
4875 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4876 fts_sync_table(
4877 dict_table_t* table,
4878 bool unlock_cache,
4879 bool wait,
4880 bool has_dict)
4881 {
4882 dberr_t err = DB_SUCCESS;
4883
4884 ut_ad(table->fts);
4885
4886 if (!dict_table_is_discarded(table) && table->fts->cache
4887 && !dict_table_is_corrupted(table)) {
4888 err = fts_sync(table->fts->cache->sync,
4889 unlock_cache, wait, has_dict);
4890 }
4891
4892 return(err);
4893 }
4894
4895 /** Check fts token
4896 1. for ngram token, check whether the token contains any words in stopwords
4897 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4898 or greater than fts_max_token_size.
4899 @param[in] token token string
4900 @param[in] stopwords stopwords rb tree
4901 @param[in] is_ngram is ngram parser
4902 @param[in] cs token charset
4903 @retval true if it is not stopword and length in range
4904 @retval false if it is stopword or lenght not in range */
4905 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs)4906 fts_check_token(
4907 const fts_string_t* token,
4908 const ib_rbt_t* stopwords,
4909 bool is_ngram,
4910 const CHARSET_INFO* cs)
4911 {
4912 ut_ad(cs != NULL || stopwords == NULL);
4913
4914 if (!is_ngram) {
4915 ib_rbt_bound_t parent;
4916
4917 if (token->f_n_char < fts_min_token_size
4918 || token->f_n_char > fts_max_token_size
4919 || (stopwords != NULL
4920 && rbt_search(stopwords, &parent, token) == 0)) {
4921 return(false);
4922 } else {
4923 return(true);
4924 }
4925 }
4926
4927 /* Check token for ngram. */
4928 DBUG_EXECUTE_IF(
4929 "fts_instrument_ignore_ngram_check",
4930 return(true);
4931 );
4932
4933 /* We ignore fts_min_token_size when ngram */
4934 ut_ad(token->f_n_char > 0
4935 && token->f_n_char <= fts_max_token_size);
4936
4937 if (stopwords == NULL) {
4938 return(true);
4939 }
4940
4941 /*Ngram checks whether the token contains any words in stopwords.
4942 We can't simply use CONTAIN to search in stopwords, because it's
4943 built on COMPARE. So we need to tokenize the token into words
4944 from unigram to f_n_char, and check them separately. */
4945 for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4946 ngram_token_size ++) {
4947 const char* start;
4948 const char* next;
4949 const char* end;
4950 ulint char_len;
4951 ulint n_chars;
4952
4953 start = reinterpret_cast<char*>(token->f_str);
4954 next = start;
4955 end = start + token->f_len;
4956 n_chars = 0;
4957
4958 while (next < end) {
4959 char_len = my_mbcharlen_ptr(cs, next, end);
4960
4961 if (next + char_len > end || char_len == 0) {
4962 break;
4963 } else {
4964 /* Skip SPACE */
4965 if (char_len == 1 && *next == ' ') {
4966 start = next + 1;
4967 next = start;
4968 n_chars = 0;
4969
4970 continue;
4971 }
4972
4973 next += char_len;
4974 n_chars++;
4975 }
4976
4977 if (n_chars == ngram_token_size) {
4978 fts_string_t ngram_token;
4979 ngram_token.f_str =
4980 reinterpret_cast<byte*>(
4981 const_cast<char*>(start));
4982 ngram_token.f_len = next - start;
4983 ngram_token.f_n_char = ngram_token_size;
4984
4985 ib_rbt_bound_t parent;
4986 if (rbt_search(stopwords, &parent,
4987 &ngram_token) == 0) {
4988 return(false);
4989 }
4990
4991 /* Move a char forward */
4992 start += my_mbcharlen_ptr(cs, start, end);
4993 n_chars = ngram_token_size - 1;
4994 }
4995 }
4996 }
4997
4998 return(true);
4999 }
5000
5001 /** Add the token and its start position to the token's list of positions.
5002 @param[in,out] result_doc result doc rb tree
5003 @param[in] str token string
5004 @param[in] position token position */
5005 static
5006 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)5007 fts_add_token(
5008 fts_doc_t* result_doc,
5009 fts_string_t str,
5010 ulint position)
5011 {
5012 /* Ignore string whose character number is less than
5013 "fts_min_token_size" or more than "fts_max_token_size" */
5014
5015 if (fts_check_token(&str, NULL, result_doc->is_ngram,
5016 result_doc->charset)) {
5017
5018 mem_heap_t* heap;
5019 fts_string_t t_str;
5020 fts_token_t* token;
5021 ib_rbt_bound_t parent;
5022 ulint newlen;
5023
5024 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
5025
5026 t_str.f_n_char = str.f_n_char;
5027
5028 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
5029
5030 t_str.f_str = static_cast<byte*>(
5031 mem_heap_alloc(heap, t_str.f_len));
5032
5033 /* For binary collations, a case sensitive search is
5034 performed. Hence don't convert to lower case. */
5035 if (my_binary_compare(result_doc->charset)) {
5036 memcpy(t_str.f_str, str.f_str, str.f_len);
5037 t_str.f_str[str.f_len]= 0;
5038 newlen= str.f_len;
5039 } else {
5040 newlen = innobase_fts_casedn_str(
5041 result_doc->charset, (char*) str.f_str, str.f_len,
5042 (char*) t_str.f_str, t_str.f_len);
5043 }
5044
5045 t_str.f_len = newlen;
5046 t_str.f_str[newlen] = 0;
5047
5048 /* Add the word to the document statistics. If the word
5049 hasn't been seen before we create a new entry for it. */
5050 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
5051 fts_token_t new_token;
5052
5053 new_token.text.f_len = newlen;
5054 new_token.text.f_str = t_str.f_str;
5055 new_token.text.f_n_char = t_str.f_n_char;
5056
5057 new_token.positions = ib_vector_create(
5058 result_doc->self_heap, sizeof(ulint), 32);
5059
5060 parent.last = rbt_add_node(
5061 result_doc->tokens, &parent, &new_token);
5062
5063 ut_ad(rbt_validate(result_doc->tokens));
5064 }
5065
5066 token = rbt_value(fts_token_t, parent.last);
5067 ib_vector_push(token->positions, &position);
5068 }
5069 }
5070
5071 /********************************************************************
5072 Process next token from document starting at the given position, i.e., add
5073 the token's start position to the token's list of positions.
5074 @return number of characters handled in this call */
5075 static
5076 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)5077 fts_process_token(
5078 /*==============*/
5079 fts_doc_t* doc, /* in/out: document to
5080 tokenize */
5081 fts_doc_t* result, /* out: if provided, save
5082 result here */
5083 ulint start_pos, /*!< in: start position in text */
5084 ulint add_pos) /*!< in: add this position to all
5085 tokens from this tokenization */
5086 {
5087 ulint ret;
5088 fts_string_t str;
5089 ulint position;
5090 fts_doc_t* result_doc;
5091 byte buf[FTS_MAX_WORD_LEN + 1];
5092
5093 str.f_str = buf;
5094
5095 /* Determine where to save the result. */
5096 result_doc = (result != NULL) ? result : doc;
5097
5098 /* The length of a string in characters is set here only. */
5099
5100 ret = innobase_mysql_fts_get_token(
5101 doc->charset, doc->text.f_str + start_pos,
5102 doc->text.f_str + doc->text.f_len, &str);
5103
5104 position = start_pos + ret - str.f_len + add_pos;
5105
5106 fts_add_token(result_doc, str, position);
5107
5108 return(ret);
5109 }
5110
5111 /*************************************************************//**
5112 Get token char size by charset
5113 @return token size */
5114 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)5115 fts_get_token_size(
5116 /*===============*/
5117 const CHARSET_INFO* cs, /*!< in: Character set */
5118 const char* token, /*!< in: token */
5119 ulint len) /*!< in: token length */
5120 {
5121 char* start;
5122 char* end;
5123 ulint size = 0;
5124
5125 /* const_cast is for reinterpret_cast below, or it will fail. */
5126 start = const_cast<char*>(token);
5127 end = start + len;
5128 while (start < end) {
5129 int ctype;
5130 int mbl;
5131
5132 mbl = cs->cset->ctype(
5133 cs, &ctype,
5134 reinterpret_cast<uchar*>(start),
5135 reinterpret_cast<uchar*>(end));
5136
5137 size++;
5138
5139 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
5140 }
5141
5142 return(size);
5143 }
5144
5145 /*************************************************************//**
5146 FTS plugin parser 'myql_parser' callback function for document tokenize.
5147 Refer to 'st_mysql_ftparser_param' for more detail.
5148 @return always returns 0 */
5149 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)5150 fts_tokenize_document_internal(
5151 /*===========================*/
5152 MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
5153 char* doc, /*!< in/out: document */
5154 int len) /*!< in: document length */
5155 {
5156 fts_string_t str;
5157 byte buf[FTS_MAX_WORD_LEN + 1];
5158 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
5159 { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
5160
5161 ut_ad(len >= 0);
5162
5163 str.f_str = buf;
5164
5165 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
5166 inc = innobase_mysql_fts_get_token(
5167 const_cast<CHARSET_INFO*>(param->cs),
5168 reinterpret_cast<byte*>(doc) + i,
5169 reinterpret_cast<byte*>(doc) + len,
5170 &str);
5171
5172 if (str.f_len > 0) {
5173 bool_info.position =
5174 static_cast<int>(i + inc - str.f_len);
5175 ut_ad(bool_info.position >= 0);
5176
5177 /* Stop when add word fails */
5178 if (param->mysql_add_word(
5179 param,
5180 reinterpret_cast<char*>(str.f_str),
5181 static_cast<int>(str.f_len),
5182 &bool_info)) {
5183 break;
5184 }
5185 }
5186 }
5187
5188 return(0);
5189 }
5190
5191 /******************************************************************//**
5192 FTS plugin parser 'myql_add_word' callback function for document tokenize.
5193 Refer to 'st_mysql_ftparser_param' for more detail.
5194 @return always returns 0 */
5195 static
5196 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)5197 fts_tokenize_add_word_for_parser(
5198 /*=============================*/
5199 MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
5200 char* word, /* in: token word */
5201 int word_len, /* in: word len */
5202 MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
5203 {
5204 fts_string_t str;
5205 fts_tokenize_param_t* fts_param;
5206 fts_doc_t* result_doc;
5207 ulint position;
5208
5209 fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
5210 result_doc = fts_param->result_doc;
5211 ut_ad(result_doc != NULL);
5212
5213 str.f_str = reinterpret_cast<byte*>(word);
5214 str.f_len = word_len;
5215 str.f_n_char = fts_get_token_size(
5216 const_cast<CHARSET_INFO*>(param->cs), word, word_len);
5217
5218 ut_ad(boolean_info->position >= 0);
5219 position = boolean_info->position + fts_param->add_pos;
5220
5221 fts_add_token(result_doc, str, position);
5222
5223 return(0);
5224 }
5225
5226 /******************************************************************//**
5227 Parse a document using an external / user supplied parser */
5228 static
5229 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)5230 fts_tokenize_by_parser(
5231 /*===================*/
5232 fts_doc_t* doc, /* in/out: document to tokenize */
5233 st_mysql_ftparser* parser, /* in: plugin fts parser */
5234 fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
5235 {
5236 MYSQL_FTPARSER_PARAM param;
5237
5238 ut_a(parser);
5239
5240 /* Set paramters for param */
5241 param.mysql_parse = fts_tokenize_document_internal;
5242 param.mysql_add_word = fts_tokenize_add_word_for_parser;
5243 param.mysql_ftparam = fts_param;
5244 param.cs = doc->charset;
5245 param.doc = reinterpret_cast<char*>(doc->text.f_str);
5246 param.length = static_cast<int>(doc->text.f_len);
5247 param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
5248
5249 PARSER_INIT(parser, ¶m);
5250 parser->parse(¶m);
5251 PARSER_DEINIT(parser, ¶m);
5252 }
5253
5254 /******************************************************************//**
5255 Tokenize a document. */
5256 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)5257 fts_tokenize_document(
5258 /*==================*/
5259 fts_doc_t* doc, /* in/out: document to
5260 tokenize */
5261 fts_doc_t* result, /* out: if provided, save
5262 the result token here */
5263 st_mysql_ftparser* parser) /* in: plugin fts parser */
5264 {
5265 ut_a(!doc->tokens);
5266 ut_a(doc->charset);
5267
5268 doc->tokens = rbt_create_arg_cmp(
5269 sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
5270
5271 if (parser != NULL) {
5272 fts_tokenize_param_t fts_param;
5273
5274 fts_param.result_doc = (result != NULL) ? result : doc;
5275 fts_param.add_pos = 0;
5276
5277 fts_tokenize_by_parser(doc, parser, &fts_param);
5278 } else {
5279 ulint inc;
5280
5281 for (ulint i = 0; i < doc->text.f_len; i += inc) {
5282 inc = fts_process_token(doc, result, i, 0);
5283 ut_a(inc > 0);
5284 }
5285 }
5286 }
5287
5288 /******************************************************************//**
5289 Continue to tokenize a document. */
5290 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)5291 fts_tokenize_document_next(
5292 /*=======================*/
5293 fts_doc_t* doc, /*!< in/out: document to
5294 tokenize */
5295 ulint add_pos, /*!< in: add this position to all
5296 tokens from this tokenization */
5297 fts_doc_t* result, /*!< out: if provided, save
5298 the result token here */
5299 st_mysql_ftparser* parser) /* in: plugin fts parser */
5300 {
5301 ut_a(doc->tokens);
5302
5303 if (parser) {
5304 fts_tokenize_param_t fts_param;
5305
5306 fts_param.result_doc = (result != NULL) ? result : doc;
5307 fts_param.add_pos = add_pos;
5308
5309 fts_tokenize_by_parser(doc, parser, &fts_param);
5310 } else {
5311 ulint inc;
5312
5313 for (ulint i = 0; i < doc->text.f_len; i += inc) {
5314 inc = fts_process_token(doc, result, i, add_pos);
5315 ut_a(inc > 0);
5316 }
5317 }
5318 }
5319
5320 /********************************************************************
5321 Create the vector of fts_get_doc_t instances. */
5322 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)5323 fts_get_docs_create(
5324 /*================*/
5325 /* out: vector of
5326 fts_get_doc_t instances */
5327 fts_cache_t* cache) /*!< in: fts cache */
5328 {
5329 ib_vector_t* get_docs;
5330
5331 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
5332
5333 /* We need one instance of fts_get_doc_t per index. */
5334 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
5335
5336 /* Create the get_doc instance, we need one of these
5337 per FTS index. */
5338 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
5339
5340 dict_index_t** index;
5341 fts_get_doc_t* get_doc;
5342
5343 index = static_cast<dict_index_t**>(
5344 ib_vector_get(cache->indexes, i));
5345
5346 get_doc = static_cast<fts_get_doc_t*>(
5347 ib_vector_push(get_docs, NULL));
5348
5349 memset(get_doc, 0x0, sizeof(*get_doc));
5350
5351 get_doc->index_cache = fts_get_index_cache(cache, *index);
5352 get_doc->cache = cache;
5353
5354 /* Must find the index cache. */
5355 ut_a(get_doc->index_cache != NULL);
5356 }
5357
5358 return(get_docs);
5359 }
5360
5361 /********************************************************************
5362 Release any resources held by the fts_get_doc_t instances. */
5363 static
5364 void
fts_get_docs_clear(ib_vector_t * get_docs)5365 fts_get_docs_clear(
5366 /*===============*/
5367 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
5368 {
5369 ulint i;
5370
5371 /* Release the get doc graphs if any. */
5372 for (i = 0; i < ib_vector_size(get_docs); ++i) {
5373
5374 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
5375 ib_vector_get(get_docs, i));
5376
5377 if (get_doc->get_document_graph != NULL) {
5378
5379 ut_a(get_doc->index_cache);
5380
5381 fts_que_graph_free(get_doc->get_document_graph);
5382 get_doc->get_document_graph = NULL;
5383 }
5384 }
5385 }
5386
5387 /*********************************************************************//**
5388 Get the initial Doc ID by consulting the CONFIG table
5389 @return initial Doc ID */
5390 doc_id_t
fts_init_doc_id(const dict_table_t * table)5391 fts_init_doc_id(
5392 /*============*/
5393 const dict_table_t* table) /*!< in: table */
5394 {
5395 doc_id_t max_doc_id = 0;
5396
5397 rw_lock_x_lock(&table->fts->cache->lock);
5398
5399 /* Return if the table is already initialized for DOC ID */
5400 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5401 rw_lock_x_unlock(&table->fts->cache->lock);
5402 return(0);
5403 }
5404
5405 DEBUG_SYNC_C("fts_initialize_doc_id");
5406
5407 /* Then compare this value with the ID value stored in the CONFIG
5408 table. The larger one will be our new initial Doc ID */
5409 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5410
5411 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5412 creating index (and add doc id column. No need to recovery
5413 documents */
5414 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5415 fts_init_index((dict_table_t*) table, TRUE);
5416 }
5417
5418 table->fts->fts_status |= ADDED_TABLE_SYNCED;
5419
5420 table->fts->cache->first_doc_id = max_doc_id;
5421
5422 rw_lock_x_unlock(&table->fts->cache->lock);
5423
5424 ut_ad(max_doc_id > 0);
5425
5426 return(max_doc_id);
5427 }
5428
5429 #ifdef FTS_MULT_INDEX
5430 /*********************************************************************//**
5431 Check if the index is in the affected set.
5432 @return TRUE if index is updated */
5433 static
5434 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5435 fts_is_index_updated(
5436 /*=================*/
5437 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
5438 const fts_get_doc_t* get_doc) /*!< in: info for reading
5439 document */
5440 {
5441 ulint i;
5442 dict_index_t* index = get_doc->index_cache->index;
5443
5444 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5445 const dict_index_t* updated_fts_index;
5446
5447 updated_fts_index = static_cast<const dict_index_t*>(
5448 ib_vector_getp_const(fts_indexes, i));
5449
5450 ut_a(updated_fts_index != NULL);
5451
5452 if (updated_fts_index == index) {
5453 return(TRUE);
5454 }
5455 }
5456
5457 return(FALSE);
5458 }
5459 #endif
5460
5461 /*********************************************************************//**
5462 Fetch COUNT(*) from specified table.
5463 @return the number of rows in the table */
5464 ulint
fts_get_rows_count(fts_table_t * fts_table)5465 fts_get_rows_count(
5466 /*===============*/
5467 fts_table_t* fts_table) /*!< in: fts table to read */
5468 {
5469 trx_t* trx;
5470 pars_info_t* info;
5471 que_t* graph;
5472 dberr_t error;
5473 ulint count = 0;
5474 char table_name[MAX_FULL_NAME_LEN];
5475
5476 trx = trx_allocate_for_background();
5477
5478 trx->op_info = "fetching FT table rows count";
5479
5480 info = pars_info_create();
5481
5482 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5483
5484 fts_get_table_name(fts_table, table_name);
5485 pars_info_bind_id(info, true, "table_name", table_name);
5486
5487 graph = fts_parse_sql(
5488 fts_table,
5489 info,
5490 "DECLARE FUNCTION my_func;\n"
5491 "DECLARE CURSOR c IS"
5492 " SELECT COUNT(*)"
5493 " FROM $table_name;\n"
5494 "BEGIN\n"
5495 "\n"
5496 "OPEN c;\n"
5497 "WHILE 1 = 1 LOOP\n"
5498 " FETCH c INTO my_func();\n"
5499 " IF c % NOTFOUND THEN\n"
5500 " EXIT;\n"
5501 " END IF;\n"
5502 "END LOOP;\n"
5503 "CLOSE c;");
5504
5505 for (;;) {
5506 error = fts_eval_sql(trx, graph);
5507
5508 if (error == DB_SUCCESS) {
5509 fts_sql_commit(trx);
5510
5511 break; /* Exit the loop. */
5512 } else {
5513 fts_sql_rollback(trx);
5514
5515 if (error == DB_LOCK_WAIT_TIMEOUT) {
5516 ib::warn() << "lock wait timeout reading"
5517 " FTS table. Retrying!";
5518
5519 trx->error_state = DB_SUCCESS;
5520 } else {
5521 ib::error() << "(" << ut_strerr(error)
5522 << ") while reading FTS table.";
5523
5524 break; /* Exit the loop. */
5525 }
5526 }
5527 }
5528
5529 fts_que_graph_free(graph);
5530
5531 trx_free_for_background(trx);
5532
5533 return(count);
5534 }
5535
5536 #ifdef FTS_CACHE_SIZE_DEBUG
5537 /*********************************************************************//**
5538 Read the max cache size parameter from the config table. */
5539 static
5540 void
fts_update_max_cache_size(fts_sync_t * sync)5541 fts_update_max_cache_size(
5542 /*======================*/
5543 fts_sync_t* sync) /*!< in: sync state */
5544 {
5545 trx_t* trx;
5546 fts_table_t fts_table;
5547
5548 trx = trx_allocate_for_background();
5549
5550 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5551
5552 /* The size returned is in bytes. */
5553 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5554
5555 fts_sql_commit(trx);
5556
5557 trx_free_for_background(trx);
5558 }
5559 #endif /* FTS_CACHE_SIZE_DEBUG */
5560
5561 /*********************************************************************//**
5562 Free the modified rows of a table. */
5563 UNIV_INLINE
5564 void
fts_trx_table_rows_free(ib_rbt_t * rows)5565 fts_trx_table_rows_free(
5566 /*====================*/
5567 ib_rbt_t* rows) /*!< in: rbt of rows to free */
5568 {
5569 const ib_rbt_node_t* node;
5570
5571 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5572 fts_trx_row_t* row;
5573
5574 row = rbt_value(fts_trx_row_t, node);
5575
5576 if (row->fts_indexes != NULL) {
5577 /* This vector shouldn't be using the
5578 heap allocator. */
5579 ut_a(row->fts_indexes->allocator->arg == NULL);
5580
5581 ib_vector_free(row->fts_indexes);
5582 row->fts_indexes = NULL;
5583 }
5584
5585 ut_free(rbt_remove_node(rows, node));
5586 }
5587
5588 ut_a(rbt_empty(rows));
5589 rbt_free(rows);
5590 }
5591
5592 /*********************************************************************//**
5593 Free an FTS savepoint instance. */
5594 UNIV_INLINE
5595 void
fts_savepoint_free(fts_savepoint_t * savepoint)5596 fts_savepoint_free(
5597 /*===============*/
5598 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5599 {
5600 const ib_rbt_node_t* node;
5601 ib_rbt_t* tables = savepoint->tables;
5602
5603 /* Nothing to free! */
5604 if (tables == NULL) {
5605 return;
5606 }
5607
5608 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5609 fts_trx_table_t* ftt;
5610 fts_trx_table_t** fttp;
5611
5612 fttp = rbt_value(fts_trx_table_t*, node);
5613 ftt = *fttp;
5614
5615 /* This can be NULL if a savepoint was released. */
5616 if (ftt->rows != NULL) {
5617 fts_trx_table_rows_free(ftt->rows);
5618 ftt->rows = NULL;
5619 }
5620
5621 /* This can be NULL if a savepoint was released. */
5622 if (ftt->added_doc_ids != NULL) {
5623 fts_doc_ids_free(ftt->added_doc_ids);
5624 ftt->added_doc_ids = NULL;
5625 }
5626
5627 /* The default savepoint name must be NULL. */
5628 if (ftt->docs_added_graph) {
5629 fts_que_graph_free(ftt->docs_added_graph);
5630 }
5631
5632 /* NOTE: We are responsible for free'ing the node */
5633 ut_free(rbt_remove_node(tables, node));
5634 }
5635
5636 ut_a(rbt_empty(tables));
5637 rbt_free(tables);
5638 savepoint->tables = NULL;
5639 }
5640
5641 /*********************************************************************//**
5642 Free an FTS trx. */
5643 void
fts_trx_free(fts_trx_t * fts_trx)5644 fts_trx_free(
5645 /*=========*/
5646 fts_trx_t* fts_trx) /* in, own: FTS trx */
5647 {
5648 ulint i;
5649
5650 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5651 fts_savepoint_t* savepoint;
5652
5653 savepoint = static_cast<fts_savepoint_t*>(
5654 ib_vector_get(fts_trx->savepoints, i));
5655
5656 /* The default savepoint name must be NULL. */
5657 if (i == 0) {
5658 ut_a(savepoint->name == NULL);
5659 }
5660
5661 fts_savepoint_free(savepoint);
5662 }
5663
5664 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5665 fts_savepoint_t* savepoint;
5666
5667 savepoint = static_cast<fts_savepoint_t*>(
5668 ib_vector_get(fts_trx->last_stmt, i));
5669
5670 /* The default savepoint name must be NULL. */
5671 if (i == 0) {
5672 ut_a(savepoint->name == NULL);
5673 }
5674
5675 fts_savepoint_free(savepoint);
5676 }
5677
5678 if (fts_trx->heap) {
5679 mem_heap_free(fts_trx->heap);
5680 }
5681 }
5682
5683 /*********************************************************************//**
5684 Extract the doc id from the FTS hidden column.
5685 @return doc id that was extracted from rec */
5686 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5687 fts_get_doc_id_from_row(
5688 /*====================*/
5689 dict_table_t* table, /*!< in: table */
5690 dtuple_t* row) /*!< in: row whose FTS doc id we
5691 want to extract.*/
5692 {
5693 dfield_t* field;
5694 doc_id_t doc_id = 0;
5695
5696 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5697
5698 field = dtuple_get_nth_field(row, table->fts->doc_col);
5699
5700 ut_a(dfield_get_len(field) == sizeof(doc_id));
5701 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5702
5703 doc_id = fts_read_doc_id(
5704 static_cast<const byte*>(dfield_get_data(field)));
5705
5706 return(doc_id);
5707 }
5708
5709 /** Extract the doc id from the record that belongs to index.
5710 @param[in] table table
5711 @param[in] rec record contains FTS_DOC_ID
5712 @param[in] index index of rec
5713 @param[in] heap heap memory
5714 @return doc id that was extracted from rec */
5715 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5716 fts_get_doc_id_from_rec(
5717 dict_table_t* table,
5718 const rec_t* rec,
5719 const dict_index_t* index,
5720 mem_heap_t* heap)
5721 {
5722 ulint len;
5723 const byte* data;
5724 ulint col_no;
5725 doc_id_t doc_id = 0;
5726 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5727 ulint* offsets = offsets_;
5728 mem_heap_t* my_heap = heap;
5729
5730 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5731
5732 rec_offs_init(offsets_);
5733
5734 offsets = rec_get_offsets(
5735 rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5736
5737 col_no = dict_col_get_index_pos(
5738 &table->cols[table->fts->doc_col], index);
5739
5740 ut_ad(col_no != ULINT_UNDEFINED);
5741
5742 data = rec_get_nth_field(rec, offsets, col_no, &len);
5743
5744 ut_a(len == 8);
5745 ut_ad(8 == sizeof(doc_id));
5746 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5747
5748 if (my_heap && !heap) {
5749 mem_heap_free(my_heap);
5750 }
5751
5752 return(doc_id);
5753 }
5754
5755 /*********************************************************************//**
5756 Search the index specific cache for a particular FTS index.
5757 @return the index specific cache else NULL */
5758 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5759 fts_find_index_cache(
5760 /*=================*/
5761 const fts_cache_t* cache, /*!< in: cache to search */
5762 const dict_index_t* index) /*!< in: index to search for */
5763 {
5764 /* We cast away the const because our internal function, takes
5765 non-const cache arg and returns a non-const pointer. */
5766 return(static_cast<fts_index_cache_t*>(
5767 fts_get_index_cache((fts_cache_t*) cache, index)));
5768 }
5769
5770 /*********************************************************************//**
5771 Search cache for word.
5772 @return the word node vector if found else NULL */
5773 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5774 fts_cache_find_word(
5775 /*================*/
5776 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5777 const fts_string_t* text) /*!< in: word to search for */
5778 {
5779 ib_rbt_bound_t parent;
5780 const ib_vector_t* nodes = NULL;
5781 #ifdef UNIV_DEBUG
5782 dict_table_t* table = index_cache->index->table;
5783 fts_cache_t* cache = table->fts->cache;
5784
5785 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5786 #endif /* UNIV_DEBUG */
5787
5788 /* Lookup the word in the rb tree */
5789 if (rbt_search(index_cache->words, &parent, text) == 0) {
5790 const fts_tokenizer_word_t* word;
5791
5792 word = rbt_value(fts_tokenizer_word_t, parent.last);
5793
5794 nodes = word->nodes;
5795 }
5796
5797 return(nodes);
5798 }
5799
5800 /*********************************************************************//**
5801 Check cache for deleted doc id.
5802 @return TRUE if deleted */
5803 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5804 fts_cache_is_deleted_doc_id(
5805 /*========================*/
5806 const fts_cache_t* cache, /*!< in: cache ito search */
5807 doc_id_t doc_id) /*!< in: doc id to search for */
5808 {
5809 ut_ad(mutex_own(&cache->deleted_lock));
5810
5811 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5812 const fts_update_t* update;
5813
5814 update = static_cast<const fts_update_t*>(
5815 ib_vector_get_const(cache->deleted_doc_ids, i));
5816
5817 if (doc_id == update->doc_id) {
5818
5819 return(TRUE);
5820 }
5821 }
5822
5823 return(FALSE);
5824 }
5825
5826 /*********************************************************************//**
5827 Append deleted doc ids to vector. */
5828 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5829 fts_cache_append_deleted_doc_ids(
5830 /*=============================*/
5831 const fts_cache_t* cache, /*!< in: cache to use */
5832 ib_vector_t* vector) /*!< in: append to this vector */
5833 {
5834 mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5835
5836 if (cache->deleted_doc_ids == NULL) {
5837 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5838 return;
5839 }
5840
5841
5842 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5843 fts_update_t* update;
5844
5845 update = static_cast<fts_update_t*>(
5846 ib_vector_get(cache->deleted_doc_ids, i));
5847
5848 ib_vector_push(vector, &update->doc_id);
5849 }
5850
5851 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5852 }
5853
5854 /*********************************************************************//**
5855 Wait for the background thread to start. We poll to detect change
5856 of state, which is acceptable, since the wait should happen only
5857 once during startup.
5858 @return true if the thread started else FALSE (i.e timed out) */
5859 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5860 fts_wait_for_background_thread_to_start(
5861 /*====================================*/
5862 dict_table_t* table, /*!< in: table to which the thread
5863 is attached */
5864 ulint max_wait) /*!< in: time in microseconds, if
5865 set to 0 then it disables
5866 timeout checking */
5867 {
5868 ulint count = 0;
5869 ibool done = FALSE;
5870
5871 ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5872
5873 for (;;) {
5874 fts_t* fts = table->fts;
5875
5876 mutex_enter(&fts->bg_threads_mutex);
5877
5878 if (fts->fts_status & BG_THREAD_READY) {
5879
5880 done = TRUE;
5881 }
5882
5883 mutex_exit(&fts->bg_threads_mutex);
5884
5885 if (!done) {
5886 os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5887
5888 if (max_wait > 0) {
5889
5890 max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5891
5892 /* We ignore the residual value. */
5893 if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5894 break;
5895 }
5896 }
5897
5898 ++count;
5899 } else {
5900 break;
5901 }
5902
5903 if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5904 ib::error() << "The background thread for the FTS"
5905 " table " << table->name
5906 << " refuses to start";
5907
5908 count = 0;
5909 }
5910 }
5911
5912 return(done);
5913 }
5914
5915 /*********************************************************************//**
5916 Add the FTS document id hidden column. */
5917 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5918 fts_add_doc_id_column(
5919 /*==================*/
5920 dict_table_t* table, /*!< in/out: Table with FTS index */
5921 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5922 {
5923 dict_mem_table_add_col(
5924 table, heap,
5925 FTS_DOC_ID_COL_NAME,
5926 DATA_INT,
5927 dtype_form_prtype(
5928 DATA_NOT_NULL | DATA_UNSIGNED
5929 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5930 sizeof(doc_id_t));
5931 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5932 }
5933
5934 /** Add new fts doc id to the update vector.
5935 @param[in] table the table that contains the FTS index.
5936 @param[in,out] ufield the fts doc id field in the update vector.
5937 No new memory is allocated for this in this
5938 function.
5939 @param[in,out] next_doc_id the fts doc id that has been added to the
5940 update vector. If 0, a new fts doc id is
5941 automatically generated. The memory provided
5942 for this argument will be used by the update
5943 vector. Ensure that the life time of this
5944 memory matches that of the update vector.
5945 @return the fts doc id used in the update vector */
5946 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5947 fts_update_doc_id(
5948 dict_table_t* table,
5949 upd_field_t* ufield,
5950 doc_id_t* next_doc_id)
5951 {
5952 doc_id_t doc_id;
5953 dberr_t error = DB_SUCCESS;
5954
5955 if (*next_doc_id) {
5956 doc_id = *next_doc_id;
5957 } else {
5958 /* Get the new document id that will be added. */
5959 error = fts_get_next_doc_id(table, &doc_id);
5960 }
5961
5962 if (error == DB_SUCCESS) {
5963 dict_index_t* clust_index;
5964 dict_col_t* col = dict_table_get_nth_col(
5965 table, table->fts->doc_col);
5966
5967 ufield->exp = NULL;
5968
5969 ufield->new_val.len = sizeof(doc_id);
5970
5971 clust_index = dict_table_get_first_index(table);
5972
5973 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5974 dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5975
5976 /* It is possible we update record that has
5977 not yet be sync-ed from last crash. */
5978
5979 /* Convert to storage byte order. */
5980 ut_a(doc_id != FTS_NULL_DOC_ID);
5981 fts_write_doc_id((byte*) next_doc_id, doc_id);
5982
5983 ufield->new_val.data = next_doc_id;
5984 ufield->new_val.ext = 0;
5985 }
5986
5987 return(doc_id);
5988 }
5989
5990 /*********************************************************************//**
5991 Check if the table has an FTS index. This is the non-inline version
5992 of dict_table_has_fts_index().
5993 @return TRUE if table has an FTS index */
5994 ibool
fts_dict_table_has_fts_index(dict_table_t * table)5995 fts_dict_table_has_fts_index(
5996 /*=========================*/
5997 dict_table_t* table) /*!< in: table */
5998 {
5999 return(dict_table_has_fts_index(table));
6000 }
6001
6002 /** fts_t constructor.
6003 @param[in] table table with FTS indexes
6004 @param[in,out] heap memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)6005 fts_t::fts_t(
6006 const dict_table_t* table,
6007 mem_heap_t* heap)
6008 :
6009 bg_threads(0),
6010 fts_status(0),
6011 add_wq(NULL),
6012 cache(NULL),
6013 doc_col(ULINT_UNDEFINED),
6014 fts_heap(heap)
6015 {
6016 ut_a(table->fts == NULL);
6017
6018 mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
6019
6020 ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
6021
6022 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
6023
6024 dict_table_get_all_fts_indexes(table, indexes);
6025 }
6026
6027 /** fts_t destructor. */
~fts_t()6028 fts_t::~fts_t()
6029 {
6030 mutex_free(&bg_threads_mutex);
6031
6032 ut_ad(add_wq == NULL);
6033
6034 if (cache != NULL) {
6035 fts_cache_clear(cache);
6036 fts_cache_destroy(cache);
6037 cache = NULL;
6038 }
6039
6040 /* There is no need to call ib_vector_free() on this->indexes
6041 because it is stored in this->fts_heap. */
6042 }
6043
6044 /*********************************************************************//**
6045 Create an instance of fts_t.
6046 @return instance of fts_t */
6047 fts_t*
fts_create(dict_table_t * table)6048 fts_create(
6049 /*=======*/
6050 dict_table_t* table) /*!< in/out: table with FTS indexes */
6051 {
6052 fts_t* fts;
6053 mem_heap_t* heap;
6054
6055 heap = mem_heap_create(512);
6056
6057 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
6058
6059 new(fts) fts_t(table, heap);
6060
6061 return(fts);
6062 }
6063
6064 /*********************************************************************//**
6065 Free the FTS resources. */
6066 void
fts_free(dict_table_t * table)6067 fts_free(
6068 /*=====*/
6069 dict_table_t* table) /*!< in/out: table with FTS indexes */
6070 {
6071 fts_t* fts = table->fts;
6072
6073 fts->~fts_t();
6074
6075 mem_heap_free(fts->fts_heap);
6076
6077 table->fts = NULL;
6078 }
6079
6080 /*********************************************************************//**
6081 Signal FTS threads to initiate shutdown. */
6082 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)6083 fts_start_shutdown(
6084 /*===============*/
6085 dict_table_t* table, /*!< in: table with FTS indexes */
6086 fts_t* fts) /*!< in: fts instance that needs
6087 to be informed about shutdown */
6088 {
6089 mutex_enter(&fts->bg_threads_mutex);
6090
6091 fts->fts_status |= BG_THREAD_STOP;
6092
6093 mutex_exit(&fts->bg_threads_mutex);
6094
6095 }
6096
6097 /*********************************************************************//**
6098 Wait for FTS threads to shutdown. */
6099 void
fts_shutdown(dict_table_t * table,fts_t * fts)6100 fts_shutdown(
6101 /*=========*/
6102 dict_table_t* table, /*!< in: table with FTS indexes */
6103 fts_t* fts) /*!< in: fts instance to shutdown */
6104 {
6105 mutex_enter(&fts->bg_threads_mutex);
6106
6107 ut_a(fts->fts_status & BG_THREAD_STOP);
6108
6109 dict_table_wait_for_bg_threads_to_exit(table, 20000);
6110
6111 mutex_exit(&fts->bg_threads_mutex);
6112 }
6113
6114 /*********************************************************************//**
6115 Take a FTS savepoint. */
6116 UNIV_INLINE
6117 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)6118 fts_savepoint_copy(
6119 /*===============*/
6120 const fts_savepoint_t* src, /*!< in: source savepoint */
6121 fts_savepoint_t* dst) /*!< out: destination savepoint */
6122 {
6123 const ib_rbt_node_t* node;
6124 const ib_rbt_t* tables;
6125
6126 tables = src->tables;
6127
6128 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
6129
6130 fts_trx_table_t* ftt_dst;
6131 const fts_trx_table_t** ftt_src;
6132
6133 ftt_src = rbt_value(const fts_trx_table_t*, node);
6134
6135 ftt_dst = fts_trx_table_clone(*ftt_src);
6136
6137 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
6138 }
6139 }
6140
6141 /*********************************************************************//**
6142 Take a FTS savepoint. */
6143 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)6144 fts_savepoint_take(
6145 /*===============*/
6146 trx_t* trx, /*!< in: transaction */
6147 fts_trx_t* fts_trx, /*!< in: fts transaction */
6148 const char* name) /*!< in: savepoint name */
6149 {
6150 mem_heap_t* heap;
6151 fts_savepoint_t* savepoint;
6152 fts_savepoint_t* last_savepoint;
6153
6154 ut_a(name != NULL);
6155
6156 heap = fts_trx->heap;
6157
6158 /* The implied savepoint must exist. */
6159 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
6160
6161 last_savepoint = static_cast<fts_savepoint_t*>(
6162 ib_vector_last(fts_trx->savepoints));
6163 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
6164
6165 if (last_savepoint->tables != NULL) {
6166 fts_savepoint_copy(last_savepoint, savepoint);
6167 }
6168 }
6169
6170 /*********************************************************************//**
6171 Lookup a savepoint instance by name.
6172 @return ULINT_UNDEFINED if not found */
6173 UNIV_INLINE
6174 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)6175 fts_savepoint_lookup(
6176 /*==================*/
6177 ib_vector_t* savepoints, /*!< in: savepoints */
6178 const char* name) /*!< in: savepoint name */
6179 {
6180 ulint i;
6181
6182 ut_a(ib_vector_size(savepoints) > 0);
6183
6184 for (i = 1; i < ib_vector_size(savepoints); ++i) {
6185 fts_savepoint_t* savepoint;
6186
6187 savepoint = static_cast<fts_savepoint_t*>(
6188 ib_vector_get(savepoints, i));
6189
6190 if (strcmp(name, savepoint->name) == 0) {
6191 return(i);
6192 }
6193 }
6194
6195 return(ULINT_UNDEFINED);
6196 }
6197
6198 /*********************************************************************//**
6199 Release the savepoint data identified by name. All savepoints created
6200 after the named savepoint are kept.
6201 @return DB_SUCCESS or error code */
6202 void
fts_savepoint_release(trx_t * trx,const char * name)6203 fts_savepoint_release(
6204 /*==================*/
6205 trx_t* trx, /*!< in: transaction */
6206 const char* name) /*!< in: savepoint name */
6207 {
6208 ut_a(name != NULL);
6209
6210 ib_vector_t* savepoints = trx->fts_trx->savepoints;
6211
6212 ut_a(ib_vector_size(savepoints) > 0);
6213
6214 ulint i = fts_savepoint_lookup(savepoints, name);
6215 if (i != ULINT_UNDEFINED) {
6216 ut_a(i >= 1);
6217
6218 fts_savepoint_t* savepoint;
6219 savepoint = static_cast<fts_savepoint_t*>(
6220 ib_vector_get(savepoints, i));
6221
6222 if (i == ib_vector_size(savepoints) - 1) {
6223 /* If the savepoint is the last, we save its
6224 tables to the previous savepoint. */
6225 fts_savepoint_t* prev_savepoint;
6226 prev_savepoint = static_cast<fts_savepoint_t*>(
6227 ib_vector_get(savepoints, i - 1));
6228
6229 ib_rbt_t* tables = savepoint->tables;
6230 savepoint->tables = prev_savepoint->tables;
6231 prev_savepoint->tables = tables;
6232 }
6233
6234 fts_savepoint_free(savepoint);
6235 ib_vector_remove(savepoints, *(void**)savepoint);
6236
6237 /* Make sure we don't delete the implied savepoint. */
6238 ut_a(ib_vector_size(savepoints) > 0);
6239 }
6240 }
6241
6242 /**********************************************************************//**
6243 Refresh last statement savepoint. */
6244 void
fts_savepoint_laststmt_refresh(trx_t * trx)6245 fts_savepoint_laststmt_refresh(
6246 /*===========================*/
6247 trx_t* trx) /*!< in: transaction */
6248 {
6249
6250 fts_trx_t* fts_trx;
6251 fts_savepoint_t* savepoint;
6252
6253 fts_trx = trx->fts_trx;
6254
6255 savepoint = static_cast<fts_savepoint_t*>(
6256 ib_vector_pop(fts_trx->last_stmt));
6257 fts_savepoint_free(savepoint);
6258
6259 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
6260 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
6261 }
6262
6263 /********************************************************************
6264 Undo the Doc ID add/delete operations in last stmt */
6265 static
6266 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)6267 fts_undo_last_stmt(
6268 /*===============*/
6269 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
6270 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
6271 {
6272 ib_rbt_t* s_rows;
6273 ib_rbt_t* l_rows;
6274 const ib_rbt_node_t* node;
6275
6276 l_rows = l_ftt->rows;
6277 s_rows = s_ftt->rows;
6278
6279 for (node = rbt_first(l_rows);
6280 node;
6281 node = rbt_next(l_rows, node)) {
6282 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
6283 ib_rbt_bound_t parent;
6284
6285 rbt_search(s_rows, &parent, &(l_row->doc_id));
6286
6287 if (parent.result == 0) {
6288 fts_trx_row_t* s_row = rbt_value(
6289 fts_trx_row_t, parent.last);
6290
6291 switch (l_row->state) {
6292 case FTS_INSERT:
6293 ut_free(rbt_remove_node(s_rows, parent.last));
6294 break;
6295
6296 case FTS_DELETE:
6297 if (s_row->state == FTS_NOTHING) {
6298 s_row->state = FTS_INSERT;
6299 } else if (s_row->state == FTS_DELETE) {
6300 ut_free(rbt_remove_node(
6301 s_rows, parent.last));
6302 }
6303 break;
6304
6305 /* FIXME: Check if FTS_MODIFY need to be addressed */
6306 case FTS_MODIFY:
6307 case FTS_NOTHING:
6308 break;
6309 default:
6310 ut_error;
6311 }
6312 }
6313 }
6314 }
6315
6316 /**********************************************************************//**
6317 Rollback to savepoint indentified by name.
6318 @return DB_SUCCESS or error code */
6319 void
fts_savepoint_rollback_last_stmt(trx_t * trx)6320 fts_savepoint_rollback_last_stmt(
6321 /*=============================*/
6322 trx_t* trx) /*!< in: transaction */
6323 {
6324 ib_vector_t* savepoints;
6325 fts_savepoint_t* savepoint;
6326 fts_savepoint_t* last_stmt;
6327 fts_trx_t* fts_trx;
6328 ib_rbt_bound_t parent;
6329 const ib_rbt_node_t* node;
6330 ib_rbt_t* l_tables;
6331 ib_rbt_t* s_tables;
6332
6333 fts_trx = trx->fts_trx;
6334 savepoints = fts_trx->savepoints;
6335
6336 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
6337 last_stmt = static_cast<fts_savepoint_t*>(
6338 ib_vector_last(fts_trx->last_stmt));
6339
6340 l_tables = last_stmt->tables;
6341 s_tables = savepoint->tables;
6342
6343 for (node = rbt_first(l_tables);
6344 node;
6345 node = rbt_next(l_tables, node)) {
6346
6347 fts_trx_table_t** l_ftt;
6348
6349 l_ftt = rbt_value(fts_trx_table_t*, node);
6350
6351 rbt_search_cmp(
6352 s_tables, &parent, &(*l_ftt)->table->id,
6353 fts_trx_table_id_cmp, NULL);
6354
6355 if (parent.result == 0) {
6356 fts_trx_table_t** s_ftt;
6357
6358 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
6359
6360 fts_undo_last_stmt(*s_ftt, *l_ftt);
6361 }
6362 }
6363 }
6364
6365 /**********************************************************************//**
6366 Rollback to savepoint indentified by name.
6367 @return DB_SUCCESS or error code */
6368 void
fts_savepoint_rollback(trx_t * trx,const char * name)6369 fts_savepoint_rollback(
6370 /*===================*/
6371 trx_t* trx, /*!< in: transaction */
6372 const char* name) /*!< in: savepoint name */
6373 {
6374 ulint i;
6375 ib_vector_t* savepoints;
6376
6377 ut_a(name != NULL);
6378
6379 savepoints = trx->fts_trx->savepoints;
6380
6381 /* We pop all savepoints from the the top of the stack up to
6382 and including the instance that was found. */
6383 i = fts_savepoint_lookup(savepoints, name);
6384
6385 if (i != ULINT_UNDEFINED) {
6386 fts_savepoint_t* savepoint;
6387
6388 ut_a(i > 0);
6389
6390 while (ib_vector_size(savepoints) > i) {
6391 fts_savepoint_t* savepoint;
6392
6393 savepoint = static_cast<fts_savepoint_t*>(
6394 ib_vector_pop(savepoints));
6395
6396 if (savepoint->name != NULL) {
6397 /* Since name was allocated on the heap, the
6398 memory will be released when the transaction
6399 completes. */
6400 savepoint->name = NULL;
6401
6402 fts_savepoint_free(savepoint);
6403 }
6404 }
6405
6406 /* Pop all a elements from the top of the stack that may
6407 have been released. We have to be careful that we don't
6408 delete the implied savepoint. */
6409
6410 for (savepoint = static_cast<fts_savepoint_t*>(
6411 ib_vector_last(savepoints));
6412 ib_vector_size(savepoints) > 1
6413 && savepoint->name == NULL;
6414 savepoint = static_cast<fts_savepoint_t*>(
6415 ib_vector_last(savepoints))) {
6416
6417 ib_vector_pop(savepoints);
6418 }
6419
6420 /* Make sure we don't delete the implied savepoint. */
6421 ut_a(ib_vector_size(savepoints) > 0);
6422
6423 /* Restore the savepoint. */
6424 fts_savepoint_take(trx, trx->fts_trx, name);
6425 }
6426 }
6427
6428 /** Check if a table is an FTS auxiliary table name.
6429 @param[out] table FTS table info
6430 @param[in] name Table name
6431 @param[in] len Length of table name
6432 @return true if the name matches an auxiliary table name pattern */
6433 static
6434 bool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6435 fts_is_aux_table_name(
6436 fts_aux_table_t* table,
6437 const char* name,
6438 ulint len)
6439 {
6440 const char* ptr;
6441 char* end;
6442 char my_name[MAX_FULL_NAME_LEN + 1];
6443
6444 ut_ad(len <= MAX_FULL_NAME_LEN);
6445 ut_memcpy(my_name, name, len);
6446 my_name[len] = 0;
6447 end = my_name + len;
6448
6449 ptr = static_cast<const char*>(memchr(my_name, '/', len));
6450
6451 if (ptr != NULL) {
6452 /* We will start the match after the '/' */
6453 ++ptr;
6454 len = end - ptr;
6455 }
6456
6457 /* All auxiliary tables are prefixed with "FTS_" and the name
6458 length will be at the very least greater than 20 bytes. */
6459 if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6460 ulint i;
6461
6462 /* Skip the prefix. */
6463 ptr += 4;
6464 len -= 4;
6465
6466 /* Try and read the table id. */
6467 if (!fts_read_object_id(&table->parent_id, ptr)) {
6468 return(false);
6469 }
6470
6471 /* Skip the table id. */
6472 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6473
6474 if (ptr == NULL) {
6475 return(false);
6476 }
6477
6478 /* Skip the underscore. */
6479 ++ptr;
6480 ut_a(end > ptr);
6481 len = end - ptr;
6482
6483 /* First search the common table suffix array. */
6484 for (i = 0; fts_common_tables[i] != NULL; ++i) {
6485
6486 if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6487 return(true);
6488 }
6489 }
6490
6491 /* Could be obsolete common tables. */
6492 if (strncmp(ptr, "ADDED", len) == 0
6493 || strncmp(ptr, "STOPWORDS", len) == 0) {
6494 return(true);
6495 }
6496
6497 /* Try and read the index id. */
6498 if (!fts_read_object_id(&table->index_id, ptr)) {
6499 return(false);
6500 }
6501
6502 /* Skip the table id. */
6503 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6504
6505 if (ptr == NULL) {
6506 return(false);
6507 }
6508
6509 /* Skip the underscore. */
6510 ++ptr;
6511 ut_a(end > ptr);
6512 len = end - ptr;
6513
6514 /* Search the FT index specific array. */
6515 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
6516
6517 if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6518 return(true);
6519 }
6520 }
6521
6522 /* Other FT index specific table(s). */
6523 if (strncmp(ptr, "DOC_ID", len) == 0) {
6524 return(true);
6525 }
6526 }
6527
6528 return(false);
6529 }
6530
6531 /**********************************************************************//**
6532 Callback function to read a single table ID column.
6533 @return Always return TRUE */
6534 static
6535 ibool
fts_read_tables(void * row,void * user_arg)6536 fts_read_tables(
6537 /*============*/
6538 void* row, /*!< in: sel_node_t* */
6539 void* user_arg) /*!< in: pointer to ib_vector_t */
6540 {
6541 int i;
6542 fts_aux_table_t*table;
6543 mem_heap_t* heap;
6544 ibool done = FALSE;
6545 ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
6546 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
6547 que_node_t* exp = sel_node->select_list;
6548
6549 /* Must be a heap allocated vector. */
6550 ut_a(tables->allocator->arg != NULL);
6551
6552 /* We will use this heap for allocating strings. */
6553 heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6554 table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6555
6556 memset(table, 0x0, sizeof(*table));
6557
6558 /* Iterate over the columns and read the values. */
6559 for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6560
6561 dfield_t* dfield = que_node_get_val(exp);
6562 void* data = dfield_get_data(dfield);
6563 ulint len = dfield_get_len(dfield);
6564
6565 ut_a(len != UNIV_SQL_NULL);
6566
6567 /* Note: The column numbers below must match the SELECT */
6568 switch (i) {
6569 case 0: /* NAME */
6570
6571 if (!fts_is_aux_table_name(
6572 table, static_cast<const char*>(data), len)) {
6573 ib_vector_pop(tables);
6574 done = TRUE;
6575 break;
6576 }
6577
6578 table->name = static_cast<char*>(
6579 mem_heap_alloc(heap, len + 1));
6580 memcpy(table->name, data, len);
6581 table->name[len] = 0;
6582 break;
6583
6584 case 1: /* ID */
6585 ut_a(len == 8);
6586 table->id = mach_read_from_8(
6587 static_cast<const byte*>(data));
6588 break;
6589
6590 default:
6591 ut_error;
6592 }
6593 }
6594
6595 return(TRUE);
6596 }
6597
6598 /******************************************************************//**
6599 Callback that sets a hex formatted FTS table's flags2 in
6600 SYS_TABLES. The flags is stored in MIX_LEN column.
6601 @return FALSE if all OK */
6602 static
6603 ibool
fts_set_hex_format(void * row,void * user_arg)6604 fts_set_hex_format(
6605 /*===============*/
6606 void* row, /*!< in: sel_node_t* */
6607 void* user_arg) /*!< in: bool set/unset flag */
6608 {
6609 sel_node_t* node = static_cast<sel_node_t*>(row);
6610 dfield_t* dfield = que_node_get_val(node->select_list);
6611
6612 ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6613 ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6614 /* There should be at most one matching record. So the value
6615 must be the default value. */
6616 ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6617 == ULINT32_UNDEFINED);
6618
6619 ulint flags2 = mach_read_from_4(
6620 static_cast<byte*>(dfield_get_data(dfield)));
6621
6622 flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6623
6624 mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6625
6626 return(FALSE);
6627 }
6628
6629 /*****************************************************************//**
6630 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6631 @return DB_SUCCESS or error code. */
6632 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6633 fts_update_hex_format_flag(
6634 /*=======================*/
6635 trx_t* trx, /*!< in/out: transaction that
6636 covers the update */
6637 table_id_t table_id, /*!< in: Table for which we want
6638 to set the root table->flags2 */
6639 bool dict_locked) /*!< in: set to true if the
6640 caller already owns the
6641 dict_sys_t::mutex. */
6642 {
6643 pars_info_t* info;
6644 ib_uint32_t flags2;
6645
6646 static const char sql[] =
6647 "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6648 "DECLARE FUNCTION my_func;\n"
6649 "DECLARE CURSOR c IS\n"
6650 " SELECT MIX_LEN"
6651 " FROM SYS_TABLES"
6652 " WHERE ID = :table_id FOR UPDATE;"
6653 "\n"
6654 "BEGIN\n"
6655 "OPEN c;\n"
6656 "WHILE 1 = 1 LOOP\n"
6657 " FETCH c INTO my_func();\n"
6658 " IF c % NOTFOUND THEN\n"
6659 " EXIT;\n"
6660 " END IF;\n"
6661 "END LOOP;\n"
6662 "UPDATE SYS_TABLES"
6663 " SET MIX_LEN = :flags2"
6664 " WHERE ID = :table_id;\n"
6665 "CLOSE c;\n"
6666 "END;\n";
6667
6668 flags2 = ULINT32_UNDEFINED;
6669
6670 info = pars_info_create();
6671
6672 pars_info_add_ull_literal(info, "table_id", table_id);
6673 pars_info_bind_int4_literal(info, "flags2", &flags2);
6674
6675 pars_info_bind_function(
6676 info, "my_func", fts_set_hex_format, &flags2);
6677
6678 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6679 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6680 }
6681
6682 dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6683
6684 ut_a(flags2 != ULINT32_UNDEFINED);
6685
6686 return(err);
6687 }
6688
6689 /*********************************************************************//**
6690 Rename an aux table to HEX format. It's called when "%016llu" is used
6691 to format an object id in table name, which only happens in Windows. */
6692 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6693 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6694 fts_rename_one_aux_table_to_hex_format(
6695 /*===================================*/
6696 trx_t* trx, /*!< in: transaction */
6697 const fts_aux_table_t* aux_table, /*!< in: table info */
6698 const dict_table_t* parent_table) /*!< in: parent table name */
6699 {
6700 const char* ptr;
6701 fts_table_t fts_table;
6702 char new_name[MAX_FULL_NAME_LEN];
6703 dberr_t error;
6704
6705 ptr = strchr(aux_table->name, '/');
6706 ut_a(ptr != NULL);
6707 ++ptr;
6708 /* Skip "FTS_", table id and underscore */
6709 for (ulint i = 0; i < 2; ++i) {
6710 ptr = strchr(ptr, '_');
6711 ut_a(ptr != NULL);
6712 ++ptr;
6713 }
6714
6715 fts_table.suffix = NULL;
6716 if (aux_table->index_id == 0) {
6717 fts_table.type = FTS_COMMON_TABLE;
6718
6719 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6720 if (strcmp(ptr, fts_common_tables[i]) == 0) {
6721 fts_table.suffix = fts_common_tables[i];
6722 break;
6723 }
6724 }
6725 } else {
6726 fts_table.type = FTS_INDEX_TABLE;
6727
6728 /* Skip index id and underscore */
6729 ptr = strchr(ptr, '_');
6730 ut_a(ptr != NULL);
6731 ++ptr;
6732
6733 for (ulint i = 0; fts_index_selector[i].value; ++i) {
6734 if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6735 fts_table.suffix = fts_get_suffix(i);
6736 break;
6737 }
6738 }
6739 }
6740
6741 ut_a(fts_table.suffix != NULL);
6742
6743 fts_table.parent = parent_table->name.m_name;
6744 fts_table.table_id = aux_table->parent_id;
6745 fts_table.index_id = aux_table->index_id;
6746 fts_table.table = parent_table;
6747
6748 fts_get_table_name(&fts_table, new_name);
6749 ut_ad(strcmp(new_name, aux_table->name) != 0);
6750
6751 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6752 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6753 }
6754
6755 error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6756 FALSE);
6757
6758 if (error != DB_SUCCESS) {
6759 ib::warn() << "Failed to rename aux table '"
6760 << aux_table->name << "' to new format '"
6761 << new_name << "'.";
6762 } else {
6763 ib::info() << "Renamed aux table '" << aux_table->name
6764 << "' to '" << new_name << "'.";
6765 }
6766
6767 return(error);
6768 }
6769
6770 /**********************************************************************//**
6771 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6772 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6773 It's called when "%016llu" is used to format an object id in table name,
6774 which only happens in Windows.
6775 Note the ids in tables are correct but the names are old ambiguous ones.
6776
6777 This function should make sure that either all the parent table and aux tables
6778 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6779 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6780 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6781 fts_rename_aux_tables_to_hex_format_low(
6782 /*====================================*/
6783 trx_t* trx, /*!< in: transaction */
6784 dict_table_t* parent_table, /*!< in: parent table */
6785 ib_vector_t* tables) /*!< in: aux tables to rename. */
6786 {
6787 dberr_t error;
6788 ulint count;
6789
6790 ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6791 ut_ad(!ib_vector_is_empty(tables));
6792
6793 error = fts_update_hex_format_flag(trx, parent_table->id, true);
6794
6795 if (error != DB_SUCCESS) {
6796 ib::warn() << "Setting parent table " << parent_table->name
6797 << " to hex format failed.";
6798 fts_sql_rollback(trx);
6799 return(error);
6800 }
6801
6802 DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6803
6804 for (count = 0; count < ib_vector_size(tables); ++count) {
6805 dict_table_t* table;
6806 fts_aux_table_t* aux_table;
6807
6808 aux_table = static_cast<fts_aux_table_t*>(
6809 ib_vector_get(tables, count));
6810
6811 table = dict_table_open_on_id(aux_table->id, TRUE,
6812 DICT_TABLE_OP_NORMAL);
6813
6814 ut_ad(table != NULL);
6815 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6816
6817 /* Set HEX_NAME flag here to make sure we can get correct
6818 new table name in following function */
6819 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6820 error = fts_rename_one_aux_table_to_hex_format(trx,
6821 aux_table, parent_table);
6822 /* We will rollback the trx if the error != DB_SUCCESS,
6823 so setting the flag here is the same with setting it in
6824 row_rename_table_for_mysql */
6825 DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6826
6827 if (error != DB_SUCCESS) {
6828 dict_table_close(table, TRUE, FALSE);
6829
6830 ib::warn() << "Failed to rename one aux table "
6831 << aux_table->name << ". Will revert"
6832 " all successful rename operations.";
6833
6834 fts_sql_rollback(trx);
6835 break;
6836 }
6837
6838 error = fts_update_hex_format_flag(trx, aux_table->id, true);
6839 dict_table_close(table, TRUE, FALSE);
6840
6841 if (error != DB_SUCCESS) {
6842 ib::warn() << "Setting aux table " << aux_table->name
6843 << " to hex format failed.";
6844
6845 fts_sql_rollback(trx);
6846 break;
6847 }
6848 }
6849
6850 if (error != DB_SUCCESS) {
6851 ut_ad(count != ib_vector_size(tables));
6852
6853 /* If rename fails, thr trx would be rolled back, we can't
6854 use it any more, we'll start a new background trx to do
6855 the reverting. */
6856
6857 ut_ad(!trx_is_started(trx));
6858
6859 bool not_rename = false;
6860
6861 /* Try to revert those succesful rename operations
6862 in order to revert the ibd file rename. */
6863 for (ulint i = 0; i <= count; ++i) {
6864 dict_table_t* table;
6865 fts_aux_table_t* aux_table;
6866 trx_t* trx_bg;
6867 dberr_t err;
6868
6869 aux_table = static_cast<fts_aux_table_t*>(
6870 ib_vector_get(tables, i));
6871
6872 table = dict_table_open_on_id(aux_table->id, TRUE,
6873 DICT_TABLE_OP_NORMAL);
6874 ut_ad(table != NULL);
6875
6876 if (not_rename) {
6877 DICT_TF2_FLAG_UNSET(table,
6878 DICT_TF2_FTS_AUX_HEX_NAME);
6879 }
6880
6881 if (!DICT_TF2_FLAG_IS_SET(table,
6882 DICT_TF2_FTS_AUX_HEX_NAME)) {
6883 dict_table_close(table, TRUE, FALSE);
6884 continue;
6885 }
6886
6887 trx_bg = trx_allocate_for_background();
6888 trx_bg->op_info = "Revert half done rename";
6889 trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6890 trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6891
6892 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6893 err = row_rename_table_for_mysql(table->name.m_name,
6894 aux_table->name,
6895 trx_bg, FALSE);
6896
6897 trx_bg->dict_operation_lock_mode = 0;
6898 dict_table_close(table, TRUE, FALSE);
6899
6900 if (err != DB_SUCCESS) {
6901 ib::warn() << "Failed to revert table "
6902 << table->name << ". Please revert"
6903 " manually.";
6904 fts_sql_rollback(trx_bg);
6905 trx_free_for_background(trx_bg);
6906 /* Continue to clear aux tables' flags2 */
6907 not_rename = true;
6908 continue;
6909 }
6910
6911 fts_sql_commit(trx_bg);
6912 trx_free_for_background(trx_bg);
6913 }
6914
6915 DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6916 }
6917
6918 return(error);
6919 }
6920
6921 /**********************************************************************//**
6922 Convert an id, which is actually a decimal number but was regard as a HEX
6923 from a string, to its real value. */
6924 static
6925 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6926 fts_fake_hex_to_dec(
6927 /*================*/
6928 ib_id_t id) /*!< in: number to convert */
6929 {
6930 ib_id_t dec_id = 0;
6931 char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6932
6933 #ifdef UNIV_DEBUG
6934 int ret =
6935 #endif /* UNIV_DEBUG */
6936 sprintf(tmp_id, UINT64PFx, id);
6937 ut_ad(ret == 16);
6938 #ifdef UNIV_DEBUG
6939 ret =
6940 #endif /* UNIV_DEBUG */
6941 #ifdef _WIN32
6942 sscanf(tmp_id, "%016llu", &dec_id);
6943 #else
6944 sscanf(tmp_id, "%016" PRIu64, &dec_id);
6945 #endif /* _WIN32 */
6946 ut_ad(ret == 1);
6947
6948 return dec_id;
6949 }
6950
6951 /*********************************************************************//**
6952 Compare two fts_aux_table_t parent_ids.
6953 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6954 UNIV_INLINE
6955 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6956 fts_check_aux_table_parent_id_cmp(
6957 /*==============================*/
6958 const void* p1, /*!< in: id1 */
6959 const void* p2) /*!< in: id2 */
6960 {
6961 const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1);
6962 const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2);
6963
6964 return static_cast<int>(fa1->parent_id - fa2->parent_id);
6965 }
6966
6967 /** Mark all the fts index associated with the parent table as corrupted.
6968 @param[in] trx transaction
6969 @param[in, out] parent_table fts index associated with this parent table
6970 will be marked as corrupted. */
6971 static
6972 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6973 fts_parent_all_index_set_corrupt(
6974 trx_t* trx,
6975 dict_table_t* parent_table)
6976 {
6977 fts_t* fts = parent_table->fts;
6978
6979 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6980 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6981 }
6982
6983 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6984 dict_index_t* index = static_cast<dict_index_t*>(
6985 ib_vector_getp_const(fts->indexes, j));
6986 dict_set_corrupted(index,
6987 trx, "DROP ORPHANED TABLE");
6988 }
6989 }
6990
6991 /** Mark the fts index which index id matches the id as corrupted.
6992 @param[in] trx transaction
6993 @param[in] id index id to search
6994 @param[in, out] parent_table parent table to check with all
6995 the index. */
6996 static
6997 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)6998 fts_set_index_corrupt(
6999 trx_t* trx,
7000 index_id_t id,
7001 dict_table_t* table)
7002 {
7003 fts_t* fts = table->fts;
7004
7005 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7006 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7007 }
7008
7009 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7010 dict_index_t* index = static_cast<dict_index_t*>(
7011 ib_vector_getp_const(fts->indexes, j));
7012 if (index->id == id) {
7013 dict_set_corrupted(index, trx,
7014 "DROP ORPHANED TABLE");
7015 break;
7016 }
7017 }
7018 }
7019
7020 /** Check the index for the aux table is corrupted.
7021 @param[in] aux_table auxiliary table
7022 @retval nonzero if index is corrupted, zero for valid index */
7023 static
7024 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)7025 fts_check_corrupt_index(
7026 fts_aux_table_t* aux_table)
7027 {
7028 dict_table_t* table;
7029 dict_index_t* index;
7030 table = dict_table_open_on_id(
7031 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7032
7033 if (table == NULL) {
7034 return(0);
7035 }
7036
7037 for (index = UT_LIST_GET_FIRST(table->indexes);
7038 index;
7039 index = UT_LIST_GET_NEXT(indexes, index)) {
7040 if (index->id == aux_table->index_id) {
7041 ut_ad(index->type & DICT_FTS);
7042 dict_table_close(table, true, false);
7043 return(dict_index_is_corrupted(index));
7044 }
7045 }
7046
7047 dict_table_close(table, true, false);
7048 return(0);
7049 }
7050
7051 /* Get parent table name if it's a fts aux table
7052 @param[in] aux_table_name aux table name
7053 @param[in] aux_table_len aux table length
7054 @return parent table name, or NULL */
7055 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)7056 fts_get_parent_table_name(
7057 const char* aux_table_name,
7058 ulint aux_table_len)
7059 {
7060 fts_aux_table_t aux_table;
7061 char* parent_table_name = NULL;
7062
7063 if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
7064 dict_table_t* parent_table;
7065
7066 parent_table = dict_table_open_on_id(
7067 aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7068
7069 if (parent_table != NULL) {
7070 parent_table_name = mem_strdupl(
7071 parent_table->name.m_name,
7072 strlen(parent_table->name.m_name));
7073
7074 dict_table_close(parent_table, TRUE, FALSE);
7075 }
7076 }
7077
7078 return(parent_table_name);
7079 }
7080
7081 /** Check the validity of the parent table.
7082 @param[in] aux_table auxiliary table
7083 @return true if it is a valid table or false if it is not */
7084 static
7085 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)7086 fts_valid_parent_table(
7087 const fts_aux_table_t* aux_table)
7088 {
7089 dict_table_t* parent_table;
7090 bool valid = false;
7091
7092 parent_table = dict_table_open_on_id(
7093 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7094
7095 if (parent_table != NULL && parent_table->fts != NULL) {
7096 if (aux_table->index_id == 0) {
7097 valid = true;
7098 } else {
7099 index_id_t id = aux_table->index_id;
7100 dict_index_t* index;
7101
7102 /* Search for the FT index in the table's list. */
7103 for (index = UT_LIST_GET_FIRST(parent_table->indexes);
7104 index;
7105 index = UT_LIST_GET_NEXT(indexes, index)) {
7106 if (index->id == id) {
7107 valid = true;
7108 break;
7109 }
7110
7111 }
7112 }
7113 }
7114
7115 if (parent_table) {
7116 dict_table_close(parent_table, TRUE, FALSE);
7117 }
7118
7119 return(valid);
7120 }
7121
7122 /** Try to rename all aux tables of the specified parent table.
7123 @param[in] aux_tables aux_tables to be renamed
7124 @param[in] parent_table parent table of all aux
7125 tables stored in tables. */
7126 static
7127 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)7128 fts_rename_aux_tables_to_hex_format(
7129 ib_vector_t* aux_tables,
7130 dict_table_t* parent_table)
7131 {
7132 dberr_t err;
7133 trx_t* trx_rename = trx_allocate_for_background();
7134 trx_rename->op_info = "Rename aux tables to hex format";
7135 trx_rename->dict_operation_lock_mode = RW_X_LATCH;
7136 trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
7137
7138 err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
7139 parent_table, aux_tables);
7140
7141 trx_rename->dict_operation_lock_mode = 0;
7142
7143 if (err != DB_SUCCESS) {
7144
7145 ib::warn() << "Rollback operations on all aux tables of "
7146 "table "<< parent_table->name << ". All the fts index "
7147 "associated with the table are marked as corrupted. "
7148 "Please rebuild the index again.";
7149
7150 /* Corrupting the fts index related to parent table. */
7151 trx_t* trx_corrupt;
7152 trx_corrupt = trx_allocate_for_background();
7153 trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
7154 trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
7155 fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
7156 trx_corrupt->dict_operation_lock_mode = 0;
7157 fts_sql_commit(trx_corrupt);
7158 trx_free_for_background(trx_corrupt);
7159 } else {
7160 fts_sql_commit(trx_rename);
7161 }
7162
7163 trx_free_for_background(trx_rename);
7164 ib_vector_reset(aux_tables);
7165 }
7166
7167 /** Set the hex format flag for the parent table.
7168 @param[in, out] parent_table parent table
7169 @param[in] trx transaction */
7170 static
7171 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)7172 fts_set_parent_hex_format_flag(
7173 dict_table_t* parent_table,
7174 trx_t* trx)
7175 {
7176 if (!DICT_TF2_FLAG_IS_SET(parent_table,
7177 DICT_TF2_FTS_AUX_HEX_NAME)) {
7178 DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
7179
7180 dberr_t err = fts_update_hex_format_flag(
7181 trx, parent_table->id, true);
7182
7183 if (err != DB_SUCCESS) {
7184 ib::fatal() << "Setting parent table "
7185 << parent_table->name
7186 << "to hex format failed. Please try "
7187 << "to restart the server again, if it "
7188 << "doesn't work, the system tables "
7189 << "might be corrupted.";
7190 } else {
7191 DICT_TF2_FLAG_SET(
7192 parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
7193 }
7194 }
7195 }
7196
7197 /** Drop the obsolete auxilary table.
7198 @param[in] tables tables to be dropped. */
7199 static
7200 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)7201 fts_drop_obsolete_aux_table_from_vector(
7202 ib_vector_t* tables)
7203 {
7204 dberr_t err;
7205
7206 for (ulint count = 0; count < ib_vector_size(tables);
7207 ++count) {
7208
7209 fts_aux_table_t* aux_drop_table;
7210 aux_drop_table = static_cast<fts_aux_table_t*>(
7211 ib_vector_get(tables, count));
7212 trx_t* trx_drop = trx_allocate_for_background();
7213 trx_drop->op_info = "Drop obsolete aux tables";
7214 trx_drop->dict_operation_lock_mode = RW_X_LATCH;
7215 trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
7216
7217 err = row_drop_table_for_mysql(
7218 aux_drop_table->name, trx_drop, false, true);
7219
7220 trx_drop->dict_operation_lock_mode = 0;
7221
7222 if (err != DB_SUCCESS) {
7223 /* We don't need to worry about the
7224 failure, since server would try to
7225 drop it on next restart, even if
7226 the table was broken. */
7227 ib::warn() << "Failed to drop obsolete aux table "
7228 << aux_drop_table->name << ", which is "
7229 << "harmless. will try to drop it on next "
7230 << "restart.";
7231
7232 fts_sql_rollback(trx_drop);
7233 } else {
7234 ib::info() << "Dropped obsolete aux"
7235 " table '" << aux_drop_table->name
7236 << "'.";
7237
7238 fts_sql_commit(trx_drop);
7239 }
7240
7241 trx_free_for_background(trx_drop);
7242 }
7243 }
7244
7245 /** Drop all the auxiliary table present in the vector.
7246 @param[in] trx transaction
7247 @param[in] tables tables to be dropped */
7248 static
7249 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)7250 fts_drop_aux_table_from_vector(
7251 trx_t* trx,
7252 ib_vector_t* tables)
7253 {
7254 for (ulint count = 0; count < ib_vector_size(tables);
7255 ++count) {
7256 fts_aux_table_t* aux_drop_table;
7257 aux_drop_table = static_cast<fts_aux_table_t*>(
7258 ib_vector_get(tables, count));
7259
7260 /* Check for the validity of the parent table */
7261 if (!fts_valid_parent_table(aux_drop_table)) {
7262
7263 ib::warn() << "Parent table of FTS auxiliary table "
7264 << aux_drop_table->name << " not found.";
7265
7266 dberr_t err = fts_drop_table(trx, aux_drop_table->name);
7267 if (err == DB_FAIL) {
7268
7269 char* path = fil_make_filepath(
7270 NULL, aux_drop_table->name, IBD, false);
7271
7272 if (path != NULL) {
7273 os_file_delete_if_exists(
7274 innodb_data_file_key,
7275 path , NULL);
7276 ut_free(path);
7277 }
7278 }
7279 }
7280 }
7281 }
7282
7283 /**********************************************************************//**
7284 Check and drop all orphaned FTS auxiliary tables, those that don't have
7285 a parent table or FTS index defined on them.
7286 @return DB_SUCCESS or error code */
7287 static MY_ATTRIBUTE((nonnull))
7288 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)7289 fts_check_and_drop_orphaned_tables(
7290 /*===============================*/
7291 trx_t* trx, /*!< in: transaction */
7292 ib_vector_t* tables) /*!< in: tables to check */
7293 {
7294 mem_heap_t* heap;
7295 ib_vector_t* aux_tables_to_rename;
7296 ib_vector_t* invalid_aux_tables;
7297 ib_vector_t* valid_aux_tables;
7298 ib_vector_t* drop_aux_tables;
7299 ib_vector_t* obsolete_aux_tables;
7300 ib_alloc_t* heap_alloc;
7301
7302 heap = mem_heap_create(1024);
7303 heap_alloc = ib_heap_allocator_create(heap);
7304
7305 /* We store all aux tables belonging to the same parent table here,
7306 and rename all these tables in a batch mode. */
7307 aux_tables_to_rename = ib_vector_create(heap_alloc,
7308 sizeof(fts_aux_table_t), 128);
7309
7310 /* We store all fake auxiliary table and orphaned table here. */
7311 invalid_aux_tables = ib_vector_create(heap_alloc,
7312 sizeof(fts_aux_table_t), 128);
7313
7314 /* We store all valid aux tables. We use this to filter the
7315 fake auxiliary table from invalid auxiliary tables. */
7316 valid_aux_tables = ib_vector_create(heap_alloc,
7317 sizeof(fts_aux_table_t), 128);
7318
7319 /* We store all auxiliary tables to be dropped. */
7320 drop_aux_tables = ib_vector_create(heap_alloc,
7321 sizeof(fts_aux_table_t), 128);
7322
7323 /* We store all obsolete auxiliary tables to be dropped. */
7324 obsolete_aux_tables = ib_vector_create(heap_alloc,
7325 sizeof(fts_aux_table_t), 128);
7326
7327 /* Sort by parent_id first, in case rename will fail */
7328 ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
7329
7330 for (ulint i = 0; i < ib_vector_size(tables); ++i) {
7331 dict_table_t* parent_table;
7332 fts_aux_table_t* aux_table;
7333 bool drop = false;
7334 dict_table_t* table;
7335 fts_aux_table_t* next_aux_table = NULL;
7336 ib_id_t orig_parent_id = 0;
7337 ib_id_t orig_index_id = 0;
7338 bool rename = false;
7339
7340 aux_table = static_cast<fts_aux_table_t*>(
7341 ib_vector_get(tables, i));
7342
7343 table = dict_table_open_on_id(
7344 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7345 orig_parent_id = aux_table->parent_id;
7346 orig_index_id = aux_table->index_id;
7347
7348 if (table == NULL
7349 || strcmp(table->name.m_name, aux_table->name)) {
7350
7351 bool fake_aux = false;
7352
7353 if (table != NULL) {
7354 dict_table_close(table, TRUE, FALSE);
7355 }
7356
7357 if (i + 1 < ib_vector_size(tables)) {
7358 next_aux_table = static_cast<fts_aux_table_t*>(
7359 ib_vector_get(tables, i + 1));
7360 }
7361
7362 /* To know whether aux table is fake fts or
7363 orphan fts table. */
7364 for (ulint count = 0;
7365 count < ib_vector_size(valid_aux_tables);
7366 count++) {
7367 fts_aux_table_t* valid_aux;
7368 valid_aux = static_cast<fts_aux_table_t*>(
7369 ib_vector_get(valid_aux_tables, count));
7370 if (strcmp(valid_aux->name,
7371 aux_table->name) == 0) {
7372 fake_aux = true;
7373 break;
7374 }
7375 }
7376
7377 /* All aux tables of parent table, whose id is
7378 last_parent_id, have been checked, try to rename
7379 them if necessary. */
7380 if ((next_aux_table == NULL
7381 || orig_parent_id != next_aux_table->parent_id)
7382 && (!ib_vector_is_empty(aux_tables_to_rename))) {
7383
7384 ib_id_t parent_id = fts_fake_hex_to_dec(
7385 aux_table->parent_id);
7386
7387 parent_table = dict_table_open_on_id(
7388 parent_id, TRUE,
7389 DICT_TABLE_OP_NORMAL);
7390
7391 fts_rename_aux_tables_to_hex_format(
7392 aux_tables_to_rename, parent_table);
7393
7394 dict_table_close(parent_table, TRUE,
7395 FALSE);
7396 }
7397
7398 /* If the aux table is fake aux table. Skip it. */
7399 if (!fake_aux) {
7400 ib_vector_push(invalid_aux_tables, aux_table);
7401 }
7402
7403 continue;
7404 } else if (!DICT_TF2_FLAG_IS_SET(table,
7405 DICT_TF2_FTS_AUX_HEX_NAME)) {
7406
7407 aux_table->parent_id = fts_fake_hex_to_dec(
7408 aux_table->parent_id);
7409
7410 if (aux_table->index_id != 0) {
7411 aux_table->index_id = fts_fake_hex_to_dec(
7412 aux_table->index_id);
7413 }
7414
7415 ut_ad(aux_table->id > aux_table->parent_id);
7416
7417 /* Check whether parent table id and index id
7418 are stored as decimal format. */
7419 if (fts_valid_parent_table(aux_table)) {
7420
7421 parent_table = dict_table_open_on_id(
7422 aux_table->parent_id, true,
7423 DICT_TABLE_OP_NORMAL);
7424
7425 ut_ad(parent_table != NULL);
7426 ut_ad(parent_table->fts != NULL);
7427
7428 if (!DICT_TF2_FLAG_IS_SET(
7429 parent_table,
7430 DICT_TF2_FTS_AUX_HEX_NAME)) {
7431 rename = true;
7432 }
7433
7434 dict_table_close(parent_table, TRUE, FALSE);
7435 }
7436
7437 if (!rename) {
7438 /* Reassign the original value of
7439 aux table if it is not in decimal format */
7440 aux_table->parent_id = orig_parent_id;
7441 aux_table->index_id = orig_index_id;
7442 }
7443 }
7444
7445 if (table != NULL) {
7446 dict_table_close(table, TRUE, FALSE);
7447 }
7448
7449 if (!rename) {
7450 /* Check the validity of the parent table. */
7451 if (!fts_valid_parent_table(aux_table)) {
7452 drop = true;
7453 }
7454 }
7455
7456 /* Filter out the fake aux table by comparing with the
7457 current valid auxiliary table name. */
7458 for (ulint count = 0;
7459 count < ib_vector_size(invalid_aux_tables); count++) {
7460 fts_aux_table_t* invalid_aux;
7461 invalid_aux = static_cast<fts_aux_table_t*>(
7462 ib_vector_get(invalid_aux_tables, count));
7463 if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7464 ib_vector_remove(
7465 invalid_aux_tables,
7466 *reinterpret_cast<void**>(invalid_aux));
7467 break;
7468 }
7469 }
7470
7471 ib_vector_push(valid_aux_tables, aux_table);
7472
7473 /* If the index associated with aux table is corrupted,
7474 skip it. */
7475 if (fts_check_corrupt_index(aux_table) > 0) {
7476
7477 if (i + 1 < ib_vector_size(tables)) {
7478 next_aux_table = static_cast<fts_aux_table_t*>(
7479 ib_vector_get(tables, i + 1));
7480 }
7481
7482 if (next_aux_table == NULL
7483 || orig_parent_id != next_aux_table->parent_id) {
7484
7485 parent_table = dict_table_open_on_id(
7486 aux_table->parent_id, TRUE,
7487 DICT_TABLE_OP_NORMAL);
7488
7489 if (!ib_vector_is_empty(aux_tables_to_rename)) {
7490 fts_rename_aux_tables_to_hex_format(
7491 aux_tables_to_rename, parent_table);
7492 } else {
7493 fts_set_parent_hex_format_flag(
7494 parent_table, trx);
7495 }
7496
7497 dict_table_close(parent_table, TRUE, FALSE);
7498 }
7499
7500 continue;
7501 }
7502
7503 parent_table = dict_table_open_on_id(
7504 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7505
7506 if (drop) {
7507 ib_vector_push(drop_aux_tables, aux_table);
7508 } else {
7509 if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7510 ib_vector_push(obsolete_aux_tables, aux_table);
7511 continue;
7512 }
7513 }
7514
7515 /* If the aux table is in decimal format, we should
7516 rename it, so push it to aux_tables_to_rename */
7517 if (!drop && rename) {
7518 bool rename_table = true;
7519 for (ulint count = 0;
7520 count < ib_vector_size(aux_tables_to_rename);
7521 count++) {
7522 fts_aux_table_t* rename_aux =
7523 static_cast<fts_aux_table_t*>(
7524 ib_vector_get(aux_tables_to_rename,
7525 count));
7526 if (strcmp(rename_aux->name,
7527 aux_table->name) == 0) {
7528 rename_table = false;
7529 break;
7530 }
7531 }
7532
7533 if (rename_table) {
7534 ib_vector_push(aux_tables_to_rename,
7535 aux_table);
7536 }
7537 }
7538
7539 if (i + 1 < ib_vector_size(tables)) {
7540 next_aux_table = static_cast<fts_aux_table_t*>(
7541 ib_vector_get(tables, i + 1));
7542 }
7543
7544 if ((next_aux_table == NULL
7545 || orig_parent_id != next_aux_table->parent_id)
7546 && !ib_vector_is_empty(aux_tables_to_rename)) {
7547
7548 ut_ad(rename);
7549 ut_ad(!DICT_TF2_FLAG_IS_SET(
7550 parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7551
7552 fts_rename_aux_tables_to_hex_format(
7553 aux_tables_to_rename,parent_table);
7554 }
7555
7556 /* The IDs are already in correct hex format. */
7557 if (!drop && !rename) {
7558 dict_table_t* table;
7559
7560 table = dict_table_open_on_id(
7561 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7562
7563 if (table != NULL
7564 && strcmp(table->name.m_name, aux_table->name)) {
7565 dict_table_close(table, TRUE, FALSE);
7566 table = NULL;
7567 }
7568
7569 if (table != NULL
7570 && !DICT_TF2_FLAG_IS_SET(
7571 table,
7572 DICT_TF2_FTS_AUX_HEX_NAME)) {
7573
7574 DBUG_EXECUTE_IF("aux_table_flag_fail",
7575 ib::warn() << "Setting aux table "
7576 << table->name << " to hex "
7577 "format failed.";
7578 fts_set_index_corrupt(
7579 trx, aux_table->index_id,
7580 parent_table);
7581 goto table_exit;);
7582
7583 dberr_t err = fts_update_hex_format_flag(
7584 trx, table->id, true);
7585
7586 if (err != DB_SUCCESS) {
7587 ib::warn() << "Setting aux table "
7588 << table->name << " to hex "
7589 "format failed.";
7590
7591 fts_set_index_corrupt(
7592 trx, aux_table->index_id,
7593 parent_table);
7594 } else {
7595 DICT_TF2_FLAG_SET(table,
7596 DICT_TF2_FTS_AUX_HEX_NAME);
7597 }
7598 }
7599 #ifndef DBUG_OFF
7600 table_exit:
7601 #endif /* !DBUG_OFF */
7602
7603 if (table != NULL) {
7604 dict_table_close(table, TRUE, FALSE);
7605 }
7606
7607 ut_ad(parent_table != NULL);
7608
7609 fts_set_parent_hex_format_flag(
7610 parent_table, trx);
7611 }
7612
7613 if (parent_table != NULL) {
7614 dict_table_close(parent_table, TRUE, FALSE);
7615 }
7616 }
7617
7618 fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7619 fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7620 fts_sql_commit(trx);
7621
7622 fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7623
7624 /* Free the memory allocated at the beginning */
7625 if (heap != NULL) {
7626 mem_heap_free(heap);
7627 }
7628 }
7629
7630 /**********************************************************************//**
7631 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7632 table or FTS index defined on them. */
7633 void
fts_drop_orphaned_tables(void)7634 fts_drop_orphaned_tables(void)
7635 /*==========================*/
7636 {
7637 trx_t* trx;
7638 pars_info_t* info;
7639 mem_heap_t* heap;
7640 que_t* graph;
7641 ib_vector_t* tables;
7642 ib_alloc_t* heap_alloc;
7643 space_name_list_t space_name_list;
7644 dberr_t error = DB_SUCCESS;
7645
7646 /* Note: We have to free the memory after we are done with the list. */
7647 error = fil_get_space_names(space_name_list);
7648
7649 if (error == DB_OUT_OF_MEMORY) {
7650 ib::fatal() << "Out of memory";
7651 }
7652
7653 heap = mem_heap_create(1024);
7654 heap_alloc = ib_heap_allocator_create(heap);
7655
7656 /* We store the table ids of all the FTS indexes that were found. */
7657 tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7658
7659 /* Get the list of all known .ibd files and check for orphaned
7660 FTS auxiliary files in that list. We need to remove them because
7661 users can't map them back to table names and this will create
7662 unnecessary clutter. */
7663
7664 for (space_name_list_t::iterator it = space_name_list.begin();
7665 it != space_name_list.end();
7666 ++it) {
7667
7668 fts_aux_table_t* fts_aux_table;
7669
7670 fts_aux_table = static_cast<fts_aux_table_t*>(
7671 ib_vector_push(tables, NULL));
7672
7673 memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7674
7675 if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7676 ib_vector_pop(tables);
7677 } else {
7678 ulint len = strlen(*it);
7679
7680 fts_aux_table->id = fil_space_get_id_by_name(*it);
7681
7682 /* We got this list from fil0fil.cc. The tablespace
7683 with this name must exist. */
7684 ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7685
7686 fts_aux_table->name = static_cast<char*>(
7687 mem_heap_dup(heap, *it, len + 1));
7688
7689 fts_aux_table->name[len] = 0;
7690 }
7691 }
7692
7693 trx = trx_allocate_for_background();
7694 trx->op_info = "dropping orphaned FTS tables";
7695 row_mysql_lock_data_dictionary(trx);
7696
7697 info = pars_info_create();
7698
7699 pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7700
7701 graph = fts_parse_sql_no_dict_lock(
7702 NULL,
7703 info,
7704 "DECLARE FUNCTION my_func;\n"
7705 "DECLARE CURSOR c IS"
7706 " SELECT NAME, ID"
7707 " FROM SYS_TABLES;\n"
7708 "BEGIN\n"
7709 "\n"
7710 "OPEN c;\n"
7711 "WHILE 1 = 1 LOOP\n"
7712 " FETCH c INTO my_func();\n"
7713 " IF c % NOTFOUND THEN\n"
7714 " EXIT;\n"
7715 " END IF;\n"
7716 "END LOOP;\n"
7717 "CLOSE c;");
7718
7719 for (;;) {
7720 error = fts_eval_sql(trx, graph);
7721
7722 if (error == DB_SUCCESS) {
7723 fts_check_and_drop_orphaned_tables(trx, tables);
7724 break; /* Exit the loop. */
7725 } else {
7726 ib_vector_reset(tables);
7727
7728 fts_sql_rollback(trx);
7729
7730 if (error == DB_LOCK_WAIT_TIMEOUT) {
7731 ib::warn() << "lock wait timeout reading"
7732 " SYS_TABLES. Retrying!";
7733
7734 trx->error_state = DB_SUCCESS;
7735 } else {
7736 ib::error() << "(" << ut_strerr(error)
7737 << ") while reading SYS_TABLES.";
7738
7739 break; /* Exit the loop. */
7740 }
7741 }
7742 }
7743
7744 que_graph_free(graph);
7745
7746 row_mysql_unlock_data_dictionary(trx);
7747
7748 trx_free_for_background(trx);
7749
7750 if (heap != NULL) {
7751 mem_heap_free(heap);
7752 }
7753
7754 /** Free the memory allocated to store the .ibd names. */
7755 for (space_name_list_t::iterator it = space_name_list.begin();
7756 it != space_name_list.end();
7757 ++it) {
7758
7759 UT_DELETE_ARRAY(*it);
7760 }
7761 }
7762
7763 /**********************************************************************//**
7764 Check whether user supplied stopword table is of the right format.
7765 Caller is responsible to hold dictionary locks.
7766 @return the stopword column charset if qualifies */
7767 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7768 fts_valid_stopword_table(
7769 /*=====================*/
7770 const char* stopword_table_name) /*!< in: Stopword table
7771 name */
7772 {
7773 dict_table_t* table;
7774 dict_col_t* col = NULL;
7775
7776 if (!stopword_table_name) {
7777 return(NULL);
7778 }
7779
7780 table = dict_table_get_low(stopword_table_name);
7781
7782 if (!table) {
7783 ib::error() << "User stopword table " << stopword_table_name
7784 << " does not exist.";
7785
7786 return(NULL);
7787 } else {
7788 const char* col_name;
7789
7790 col_name = dict_table_get_col_name(table, 0);
7791
7792 if (ut_strcmp(col_name, "value")) {
7793 ib::error() << "Invalid column name for stopword"
7794 " table " << stopword_table_name << ". Its"
7795 " first column must be named as 'value'.";
7796
7797 return(NULL);
7798 }
7799
7800 col = dict_table_get_nth_col(table, 0);
7801
7802 if (col->mtype != DATA_VARCHAR
7803 && col->mtype != DATA_VARMYSQL) {
7804 ib::error() << "Invalid column type for stopword"
7805 " table " << stopword_table_name << ". Its"
7806 " first column must be of varchar type";
7807
7808 return(NULL);
7809 }
7810 }
7811
7812 ut_ad(col);
7813
7814 return(fts_get_charset(col->prtype));
7815 }
7816
7817 /**********************************************************************//**
7818 This function loads the stopword into the FTS cache. It also
7819 records/fetches stopword configuration to/from FTS configure
7820 table, depending on whether we are creating or reloading the
7821 FTS.
7822 @return TRUE if load operation is successful */
7823 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7824 fts_load_stopword(
7825 /*==============*/
7826 const dict_table_t*
7827 table, /*!< in: Table with FTS */
7828 trx_t* trx, /*!< in: Transactions */
7829 const char* global_stopword_table, /*!< in: Global stopword table
7830 name */
7831 const char* session_stopword_table, /*!< in: Session stopword table
7832 name */
7833 ibool stopword_is_on, /*!< in: Whether stopword
7834 option is turned on/off */
7835 ibool reload) /*!< in: Whether it is
7836 for reloading FTS table */
7837 {
7838 fts_table_t fts_table;
7839 fts_string_t str;
7840 dberr_t error = DB_SUCCESS;
7841 ulint use_stopword;
7842 fts_cache_t* cache;
7843 const char* stopword_to_use = NULL;
7844 ibool new_trx = FALSE;
7845 byte str_buffer[MAX_FULL_NAME_LEN + 1];
7846
7847 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7848
7849 cache = table->fts->cache;
7850
7851 if (!reload && !(cache->stopword_info.status
7852 & STOPWORD_NOT_INIT)) {
7853 return(TRUE);
7854 }
7855
7856 if (!trx) {
7857 trx = trx_allocate_for_background();
7858 trx->op_info = "upload FTS stopword";
7859 new_trx = TRUE;
7860 }
7861
7862 /* First check whether stopword filtering is turned off */
7863 if (reload) {
7864 error = fts_config_get_ulint(
7865 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7866 } else {
7867 use_stopword = (ulint) stopword_is_on;
7868
7869 error = fts_config_set_ulint(
7870 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7871 }
7872
7873 if (error != DB_SUCCESS) {
7874 goto cleanup;
7875 }
7876
7877 /* If stopword is turned off, no need to continue to load the
7878 stopword into cache, but still need to do initialization */
7879 if (!use_stopword) {
7880 cache->stopword_info.status = STOPWORD_OFF;
7881 goto cleanup;
7882 }
7883
7884 if (reload) {
7885 /* Fetch the stopword table name from FTS config
7886 table */
7887 str.f_n_char = 0;
7888 str.f_str = str_buffer;
7889 str.f_len = sizeof(str_buffer) - 1;
7890
7891 error = fts_config_get_value(
7892 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7893
7894 if (error != DB_SUCCESS) {
7895 goto cleanup;
7896 }
7897
7898 if (strlen((char*) str.f_str) > 0) {
7899 stopword_to_use = (const char*) str.f_str;
7900 }
7901 } else {
7902 stopword_to_use = (session_stopword_table)
7903 ? session_stopword_table : global_stopword_table;
7904 }
7905
7906 if (stopword_to_use
7907 && fts_load_user_stopword(table->fts, stopword_to_use,
7908 &cache->stopword_info)) {
7909 /* Save the stopword table name to the configure
7910 table */
7911 if (!reload) {
7912 str.f_n_char = 0;
7913 str.f_str = (byte*) stopword_to_use;
7914 str.f_len = ut_strlen(stopword_to_use);
7915
7916 error = fts_config_set_value(
7917 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7918 }
7919 } else {
7920 /* Load system default stopword list */
7921 fts_load_default_stopword(&cache->stopword_info);
7922 }
7923
7924 cleanup:
7925 if (new_trx) {
7926 if (error == DB_SUCCESS) {
7927 fts_sql_commit(trx);
7928 } else {
7929 fts_sql_rollback(trx);
7930 }
7931
7932 trx_free_for_background(trx);
7933 }
7934
7935 if (!cache->stopword_info.cached_stopword) {
7936 cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
7937 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
7938 &my_charset_latin1);
7939 }
7940
7941 return(error == DB_SUCCESS);
7942 }
7943
7944 /**********************************************************************//**
7945 Callback function when we initialize the FTS at the start up
7946 time. It recovers the maximum Doc IDs presented in the current table.
7947 @return: always returns TRUE */
7948 static
7949 ibool
fts_init_get_doc_id(void * row,void * user_arg)7950 fts_init_get_doc_id(
7951 /*================*/
7952 void* row, /*!< in: sel_node_t* */
7953 void* user_arg) /*!< in: fts cache */
7954 {
7955 doc_id_t doc_id = FTS_NULL_DOC_ID;
7956 sel_node_t* node = static_cast<sel_node_t*>(row);
7957 que_node_t* exp = node->select_list;
7958 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
7959
7960 ut_ad(ib_vector_is_empty(cache->get_docs));
7961
7962 /* Copy each indexed column content into doc->text.f_str */
7963 if (exp) {
7964 dfield_t* dfield = que_node_get_val(exp);
7965 dtype_t* type = dfield_get_type(dfield);
7966 void* data = dfield_get_data(dfield);
7967
7968 ut_a(dtype_get_mtype(type) == DATA_INT);
7969
7970 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7971 static_cast<const byte*>(data)));
7972
7973 if (doc_id >= cache->next_doc_id) {
7974 cache->next_doc_id = doc_id + 1;
7975 }
7976 }
7977
7978 return(TRUE);
7979 }
7980
7981 /**********************************************************************//**
7982 Callback function when we initialize the FTS at the start up
7983 time. It recovers Doc IDs that have not sync-ed to the auxiliary
7984 table, and require to bring them back into FTS index.
7985 @return: always returns TRUE */
7986 static
7987 ibool
fts_init_recover_doc(void * row,void * user_arg)7988 fts_init_recover_doc(
7989 /*=================*/
7990 void* row, /*!< in: sel_node_t* */
7991 void* user_arg) /*!< in: fts cache */
7992 {
7993
7994 fts_doc_t doc;
7995 ulint doc_len = 0;
7996 ulint field_no = 0;
7997 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
7998 doc_id_t doc_id = FTS_NULL_DOC_ID;
7999 sel_node_t* node = static_cast<sel_node_t*>(row);
8000 que_node_t* exp = node->select_list;
8001 fts_cache_t* cache = get_doc->cache;
8002 st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
8003
8004 fts_doc_init(&doc);
8005 doc.found = TRUE;
8006
8007 ut_ad(cache);
8008
8009 /* Copy each indexed column content into doc->text.f_str */
8010 while (exp) {
8011 dfield_t* dfield = que_node_get_val(exp);
8012 ulint len = dfield_get_len(dfield);
8013
8014 if (field_no == 0) {
8015 dtype_t* type = dfield_get_type(dfield);
8016 void* data = dfield_get_data(dfield);
8017
8018 ut_a(dtype_get_mtype(type) == DATA_INT);
8019
8020 doc_id = static_cast<doc_id_t>(mach_read_from_8(
8021 static_cast<const byte*>(data)));
8022
8023 field_no++;
8024 exp = que_node_get_next(exp);
8025 continue;
8026 }
8027
8028 if (len == UNIV_SQL_NULL) {
8029 exp = que_node_get_next(exp);
8030 continue;
8031 }
8032
8033 ut_ad(get_doc);
8034
8035 if (!get_doc->index_cache->charset) {
8036 get_doc->index_cache->charset = fts_get_charset(
8037 dfield->type.prtype);
8038 }
8039
8040 doc.charset = get_doc->index_cache->charset;
8041 doc.is_ngram = get_doc->index_cache->index->is_ngram;
8042
8043 if (dfield_is_ext(dfield)) {
8044 dict_table_t* table = cache->sync->table;
8045
8046 doc.text.f_str = btr_copy_externally_stored_field(
8047 &doc.text.f_len,
8048 static_cast<byte*>(dfield_get_data(dfield)),
8049 dict_table_page_size(table), len,
8050 static_cast<mem_heap_t*>(doc.self_heap->arg));
8051 } else {
8052 doc.text.f_str = static_cast<byte*>(
8053 dfield_get_data(dfield));
8054
8055 doc.text.f_len = len;
8056 }
8057
8058 if (field_no == 1) {
8059 fts_tokenize_document(&doc, NULL, parser);
8060 } else {
8061 fts_tokenize_document_next(&doc, doc_len, NULL, parser);
8062 }
8063
8064 exp = que_node_get_next(exp);
8065
8066 doc_len += (exp) ? len + 1 : len;
8067
8068 field_no++;
8069 }
8070
8071 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
8072
8073 fts_doc_free(&doc);
8074
8075 cache->added++;
8076
8077 if (doc_id >= cache->next_doc_id) {
8078 cache->next_doc_id = doc_id + 1;
8079 }
8080
8081 return(TRUE);
8082 }
8083
8084 /**********************************************************************//**
8085 This function brings FTS index in sync when FTS index is first
8086 used. There are documents that have not yet sync-ed to auxiliary
8087 tables from last server abnormally shutdown, we will need to bring
8088 such document into FTS cache before any further operations
8089 @return TRUE if all OK */
8090 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)8091 fts_init_index(
8092 /*===========*/
8093 dict_table_t* table, /*!< in: Table with FTS */
8094 ibool has_cache_lock) /*!< in: Whether we already have
8095 cache lock */
8096 {
8097 dict_index_t* index;
8098 doc_id_t start_doc;
8099 fts_get_doc_t* get_doc = NULL;
8100 fts_cache_t* cache = table->fts->cache;
8101 bool need_init = false;
8102
8103 ut_ad(!mutex_own(&dict_sys->mutex));
8104
8105 /* First check cache->get_docs is initialized */
8106 if (!has_cache_lock) {
8107 rw_lock_x_lock(&cache->lock);
8108 }
8109
8110 rw_lock_x_lock(&cache->init_lock);
8111 if (cache->get_docs == NULL) {
8112 cache->get_docs = fts_get_docs_create(cache);
8113 }
8114 rw_lock_x_unlock(&cache->init_lock);
8115
8116 if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
8117 goto func_exit;
8118 }
8119
8120 need_init = true;
8121
8122 start_doc = cache->synced_doc_id;
8123
8124 if (!start_doc) {
8125 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
8126 cache->synced_doc_id = start_doc;
8127 }
8128
8129 /* No FTS index, this is the case when previous FTS index
8130 dropped, and we re-initialize the Doc ID system for subsequent
8131 insertion */
8132 if (ib_vector_is_empty(cache->get_docs)) {
8133 index = table->fts_doc_id_index;
8134
8135 ut_a(index);
8136
8137 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8138 FTS_FETCH_DOC_BY_ID_LARGE,
8139 fts_init_get_doc_id, cache);
8140 } else {
8141 if (table->fts->cache->stopword_info.status
8142 & STOPWORD_NOT_INIT) {
8143 fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
8144 }
8145
8146 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
8147 get_doc = static_cast<fts_get_doc_t*>(
8148 ib_vector_get(cache->get_docs, i));
8149
8150 index = get_doc->index_cache->index;
8151
8152 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8153 FTS_FETCH_DOC_BY_ID_LARGE,
8154 fts_init_recover_doc, get_doc);
8155 }
8156 }
8157
8158 table->fts->fts_status |= ADDED_TABLE_SYNCED;
8159
8160 fts_get_docs_clear(cache->get_docs);
8161
8162 func_exit:
8163 if (!has_cache_lock) {
8164 rw_lock_x_unlock(&cache->lock);
8165 }
8166
8167 if (need_init) {
8168 mutex_enter(&dict_sys->mutex);
8169 /* Register the table with the optimize thread. */
8170 fts_optimize_add_table(table);
8171 mutex_exit(&dict_sys->mutex);
8172 }
8173
8174 return(TRUE);
8175 }
8176
8177 /** Check if the all the auxillary tables associated with FTS index are in
8178 consistent state. For now consistency is check only by ensuring
8179 index->page_no != FIL_NULL
8180 @param[out] base_table table has host fts index
8181 @param[in,out] trx trx handler */
8182 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)8183 fts_check_corrupt(
8184 dict_table_t* base_table,
8185 trx_t* trx)
8186 {
8187 bool sane = true;
8188 fts_table_t fts_table;
8189
8190 /* Iterate over the common table and check for their sanity. */
8191 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
8192
8193 for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
8194
8195 char table_name[MAX_FULL_NAME_LEN];
8196
8197 fts_table.suffix = fts_common_tables[i];
8198 fts_get_table_name(&fts_table, table_name);
8199
8200 dict_table_t* aux_table = dict_table_open_on_name(
8201 table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
8202
8203 if (aux_table == NULL) {
8204 dict_set_corrupted(
8205 dict_table_get_first_index(base_table),
8206 trx, "FTS_SANITY_CHECK");
8207 ut_ad(base_table->corrupted == TRUE);
8208 sane = false;
8209 continue;
8210 }
8211
8212 for (dict_index_t* aux_table_index =
8213 UT_LIST_GET_FIRST(aux_table->indexes);
8214 aux_table_index != NULL;
8215 aux_table_index =
8216 UT_LIST_GET_NEXT(indexes, aux_table_index)) {
8217
8218 /* Check if auxillary table needed for FTS is sane. */
8219 if (aux_table_index->page == FIL_NULL) {
8220 dict_set_corrupted(
8221 dict_table_get_first_index(base_table),
8222 trx, "FTS_SANITY_CHECK");
8223 ut_ad(base_table->corrupted == TRUE);
8224 sane = false;
8225 }
8226 }
8227
8228 dict_table_close(aux_table, FALSE, FALSE);
8229 }
8230 }
8231