1 /*****************************************************************************
2
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31
32 #include "ha_prototypes.h"
33
34 #include "trx0roll.h"
35 #include "row0mysql.h"
36 #include "row0upd.h"
37 #include "dict0types.h"
38 #include "dict0stats_bg.h"
39 #include "row0sel.h"
40 #include "fts0fts.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "fts0plugin.h"
46 #include "dict0priv.h"
47 #include "dict0stats.h"
48 #include "btr0pcur.h"
49 #include "sync0sync.h"
50 #include "ut0new.h"
51
52 static const ulint FTS_MAX_ID_LEN = 32;
53
54 /** Column name from the FTS config table */
55 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
56
57 /** Verify if a aux table name is a obsolete table
58 by looking up the key word in the obsolete table names */
59 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
60 (strstr((table_name), "DOC_ID") != NULL \
61 || strstr((table_name), "ADDED") != NULL \
62 || strstr((table_name), "STOPWORDS") != NULL)
63
64 /** This is maximum FTS cache for each table and would be
65 a configurable variable */
66 ulong fts_max_cache_size;
67
68 /** Whether the total memory used for FTS cache is exhausted, and we will
69 need a sync to free some memory */
70 bool fts_need_sync = false;
71
72 /** Variable specifying the total memory allocated for FTS cache */
73 ulong fts_max_total_cache_size;
74
75 /** This is FTS result cache limit for each query and would be
76 a configurable variable */
77 ulong fts_result_cache_limit;
78
79 /** Variable specifying the maximum FTS max token size */
80 ulong fts_max_token_size;
81
82 /** Variable specifying the minimum FTS max token size */
83 ulong fts_min_token_size;
84
85
86 // FIXME: testing
87 ib_time_monotonic_t elapsed_time = 0;
88 ulint n_nodes = 0;
89
90 #ifdef FTS_CACHE_SIZE_DEBUG
91 /** The cache size permissible lower limit (1K) */
92 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
93
94 /** The cache size permissible upper limit (1G) */
95 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
96 #endif
97
98 /** Time to sleep after DEADLOCK error before retrying operation. */
99 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
100
101 /** variable to record innodb_fts_internal_tbl_name for information
102 schema table INNODB_FTS_INSERTED etc. */
103 char* fts_internal_tbl_name = NULL;
104
105 /** InnoDB default stopword list:
106 There are different versions of stopwords, the stop words listed
107 below comes from "Google Stopword" list. Reference:
108 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
109 The final version of InnoDB default stopword list is still pending
110 for decision */
111 const char *fts_default_stopword[] =
112 {
113 "a",
114 "about",
115 "an",
116 "are",
117 "as",
118 "at",
119 "be",
120 "by",
121 "com",
122 "de",
123 "en",
124 "for",
125 "from",
126 "how",
127 "i",
128 "in",
129 "is",
130 "it",
131 "la",
132 "of",
133 "on",
134 "or",
135 "that",
136 "the",
137 "this",
138 "to",
139 "was",
140 "what",
141 "when",
142 "where",
143 "who",
144 "will",
145 "with",
146 "und",
147 "the",
148 "www",
149 NULL
150 };
151
152 /** For storing table info when checking for orphaned tables. */
153 struct fts_aux_table_t {
154 table_id_t id; /*!< Table id */
155 table_id_t parent_id; /*!< Parent table id */
156 table_id_t index_id; /*!< Table FT index id */
157 char* name; /*!< Name of the table */
158 };
159
160 #ifdef FTS_DOC_STATS_DEBUG
161 /** Template for creating the FTS auxiliary index specific tables. This is
162 mainly designed for the statistics work in the future */
163 static const char* fts_create_index_tables_sql = {
164 "BEGIN\n"
165 ""
166 "CREATE TABLE $doc_id_table (\n"
167 " doc_id BIGINT UNSIGNED,\n"
168 " word_count INTEGER UNSIGNED NOT NULL\n"
169 ") COMPACT;\n"
170 "CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
171 };
172 #endif
173
174 /** FTS auxiliary table suffixes that are common to all FT indexes. */
175 const char* fts_common_tables[] = {
176 "BEING_DELETED",
177 "BEING_DELETED_CACHE",
178 "CONFIG",
179 "DELETED",
180 "DELETED_CACHE",
181 NULL
182 };
183
184 /** FTS auxiliary INDEX split intervals. */
185 const fts_index_selector_t fts_index_selector[] = {
186 { 9, "INDEX_1" },
187 { 65, "INDEX_2" },
188 { 70, "INDEX_3" },
189 { 75, "INDEX_4" },
190 { 80, "INDEX_5" },
191 { 85, "INDEX_6" },
192 { 0 , NULL }
193 };
194
195 /** Default config values for FTS indexes on a table. */
196 static const char* fts_config_table_insert_values_sql =
197 "BEGIN\n"
198 "\n"
199 "INSERT INTO $config_table VALUES('"
200 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
201 ""
202 "INSERT INTO $config_table VALUES('"
203 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
204 ""
205 "INSERT INTO $config_table VALUES ('"
206 FTS_SYNCED_DOC_ID "', '0');\n"
207 ""
208 "INSERT INTO $config_table VALUES ('"
209 FTS_TOTAL_DELETED_COUNT "', '0');\n"
210 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
211 "INSERT INTO $config_table VALUES ('"
212 FTS_TABLE_STATE "', '0');\n";
213
214 /** FTS tokenize parmameter for plugin parser */
215 struct fts_tokenize_param_t {
216 fts_doc_t* result_doc; /*!< Result doc for tokens */
217 ulint add_pos; /*!< Added position for tokens */
218 };
219
220 /** Run SYNC on the table, i.e., write out data from the cache to the
221 FTS auxiliary INDEX table and clear the cache at the end.
222 @param[in,out] sync sync state
223 @param[in] unlock_cache whether unlock cache lock when write node
224 @param[in] wait whether wait when a sync is in progress
225 @param[in] has_dict_lock whether has dict operation lock
226 @return DB_SUCCESS if all OK */
227 static
228 dberr_t
229 fts_sync(
230 fts_sync_t* sync,
231 bool unlock_cache,
232 bool wait,
233 bool has_dict_lock);
234
235 /****************************************************************//**
236 Release all resources help by the words rb tree e.g., the node ilist. */
237 static
238 void
239 fts_words_free(
240 /*===========*/
241 ib_rbt_t* words) /*!< in: rb tree of words */
242 MY_ATTRIBUTE((nonnull));
243 #ifdef FTS_CACHE_SIZE_DEBUG
244 /****************************************************************//**
245 Read the max cache size parameter from the config table. */
246 static
247 void
248 fts_update_max_cache_size(
249 /*======================*/
250 fts_sync_t* sync); /*!< in: sync state */
251 #endif
252
253 /*********************************************************************//**
254 This function fetches the document just inserted right before
255 we commit the transaction, and tokenize the inserted text data
256 and insert into FTS auxiliary table and its cache.
257 @return TRUE if successful */
258 static
259 ulint
260 fts_add_doc_by_id(
261 /*==============*/
262 fts_trx_table_t*ftt, /*!< in: FTS trx table */
263 doc_id_t doc_id, /*!< in: doc id */
264 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
265 /*!< in: affected fts indexes */
266 #ifdef FTS_DOC_STATS_DEBUG
267 /****************************************************************//**
268 Check whether a particular word (term) exists in the FTS index.
269 @return DB_SUCCESS if all went fine */
270 static
271 dberr_t
272 fts_is_word_in_index(
273 /*=================*/
274 trx_t* trx, /*!< in: FTS query state */
275 que_t** graph, /*!< out: Query graph */
276 fts_table_t* fts_table, /*!< in: table instance */
277 const fts_string_t* word, /*!< in: the word to check */
278 ibool* found) /*!< out: TRUE if exists */
279 MY_ATTRIBUTE((nonnull, warn_unused_result));
280 #endif /* FTS_DOC_STATS_DEBUG */
281
282 /******************************************************************//**
283 Update the last document id. This function could create a new
284 transaction to update the last document id.
285 @return DB_SUCCESS if OK */
286 static
287 dberr_t
288 fts_update_sync_doc_id(
289 /*===================*/
290 const dict_table_t* table, /*!< in: table */
291 const char* table_name, /*!< in: table name, or NULL */
292 doc_id_t doc_id, /*!< in: last document id */
293 trx_t* trx) /*!< in: update trx, or NULL */
294 MY_ATTRIBUTE((nonnull(1)));
295
296 /** Get a character set based on precise type.
297 @param prtype precise type
298 @return the corresponding character set */
299 UNIV_INLINE
300 CHARSET_INFO*
fts_get_charset(ulint prtype)301 fts_get_charset(ulint prtype)
302 {
303 #ifdef UNIV_DEBUG
304 switch (prtype & DATA_MYSQL_TYPE_MASK) {
305 case MYSQL_TYPE_BIT:
306 case MYSQL_TYPE_STRING:
307 case MYSQL_TYPE_VAR_STRING:
308 case MYSQL_TYPE_TINY_BLOB:
309 case MYSQL_TYPE_MEDIUM_BLOB:
310 case MYSQL_TYPE_BLOB:
311 case MYSQL_TYPE_LONG_BLOB:
312 case MYSQL_TYPE_VARCHAR:
313 break;
314 default:
315 ut_error;
316 }
317 #endif /* UNIV_DEBUG */
318
319 uint cs_num = (uint) dtype_get_charset_coll(prtype);
320
321 if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
322 return(cs);
323 }
324
325 ib::fatal() << "Unable to find charset-collation " << cs_num;
326 return(NULL);
327 }
328
329 /****************************************************************//**
330 This function loads the default InnoDB stopword list */
331 static
332 void
fts_load_default_stopword(fts_stopword_t * stopword_info)333 fts_load_default_stopword(
334 /*======================*/
335 fts_stopword_t* stopword_info) /*!< in: stopword info */
336 {
337 fts_string_t str;
338 mem_heap_t* heap;
339 ib_alloc_t* allocator;
340 ib_rbt_t* stop_words;
341
342 allocator = stopword_info->heap;
343 heap = static_cast<mem_heap_t*>(allocator->arg);
344
345 if (!stopword_info->cached_stopword) {
346 stopword_info->cached_stopword = rbt_create_arg_cmp(
347 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
348 &my_charset_latin1);
349 }
350
351 stop_words = stopword_info->cached_stopword;
352
353 str.f_n_char = 0;
354
355 for (ulint i = 0; fts_default_stopword[i]; ++i) {
356 char* word;
357 fts_tokenizer_word_t new_word;
358
359 /* We are going to duplicate the value below. */
360 word = const_cast<char*>(fts_default_stopword[i]);
361
362 new_word.nodes = ib_vector_create(
363 allocator, sizeof(fts_node_t), 4);
364
365 str.f_len = ut_strlen(word);
366 str.f_str = reinterpret_cast<byte*>(word);
367
368 fts_string_dup(&new_word.text, &str, heap);
369
370 rbt_insert(stop_words, &new_word, &new_word);
371 }
372
373 stopword_info->status = STOPWORD_FROM_DEFAULT;
374 }
375
376 /****************************************************************//**
377 Callback function to read a single stopword value.
378 @return Always return TRUE */
379 static
380 ibool
fts_read_stopword(void * row,void * user_arg)381 fts_read_stopword(
382 /*==============*/
383 void* row, /*!< in: sel_node_t* */
384 void* user_arg) /*!< in: pointer to ib_vector_t */
385 {
386 ib_alloc_t* allocator;
387 fts_stopword_t* stopword_info;
388 sel_node_t* sel_node;
389 que_node_t* exp;
390 ib_rbt_t* stop_words;
391 dfield_t* dfield;
392 fts_string_t str;
393 mem_heap_t* heap;
394 ib_rbt_bound_t parent;
395
396 sel_node = static_cast<sel_node_t*>(row);
397 stopword_info = static_cast<fts_stopword_t*>(user_arg);
398
399 stop_words = stopword_info->cached_stopword;
400 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
401 heap = static_cast<mem_heap_t*>(allocator->arg);
402
403 exp = sel_node->select_list;
404
405 /* We only need to read the first column */
406 dfield = que_node_get_val(exp);
407
408 str.f_n_char = 0;
409 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
410 str.f_len = dfield_get_len(dfield);
411
412 /* Only create new node if it is a value not already existed */
413 if (str.f_len != UNIV_SQL_NULL
414 && rbt_search(stop_words, &parent, &str) != 0) {
415
416 fts_tokenizer_word_t new_word;
417
418 new_word.nodes = ib_vector_create(
419 allocator, sizeof(fts_node_t), 4);
420
421 new_word.text.f_str = static_cast<byte*>(
422 mem_heap_alloc(heap, str.f_len + 1));
423
424 memcpy(new_word.text.f_str, str.f_str, str.f_len);
425
426 new_word.text.f_n_char = 0;
427 new_word.text.f_len = str.f_len;
428 new_word.text.f_str[str.f_len] = 0;
429
430 rbt_insert(stop_words, &new_word, &new_word);
431 }
432
433 return(TRUE);
434 }
435
436 /******************************************************************//**
437 Load user defined stopword from designated user table
438 @return TRUE if load operation is successful */
439 static
440 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)441 fts_load_user_stopword(
442 /*===================*/
443 fts_t* fts, /*!< in: FTS struct */
444 const char* stopword_table_name, /*!< in: Stopword table
445 name */
446 fts_stopword_t* stopword_info) /*!< in: Stopword info */
447 {
448 pars_info_t* info;
449 que_t* graph;
450 dberr_t error = DB_SUCCESS;
451 ibool ret = TRUE;
452 trx_t* trx;
453 ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
454
455 trx = trx_allocate_for_background();
456 trx->op_info = "Load user stopword table into FTS cache";
457
458 if (!has_lock) {
459 mutex_enter(&dict_sys->mutex);
460 }
461
462 /* Validate the user table existence and in the right
463 format */
464 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
465 if (!stopword_info->charset) {
466 ret = FALSE;
467 goto cleanup;
468 } else if (!stopword_info->cached_stopword) {
469 /* Create the stopword RB tree with the stopword column
470 charset. All comparison will use this charset */
471 stopword_info->cached_stopword = rbt_create_arg_cmp(
472 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
473 stopword_info->charset);
474
475 }
476
477 info = pars_info_create();
478
479 pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
480
481 pars_info_bind_function(info, "my_func", fts_read_stopword,
482 stopword_info);
483
484 graph = fts_parse_sql_no_dict_lock(
485 NULL,
486 info,
487 "DECLARE FUNCTION my_func;\n"
488 "DECLARE CURSOR c IS"
489 " SELECT value"
490 " FROM $table_stopword;\n"
491 "BEGIN\n"
492 "\n"
493 "OPEN c;\n"
494 "WHILE 1 = 1 LOOP\n"
495 " FETCH c INTO my_func();\n"
496 " IF c % NOTFOUND THEN\n"
497 " EXIT;\n"
498 " END IF;\n"
499 "END LOOP;\n"
500 "CLOSE c;");
501
502 for (;;) {
503 error = fts_eval_sql(trx, graph);
504
505 if (error == DB_SUCCESS) {
506 fts_sql_commit(trx);
507 stopword_info->status = STOPWORD_USER_TABLE;
508 break;
509 } else {
510
511 fts_sql_rollback(trx);
512
513 if (error == DB_LOCK_WAIT_TIMEOUT) {
514 ib::warn() << "Lock wait timeout reading user"
515 " stopword table. Retrying!";
516
517 trx->error_state = DB_SUCCESS;
518 } else {
519 ib::error() << "Error '" << ut_strerr(error)
520 << "' while reading user stopword"
521 " table.";
522 ret = FALSE;
523 break;
524 }
525 }
526 }
527
528 que_graph_free(graph);
529
530 cleanup:
531 if (!has_lock) {
532 mutex_exit(&dict_sys->mutex);
533 }
534
535 trx_free_for_background(trx);
536 return(ret);
537 }
538
539 /******************************************************************//**
540 Initialize the index cache. */
541 static
542 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)543 fts_index_cache_init(
544 /*=================*/
545 ib_alloc_t* allocator, /*!< in: the allocator to use */
546 fts_index_cache_t* index_cache) /*!< in: index cache */
547 {
548 ulint i;
549
550 ut_a(index_cache->words == NULL);
551
552 index_cache->words = rbt_create_arg_cmp(
553 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
554 index_cache->charset);
555
556 ut_a(index_cache->doc_stats == NULL);
557
558 index_cache->doc_stats = ib_vector_create(
559 allocator, sizeof(fts_doc_stats_t), 4);
560
561 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
562 ut_a(index_cache->ins_graph[i] == NULL);
563 ut_a(index_cache->sel_graph[i] == NULL);
564 }
565 }
566
567 /*********************************************************************//**
568 Initialize FTS cache. */
569 void
fts_cache_init(fts_cache_t * cache)570 fts_cache_init(
571 /*===========*/
572 fts_cache_t* cache) /*!< in: cache to initialize */
573 {
574 ulint i;
575
576 /* Just to make sure */
577 ut_a(cache->sync_heap->arg == NULL);
578
579 cache->sync_heap->arg = mem_heap_create(1024);
580
581 cache->total_size = 0;
582 cache->total_size_before_sync = 0;
583
584 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
585 cache->deleted_doc_ids = ib_vector_create(
586 cache->sync_heap, sizeof(fts_update_t), 4);
587 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
588
589 /* Reset the cache data for all the FTS indexes. */
590 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
591 fts_index_cache_t* index_cache;
592
593 index_cache = static_cast<fts_index_cache_t*>(
594 ib_vector_get(cache->indexes, i));
595
596 fts_index_cache_init(cache->sync_heap, index_cache);
597 }
598 }
599
600 /****************************************************************//**
601 Create a FTS cache. */
602 fts_cache_t*
fts_cache_create(dict_table_t * table)603 fts_cache_create(
604 /*=============*/
605 dict_table_t* table) /*!< in: table owns the FTS cache */
606 {
607 mem_heap_t* heap;
608 fts_cache_t* cache;
609
610 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
611
612 cache = static_cast<fts_cache_t*>(
613 mem_heap_zalloc(heap, sizeof(*cache)));
614
615 cache->cache_heap = heap;
616
617 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
618
619 rw_lock_create(
620 fts_cache_init_rw_lock_key, &cache->init_lock,
621 SYNC_FTS_CACHE_INIT);
622
623 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
624
625 mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
626
627 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
628
629 /* This is the heap used to create the cache itself. */
630 cache->self_heap = ib_heap_allocator_create(heap);
631
632 /* This is a transient heap, used for storing sync data. */
633 cache->sync_heap = ib_heap_allocator_create(heap);
634 cache->sync_heap->arg = NULL;
635
636 cache->sync = static_cast<fts_sync_t*>(
637 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
638
639 cache->sync->table = table;
640 cache->sync->event = os_event_create(0);
641
642 /* Create the index cache vector that will hold the inverted indexes. */
643 cache->indexes = ib_vector_create(
644 cache->self_heap, sizeof(fts_index_cache_t), 2);
645
646 fts_cache_init(cache);
647
648 cache->stopword_info.cached_stopword = NULL;
649 cache->stopword_info.charset = NULL;
650
651 cache->stopword_info.heap = cache->self_heap;
652
653 cache->stopword_info.status = STOPWORD_NOT_INIT;
654
655 return(cache);
656 }
657
658 /*******************************************************************//**
659 Add a newly create index into FTS cache */
660 void
fts_add_index(dict_index_t * index,dict_table_t * table)661 fts_add_index(
662 /*==========*/
663 dict_index_t* index, /*!< FTS index to be added */
664 dict_table_t* table) /*!< table */
665 {
666 fts_t* fts = table->fts;
667 fts_cache_t* cache;
668 fts_index_cache_t* index_cache;
669
670 ut_ad(fts);
671 cache = table->fts->cache;
672
673 rw_lock_x_lock(&cache->init_lock);
674
675 ib_vector_push(fts->indexes, &index);
676
677 index_cache = fts_find_index_cache(cache, index);
678
679 if (!index_cache) {
680 /* Add new index cache structure */
681 index_cache = fts_cache_index_cache_create(table, index);
682 }
683
684 rw_lock_x_unlock(&cache->init_lock);
685 }
686
687 /*******************************************************************//**
688 recalibrate get_doc structure after index_cache in cache->indexes changed */
689 static
690 void
fts_reset_get_doc(fts_cache_t * cache)691 fts_reset_get_doc(
692 /*==============*/
693 fts_cache_t* cache) /*!< in: FTS index cache */
694 {
695 fts_get_doc_t* get_doc;
696 ulint i;
697
698 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
699
700 ib_vector_reset(cache->get_docs);
701
702 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
703 fts_index_cache_t* ind_cache;
704
705 ind_cache = static_cast<fts_index_cache_t*>(
706 ib_vector_get(cache->indexes, i));
707
708 get_doc = static_cast<fts_get_doc_t*>(
709 ib_vector_push(cache->get_docs, NULL));
710
711 memset(get_doc, 0x0, sizeof(*get_doc));
712
713 get_doc->index_cache = ind_cache;
714 }
715
716 ut_ad(ib_vector_size(cache->get_docs)
717 == ib_vector_size(cache->indexes));
718 }
719
720 /*******************************************************************//**
721 Check an index is in the table->indexes list
722 @return TRUE if it exists */
723 static
724 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)725 fts_in_dict_index(
726 /*==============*/
727 dict_table_t* table, /*!< in: Table */
728 dict_index_t* index_check) /*!< in: index to be checked */
729 {
730 dict_index_t* index;
731
732 for (index = dict_table_get_first_index(table);
733 index != NULL;
734 index = dict_table_get_next_index(index)) {
735
736 if (index == index_check) {
737 return(TRUE);
738 }
739 }
740
741 return(FALSE);
742 }
743
744 /*******************************************************************//**
745 Check an index is in the fts->cache->indexes list
746 @return TRUE if it exists */
747 static
748 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)749 fts_in_index_cache(
750 /*===============*/
751 dict_table_t* table, /*!< in: Table */
752 dict_index_t* index) /*!< in: index to be checked */
753 {
754 ulint i;
755
756 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
757 fts_index_cache_t* index_cache;
758
759 index_cache = static_cast<fts_index_cache_t*>(
760 ib_vector_get(table->fts->cache->indexes, i));
761
762 if (index_cache->index == index) {
763 return(TRUE);
764 }
765 }
766
767 return(FALSE);
768 }
769
770 /*******************************************************************//**
771 Check indexes in the fts->indexes is also present in index cache and
772 table->indexes list
773 @return TRUE if all indexes match */
774 ibool
fts_check_cached_index(dict_table_t * table)775 fts_check_cached_index(
776 /*===================*/
777 dict_table_t* table) /*!< in: Table where indexes are dropped */
778 {
779 ulint i;
780
781 if (!table->fts || !table->fts->cache) {
782 return(TRUE);
783 }
784
785 ut_a(ib_vector_size(table->fts->indexes)
786 == ib_vector_size(table->fts->cache->indexes));
787
788 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
789 dict_index_t* index;
790
791 index = static_cast<dict_index_t*>(
792 ib_vector_getp(table->fts->indexes, i));
793
794 if (!fts_in_index_cache(table, index)) {
795 return(FALSE);
796 }
797
798 if (!fts_in_dict_index(table, index)) {
799 return(FALSE);
800 }
801 }
802
803 return(TRUE);
804 }
805
806 /*******************************************************************//**
807 Drop auxiliary tables related to an FTS index
808 @return DB_SUCCESS or error number */
809 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)810 fts_drop_index(
811 /*===========*/
812 dict_table_t* table, /*!< in: Table where indexes are dropped */
813 dict_index_t* index, /*!< in: Index to be dropped */
814 trx_t* trx) /*!< in: Transaction for the drop */
815 {
816 ib_vector_t* indexes = table->fts->indexes;
817 dberr_t err = DB_SUCCESS;
818
819 ut_a(indexes);
820
821 if ((ib_vector_size(indexes) == 1
822 && (index == static_cast<dict_index_t*>(
823 ib_vector_getp(table->fts->indexes, 0))))
824 || ib_vector_is_empty(indexes)) {
825 doc_id_t current_doc_id;
826 doc_id_t first_doc_id;
827
828 /* If we are dropping the only FTS index of the table,
829 remove it from optimize thread */
830 fts_optimize_remove_table(table);
831
832 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
833
834 /* If Doc ID column is not added internally by FTS index,
835 we can drop all FTS auxiliary tables. Otherwise, we will
836 need to keep some common table such as CONFIG table, so
837 as to keep track of incrementing Doc IDs */
838 if (!DICT_TF2_FLAG_IS_SET(
839 table, DICT_TF2_FTS_HAS_DOC_ID)) {
840
841 err = fts_drop_tables(trx, table);
842
843 err = fts_drop_index_tables(trx, index);
844
845 while (index->index_fts_syncing
846 && !trx_is_interrupted(trx)) {
847 DICT_BG_YIELD(trx);
848 }
849
850 fts_free(table);
851
852 return(err);
853 }
854
855 while (index->index_fts_syncing
856 && !trx_is_interrupted(trx)) {
857 DICT_BG_YIELD(trx);
858 }
859
860 current_doc_id = table->fts->cache->next_doc_id;
861 first_doc_id = table->fts->cache->first_doc_id;
862 fts_cache_clear(table->fts->cache);
863 fts_cache_destroy(table->fts->cache);
864 table->fts->cache = fts_cache_create(table);
865 table->fts->cache->next_doc_id = current_doc_id;
866 table->fts->cache->first_doc_id = first_doc_id;
867
868 } else {
869 fts_cache_t* cache = table->fts->cache;
870 fts_index_cache_t* index_cache;
871
872 rw_lock_x_lock(&cache->init_lock);
873
874 index_cache = fts_find_index_cache(cache, index);
875
876 if (index_cache != NULL) {
877 while (index->index_fts_syncing
878 && !trx_is_interrupted(trx)) {
879 DICT_BG_YIELD(trx);
880 }
881
882 if (index_cache->words) {
883 fts_words_free(index_cache->words);
884 rbt_free(index_cache->words);
885 }
886
887 ib_vector_remove(cache->indexes, *(void**) index_cache);
888 }
889
890 if (cache->get_docs) {
891 fts_reset_get_doc(cache);
892 }
893
894 rw_lock_x_unlock(&cache->init_lock);
895 }
896
897 err = fts_drop_index_tables(trx, index);
898
899 ib_vector_remove(indexes, (const void*) index);
900
901 return(err);
902 }
903
904 /****************************************************************//**
905 Free the query graph but check whether dict_sys->mutex is already
906 held */
907 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)908 fts_que_graph_free_check_lock(
909 /*==========================*/
910 fts_table_t* fts_table, /*!< in: FTS table */
911 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
912 que_t* graph) /*!< in: query graph */
913 {
914 ibool has_dict = FALSE;
915
916 if (fts_table && fts_table->table) {
917 ut_ad(fts_table->table->fts);
918
919 has_dict = fts_table->table->fts->fts_status
920 & TABLE_DICT_LOCKED;
921 } else if (index_cache) {
922 ut_ad(index_cache->index->table->fts);
923
924 has_dict = index_cache->index->table->fts->fts_status
925 & TABLE_DICT_LOCKED;
926 }
927
928 if (!has_dict) {
929 mutex_enter(&dict_sys->mutex);
930 }
931
932 ut_ad(mutex_own(&dict_sys->mutex));
933
934 que_graph_free(graph);
935
936 if (!has_dict) {
937 mutex_exit(&dict_sys->mutex);
938 }
939 }
940
941 /****************************************************************//**
942 Create an FTS index cache. */
943 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)944 fts_index_get_charset(
945 /*==================*/
946 dict_index_t* index) /*!< in: FTS index */
947 {
948 CHARSET_INFO* charset = NULL;
949 dict_field_t* field;
950 ulint prtype;
951
952 field = dict_index_get_nth_field(index, 0);
953 prtype = field->col->prtype;
954
955 charset = fts_get_charset(prtype);
956
957 #ifdef FTS_DEBUG
958 /* Set up charset info for this index. Please note all
959 field of the FTS index should have the same charset */
960 for (i = 1; i < index->n_fields; i++) {
961 CHARSET_INFO* fld_charset;
962
963 field = dict_index_get_nth_field(index, i);
964 prtype = field->col->prtype;
965
966 fld_charset = fts_get_charset(prtype);
967
968 /* All FTS columns should have the same charset */
969 if (charset) {
970 ut_a(charset == fld_charset);
971 } else {
972 charset = fld_charset;
973 }
974 }
975 #endif
976
977 return(charset);
978
979 }
980 /****************************************************************//**
981 Create an FTS index cache.
982 @return Index Cache */
983 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)984 fts_cache_index_cache_create(
985 /*=========================*/
986 dict_table_t* table, /*!< in: table with FTS index */
987 dict_index_t* index) /*!< in: FTS index */
988 {
989 ulint n_bytes;
990 fts_index_cache_t* index_cache;
991 fts_cache_t* cache = table->fts->cache;
992
993 ut_a(cache != NULL);
994
995 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
996
997 /* Must not already exist in the cache vector. */
998 ut_a(fts_find_index_cache(cache, index) == NULL);
999
1000 index_cache = static_cast<fts_index_cache_t*>(
1001 ib_vector_push(cache->indexes, NULL));
1002
1003 memset(index_cache, 0x0, sizeof(*index_cache));
1004
1005 index_cache->index = index;
1006
1007 index_cache->charset = fts_index_get_charset(index);
1008
1009 n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1010
1011 index_cache->ins_graph = static_cast<que_t**>(
1012 mem_heap_zalloc(static_cast<mem_heap_t*>(
1013 cache->self_heap->arg), n_bytes));
1014
1015 index_cache->sel_graph = static_cast<que_t**>(
1016 mem_heap_zalloc(static_cast<mem_heap_t*>(
1017 cache->self_heap->arg), n_bytes));
1018
1019 fts_index_cache_init(cache->sync_heap, index_cache);
1020
1021 if (cache->get_docs) {
1022 fts_reset_get_doc(cache);
1023 }
1024
1025 return(index_cache);
1026 }
1027
1028 /****************************************************************//**
1029 Release all resources help by the words rb tree e.g., the node ilist. */
1030 static
1031 void
fts_words_free(ib_rbt_t * words)1032 fts_words_free(
1033 /*===========*/
1034 ib_rbt_t* words) /*!< in: rb tree of words */
1035 {
1036 const ib_rbt_node_t* rbt_node;
1037
1038 /* Free the resources held by a word. */
1039 for (rbt_node = rbt_first(words);
1040 rbt_node != NULL;
1041 rbt_node = rbt_first(words)) {
1042
1043 ulint i;
1044 fts_tokenizer_word_t* word;
1045
1046 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1047
1048 /* Free the ilists of this word. */
1049 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1050
1051 fts_node_t* fts_node = static_cast<fts_node_t*>(
1052 ib_vector_get(word->nodes, i));
1053
1054 ut_free(fts_node->ilist);
1055 fts_node->ilist = NULL;
1056 }
1057
1058 /* NOTE: We are responsible for free'ing the node */
1059 ut_free(rbt_remove_node(words, rbt_node));
1060 }
1061 }
1062
1063 /** Clear cache.
1064 @param[in,out] cache fts cache */
1065 void
fts_cache_clear(fts_cache_t * cache)1066 fts_cache_clear(
1067 fts_cache_t* cache)
1068 {
1069 ulint i;
1070
1071 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1072 ulint j;
1073 fts_index_cache_t* index_cache;
1074
1075 index_cache = static_cast<fts_index_cache_t*>(
1076 ib_vector_get(cache->indexes, i));
1077
1078 fts_words_free(index_cache->words);
1079
1080 rbt_free(index_cache->words);
1081
1082 index_cache->words = NULL;
1083
1084 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1085
1086 if (index_cache->ins_graph[j] != NULL) {
1087
1088 fts_que_graph_free_check_lock(
1089 NULL, index_cache,
1090 index_cache->ins_graph[j]);
1091
1092 index_cache->ins_graph[j] = NULL;
1093 }
1094
1095 if (index_cache->sel_graph[j] != NULL) {
1096
1097 fts_que_graph_free_check_lock(
1098 NULL, index_cache,
1099 index_cache->sel_graph[j]);
1100
1101 index_cache->sel_graph[j] = NULL;
1102 }
1103 }
1104
1105 index_cache->doc_stats = NULL;
1106 }
1107
1108 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1109 cache->sync_heap->arg = NULL;
1110
1111 fts_need_sync = false;
1112
1113 cache->total_size = 0;
1114
1115 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1116 cache->deleted_doc_ids = NULL;
1117 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1118 }
1119
1120 /*********************************************************************//**
1121 Search the index specific cache for a particular FTS index.
1122 @return the index cache else NULL */
1123 UNIV_INLINE
1124 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1125 fts_get_index_cache(
1126 /*================*/
1127 fts_cache_t* cache, /*!< in: cache to search */
1128 const dict_index_t* index) /*!< in: index to search for */
1129 {
1130 ulint i;
1131
1132 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1133 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1134
1135 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1136 fts_index_cache_t* index_cache;
1137
1138 index_cache = static_cast<fts_index_cache_t*>(
1139 ib_vector_get(cache->indexes, i));
1140
1141 if (index_cache->index == index) {
1142
1143 return(index_cache);
1144 }
1145 }
1146
1147 return(NULL);
1148 }
1149
1150 #ifdef FTS_DEBUG
1151 /*********************************************************************//**
1152 Search the index cache for a get_doc structure.
1153 @return the fts_get_doc_t item else NULL */
1154 static
1155 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1156 fts_get_index_get_doc(
1157 /*==================*/
1158 fts_cache_t* cache, /*!< in: cache to search */
1159 const dict_index_t* index) /*!< in: index to search for */
1160 {
1161 ulint i;
1162
1163 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1164
1165 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1166 fts_get_doc_t* get_doc;
1167
1168 get_doc = static_cast<fts_get_doc_t*>(
1169 ib_vector_get(cache->get_docs, i));
1170
1171 if (get_doc->index_cache->index == index) {
1172
1173 return(get_doc);
1174 }
1175 }
1176
1177 return(NULL);
1178 }
1179 #endif
1180
1181 /**********************************************************************//**
1182 Free the FTS cache. */
1183 void
fts_cache_destroy(fts_cache_t * cache)1184 fts_cache_destroy(
1185 /*==============*/
1186 fts_cache_t* cache) /*!< in: cache*/
1187 {
1188 rw_lock_free(&cache->lock);
1189 rw_lock_free(&cache->init_lock);
1190 mutex_free(&cache->optimize_lock);
1191 mutex_free(&cache->deleted_lock);
1192 mutex_free(&cache->doc_id_lock);
1193 os_event_destroy(cache->sync->event);
1194
1195 if (cache->stopword_info.cached_stopword) {
1196 rbt_free(cache->stopword_info.cached_stopword);
1197 }
1198
1199 if (cache->sync_heap->arg) {
1200 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1201 }
1202
1203 mem_heap_free(cache->cache_heap);
1204 }
1205
1206 /**********************************************************************//**
1207 Find an existing word, or if not found, create one and return it.
1208 @return specified word token */
1209 static
1210 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1211 fts_tokenizer_word_get(
1212 /*===================*/
1213 fts_cache_t* cache, /*!< in: cache */
1214 fts_index_cache_t*
1215 index_cache, /*!< in: index cache */
1216 fts_string_t* text) /*!< in: node text */
1217 {
1218 fts_tokenizer_word_t* word;
1219 ib_rbt_bound_t parent;
1220
1221 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1222
1223 /* If it is a stopword, do not index it */
1224 if (!fts_check_token(text,
1225 cache->stopword_info.cached_stopword,
1226 index_cache->index->is_ngram,
1227 index_cache->charset)) {
1228
1229 return(NULL);
1230 }
1231
1232 /* Check if we found a match, if not then add word to tree. */
1233 if (rbt_search(index_cache->words, &parent, text) != 0) {
1234 mem_heap_t* heap;
1235 fts_tokenizer_word_t new_word;
1236
1237 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1238
1239 new_word.nodes = ib_vector_create(
1240 cache->sync_heap, sizeof(fts_node_t), 4);
1241
1242 fts_string_dup(&new_word.text, text, heap);
1243
1244 parent.last = rbt_add_node(
1245 index_cache->words, &parent, &new_word);
1246
1247 /* Take into account the RB tree memory use and the vector. */
1248 cache->total_size += sizeof(new_word)
1249 + sizeof(ib_rbt_node_t)
1250 + text->f_len
1251 + (sizeof(fts_node_t) * 4)
1252 + sizeof(*new_word.nodes);
1253
1254 ut_ad(rbt_validate(index_cache->words));
1255 }
1256
1257 word = rbt_value(fts_tokenizer_word_t, parent.last);
1258
1259 return(word);
1260 }
1261
1262 /**********************************************************************//**
1263 Add the given doc_id/word positions to the given node's ilist. */
1264 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1265 fts_cache_node_add_positions(
1266 /*=========================*/
1267 fts_cache_t* cache, /*!< in: cache */
1268 fts_node_t* node, /*!< in: word node */
1269 doc_id_t doc_id, /*!< in: doc id */
1270 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1271 {
1272 ulint i;
1273 byte* ptr;
1274 byte* ilist;
1275 ulint enc_len;
1276 ulint last_pos;
1277 byte* ptr_start;
1278 ulint doc_id_delta;
1279
1280 #ifdef UNIV_DEBUG
1281 if (cache) {
1282 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1283 }
1284 #endif /* UNIV_DEBUG */
1285
1286 ut_ad(doc_id >= node->last_doc_id);
1287
1288 /* Calculate the space required to store the ilist. */
1289 doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1290 enc_len = fts_get_encoded_len(doc_id_delta);
1291
1292 last_pos = 0;
1293 for (i = 0; i < ib_vector_size(positions); i++) {
1294 ulint pos = *(static_cast<ulint*>(
1295 ib_vector_get(positions, i)));
1296
1297 ut_ad(last_pos == 0 || pos > last_pos);
1298
1299 enc_len += fts_get_encoded_len(pos - last_pos);
1300 last_pos = pos;
1301 }
1302
1303 /* The 0x00 byte at the end of the token positions list. */
1304 enc_len++;
1305
1306 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1307 /* No need to allocate more space, we can fit in the new
1308 data at the end of the old one. */
1309 ilist = NULL;
1310 ptr = node->ilist + node->ilist_size;
1311 } else {
1312 ulint new_size = node->ilist_size + enc_len;
1313
1314 /* Over-reserve space by a fixed size for small lengths and
1315 by 20% for lengths >= 48 bytes. */
1316 if (new_size < 16) {
1317 new_size = 16;
1318 } else if (new_size < 32) {
1319 new_size = 32;
1320 } else if (new_size < 48) {
1321 new_size = 48;
1322 } else {
1323 new_size = (ulint)(1.2 * new_size);
1324 }
1325
1326 ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1327 ptr = ilist + node->ilist_size;
1328
1329 node->ilist_size_alloc = new_size;
1330 if (cache) {
1331 cache->total_size += new_size;
1332 }
1333 }
1334
1335 ptr_start = ptr;
1336
1337 /* Encode the new fragment. */
1338 ptr += fts_encode_int(doc_id_delta, ptr);
1339
1340 last_pos = 0;
1341 for (i = 0; i < ib_vector_size(positions); i++) {
1342 ulint pos = *(static_cast<ulint*>(
1343 ib_vector_get(positions, i)));
1344
1345 ptr += fts_encode_int(pos - last_pos, ptr);
1346 last_pos = pos;
1347 }
1348
1349 *ptr++ = 0;
1350
1351 ut_a(enc_len == (ulint)(ptr - ptr_start));
1352
1353 if (ilist) {
1354 /* Copy old ilist to the start of the new one and switch the
1355 new one into place in the node. */
1356 if (node->ilist_size > 0) {
1357 memcpy(ilist, node->ilist, node->ilist_size);
1358 ut_free(node->ilist);
1359 if (cache) {
1360 cache->total_size -= node->ilist_size;
1361 }
1362 }
1363
1364 node->ilist = ilist;
1365 }
1366
1367 node->ilist_size += enc_len;
1368
1369 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1370 node->first_doc_id = doc_id;
1371 }
1372
1373 node->last_doc_id = doc_id;
1374 ++node->doc_count;
1375 }
1376
1377 /**********************************************************************//**
1378 Add document to the cache. */
1379 static
1380 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1381 fts_cache_add_doc(
1382 /*==============*/
1383 fts_cache_t* cache, /*!< in: cache */
1384 fts_index_cache_t*
1385 index_cache, /*!< in: index cache */
1386 doc_id_t doc_id, /*!< in: doc id to add */
1387 ib_rbt_t* tokens) /*!< in: document tokens */
1388 {
1389 const ib_rbt_node_t* node;
1390 ulint n_words;
1391 fts_doc_stats_t* doc_stats;
1392
1393 if (!tokens) {
1394 return;
1395 }
1396
1397 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1398
1399 n_words = rbt_size(tokens);
1400
1401 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1402
1403 fts_tokenizer_word_t* word;
1404 fts_node_t* fts_node = NULL;
1405 fts_token_t* token = rbt_value(fts_token_t, node);
1406
1407 /* Find and/or add token to the cache. */
1408 word = fts_tokenizer_word_get(
1409 cache, index_cache, &token->text);
1410
1411 if (!word) {
1412 ut_free(rbt_remove_node(tokens, node));
1413 continue;
1414 }
1415
1416 if (ib_vector_size(word->nodes) > 0) {
1417 fts_node = static_cast<fts_node_t*>(
1418 ib_vector_last(word->nodes));
1419 }
1420
1421 if (fts_node == NULL || fts_node->synced
1422 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1423 || doc_id < fts_node->last_doc_id) {
1424
1425 fts_node = static_cast<fts_node_t*>(
1426 ib_vector_push(word->nodes, NULL));
1427
1428 memset(fts_node, 0x0, sizeof(*fts_node));
1429
1430 cache->total_size += sizeof(*fts_node);
1431 }
1432
1433 fts_cache_node_add_positions(
1434 cache, fts_node, doc_id, token->positions);
1435
1436 ut_free(rbt_remove_node(tokens, node));
1437 }
1438
1439 ut_a(rbt_empty(tokens));
1440
1441 /* Add to doc ids processed so far. */
1442 doc_stats = static_cast<fts_doc_stats_t*>(
1443 ib_vector_push(index_cache->doc_stats, NULL));
1444
1445 doc_stats->doc_id = doc_id;
1446 doc_stats->word_count = n_words;
1447
1448 /* Add the doc stats memory usage too. */
1449 cache->total_size += sizeof(*doc_stats);
1450
1451 if (doc_id > cache->sync->max_doc_id) {
1452 cache->sync->max_doc_id = doc_id;
1453 }
1454 }
1455
1456 /****************************************************************//**
1457 Drops a table. If the table can't be found we return a SUCCESS code.
1458 @return DB_SUCCESS or error code */
1459 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1460 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1461 fts_drop_table(
1462 /*===========*/
1463 trx_t* trx, /*!< in: transaction */
1464 const char* table_name) /*!< in: table to drop */
1465 {
1466 dict_table_t* table;
1467 dberr_t error = DB_SUCCESS;
1468
1469 /* Check that the table exists in our data dictionary.
1470 Similar to regular drop table case, we will open table with
1471 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1472 table = dict_table_open_on_name(
1473 table_name, TRUE, FALSE,
1474 static_cast<dict_err_ignore_t>(
1475 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1476
1477 if (table != 0) {
1478
1479 dict_table_close(table, TRUE, FALSE);
1480
1481 /* Pass nonatomic=false (dont allow data dict unlock),
1482 because the transaction may hold locks on SYS_* tables from
1483 previous calls to fts_drop_table(). */
1484 error = row_drop_table_for_mysql(table_name, trx, true, false);
1485
1486 if (error != DB_SUCCESS) {
1487 ib::error() << "Unable to drop FTS index aux table "
1488 << table_name << ": " << ut_strerr(error);
1489 }
1490 } else {
1491 error = DB_FAIL;
1492 }
1493
1494 return(error);
1495 }
1496
1497 /****************************************************************//**
1498 Rename a single auxiliary table due to database name change.
1499 @return DB_SUCCESS or error code */
1500 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1501 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1502 fts_rename_one_aux_table(
1503 /*=====================*/
1504 const char* new_name, /*!< in: new parent tbl name */
1505 const char* fts_table_old_name, /*!< in: old aux tbl name */
1506 trx_t* trx) /*!< in: transaction */
1507 {
1508 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1509 ulint new_db_name_len = dict_get_db_name_len(new_name);
1510 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1511 ulint table_new_name_len = strlen(fts_table_old_name)
1512 + new_db_name_len - old_db_name_len;
1513
1514 /* Check if the new and old database names are the same, if so,
1515 nothing to do */
1516 ut_ad((new_db_name_len != old_db_name_len)
1517 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1518
1519 /* Get the database name from "new_name", and table name
1520 from the fts_table_old_name */
1521 strncpy(fts_table_new_name, new_name, new_db_name_len);
1522 strncpy(fts_table_new_name + new_db_name_len,
1523 strchr(fts_table_old_name, '/'),
1524 table_new_name_len - new_db_name_len);
1525 fts_table_new_name[table_new_name_len] = 0;
1526
1527 return(row_rename_table_for_mysql(
1528 fts_table_old_name, fts_table_new_name, trx, false));
1529 }
1530
1531 /****************************************************************//**
1532 Rename auxiliary tables for all fts index for a table. This(rename)
1533 is due to database name change
1534 @return DB_SUCCESS or error code */
1535 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1536 fts_rename_aux_tables(
1537 /*==================*/
1538 dict_table_t* table, /*!< in: user Table */
1539 const char* new_name, /*!< in: new table name */
1540 trx_t* trx) /*!< in: transaction */
1541 {
1542 ulint i;
1543 fts_table_t fts_table;
1544
1545 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1546
1547 /* Rename common auxiliary tables */
1548 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1549 char old_table_name[MAX_FULL_NAME_LEN];
1550 dberr_t err = DB_SUCCESS;
1551
1552 fts_table.suffix = fts_common_tables[i];
1553
1554 fts_get_table_name(&fts_table, old_table_name);
1555
1556 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1557
1558 if (err != DB_SUCCESS) {
1559 return(err);
1560 }
1561 }
1562
1563 fts_t* fts = table->fts;
1564
1565 /* Rename index specific auxiliary tables */
1566 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1567 ++i) {
1568 dict_index_t* index;
1569
1570 index = static_cast<dict_index_t*>(
1571 ib_vector_getp(fts->indexes, i));
1572
1573 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1574
1575 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1576 dberr_t err;
1577 char old_table_name[MAX_FULL_NAME_LEN];
1578
1579 fts_table.suffix = fts_get_suffix(j);
1580
1581 fts_get_table_name(&fts_table, old_table_name);
1582
1583 err = fts_rename_one_aux_table(
1584 new_name, old_table_name, trx);
1585
1586 DBUG_EXECUTE_IF("fts_rename_failure",
1587 err = DB_DEADLOCK;
1588 fts_sql_rollback(trx););
1589
1590 if (err != DB_SUCCESS) {
1591 return(err);
1592 }
1593 }
1594 }
1595
1596 return(DB_SUCCESS);
1597 }
1598
1599 /****************************************************************//**
1600 Drops the common ancillary tables needed for supporting an FTS index
1601 on the given table. row_mysql_lock_data_dictionary must have been called
1602 before this.
1603 @return DB_SUCCESS or error code */
1604 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1605 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1606 fts_drop_common_tables(
1607 /*===================*/
1608 trx_t* trx, /*!< in: transaction */
1609 fts_table_t* fts_table) /*!< in: table with an FTS
1610 index */
1611 {
1612 ulint i;
1613 dberr_t error = DB_SUCCESS;
1614
1615 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1616 dberr_t err;
1617 char table_name[MAX_FULL_NAME_LEN];
1618
1619 fts_table->suffix = fts_common_tables[i];
1620
1621 fts_get_table_name(fts_table, table_name);
1622
1623 err = fts_drop_table(trx, table_name);
1624
1625 /* We only return the status of the last error. */
1626 if (err != DB_SUCCESS && err != DB_FAIL) {
1627 error = err;
1628 }
1629 }
1630
1631 return(error);
1632 }
1633
1634 /****************************************************************//**
1635 Since we do a horizontal split on the index table, we need to drop
1636 all the split tables.
1637 @return DB_SUCCESS or error code */
1638 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1639 fts_drop_index_split_tables(
1640 /*========================*/
1641 trx_t* trx, /*!< in: transaction */
1642 dict_index_t* index) /*!< in: fts instance */
1643
1644 {
1645 ulint i;
1646 fts_table_t fts_table;
1647 dberr_t error = DB_SUCCESS;
1648
1649 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1650
1651 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1652 dberr_t err;
1653 char table_name[MAX_FULL_NAME_LEN];
1654
1655 fts_table.suffix = fts_get_suffix(i);
1656
1657 fts_get_table_name(&fts_table, table_name);
1658
1659 err = fts_drop_table(trx, table_name);
1660
1661 /* We only return the status of the last error. */
1662 if (err != DB_SUCCESS && err != DB_FAIL) {
1663 error = err;
1664 }
1665 }
1666
1667 return(error);
1668 }
1669
1670 /****************************************************************//**
1671 Drops FTS auxiliary tables for an FTS index
1672 @return DB_SUCCESS or error code */
1673 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1674 fts_drop_index_tables(
1675 /*==================*/
1676 trx_t* trx, /*!< in: transaction */
1677 dict_index_t* index) /*!< in: Index to drop */
1678 {
1679 dberr_t error = DB_SUCCESS;
1680
1681 #ifdef FTS_DOC_STATS_DEBUG
1682 fts_table_t fts_table;
1683 static const char* index_tables[] = {
1684 "DOC_ID",
1685 NULL
1686 };
1687 #endif /* FTS_DOC_STATS_DEBUG */
1688
1689 dberr_t err = fts_drop_index_split_tables(trx, index);
1690
1691 /* We only return the status of the last error. */
1692 if (err != DB_SUCCESS) {
1693 error = err;
1694 }
1695
1696 #ifdef FTS_DOC_STATS_DEBUG
1697 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1698
1699 for (ulint i = 0; index_tables[i] != NULL; ++i) {
1700 char table_name[MAX_FULL_NAME_LEN];
1701
1702 fts_table.suffix = index_tables[i];
1703
1704 fts_get_table_name(&fts_table, table_name);
1705
1706 err = fts_drop_table(trx, table_name);
1707
1708 /* We only return the status of the last error. */
1709 if (err != DB_SUCCESS && err != DB_FAIL) {
1710 error = err;
1711 }
1712 }
1713 #endif /* FTS_DOC_STATS_DEBUG */
1714
1715 return(error);
1716 }
1717
1718 /****************************************************************//**
1719 Drops FTS ancillary tables needed for supporting an FTS index
1720 on the given table. row_mysql_lock_data_dictionary must have been called
1721 before this.
1722 @return DB_SUCCESS or error code */
1723 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1724 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1725 fts_drop_all_index_tables(
1726 /*======================*/
1727 trx_t* trx, /*!< in: transaction */
1728 fts_t* fts) /*!< in: fts instance */
1729 {
1730 dberr_t error = DB_SUCCESS;
1731
1732 for (ulint i = 0;
1733 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1734 ++i) {
1735
1736 dberr_t err;
1737 dict_index_t* index;
1738
1739 index = static_cast<dict_index_t*>(
1740 ib_vector_getp(fts->indexes, i));
1741
1742 err = fts_drop_index_tables(trx, index);
1743
1744 if (err != DB_SUCCESS) {
1745 error = err;
1746 }
1747 }
1748
1749 return(error);
1750 }
1751
1752 /*********************************************************************//**
1753 Drops the ancillary tables needed for supporting an FTS index on a
1754 given table. row_mysql_lock_data_dictionary must have been called before
1755 this.
1756 @return DB_SUCCESS or error code */
1757 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1758 fts_drop_tables(
1759 /*============*/
1760 trx_t* trx, /*!< in: transaction */
1761 dict_table_t* table) /*!< in: table has the FTS index */
1762 {
1763 dberr_t error;
1764 fts_table_t fts_table;
1765
1766 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1767
1768 /* TODO: This is not atomic and can cause problems during recovery. */
1769
1770 error = fts_drop_common_tables(trx, &fts_table);
1771
1772 if (error == DB_SUCCESS) {
1773 error = fts_drop_all_index_tables(trx, table->fts);
1774 }
1775
1776 return(error);
1777 }
1778
1779 /** Extract only the required flags from table->flags2 for FTS Aux
1780 tables.
1781 @param[in] in_flags2 Table flags2
1782 @return extracted flags2 for FTS aux tables */
1783 static inline
1784 ulint
fts_get_table_flags2_for_aux_tables(ulint flags2)1785 fts_get_table_flags2_for_aux_tables(
1786 ulint flags2)
1787 {
1788 /* Extract the file_per_table flag, temporary file flag and
1789 encryption flag from the main FTS table flags2 */
1790 return((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1791 (flags2 & DICT_TF2_ENCRYPTION) |
1792 (flags2 & DICT_TF2_TEMPORARY));
1793 }
1794
1795 /** Create dict_table_t object for FTS Aux tables.
1796 @param[in] aux_table_name FTS Aux table name
1797 @param[in] table table object of FTS Index
1798 @param[in] n_cols number of columns for FTS Aux table
1799 @return table object for FTS Aux table */
1800 static
1801 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1802 fts_create_in_mem_aux_table(
1803 const char* aux_table_name,
1804 const dict_table_t* table,
1805 ulint n_cols)
1806 {
1807 dict_table_t* new_table = dict_mem_table_create(
1808 aux_table_name, table->space, n_cols, 0, table->flags,
1809 fts_get_table_flags2_for_aux_tables(table->flags2));
1810
1811 if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1812 ut_ad(table->space == fil_space_get_id_by_name(
1813 table->tablespace()));
1814 new_table->tablespace = mem_heap_strdup(
1815 new_table->heap, table->tablespace);
1816 }
1817
1818 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1819 ut_ad(table->data_dir_path != NULL);
1820 new_table->data_dir_path = mem_heap_strdup(
1821 new_table->heap, table->data_dir_path);
1822 }
1823
1824 return(new_table);
1825 }
1826
1827 /** Function to create on FTS common table.
1828 @param[in,out] trx InnoDB transaction
1829 @param[in] table Table that has FTS Index
1830 @param[in] fts_table_name FTS AUX table name
1831 @param[in] fts_suffix FTS AUX table suffix
1832 @param[in] heap heap
1833 @return table object if created, else NULL */
1834 static
1835 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1836 fts_create_one_common_table(
1837 trx_t* trx,
1838 const dict_table_t* table,
1839 const char* fts_table_name,
1840 const char* fts_suffix,
1841 mem_heap_t* heap)
1842 {
1843 dict_table_t* new_table = NULL;
1844 dberr_t error;
1845 bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
1846
1847 if (!is_config) {
1848
1849 new_table = fts_create_in_mem_aux_table(
1850 fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1851
1852 dict_mem_table_add_col(
1853 new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1854 FTS_DELETED_TABLE_COL_LEN);
1855 } else {
1856 /* Config table has different schema. */
1857 new_table = fts_create_in_mem_aux_table(
1858 fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1859
1860 dict_mem_table_add_col(
1861 new_table, heap, "key", DATA_VARCHAR, 0,
1862 FTS_CONFIG_TABLE_KEY_COL_LEN);
1863
1864 dict_mem_table_add_col(
1865 new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1866 FTS_CONFIG_TABLE_VALUE_COL_LEN);
1867 }
1868
1869 error = row_create_table_for_mysql(new_table, NULL, trx, false);
1870
1871 if (error == DB_SUCCESS) {
1872
1873 dict_index_t* index = dict_mem_index_create(
1874 fts_table_name, "FTS_COMMON_TABLE_IND",
1875 new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
1876
1877 if (!is_config) {
1878 dict_mem_index_add_field(index, "doc_id", 0);
1879 } else {
1880 dict_mem_index_add_field(index, "key", 0);
1881 }
1882
1883 /* We save and restore trx->dict_operation because
1884 row_create_index_for_mysql() changes the operation to
1885 TRX_DICT_OP_TABLE. */
1886 trx_dict_op_t op = trx_get_dict_operation(trx);
1887
1888 error = row_create_index_for_mysql(index, trx, NULL, NULL);
1889
1890 trx->dict_operation = op;
1891 }
1892
1893 if (error != DB_SUCCESS) {
1894 trx->error_state = error;
1895 dict_mem_table_free(new_table);
1896 new_table = NULL;
1897 ib::warn() << "Failed to create FTS common table "
1898 << fts_table_name;
1899 }
1900 return(new_table);
1901 }
1902
1903 /** Creates the common auxiliary tables needed for supporting an FTS index
1904 on the given table. row_mysql_lock_data_dictionary must have been called
1905 before this.
1906 The following tables are created.
1907 CREATE TABLE $FTS_PREFIX_DELETED
1908 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1909 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1910 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1911 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1912 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1913 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1914 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1915 CREATE TABLE $FTS_PREFIX_CONFIG
1916 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1917 @param[in,out] trx transaction
1918 @param[in] table table with FTS index
1919 @param[in] name table name normalized
1920 @param[in] skip_doc_id_index Skip index on doc id
1921 @return DB_SUCCESS if succeed */
1922 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1923 fts_create_common_tables(
1924 trx_t* trx,
1925 const dict_table_t* table,
1926 const char* name,
1927 bool skip_doc_id_index)
1928 {
1929 dberr_t error;
1930 que_t* graph;
1931 fts_table_t fts_table;
1932 mem_heap_t* heap = mem_heap_create(1024);
1933 pars_info_t* info;
1934 char fts_name[MAX_FULL_NAME_LEN];
1935 char full_name[sizeof(fts_common_tables) / sizeof(char*)]
1936 [MAX_FULL_NAME_LEN];
1937
1938 dict_index_t* index = NULL;
1939 trx_dict_op_t op;
1940 /* common_tables vector is used for dropping FTS common tables
1941 on error condition. */
1942 std::vector<dict_table_t*> common_tables;
1943 std::vector<dict_table_t*>::const_iterator it;
1944
1945 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1946
1947 error = fts_drop_common_tables(trx, &fts_table);
1948
1949 if (error != DB_SUCCESS) {
1950
1951 goto func_exit;
1952 }
1953
1954 /* Create the FTS tables that are common to an FTS index. */
1955 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1956
1957 fts_table.suffix = fts_common_tables[i];
1958 fts_get_table_name(&fts_table, full_name[i]);
1959 dict_table_t* common_table = fts_create_one_common_table(
1960 trx, table, full_name[i], fts_table.suffix, heap);
1961
1962 if (common_table == NULL) {
1963 error = DB_ERROR;
1964 goto func_exit;
1965 } else {
1966 common_tables.push_back(common_table);
1967 }
1968
1969 DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1970 /* Return error after creating FTS_AUX_CONFIG table. */
1971 if (i == 4) {
1972 error = DB_ERROR;
1973 goto func_exit;
1974 }
1975 );
1976
1977 }
1978
1979 /* Write the default settings to the config table. */
1980 info = pars_info_create();
1981
1982 fts_table.suffix = "CONFIG";
1983 fts_get_table_name(&fts_table, fts_name);
1984 pars_info_bind_id(info, true, "config_table", fts_name);
1985
1986 graph = fts_parse_sql_no_dict_lock(
1987 &fts_table, info, fts_config_table_insert_values_sql);
1988
1989 error = fts_eval_sql(trx, graph);
1990
1991 que_graph_free(graph);
1992
1993 if (error != DB_SUCCESS || skip_doc_id_index) {
1994
1995 goto func_exit;
1996 }
1997
1998 index = dict_mem_index_create(
1999 name, FTS_DOC_ID_INDEX_NAME, table->space,
2000 DICT_UNIQUE, 1);
2001 dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
2002
2003 op = trx_get_dict_operation(trx);
2004
2005 error = row_create_index_for_mysql(index, trx, NULL, NULL);
2006
2007 trx->dict_operation = op;
2008
2009 func_exit:
2010 if (error != DB_SUCCESS) {
2011
2012 for (it = common_tables.begin(); it != common_tables.end();
2013 ++it) {
2014 row_drop_table_for_mysql(
2015 (*it)->name.m_name, trx, FALSE);
2016 }
2017 }
2018
2019 common_tables.clear();
2020 mem_heap_free(heap);
2021
2022 return(error);
2023 }
2024 /** Creates one FTS auxiliary index table for an FTS index.
2025 @param[in,out] trx transaction
2026 @param[in] index the index instance
2027 @param[in] fts_table fts_table structure
2028 @param[in] heap memory heap
2029 @return DB_SUCCESS or error code */
2030 static
2031 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)2032 fts_create_one_index_table(
2033 trx_t* trx,
2034 const dict_index_t* index,
2035 fts_table_t* fts_table,
2036 mem_heap_t* heap)
2037 {
2038 dict_field_t* field;
2039 dict_table_t* new_table = NULL;
2040 char table_name[MAX_FULL_NAME_LEN];
2041 dberr_t error;
2042 CHARSET_INFO* charset;
2043
2044 ut_ad(index->type & DICT_FTS);
2045
2046 fts_get_table_name(fts_table, table_name);
2047
2048 new_table = fts_create_in_mem_aux_table(
2049 table_name, fts_table->table,
2050 FTS_AUX_INDEX_TABLE_NUM_COLS);
2051
2052 field = dict_index_get_nth_field(index, 0);
2053 charset = fts_get_charset(field->col->prtype);
2054
2055 dict_mem_table_add_col(new_table, heap, "word",
2056 charset == &my_charset_latin1
2057 ? DATA_VARCHAR : DATA_VARMYSQL,
2058 field->col->prtype,
2059 FTS_INDEX_WORD_LEN);
2060
2061 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2062 DATA_NOT_NULL | DATA_UNSIGNED,
2063 FTS_INDEX_FIRST_DOC_ID_LEN);
2064
2065 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2066 DATA_NOT_NULL | DATA_UNSIGNED,
2067 FTS_INDEX_LAST_DOC_ID_LEN);
2068
2069 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2070 DATA_NOT_NULL | DATA_UNSIGNED,
2071 FTS_INDEX_DOC_COUNT_LEN);
2072
2073 /* The precise type calculation is as follows:
2074 least signficiant byte: MySQL type code (not applicable for sys cols)
2075 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2076 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2077
2078 dict_mem_table_add_col(
2079 new_table, heap, "ilist", DATA_BLOB,
2080 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2081 FTS_INDEX_ILIST_LEN);
2082
2083 error = row_create_table_for_mysql(new_table, NULL, trx, false);
2084
2085 if (error == DB_SUCCESS) {
2086 dict_index_t* index = dict_mem_index_create(
2087 table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2088 DICT_UNIQUE|DICT_CLUSTERED, 2);
2089 dict_mem_index_add_field(index, "word", 0);
2090 dict_mem_index_add_field(index, "first_doc_id", 0);
2091
2092 trx_dict_op_t op = trx_get_dict_operation(trx);
2093
2094 error = row_create_index_for_mysql(index, trx, NULL, NULL);
2095
2096 trx->dict_operation = op;
2097 }
2098
2099 if (error != DB_SUCCESS) {
2100 trx->error_state = error;
2101 dict_mem_table_free(new_table);
2102 new_table = NULL;
2103 ib::warn() << "Failed to create FTS index table "
2104 << table_name;
2105 }
2106
2107 return(new_table);
2108 }
2109
2110 /** Create auxiliary index tables for an FTS index.
2111 @param[in,out] trx transaction
2112 @param[in] index the index instance
2113 @param[in] table_name table name
2114 @param[in] table_id the table id
2115 @return DB_SUCCESS or error code */
2116 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2117 fts_create_index_tables_low(
2118 trx_t* trx,
2119 const dict_index_t* index,
2120 const char* table_name,
2121 table_id_t table_id)
2122 {
2123 ulint i;
2124 fts_table_t fts_table;
2125 dberr_t error = DB_SUCCESS;
2126 mem_heap_t* heap = mem_heap_create(1024);
2127
2128 fts_table.type = FTS_INDEX_TABLE;
2129 fts_table.index_id = index->id;
2130 fts_table.table_id = table_id;
2131 fts_table.parent = table_name;
2132 fts_table.table = index->table;
2133
2134 #ifdef FTS_DOC_STATS_DEBUG
2135 /* Create the FTS auxiliary tables that are specific
2136 to an FTS index. */
2137 info = pars_info_create();
2138
2139 fts_table.suffix = "DOC_ID";
2140 fts_get_table_name(&fts_table, fts_name);
2141
2142 pars_info_bind_id(info, true, "doc_id_table", fts_name);
2143
2144 graph = fts_parse_sql_no_dict_lock(NULL, info,
2145 fts_create_index_tables_sql);
2146
2147 error = fts_eval_sql(trx, graph);
2148 que_graph_free(graph);
2149 #endif /* FTS_DOC_STATS_DEBUG */
2150
2151 /* aux_idx_tables vector is used for dropping FTS AUX INDEX
2152 tables on error condition. */
2153 std::vector<dict_table_t*> aux_idx_tables;
2154 std::vector<dict_table_t*>::const_iterator it;
2155
2156 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2157 dict_table_t* new_table;
2158
2159 /* Create the FTS auxiliary tables that are specific
2160 to an FTS index. We need to preserve the table_id %s
2161 which fts_parse_sql_no_dict_lock() will fill in for us. */
2162 fts_table.suffix = fts_get_suffix(i);
2163
2164 new_table = fts_create_one_index_table(
2165 trx, index, &fts_table, heap);
2166
2167 if (new_table == NULL) {
2168 error = DB_FAIL;
2169 break;
2170 } else {
2171 aux_idx_tables.push_back(new_table);
2172 }
2173
2174 DBUG_EXECUTE_IF("ib_fts_index_table_error",
2175 /* Return error after creating FTS_INDEX_5
2176 aux table. */
2177 if (i == 4) {
2178 error = DB_FAIL;
2179 break;
2180 }
2181 );
2182 }
2183
2184 if (error != DB_SUCCESS) {
2185
2186 for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2187 ++it) {
2188 row_drop_table_for_mysql(
2189 (*it)->name.m_name, trx, FALSE);
2190 }
2191 }
2192
2193 aux_idx_tables.clear();
2194 mem_heap_free(heap);
2195
2196 return(error);
2197 }
2198
2199 /** Creates the column specific ancillary tables needed for supporting an
2200 FTS index on the given table. row_mysql_lock_data_dictionary must have
2201 been called before this.
2202
2203 All FTS AUX Index tables have the following schema.
2204 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2205 word VARCHAR(FTS_MAX_WORD_LEN),
2206 first_doc_id INT NOT NULL,
2207 last_doc_id UNSIGNED NOT NULL,
2208 doc_count UNSIGNED INT NOT NULL,
2209 ilist VARBINARY NOT NULL,
2210 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2211 @param[in,out] trx transaction
2212 @param[in] index index instance
2213 @return DB_SUCCESS or error code */
2214 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2215 fts_create_index_tables(
2216 trx_t* trx,
2217 const dict_index_t* index)
2218 {
2219 dberr_t err;
2220 dict_table_t* table;
2221
2222 table = dict_table_get_low(index->table_name);
2223 ut_a(table != NULL);
2224
2225 err = fts_create_index_tables_low(
2226 trx, index, table->name.m_name, table->id);
2227
2228 if (err == DB_SUCCESS) {
2229 trx_commit(trx);
2230 }
2231
2232 return(err);
2233 }
2234 #if 0
2235 /******************************************************************//**
2236 Return string representation of state. */
2237 static
2238 const char*
2239 fts_get_state_str(
2240 /*==============*/
2241 /* out: string representation of state */
2242 fts_row_state state) /*!< in: state */
2243 {
2244 switch (state) {
2245 case FTS_INSERT:
2246 return("INSERT");
2247
2248 case FTS_MODIFY:
2249 return("MODIFY");
2250
2251 case FTS_DELETE:
2252 return("DELETE");
2253
2254 case FTS_NOTHING:
2255 return("NOTHING");
2256
2257 case FTS_INVALID:
2258 return("INVALID");
2259
2260 default:
2261 return("UNKNOWN");
2262 }
2263 }
2264 #endif
2265
2266 /******************************************************************//**
2267 Calculate the new state of a row given the existing state and a new event.
2268 @return new state of row */
2269 static
2270 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2271 fts_trx_row_get_new_state(
2272 /*======================*/
2273 fts_row_state old_state, /*!< in: existing state of row */
2274 fts_row_state event) /*!< in: new event */
2275 {
2276 /* The rules for transforming states:
2277
2278 I = inserted
2279 M = modified
2280 D = deleted
2281 N = nothing
2282
2283 M+D -> D:
2284
2285 If the row existed before the transaction started and it is modified
2286 during the transaction, followed by a deletion of the row, only the
2287 deletion will be signaled.
2288
2289 M+ -> M:
2290
2291 If the row existed before the transaction started and it is modified
2292 more than once during the transaction, only the last modification
2293 will be signaled.
2294
2295 IM*D -> N:
2296
2297 If a new row is added during the transaction (and possibly modified
2298 after its initial insertion) but it is deleted before the end of the
2299 transaction, nothing will be signaled.
2300
2301 IM* -> I:
2302
2303 If a new row is added during the transaction and modified after its
2304 initial insertion, only the addition will be signaled.
2305
2306 M*DI -> M:
2307
2308 If the row existed before the transaction started and it is deleted,
2309 then re-inserted, only a modification will be signaled. Note that
2310 this case is only possible if the table is using the row's primary
2311 key for FTS row ids, since those can be re-inserted by the user,
2312 which is not true for InnoDB generated row ids.
2313
2314 It is easily seen that the above rules decompose such that we do not
2315 need to store the row's entire history of events. Instead, we can
2316 store just one state for the row and update that when new events
2317 arrive. Then we can implement the above rules as a two-dimensional
2318 look-up table, and get checking of invalid combinations "for free"
2319 in the process. */
2320
2321 /* The lookup table for transforming states. old_state is the
2322 Y-axis, event is the X-axis. */
2323 static const fts_row_state table[4][4] = {
2324 /* I M D N */
2325 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2326 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2327 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2328 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2329 };
2330
2331 fts_row_state result;
2332
2333 ut_a(old_state < FTS_INVALID);
2334 ut_a(event < FTS_INVALID);
2335
2336 result = table[(int) old_state][(int) event];
2337 ut_a(result != FTS_INVALID);
2338
2339 return(result);
2340 }
2341
2342 /******************************************************************//**
2343 Create a savepoint instance.
2344 @return savepoint instance */
2345 static
2346 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2347 fts_savepoint_create(
2348 /*=================*/
2349 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2350 const char* name, /*!< in: savepoint name */
2351 mem_heap_t* heap) /*!< in: heap */
2352 {
2353 fts_savepoint_t* savepoint;
2354
2355 savepoint = static_cast<fts_savepoint_t*>(
2356 ib_vector_push(savepoints, NULL));
2357
2358 memset(savepoint, 0x0, sizeof(*savepoint));
2359
2360 if (name) {
2361 savepoint->name = mem_heap_strdup(heap, name);
2362 }
2363
2364 savepoint->tables = rbt_create(
2365 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2366
2367 return(savepoint);
2368 }
2369
2370 /******************************************************************//**
2371 Create an FTS trx.
2372 @return FTS trx */
2373 static
2374 fts_trx_t*
fts_trx_create(trx_t * trx)2375 fts_trx_create(
2376 /*===========*/
2377 trx_t* trx) /*!< in/out: InnoDB
2378 transaction */
2379 {
2380 fts_trx_t* ftt;
2381 ib_alloc_t* heap_alloc;
2382 mem_heap_t* heap = mem_heap_create(1024);
2383 trx_named_savept_t* savep;
2384
2385 ut_a(trx->fts_trx == NULL);
2386
2387 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2388 ftt->trx = trx;
2389 ftt->heap = heap;
2390
2391 heap_alloc = ib_heap_allocator_create(heap);
2392
2393 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2394 heap_alloc, sizeof(fts_savepoint_t), 4));
2395
2396 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2397 heap_alloc, sizeof(fts_savepoint_t), 4));
2398
2399 /* Default instance has no name and no heap. */
2400 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2401 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2402
2403 /* Copy savepoints that already set before. */
2404 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2405 savep != NULL;
2406 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2407
2408 fts_savepoint_take(trx, ftt, savep->name);
2409 }
2410
2411 return(ftt);
2412 }
2413
2414 /******************************************************************//**
2415 Create an FTS trx table.
2416 @return FTS trx table */
2417 static
2418 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2419 fts_trx_table_create(
2420 /*=================*/
2421 fts_trx_t* fts_trx, /*!< in: FTS trx */
2422 dict_table_t* table) /*!< in: table */
2423 {
2424 fts_trx_table_t* ftt;
2425
2426 ftt = static_cast<fts_trx_table_t*>(
2427 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2428
2429 if (ftt != NULL) {
2430 memset(ftt, 0x0, sizeof(*ftt));
2431 }
2432
2433 ftt->table = table;
2434 ftt->fts_trx = fts_trx;
2435
2436 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2437
2438 return(ftt);
2439 }
2440
2441 /******************************************************************//**
2442 Clone an FTS trx table.
2443 @return FTS trx table */
2444 static
2445 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2446 fts_trx_table_clone(
2447 /*=================*/
2448 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2449 {
2450 fts_trx_table_t* ftt;
2451
2452 ftt = static_cast<fts_trx_table_t*>(
2453 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2454
2455 memset(ftt, 0x0, sizeof(*ftt));
2456
2457 ftt->table = ftt_src->table;
2458 ftt->fts_trx = ftt_src->fts_trx;
2459
2460 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2461
2462 /* Copy the rb tree values to the new savepoint. */
2463 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2464
2465 /* These are only added on commit. At this stage we only have
2466 the updated row state. */
2467 ut_a(ftt_src->added_doc_ids == NULL);
2468
2469 return(ftt);
2470 }
2471
2472 /******************************************************************//**
2473 Initialize the FTS trx instance.
2474 @return FTS trx instance */
2475 static
2476 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2477 fts_trx_init(
2478 /*=========*/
2479 trx_t* trx, /*!< in: transaction */
2480 dict_table_t* table, /*!< in: FTS table instance */
2481 ib_vector_t* savepoints) /*!< in: Savepoints */
2482 {
2483 fts_trx_table_t* ftt;
2484 ib_rbt_bound_t parent;
2485 ib_rbt_t* tables;
2486 fts_savepoint_t* savepoint;
2487
2488 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2489
2490 tables = savepoint->tables;
2491 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2492
2493 if (parent.result == 0) {
2494 fts_trx_table_t** fttp;
2495
2496 fttp = rbt_value(fts_trx_table_t*, parent.last);
2497 ftt = *fttp;
2498 } else {
2499 ftt = fts_trx_table_create(trx->fts_trx, table);
2500 rbt_add_node(tables, &parent, &ftt);
2501 }
2502
2503 ut_a(ftt->table == table);
2504
2505 return(ftt);
2506 }
2507
2508 /******************************************************************//**
2509 Notify the FTS system about an operation on an FTS-indexed table. */
2510 static
2511 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2512 fts_trx_table_add_op(
2513 /*=================*/
2514 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2515 doc_id_t doc_id, /*!< in: doc id */
2516 fts_row_state state, /*!< in: state of the row */
2517 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2518 {
2519 ib_rbt_t* rows;
2520 ib_rbt_bound_t parent;
2521
2522 rows = ftt->rows;
2523 rbt_search(rows, &parent, &doc_id);
2524
2525 /* Row id found, update state, and if new state is FTS_NOTHING,
2526 we delete the row from our tree. */
2527 if (parent.result == 0) {
2528 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2529
2530 row->state = fts_trx_row_get_new_state(row->state, state);
2531
2532 if (row->state == FTS_NOTHING) {
2533 if (row->fts_indexes) {
2534 ib_vector_free(row->fts_indexes);
2535 }
2536
2537 ut_free(rbt_remove_node(rows, parent.last));
2538 row = NULL;
2539 } else if (row->fts_indexes != NULL) {
2540 ib_vector_free(row->fts_indexes);
2541 row->fts_indexes = fts_indexes;
2542 }
2543
2544 } else { /* Row-id not found, create a new one. */
2545 fts_trx_row_t row;
2546
2547 row.doc_id = doc_id;
2548 row.state = state;
2549 row.fts_indexes = fts_indexes;
2550
2551 rbt_add_node(rows, &parent, &row);
2552 }
2553 }
2554
2555 /******************************************************************//**
2556 Notify the FTS system about an operation on an FTS-indexed table. */
2557 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2558 fts_trx_add_op(
2559 /*===========*/
2560 trx_t* trx, /*!< in: InnoDB transaction */
2561 dict_table_t* table, /*!< in: table */
2562 doc_id_t doc_id, /*!< in: new doc id */
2563 fts_row_state state, /*!< in: state of the row */
2564 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2565 (NULL=all) */
2566 {
2567 fts_trx_table_t* tran_ftt;
2568 fts_trx_table_t* stmt_ftt;
2569
2570 if (!trx->fts_trx) {
2571 trx->fts_trx = fts_trx_create(trx);
2572 }
2573
2574 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2575 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2576
2577 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2578 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2579 }
2580
2581 /******************************************************************//**
2582 Fetch callback that converts a textual document id to a binary value and
2583 stores it in the given place.
2584 @return always returns NULL */
2585 static
2586 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2587 fts_fetch_store_doc_id(
2588 /*===================*/
2589 void* row, /*!< in: sel_node_t* */
2590 void* user_arg) /*!< in: doc_id_t* to store
2591 doc_id in */
2592 {
2593 int n_parsed;
2594 sel_node_t* node = static_cast<sel_node_t*>(row);
2595 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2596 dfield_t* dfield = que_node_get_val(node->select_list);
2597 dtype_t* type = dfield_get_type(dfield);
2598 ulint len = dfield_get_len(dfield);
2599
2600 char buf[32];
2601
2602 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2603 ut_a(len > 0 && len < sizeof(buf));
2604
2605 memcpy(buf, dfield_get_data(dfield), len);
2606 buf[len] = '\0';
2607
2608 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2609 ut_a(n_parsed == 1);
2610
2611 return(FALSE);
2612 }
2613
2614 #ifdef FTS_CACHE_SIZE_DEBUG
2615 /******************************************************************//**
2616 Get the max cache size in bytes. If there is an error reading the
2617 value we simply print an error message here and return the default
2618 value to the caller.
2619 @return max cache size in bytes */
2620 static
2621 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2622 fts_get_max_cache_size(
2623 /*===================*/
2624 trx_t* trx, /*!< in: transaction */
2625 fts_table_t* fts_table) /*!< in: table instance */
2626 {
2627 dberr_t error;
2628 fts_string_t value;
2629 ulint cache_size_in_mb;
2630
2631 /* Set to the default value. */
2632 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2633
2634 /* We set the length of value to the max bytes it can hold. This
2635 information is used by the callback that reads the value. */
2636 value.f_n_char = 0;
2637 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2638 value.f_str = ut_malloc_nokey(value.f_len + 1);
2639
2640 error = fts_config_get_value(
2641 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2642
2643 if (error == DB_SUCCESS) {
2644
2645 value.f_str[value.f_len] = 0;
2646 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2647
2648 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2649
2650 ib::warn() << "FTS max cache size ("
2651 << cache_size_in_mb << ") out of range."
2652 " Minimum value is "
2653 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2654 << "MB and the maximum value is "
2655 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2656 << "MB, setting cache size to upper limit";
2657
2658 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2659
2660 } else if (cache_size_in_mb
2661 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2662
2663 ib::warn() << "FTS max cache size ("
2664 << cache_size_in_mb << ") out of range."
2665 " Minimum value is "
2666 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2667 << "MB and the maximum value is"
2668 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2669 << "MB, setting cache size to lower limit";
2670
2671 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2672 }
2673 } else {
2674 ib::error() << "(" << ut_strerr(error) << ") reading max"
2675 " cache config value from config table";
2676 }
2677
2678 ut_free(value.f_str);
2679
2680 return(cache_size_in_mb * 1024 * 1024);
2681 }
2682 #endif
2683
2684 #ifdef FTS_DOC_STATS_DEBUG
2685 /*********************************************************************//**
2686 Get the total number of words in the FTS for a particular FTS index.
2687 @return DB_SUCCESS if all OK else error code */
2688 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2689 fts_get_total_word_count(
2690 /*=====================*/
2691 trx_t* trx, /*!< in: transaction */
2692 dict_index_t* index, /*!< in: for this index */
2693 ulint* total) /* out: total words */
2694 {
2695 dberr_t error;
2696 fts_string_t value;
2697
2698 *total = 0;
2699
2700 /* We set the length of value to the max bytes it can hold. This
2701 information is used by the callback that reads the value. */
2702 value.f_n_char = 0;
2703 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2704 value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
2705
2706 error = fts_config_get_index_value(
2707 trx, index, FTS_TOTAL_WORD_COUNT, &value);
2708
2709 if (error == DB_SUCCESS) {
2710
2711 value.f_str[value.f_len] = 0;
2712 *total = strtoul((char*) value.f_str, NULL, 10);
2713 } else {
2714 ib::error() << "(" << ut_strerr(error) << ") reading total"
2715 " words value from config table";
2716 }
2717
2718 ut_free(value.f_str);
2719
2720 return(error);
2721 }
2722 #endif /* FTS_DOC_STATS_DEBUG */
2723
2724 /*********************************************************************//**
2725 Update the next and last Doc ID in the CONFIG table to be the input
2726 "doc_id" value (+ 1). We would do so after each FTS index build or
2727 table truncate */
2728 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2729 fts_update_next_doc_id(
2730 /*===================*/
2731 trx_t* trx, /*!< in/out: transaction */
2732 const dict_table_t* table, /*!< in: table */
2733 const char* table_name, /*!< in: table name, or NULL */
2734 doc_id_t doc_id) /*!< in: DOC ID to set */
2735 {
2736 table->fts->cache->synced_doc_id = doc_id;
2737 table->fts->cache->next_doc_id = doc_id + 1;
2738
2739 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2740
2741 fts_update_sync_doc_id(
2742 table, table_name, table->fts->cache->synced_doc_id, trx);
2743
2744 }
2745
2746 /*********************************************************************//**
2747 Get the next available document id.
2748 @return DB_SUCCESS if OK */
2749 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2750 fts_get_next_doc_id(
2751 /*================*/
2752 const dict_table_t* table, /*!< in: table */
2753 doc_id_t* doc_id) /*!< out: new document id */
2754 {
2755 fts_cache_t* cache = table->fts->cache;
2756
2757 /* If the Doc ID system has not yet been initialized, we
2758 will consult the CONFIG table and user table to re-establish
2759 the initial value of the Doc ID */
2760 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2761 fts_init_doc_id(table);
2762 }
2763
2764 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2765 *doc_id = FTS_NULL_DOC_ID;
2766 return(DB_SUCCESS);
2767 }
2768
2769 mutex_enter(&cache->doc_id_lock);
2770 *doc_id = ++cache->next_doc_id;
2771 mutex_exit(&cache->doc_id_lock);
2772
2773 return(DB_SUCCESS);
2774 }
2775
2776 /*********************************************************************//**
2777 This function fetch the Doc ID from CONFIG table, and compare with
2778 the Doc ID supplied. And store the larger one to the CONFIG table.
2779 @return DB_SUCCESS if OK */
2780 static MY_ATTRIBUTE((nonnull))
2781 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2782 fts_cmp_set_sync_doc_id(
2783 /*====================*/
2784 const dict_table_t* table, /*!< in: table */
2785 doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
2786 ibool read_only, /*!< in: TRUE if read the
2787 synced_doc_id only */
2788 doc_id_t* doc_id) /*!< out: larger document id
2789 after comparing "doc_id_cmp"
2790 to the one stored in CONFIG
2791 table */
2792 {
2793 trx_t* trx;
2794 pars_info_t* info;
2795 dberr_t error;
2796 fts_table_t fts_table;
2797 que_t* graph = NULL;
2798 fts_cache_t* cache = table->fts->cache;
2799 char table_name[MAX_FULL_NAME_LEN];
2800 retry:
2801 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2802
2803 fts_table.suffix = "CONFIG";
2804 fts_table.table_id = table->id;
2805 fts_table.type = FTS_COMMON_TABLE;
2806 fts_table.table = table;
2807
2808 fts_table.parent = table->name.m_name;
2809
2810 trx = trx_allocate_for_background();
2811
2812 trx->op_info = "update the next FTS document id";
2813
2814 info = pars_info_create();
2815
2816 pars_info_bind_function(
2817 info, "my_func", fts_fetch_store_doc_id, doc_id);
2818
2819 fts_get_table_name(&fts_table, table_name);
2820 pars_info_bind_id(info, true, "config_table", table_name);
2821
2822 graph = fts_parse_sql(
2823 &fts_table, info,
2824 "DECLARE FUNCTION my_func;\n"
2825 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2826 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2827 "BEGIN\n"
2828 ""
2829 "OPEN c;\n"
2830 "WHILE 1 = 1 LOOP\n"
2831 " FETCH c INTO my_func();\n"
2832 " IF c % NOTFOUND THEN\n"
2833 " EXIT;\n"
2834 " END IF;\n"
2835 "END LOOP;\n"
2836 "CLOSE c;");
2837
2838 *doc_id = 0;
2839
2840 error = fts_eval_sql(trx, graph);
2841
2842 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2843
2844 // FIXME: We need to retry deadlock errors
2845 if (error != DB_SUCCESS) {
2846 goto func_exit;
2847 }
2848
2849 if (read_only) {
2850 goto func_exit;
2851 }
2852
2853 if (doc_id_cmp == 0 && *doc_id) {
2854 cache->synced_doc_id = *doc_id - 1;
2855 } else {
2856 cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2857 }
2858
2859 mutex_enter(&cache->doc_id_lock);
2860 /* For each sync operation, we will add next_doc_id by 1,
2861 so to mark a sync operation */
2862 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2863 cache->next_doc_id = cache->synced_doc_id + 1;
2864 }
2865 mutex_exit(&cache->doc_id_lock);
2866
2867 if (doc_id_cmp > *doc_id) {
2868 error = fts_update_sync_doc_id(
2869 table, table->name.m_name, cache->synced_doc_id, trx);
2870 }
2871
2872 *doc_id = cache->next_doc_id;
2873
2874 func_exit:
2875
2876 if (error == DB_SUCCESS) {
2877 fts_sql_commit(trx);
2878 } else {
2879 *doc_id = 0;
2880
2881 ib::error() << "(" << ut_strerr(error) << ") while getting"
2882 " next doc id.";
2883 fts_sql_rollback(trx);
2884
2885 if (error == DB_DEADLOCK) {
2886 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2887 goto retry;
2888 }
2889 }
2890
2891 trx_free_for_background(trx);
2892
2893 return(error);
2894 }
2895
2896 /*********************************************************************//**
2897 Update the last document id. This function could create a new
2898 transaction to update the last document id.
2899 @return DB_SUCCESS if OK */
2900 static
2901 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2902 fts_update_sync_doc_id(
2903 /*===================*/
2904 const dict_table_t* table, /*!< in: table */
2905 const char* table_name, /*!< in: table name, or NULL */
2906 doc_id_t doc_id, /*!< in: last document id */
2907 trx_t* trx) /*!< in: update trx, or NULL */
2908 {
2909 byte id[FTS_MAX_ID_LEN];
2910 pars_info_t* info;
2911 fts_table_t fts_table;
2912 ulint id_len;
2913 que_t* graph = NULL;
2914 dberr_t error;
2915 ibool local_trx = FALSE;
2916 fts_cache_t* cache = table->fts->cache;
2917 char fts_name[MAX_FULL_NAME_LEN];
2918
2919 fts_table.suffix = "CONFIG";
2920 fts_table.table_id = table->id;
2921 fts_table.type = FTS_COMMON_TABLE;
2922 fts_table.table = table;
2923 if (table_name) {
2924 fts_table.parent = table_name;
2925 } else {
2926 fts_table.parent = table->name.m_name;
2927 }
2928
2929 if (!trx) {
2930 trx = trx_allocate_for_background();
2931
2932 trx->op_info = "setting last FTS document id";
2933 local_trx = TRUE;
2934 }
2935
2936 info = pars_info_create();
2937
2938 id_len = ut_snprintf(
2939 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2940
2941 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2942
2943 fts_get_table_name(&fts_table, fts_name);
2944 pars_info_bind_id(info, true, "table_name", fts_name);
2945
2946 graph = fts_parse_sql(
2947 &fts_table, info,
2948 "BEGIN"
2949 " UPDATE $table_name SET value = :doc_id"
2950 " WHERE key = 'synced_doc_id';");
2951
2952 error = fts_eval_sql(trx, graph);
2953
2954 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2955
2956 if (local_trx) {
2957 if (error == DB_SUCCESS) {
2958 fts_sql_commit(trx);
2959 cache->synced_doc_id = doc_id;
2960 } else {
2961
2962 ib::error() << "(" << ut_strerr(error) << ") while"
2963 " updating last doc id.";
2964
2965 fts_sql_rollback(trx);
2966 }
2967 trx_free_for_background(trx);
2968 }
2969
2970 return(error);
2971 }
2972
2973 /*********************************************************************//**
2974 Create a new fts_doc_ids_t.
2975 @return new fts_doc_ids_t */
2976 fts_doc_ids_t*
fts_doc_ids_create(void)2977 fts_doc_ids_create(void)
2978 /*====================*/
2979 {
2980 fts_doc_ids_t* fts_doc_ids;
2981 mem_heap_t* heap = mem_heap_create(512);
2982
2983 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2984 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2985
2986 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2987
2988 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2989 fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2990
2991 return(fts_doc_ids);
2992 }
2993
2994 /*********************************************************************//**
2995 Free a fts_doc_ids_t. */
2996 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2997 fts_doc_ids_free(
2998 /*=============*/
2999 fts_doc_ids_t* fts_doc_ids)
3000 {
3001 mem_heap_t* heap = static_cast<mem_heap_t*>(
3002 fts_doc_ids->self_heap->arg);
3003
3004 memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
3005
3006 mem_heap_free(heap);
3007 }
3008
3009 /*********************************************************************//**
3010 Do commit-phase steps necessary for the insertion of a new row.
3011 @return DB_SUCCESS or error code */
3012 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3013 fts_add(
3014 /*====*/
3015 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3016 fts_trx_row_t* row) /*!< in: row */
3017 {
3018 dict_table_t* table = ftt->table;
3019 doc_id_t doc_id = row->doc_id;
3020
3021 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3022
3023 fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3024
3025 mutex_enter(&table->fts->cache->deleted_lock);
3026 ++table->fts->cache->added;
3027 mutex_exit(&table->fts->cache->deleted_lock);
3028
3029 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
3030 && doc_id >= table->fts->cache->next_doc_id) {
3031 table->fts->cache->next_doc_id = doc_id + 1;
3032 }
3033 }
3034
3035 /*********************************************************************//**
3036 Do commit-phase steps necessary for the deletion of a row.
3037 @return DB_SUCCESS or error code */
3038 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3039 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3040 fts_delete(
3041 /*=======*/
3042 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3043 fts_trx_row_t* row) /*!< in: row */
3044 {
3045 que_t* graph;
3046 fts_table_t fts_table;
3047 dberr_t error = DB_SUCCESS;
3048 doc_id_t write_doc_id;
3049 dict_table_t* table = ftt->table;
3050 doc_id_t doc_id = row->doc_id;
3051 trx_t* trx = ftt->fts_trx->trx;
3052 pars_info_t* info = pars_info_create();
3053 fts_cache_t* cache = table->fts->cache;
3054
3055 /* we do not index Documents whose Doc ID value is 0 */
3056 if (doc_id == FTS_NULL_DOC_ID) {
3057 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3058 return(error);
3059 }
3060
3061 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3062
3063 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
3064
3065 /* Convert to "storage" byte order. */
3066 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3067 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3068
3069 /* It is possible we update a record that has not yet been sync-ed
3070 into cache from last crash (delete Doc will not initialize the
3071 sync). Avoid any added counter accounting until the FTS cache
3072 is re-established and sync-ed */
3073 if (table->fts->fts_status & ADDED_TABLE_SYNCED
3074 && doc_id > cache->synced_doc_id) {
3075 mutex_enter(&table->fts->cache->deleted_lock);
3076
3077 /* The Doc ID could belong to those left in
3078 ADDED table from last crash. So need to check
3079 if it is less than first_doc_id when we initialize
3080 the Doc ID system after reboot */
3081 if (doc_id >= table->fts->cache->first_doc_id
3082 && table->fts->cache->added > 0) {
3083 --table->fts->cache->added;
3084 }
3085
3086 mutex_exit(&table->fts->cache->deleted_lock);
3087
3088 /* Only if the row was really deleted. */
3089 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3090 }
3091
3092 /* Note the deleted document for OPTIMIZE to purge. */
3093 if (error == DB_SUCCESS) {
3094 char table_name[MAX_FULL_NAME_LEN];
3095
3096 trx->op_info = "adding doc id to FTS DELETED";
3097
3098 info->graph_owns_us = TRUE;
3099
3100 fts_table.suffix = "DELETED";
3101
3102 fts_get_table_name(&fts_table, table_name);
3103 pars_info_bind_id(info, true, "deleted", table_name);
3104
3105 graph = fts_parse_sql(
3106 &fts_table,
3107 info,
3108 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3109
3110 error = fts_eval_sql(trx, graph);
3111
3112 fts_que_graph_free(graph);
3113 } else {
3114 pars_info_free(info);
3115 }
3116
3117 /* Increment the total deleted count, this is used to calculate the
3118 number of documents indexed. */
3119 if (error == DB_SUCCESS) {
3120 mutex_enter(&table->fts->cache->deleted_lock);
3121
3122 ++table->fts->cache->deleted;
3123
3124 mutex_exit(&table->fts->cache->deleted_lock);
3125 }
3126
3127 return(error);
3128 }
3129
3130 /*********************************************************************//**
3131 Do commit-phase steps necessary for the modification of a row.
3132 @return DB_SUCCESS or error code */
3133 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3134 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3135 fts_modify(
3136 /*=======*/
3137 fts_trx_table_t* ftt, /*!< in: FTS trx table */
3138 fts_trx_row_t* row) /*!< in: row */
3139 {
3140 dberr_t error;
3141
3142 ut_a(row->state == FTS_MODIFY);
3143
3144 error = fts_delete(ftt, row);
3145
3146 if (error == DB_SUCCESS) {
3147 fts_add(ftt, row);
3148 }
3149
3150 return(error);
3151 }
3152
3153 /*********************************************************************//**
3154 Create a new document id.
3155 @return DB_SUCCESS if all went well else error */
3156 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3157 fts_create_doc_id(
3158 /*==============*/
3159 dict_table_t* table, /*!< in: row is of this table. */
3160 dtuple_t* row, /* in/out: add doc id value to this
3161 row. This is the current row that is
3162 being inserted. */
3163 mem_heap_t* heap) /*!< in: heap */
3164 {
3165 doc_id_t doc_id;
3166 dberr_t error = DB_SUCCESS;
3167
3168 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3169
3170 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3171 if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3172 error = fts_get_next_doc_id(table, &doc_id);
3173 }
3174 return(error);
3175 }
3176
3177 error = fts_get_next_doc_id(table, &doc_id);
3178
3179 if (error == DB_SUCCESS) {
3180 dfield_t* dfield;
3181 doc_id_t* write_doc_id;
3182
3183 ut_a(doc_id > 0);
3184
3185 dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3186 write_doc_id = static_cast<doc_id_t*>(
3187 mem_heap_alloc(heap, sizeof(*write_doc_id)));
3188
3189 ut_a(doc_id != FTS_NULL_DOC_ID);
3190 ut_a(sizeof(doc_id) == dfield->type.len);
3191 fts_write_doc_id((byte*) write_doc_id, doc_id);
3192
3193 dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3194 }
3195
3196 return(error);
3197 }
3198
3199 /*********************************************************************//**
3200 The given transaction is about to be committed; do whatever is necessary
3201 from the FTS system's POV.
3202 @return DB_SUCCESS or error code */
3203 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3204 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3205 fts_commit_table(
3206 /*=============*/
3207 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
3208 {
3209 const ib_rbt_node_t* node;
3210 ib_rbt_t* rows;
3211 dberr_t error = DB_SUCCESS;
3212 fts_cache_t* cache = ftt->table->fts->cache;
3213 trx_t* trx = trx_allocate_for_background();
3214
3215 rows = ftt->rows;
3216
3217 ftt->fts_trx->trx = trx;
3218
3219 if (cache->get_docs == NULL) {
3220 rw_lock_x_lock(&cache->init_lock);
3221 if (cache->get_docs == NULL) {
3222 cache->get_docs = fts_get_docs_create(cache);
3223 }
3224 rw_lock_x_unlock(&cache->init_lock);
3225 }
3226
3227 for (node = rbt_first(rows);
3228 node != NULL && error == DB_SUCCESS;
3229 node = rbt_next(rows, node)) {
3230
3231 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3232
3233 switch (row->state) {
3234 case FTS_INSERT:
3235 fts_add(ftt, row);
3236 break;
3237
3238 case FTS_MODIFY:
3239 error = fts_modify(ftt, row);
3240 break;
3241
3242 case FTS_DELETE:
3243 error = fts_delete(ftt, row);
3244 break;
3245
3246 default:
3247 ut_error;
3248 }
3249 }
3250
3251 fts_sql_commit(trx);
3252
3253 trx_free_for_background(trx);
3254
3255 return(error);
3256 }
3257
3258 /*********************************************************************//**
3259 The given transaction is about to be committed; do whatever is necessary
3260 from the FTS system's POV.
3261 @return DB_SUCCESS or error code */
3262 dberr_t
fts_commit(trx_t * trx)3263 fts_commit(
3264 /*=======*/
3265 trx_t* trx) /*!< in: transaction */
3266 {
3267 const ib_rbt_node_t* node;
3268 dberr_t error;
3269 ib_rbt_t* tables;
3270 fts_savepoint_t* savepoint;
3271
3272 savepoint = static_cast<fts_savepoint_t*>(
3273 ib_vector_last(trx->fts_trx->savepoints));
3274 tables = savepoint->tables;
3275
3276 for (node = rbt_first(tables), error = DB_SUCCESS;
3277 node != NULL && error == DB_SUCCESS;
3278 node = rbt_next(tables, node)) {
3279
3280 fts_trx_table_t** ftt;
3281
3282 ftt = rbt_value(fts_trx_table_t*, node);
3283
3284 error = fts_commit_table(*ftt);
3285 }
3286
3287 return(error);
3288 }
3289
3290 /*********************************************************************//**
3291 Initialize a document. */
3292 void
fts_doc_init(fts_doc_t * doc)3293 fts_doc_init(
3294 /*=========*/
3295 fts_doc_t* doc) /*!< in: doc to initialize */
3296 {
3297 mem_heap_t* heap = mem_heap_create(32);
3298
3299 memset(doc, 0, sizeof(*doc));
3300
3301 doc->self_heap = ib_heap_allocator_create(heap);
3302 }
3303
3304 /*********************************************************************//**
3305 Free document. */
3306 void
fts_doc_free(fts_doc_t * doc)3307 fts_doc_free(
3308 /*=========*/
3309 fts_doc_t* doc) /*!< in: document */
3310 {
3311 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3312
3313 if (doc->tokens) {
3314 rbt_free(doc->tokens);
3315 }
3316
3317 ut_d(memset(doc, 0, sizeof(*doc)));
3318
3319 mem_heap_free(heap);
3320 }
3321
3322 /*********************************************************************//**
3323 Callback function for fetch that stores a row id to the location pointed.
3324 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3325 @return always returns NULL */
3326 void*
fts_fetch_row_id(void * row,void * user_arg)3327 fts_fetch_row_id(
3328 /*=============*/
3329 void* row, /*!< in: sel_node_t* */
3330 void* user_arg) /*!< in: data pointer */
3331 {
3332 sel_node_t* node = static_cast<sel_node_t*>(row);
3333
3334 dfield_t* dfield = que_node_get_val(node->select_list);
3335 dtype_t* type = dfield_get_type(dfield);
3336 ulint len = dfield_get_len(dfield);
3337
3338 ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3339 ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3340 ut_a(len == 8);
3341
3342 memcpy(user_arg, dfield_get_data(dfield), 8);
3343
3344 return(NULL);
3345 }
3346
3347 /*********************************************************************//**
3348 Callback function for fetch that stores the text of an FTS document,
3349 converting each column to UTF-16.
3350 @return always FALSE */
3351 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3352 fts_query_expansion_fetch_doc(
3353 /*==========================*/
3354 void* row, /*!< in: sel_node_t* */
3355 void* user_arg) /*!< in: fts_doc_t* */
3356 {
3357 que_node_t* exp;
3358 sel_node_t* node = static_cast<sel_node_t*>(row);
3359 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3360 dfield_t* dfield;
3361 ulint len;
3362 ulint doc_len;
3363 fts_doc_t doc;
3364 CHARSET_INFO* doc_charset = NULL;
3365 ulint field_no = 0;
3366
3367 len = 0;
3368
3369 fts_doc_init(&doc);
3370 doc.found = TRUE;
3371
3372 exp = node->select_list;
3373 doc_len = 0;
3374
3375 doc_charset = result_doc->charset;
3376
3377 /* Copy each indexed column content into doc->text.f_str */
3378 while (exp) {
3379 dfield = que_node_get_val(exp);
3380 len = dfield_get_len(dfield);
3381
3382 /* NULL column */
3383 if (len == UNIV_SQL_NULL) {
3384 exp = que_node_get_next(exp);
3385 continue;
3386 }
3387
3388 if (!doc_charset) {
3389 doc_charset = fts_get_charset(dfield->type.prtype);
3390 }
3391
3392 doc.charset = doc_charset;
3393 doc.is_ngram = result_doc->is_ngram;
3394
3395 if (dfield_is_ext(dfield)) {
3396 /* We ignore columns that are stored externally, this
3397 could result in too many words to search */
3398 exp = que_node_get_next(exp);
3399 continue;
3400 } else {
3401 doc.text.f_n_char = 0;
3402
3403 doc.text.f_str = static_cast<byte*>(
3404 dfield_get_data(dfield));
3405
3406 doc.text.f_len = len;
3407 }
3408
3409 if (field_no == 0) {
3410 fts_tokenize_document(&doc, result_doc,
3411 result_doc->parser);
3412 } else {
3413 fts_tokenize_document_next(&doc, doc_len, result_doc,
3414 result_doc->parser);
3415 }
3416
3417 exp = que_node_get_next(exp);
3418
3419 doc_len += (exp) ? len + 1 : len;
3420
3421 field_no++;
3422 }
3423
3424 ut_ad(doc_charset);
3425
3426 if (!result_doc->charset) {
3427 result_doc->charset = doc_charset;
3428 }
3429
3430 fts_doc_free(&doc);
3431
3432 return(FALSE);
3433 }
3434
3435 /*********************************************************************//**
3436 fetch and tokenize the document. */
3437 static
3438 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3439 fts_fetch_doc_from_rec(
3440 /*===================*/
3441 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3442 dict_index_t* clust_index, /*!< in: cluster index */
3443 btr_pcur_t* pcur, /*!< in: cursor whose position
3444 has been stored */
3445 ulint* offsets, /*!< in: offsets */
3446 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3447 documents */
3448 {
3449 dict_index_t* index;
3450 dict_table_t* table;
3451 const rec_t* clust_rec;
3452 ulint num_field;
3453 const dict_field_t* ifield;
3454 const dict_col_t* col;
3455 ulint clust_pos;
3456 ulint i;
3457 ulint doc_len = 0;
3458 ulint processed_doc = 0;
3459 st_mysql_ftparser* parser;
3460
3461 if (!get_doc) {
3462 return;
3463 }
3464
3465 index = get_doc->index_cache->index;
3466 table = get_doc->index_cache->index->table;
3467 parser = get_doc->index_cache->index->parser;
3468
3469 clust_rec = btr_pcur_get_rec(pcur);
3470
3471 num_field = dict_index_get_n_fields(index);
3472
3473 for (i = 0; i < num_field; i++) {
3474 ifield = dict_index_get_nth_field(index, i);
3475 col = dict_field_get_col(ifield);
3476 clust_pos = dict_col_get_clust_pos(col, clust_index);
3477
3478 if (!get_doc->index_cache->charset) {
3479 get_doc->index_cache->charset = fts_get_charset(
3480 ifield->col->prtype);
3481 }
3482
3483 if (rec_offs_nth_extern(offsets, clust_pos)) {
3484 doc->text.f_str =
3485 btr_rec_copy_externally_stored_field(
3486 clust_rec, offsets,
3487 dict_table_page_size(table),
3488 clust_pos, &doc->text.f_len,
3489 static_cast<mem_heap_t*>(
3490 doc->self_heap->arg));
3491 } else {
3492 doc->text.f_str = (byte*) rec_get_nth_field(
3493 clust_rec, offsets, clust_pos,
3494 &doc->text.f_len);
3495 }
3496
3497 doc->found = TRUE;
3498 doc->charset = get_doc->index_cache->charset;
3499 doc->is_ngram = index->is_ngram;
3500
3501 /* Null Field */
3502 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3503 continue;
3504 }
3505
3506 if (processed_doc == 0) {
3507 fts_tokenize_document(doc, NULL, parser);
3508 } else {
3509 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3510 }
3511
3512 processed_doc++;
3513 doc_len += doc->text.f_len + 1;
3514 }
3515 }
3516
3517 /*********************************************************************//**
3518 This function fetches the document inserted during the committing
3519 transaction, and tokenize the inserted text data and insert into
3520 FTS auxiliary table and its cache.
3521 @return TRUE if successful */
3522 static
3523 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3524 fts_add_doc_by_id(
3525 /*==============*/
3526 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3527 doc_id_t doc_id, /*!< in: doc id */
3528 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
3529 /*!< in: affected fts indexes */
3530 {
3531 mtr_t mtr;
3532 mem_heap_t* heap;
3533 btr_pcur_t pcur;
3534 dict_table_t* table;
3535 dtuple_t* tuple;
3536 dfield_t* dfield;
3537 fts_get_doc_t* get_doc;
3538 doc_id_t temp_doc_id;
3539 dict_index_t* clust_index;
3540 dict_index_t* fts_id_index;
3541 ibool is_id_cluster;
3542 fts_cache_t* cache = ftt->table->fts->cache;
3543
3544 ut_ad(cache->get_docs);
3545
3546 /* If Doc ID has been supplied by the user, then the table
3547 might not yet be sync-ed */
3548
3549 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3550 fts_init_index(ftt->table, FALSE);
3551 }
3552
3553 /* Get the first FTS index's get_doc */
3554 get_doc = static_cast<fts_get_doc_t*>(
3555 ib_vector_get(cache->get_docs, 0));
3556 ut_ad(get_doc);
3557
3558 table = get_doc->index_cache->index->table;
3559
3560 heap = mem_heap_create(512);
3561
3562 clust_index = dict_table_get_first_index(table);
3563 fts_id_index = table->fts_doc_id_index;
3564
3565 /* Check whether the index on FTS_DOC_ID is cluster index */
3566 is_id_cluster = (clust_index == fts_id_index);
3567
3568 mtr_start(&mtr);
3569 btr_pcur_init(&pcur);
3570
3571 /* Search based on Doc ID. Here, we'll need to consider the case
3572 when there is no primary index on Doc ID */
3573 tuple = dtuple_create(heap, 1);
3574 dfield = dtuple_get_nth_field(tuple, 0);
3575 dfield->type.mtype = DATA_INT;
3576 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3577
3578 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3579 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3580
3581 btr_pcur_open_with_no_init(
3582 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3583 &pcur, 0, &mtr);
3584
3585 /* If we have a match, add the data to doc structure */
3586 if (btr_pcur_get_low_match(&pcur) == 1) {
3587 const rec_t* rec;
3588 btr_pcur_t* doc_pcur;
3589 const rec_t* clust_rec;
3590 btr_pcur_t clust_pcur;
3591 ulint* offsets = NULL;
3592 ulint num_idx = ib_vector_size(cache->get_docs);
3593
3594 rec = btr_pcur_get_rec(&pcur);
3595
3596 /* Doc could be deleted */
3597 if (page_rec_is_infimum(rec)
3598 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3599
3600 goto func_exit;
3601 }
3602
3603 if (is_id_cluster) {
3604 clust_rec = rec;
3605 doc_pcur = &pcur;
3606 } else {
3607 dtuple_t* clust_ref;
3608 ulint n_fields;
3609
3610 btr_pcur_init(&clust_pcur);
3611 n_fields = dict_index_get_n_unique(clust_index);
3612
3613 clust_ref = dtuple_create(heap, n_fields);
3614 dict_index_copy_types(clust_ref, clust_index, n_fields);
3615
3616 row_build_row_ref_in_tuple(
3617 clust_ref, rec, fts_id_index, NULL, NULL);
3618
3619 btr_pcur_open_with_no_init(
3620 clust_index, clust_ref, PAGE_CUR_LE,
3621 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3622
3623 doc_pcur = &clust_pcur;
3624 clust_rec = btr_pcur_get_rec(&clust_pcur);
3625
3626 }
3627
3628 offsets = rec_get_offsets(clust_rec, clust_index,
3629 NULL, ULINT_UNDEFINED, &heap);
3630
3631 for (ulint i = 0; i < num_idx; ++i) {
3632 fts_doc_t doc;
3633 dict_table_t* table;
3634 fts_get_doc_t* get_doc;
3635
3636 get_doc = static_cast<fts_get_doc_t*>(
3637 ib_vector_get(cache->get_docs, i));
3638
3639 table = get_doc->index_cache->index->table;
3640
3641 fts_doc_init(&doc);
3642
3643 fts_fetch_doc_from_rec(
3644 get_doc, clust_index, doc_pcur, offsets, &doc);
3645
3646 if (doc.found) {
3647 ibool success MY_ATTRIBUTE((unused));
3648
3649 btr_pcur_store_position(doc_pcur, &mtr);
3650 mtr_commit(&mtr);
3651
3652 DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3653 rw_lock_x_lock(&table->fts->cache->lock);
3654
3655 if (table->fts->cache->stopword_info.status
3656 & STOPWORD_NOT_INIT) {
3657 fts_load_stopword(table, NULL, NULL,
3658 NULL, TRUE, TRUE);
3659 }
3660
3661 fts_cache_add_doc(
3662 table->fts->cache,
3663 get_doc->index_cache,
3664 doc_id, doc.tokens);
3665
3666 bool need_sync = false;
3667 if ((cache->total_size -
3668 cache->total_size_before_sync >
3669 fts_max_cache_size / 10 || fts_need_sync)
3670 && !cache->sync->in_progress) {
3671 need_sync = true;
3672 cache->total_size_before_sync =
3673 cache->total_size;
3674 }
3675
3676 rw_lock_x_unlock(&table->fts->cache->lock);
3677
3678 DBUG_EXECUTE_IF(
3679 "fts_instrument_sync_cache_wait",
3680 srv_fatal_semaphore_wait_threshold = 25;
3681 fts_max_cache_size = 100;
3682 fts_sync(cache->sync, true, true, false);
3683 );
3684
3685 DBUG_EXECUTE_IF(
3686 "fts_instrument_sync",
3687 fts_optimize_request_sync_table(table);
3688 os_event_wait(cache->sync->event);
3689 );
3690
3691 DBUG_EXECUTE_IF(
3692 "fts_instrument_sync_debug",
3693 fts_sync(cache->sync, true, true, false);
3694 );
3695
3696 DEBUG_SYNC_C("fts_instrument_sync_request");
3697 DBUG_EXECUTE_IF(
3698 "fts_instrument_sync_request",
3699 fts_optimize_request_sync_table(table);
3700 );
3701
3702 if (need_sync) {
3703 fts_optimize_request_sync_table(table);
3704 }
3705
3706 mtr_start(&mtr);
3707
3708 if (i < num_idx - 1) {
3709
3710 success = btr_pcur_restore_position(
3711 BTR_SEARCH_LEAF, doc_pcur,
3712 &mtr);
3713
3714 ut_ad(success);
3715 }
3716 }
3717
3718 fts_doc_free(&doc);
3719 }
3720
3721 if (!is_id_cluster) {
3722 btr_pcur_close(doc_pcur);
3723 }
3724 }
3725 func_exit:
3726 mtr_commit(&mtr);
3727
3728 btr_pcur_close(&pcur);
3729
3730 mem_heap_free(heap);
3731 return(TRUE);
3732 }
3733
3734
3735 /*********************************************************************//**
3736 Callback function to read a single ulint column.
3737 return always returns TRUE */
3738 static
3739 ibool
fts_read_ulint(void * row,void * user_arg)3740 fts_read_ulint(
3741 /*===========*/
3742 void* row, /*!< in: sel_node_t* */
3743 void* user_arg) /*!< in: pointer to ulint */
3744 {
3745 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3746 ulint* value = static_cast<ulint*>(user_arg);
3747 que_node_t* exp = sel_node->select_list;
3748 dfield_t* dfield = que_node_get_val(exp);
3749 void* data = dfield_get_data(dfield);
3750
3751 *value = static_cast<ulint>(mach_read_from_4(
3752 static_cast<const byte*>(data)));
3753
3754 return(TRUE);
3755 }
3756
3757 /*********************************************************************//**
3758 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3759 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3760 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3761 fts_get_max_doc_id(
3762 /*===============*/
3763 dict_table_t* table) /*!< in: user table */
3764 {
3765 dict_index_t* index;
3766 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3767 doc_id_t doc_id = 0;
3768 mtr_t mtr;
3769 btr_pcur_t pcur;
3770
3771 index = table->fts_doc_id_index;
3772
3773 if (!index) {
3774 return(0);
3775 }
3776
3777 dfield = dict_index_get_nth_field(index, 0);
3778
3779 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3780 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3781 #endif
3782
3783 mtr_start(&mtr);
3784
3785 /* fetch the largest indexes value */
3786 btr_pcur_open_at_index_side(
3787 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3788
3789 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3790 const rec_t* rec = NULL;
3791 ulint offsets_[REC_OFFS_NORMAL_SIZE];
3792 ulint* offsets = offsets_;
3793 mem_heap_t* heap = NULL;
3794 ulint len;
3795 const void* data;
3796
3797 rec_offs_init(offsets_);
3798
3799 do {
3800 rec = btr_pcur_get_rec(&pcur);
3801
3802 if (page_rec_is_user_rec(rec)) {
3803 break;
3804 }
3805 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3806
3807 if (!rec) {
3808 goto func_exit;
3809 }
3810
3811 offsets = rec_get_offsets(
3812 rec, index, offsets, ULINT_UNDEFINED, &heap);
3813
3814 data = rec_get_nth_field(rec, offsets, 0, &len);
3815
3816 doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3817 static_cast<const byte*>(data)));
3818 }
3819
3820 func_exit:
3821 btr_pcur_close(&pcur);
3822 mtr_commit(&mtr);
3823 return(doc_id);
3824 }
3825
3826 /*********************************************************************//**
3827 Fetch document with the given document id.
3828 @return DB_SUCCESS if OK else error */
3829 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3830 fts_doc_fetch_by_doc_id(
3831 /*====================*/
3832 fts_get_doc_t* get_doc, /*!< in: state */
3833 doc_id_t doc_id, /*!< in: id of document to
3834 fetch */
3835 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3836 or NULL */
3837 ulint option, /*!< in: search option, if it is
3838 greater than doc_id or equal */
3839 fts_sql_callback
3840 callback, /*!< in: callback to read */
3841 void* arg) /*!< in: callback arg */
3842 {
3843 pars_info_t* info;
3844 dberr_t error;
3845 const char* select_str;
3846 doc_id_t write_doc_id;
3847 dict_index_t* index;
3848 trx_t* trx = trx_allocate_for_background();
3849 que_t* graph;
3850
3851 trx->op_info = "fetching indexed FTS document";
3852
3853 /* The FTS index can be supplied by caller directly with
3854 "index_to_use", otherwise, get it from "get_doc" */
3855 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3856
3857 if (get_doc && get_doc->get_document_graph) {
3858 info = get_doc->get_document_graph->info;
3859 } else {
3860 info = pars_info_create();
3861 }
3862
3863 /* Convert to "storage" byte order. */
3864 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3865 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3866 pars_info_bind_function(info, "my_func", callback, arg);
3867
3868 select_str = fts_get_select_columns_str(index, info, info->heap);
3869 pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3870
3871 if (!get_doc || !get_doc->get_document_graph) {
3872 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3873 graph = fts_parse_sql(
3874 NULL,
3875 info,
3876 mem_heap_printf(info->heap,
3877 "DECLARE FUNCTION my_func;\n"
3878 "DECLARE CURSOR c IS"
3879 " SELECT %s FROM $table_name"
3880 " WHERE %s = :doc_id;\n"
3881 "BEGIN\n"
3882 ""
3883 "OPEN c;\n"
3884 "WHILE 1 = 1 LOOP\n"
3885 " FETCH c INTO my_func();\n"
3886 " IF c %% NOTFOUND THEN\n"
3887 " EXIT;\n"
3888 " END IF;\n"
3889 "END LOOP;\n"
3890 "CLOSE c;",
3891 select_str, FTS_DOC_ID_COL_NAME));
3892 } else {
3893 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3894
3895 /* This is used for crash recovery of table with
3896 hidden DOC ID or FTS indexes. We will scan the table
3897 to re-processing user table rows whose DOC ID or
3898 FTS indexed documents have not been sync-ed to disc
3899 during recent crash.
3900 In the case that all fulltext indexes are dropped
3901 for a table, we will keep the "hidden" FTS_DOC_ID
3902 column, and this scan is to retreive the largest
3903 DOC ID being used in the table to determine the
3904 appropriate next DOC ID.
3905 In the case of there exists fulltext index(es), this
3906 operation will re-tokenize any docs that have not
3907 been sync-ed to the disk, and re-prime the FTS
3908 cached */
3909 graph = fts_parse_sql(
3910 NULL,
3911 info,
3912 mem_heap_printf(info->heap,
3913 "DECLARE FUNCTION my_func;\n"
3914 "DECLARE CURSOR c IS"
3915 " SELECT %s, %s FROM $table_name"
3916 " WHERE %s > :doc_id;\n"
3917 "BEGIN\n"
3918 ""
3919 "OPEN c;\n"
3920 "WHILE 1 = 1 LOOP\n"
3921 " FETCH c INTO my_func();\n"
3922 " IF c %% NOTFOUND THEN\n"
3923 " EXIT;\n"
3924 " END IF;\n"
3925 "END LOOP;\n"
3926 "CLOSE c;",
3927 FTS_DOC_ID_COL_NAME,
3928 select_str, FTS_DOC_ID_COL_NAME));
3929 }
3930 if (get_doc) {
3931 get_doc->get_document_graph = graph;
3932 }
3933 } else {
3934 graph = get_doc->get_document_graph;
3935 }
3936
3937 error = fts_eval_sql(trx, graph);
3938
3939 if (error == DB_SUCCESS) {
3940 fts_sql_commit(trx);
3941 } else {
3942 fts_sql_rollback(trx);
3943 }
3944
3945 trx_free_for_background(trx);
3946
3947 if (!get_doc) {
3948 fts_que_graph_free(graph);
3949 }
3950
3951 return(error);
3952 }
3953
3954 /*********************************************************************//**
3955 Write out a single word's data as new entry/entries in the INDEX table.
3956 @return DB_SUCCESS if all OK. */
3957 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3958 fts_write_node(
3959 /*===========*/
3960 trx_t* trx, /*!< in: transaction */
3961 que_t** graph, /*!< in: query graph */
3962 fts_table_t* fts_table, /*!< in: aux table */
3963 fts_string_t* word, /*!< in: word in UTF-8 */
3964 fts_node_t* node) /*!< in: node columns */
3965 {
3966 pars_info_t* info;
3967 dberr_t error;
3968 ib_uint32_t doc_count;
3969 ib_time_monotonic_t start_time;
3970 doc_id_t last_doc_id;
3971 doc_id_t first_doc_id;
3972 char table_name[MAX_FULL_NAME_LEN];
3973
3974 ut_a(node->ilist != NULL);
3975
3976 if (*graph) {
3977 info = (*graph)->info;
3978 } else {
3979 info = pars_info_create();
3980
3981 fts_get_table_name(fts_table, table_name);
3982 pars_info_bind_id(info, true, "index_table_name", table_name);
3983 }
3984
3985 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3986
3987 /* Convert to "storage" byte order. */
3988 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3989 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3990
3991 /* Convert to "storage" byte order. */
3992 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3993 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3994
3995 ut_a(node->last_doc_id >= node->first_doc_id);
3996
3997 /* Convert to "storage" byte order. */
3998 mach_write_to_4((byte*) &doc_count, node->doc_count);
3999 pars_info_bind_int4_literal(
4000 info, "doc_count", (const ib_uint32_t*) &doc_count);
4001
4002 /* Set copy_name to FALSE since it's a static. */
4003 pars_info_bind_literal(
4004 info, "ilist", node->ilist, node->ilist_size,
4005 DATA_BLOB, DATA_BINARY_TYPE);
4006
4007 if (!*graph) {
4008
4009 *graph = fts_parse_sql(
4010 fts_table,
4011 info,
4012 "BEGIN\n"
4013 "INSERT INTO $index_table_name VALUES"
4014 " (:token, :first_doc_id,"
4015 " :last_doc_id, :doc_count, :ilist);");
4016 }
4017
4018 start_time = ut_time_monotonic();
4019 error = fts_eval_sql(trx, *graph);
4020 elapsed_time += ut_time_monotonic() - start_time;
4021 ++n_nodes;
4022
4023 return(error);
4024 }
4025
4026 /*********************************************************************//**
4027 Add rows to the DELETED_CACHE table.
4028 @return DB_SUCCESS if all went well else error code*/
4029 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4030 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4031 fts_sync_add_deleted_cache(
4032 /*=======================*/
4033 fts_sync_t* sync, /*!< in: sync state */
4034 ib_vector_t* doc_ids) /*!< in: doc ids to add */
4035 {
4036 ulint i;
4037 pars_info_t* info;
4038 que_t* graph;
4039 fts_table_t fts_table;
4040 char table_name[MAX_FULL_NAME_LEN];
4041 doc_id_t dummy = 0;
4042 dberr_t error = DB_SUCCESS;
4043 ulint n_elems = ib_vector_size(doc_ids);
4044
4045 ut_a(ib_vector_size(doc_ids) > 0);
4046
4047 ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4048
4049 info = pars_info_create();
4050
4051 fts_bind_doc_id(info, "doc_id", &dummy);
4052
4053 FTS_INIT_FTS_TABLE(
4054 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
4055
4056 fts_get_table_name(&fts_table, table_name);
4057 pars_info_bind_id(info, true, "table_name", table_name);
4058
4059 graph = fts_parse_sql(
4060 &fts_table,
4061 info,
4062 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4063
4064 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4065 fts_update_t* update;
4066 doc_id_t write_doc_id;
4067
4068 update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
4069
4070 /* Convert to "storage" byte order. */
4071 fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
4072 fts_bind_doc_id(info, "doc_id", &write_doc_id);
4073
4074 error = fts_eval_sql(sync->trx, graph);
4075 }
4076
4077 fts_que_graph_free(graph);
4078
4079 return(error);
4080 }
4081
4082 /** Write the words and ilist to disk.
4083 @param[in,out] trx transaction
4084 @param[in] index_cache index cache
4085 @param[in] unlock_cache whether unlock cache when write node
4086 Also set this to true if sync takes
4087 very long
4088 @param[in] sync_start_time Holds the timestamp of start of sync
4089 for deducing the length of sync time
4090 @return DB_SUCCESS if all went well else error code */
4091 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4092 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4093 fts_sync_write_words(
4094 trx_t* trx,
4095 fts_index_cache_t* index_cache,
4096 bool unlock_cache,
4097 ib_time_t sync_start_time)
4098 {
4099 fts_table_t fts_table;
4100 ulint n_nodes = 0;
4101 ulint n_words = 0;
4102 const ib_rbt_node_t* rbt_node;
4103 dberr_t error = DB_SUCCESS;
4104 ibool print_error = FALSE;
4105 dict_table_t* table = index_cache->index->table;
4106 /* We use this to deduce threshold value of time
4107 that we can let sync to go on holding cache lock */
4108 const float cutoff = 0.98;
4109 ulint lock_threshold =
4110 (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4111 * cutoff;
4112 bool timeout_extended = false;
4113 #ifdef FTS_DOC_STATS_DEBUG
4114 ulint n_new_words = 0;
4115 #endif /* FTS_DOC_STATS_DEBUG */
4116
4117 FTS_INIT_INDEX_TABLE(
4118 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4119
4120 n_words = rbt_size(index_cache->words);
4121
4122 /* We iterate over the entire tree, even if there is an error,
4123 since we want to free the memory used during caching. */
4124 for (rbt_node = rbt_first(index_cache->words);
4125 rbt_node;
4126 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4127
4128 ulint i;
4129 ulint selected;
4130 fts_tokenizer_word_t* word;
4131
4132 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4133
4134 DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
4135 os_thread_sleep(300000););
4136
4137 selected = fts_select_index(
4138 index_cache->charset, word->text.f_str,
4139 word->text.f_len);
4140
4141 fts_table.suffix = fts_get_suffix(selected);
4142
4143 #ifdef FTS_DOC_STATS_DEBUG
4144 /* Check if the word exists in the FTS index and if not
4145 then we need to increment the total word count stats. */
4146 if (error == DB_SUCCESS && fts_enable_diag_print) {
4147 ibool found = FALSE;
4148
4149 error = fts_is_word_in_index(
4150 trx,
4151 &index_cache->sel_graph[selected],
4152 &fts_table,
4153 &word->text, &found);
4154
4155 if (error == DB_SUCCESS && !found) {
4156
4157 ++n_new_words;
4158 }
4159 }
4160 #endif /* FTS_DOC_STATS_DEBUG */
4161
4162 /* We iterate over all the nodes even if there was an error */
4163 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4164
4165 fts_node_t* fts_node = static_cast<fts_node_t*>(
4166 ib_vector_get(word->nodes, i));
4167
4168 if (fts_node->synced) {
4169 continue;
4170 } else {
4171 fts_node->synced = true;
4172 }
4173
4174 /*FIXME: we need to handle the error properly. */
4175 if (error == DB_SUCCESS) {
4176 DEBUG_SYNC_C("fts_instrument_sync");
4177 DBUG_EXECUTE_IF("fts_instrument_sync",
4178 os_thread_sleep(10000000););
4179 if (!unlock_cache) {
4180 ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4181 if (cache_lock_time > lock_threshold) {
4182 if (!timeout_extended) {
4183 os_atomic_increment_ulint(
4184 &srv_fatal_semaphore_wait_threshold,
4185 SRV_SEMAPHORE_WAIT_EXTENSION);
4186 timeout_extended = true;
4187 lock_threshold +=
4188 SRV_SEMAPHORE_WAIT_EXTENSION;
4189 } else {
4190 unlock_cache = true;
4191 os_atomic_decrement_ulint(
4192 &srv_fatal_semaphore_wait_threshold,
4193 SRV_SEMAPHORE_WAIT_EXTENSION);
4194 timeout_extended = false;
4195
4196 }
4197 }
4198 }
4199
4200 if (unlock_cache) {
4201 rw_lock_x_unlock(
4202 &table->fts->cache->lock);
4203 }
4204
4205 error = fts_write_node(
4206 trx,
4207 &index_cache->ins_graph[selected],
4208 &fts_table, &word->text, fts_node);
4209 DBUG_EXECUTE_IF("fts_instrument_sync",
4210 os_thread_sleep(15000000););
4211
4212 DEBUG_SYNC_C("fts_write_node");
4213 DBUG_EXECUTE_IF("fts_write_node_crash",
4214 DBUG_SUICIDE(););
4215
4216 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4217 os_thread_sleep(1000000);
4218 );
4219
4220 if (unlock_cache) {
4221 rw_lock_x_lock(
4222 &table->fts->cache->lock);
4223 }
4224 }
4225 }
4226
4227 n_nodes += ib_vector_size(word->nodes);
4228
4229 if (error != DB_SUCCESS && !print_error) {
4230 ib::error() << "(" << ut_strerr(error) << ") writing"
4231 " word node to FTS auxiliary index table.";
4232 print_error = TRUE;
4233 }
4234 }
4235
4236 #ifdef FTS_DOC_STATS_DEBUG
4237 if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4238 fts_table_t fts_table;
4239
4240 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4241
4242 /* Increment the total number of words in the FTS index */
4243 error = fts_config_increment_index_value(
4244 trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4245 n_new_words);
4246 }
4247 #endif /* FTS_DOC_STATS_DEBUG */
4248
4249 if (fts_enable_diag_print) {
4250 printf("Avg number of nodes: %lf\n",
4251 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4252 }
4253
4254 return(error);
4255 }
4256
4257 #ifdef FTS_DOC_STATS_DEBUG
4258 /*********************************************************************//**
4259 Write a single documents statistics to disk.
4260 @return DB_SUCCESS if all went well else error code */
4261 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4262 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4263 fts_sync_write_doc_stat(
4264 /*====================*/
4265 trx_t* trx, /*!< in: transaction */
4266 dict_index_t* index, /*!< in: index */
4267 que_t** graph, /* out: query graph */
4268 const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */
4269 {
4270 pars_info_t* info;
4271 doc_id_t doc_id;
4272 dberr_t error = DB_SUCCESS;
4273 ib_uint32_t word_count;
4274 char table_name[MAX_FULL_NAME_LEN];
4275
4276 if (*graph) {
4277 info = (*graph)->info;
4278 } else {
4279 info = pars_info_create();
4280 }
4281
4282 /* Convert to "storage" byte order. */
4283 mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4284 pars_info_bind_int4_literal(
4285 info, "count", (const ib_uint32_t*) &word_count);
4286
4287 /* Convert to "storage" byte order. */
4288 fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4289 fts_bind_doc_id(info, "doc_id", &doc_id);
4290
4291 if (!*graph) {
4292 fts_table_t fts_table;
4293
4294 FTS_INIT_INDEX_TABLE(
4295 &fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4296
4297 fts_get_table_name(&fts_table, table_name);
4298
4299 pars_info_bind_id(info, true, "doc_id_table", table_name);
4300
4301 *graph = fts_parse_sql(
4302 &fts_table,
4303 info,
4304 "BEGIN"
4305 " INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
4306 }
4307
4308 for (;;) {
4309 error = fts_eval_sql(trx, *graph);
4310
4311 if (error == DB_SUCCESS) {
4312
4313 break; /* Exit the loop. */
4314 } else {
4315
4316 if (error == DB_LOCK_WAIT_TIMEOUT) {
4317 ib::warn() << "Lock wait timeout writing to"
4318 " FTS doc_id. Retrying!";
4319
4320 trx->error_state = DB_SUCCESS;
4321 } else {
4322 ib::error() << "(" << ut_strerr(error)
4323 << ") while writing to FTS doc_id.";
4324
4325 break; /* Exit the loop. */
4326 }
4327 }
4328 }
4329
4330 return(error);
4331 }
4332
4333 /*********************************************************************//**
4334 Write document statistics to disk.
4335 @return DB_SUCCESS if all OK */
4336 static
4337 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4338 fts_sync_write_doc_stats(
4339 /*=====================*/
4340 trx_t* trx, /*!< in: transaction */
4341 const fts_index_cache_t*index_cache) /*!< in: index cache */
4342 {
4343 dberr_t error = DB_SUCCESS;
4344 que_t* graph = NULL;
4345 fts_doc_stats_t* doc_stat;
4346
4347 if (ib_vector_is_empty(index_cache->doc_stats)) {
4348 return(DB_SUCCESS);
4349 }
4350
4351 doc_stat = static_cast<ts_doc_stats_t*>(
4352 ib_vector_pop(index_cache->doc_stats));
4353
4354 while (doc_stat) {
4355 error = fts_sync_write_doc_stat(
4356 trx, index_cache->index, &graph, doc_stat);
4357
4358 if (error != DB_SUCCESS) {
4359 break;
4360 }
4361
4362 if (ib_vector_is_empty(index_cache->doc_stats)) {
4363 break;
4364 }
4365
4366 doc_stat = static_cast<ts_doc_stats_t*>(
4367 ib_vector_pop(index_cache->doc_stats));
4368 }
4369
4370 if (graph != NULL) {
4371 fts_que_graph_free_check_lock(NULL, index_cache, graph);
4372 }
4373
4374 return(error);
4375 }
4376
4377 /*********************************************************************//**
4378 Callback to check the existince of a word.
4379 @return always return NULL */
4380 static
4381 ibool
fts_lookup_word(void * row,void * user_arg)4382 fts_lookup_word(
4383 /*============*/
4384 void* row, /*!< in: sel_node_t* */
4385 void* user_arg) /*!< in: fts_doc_t* */
4386 {
4387
4388 que_node_t* exp;
4389 sel_node_t* node = static_cast<sel_node_t*>(row);
4390 ibool* found = static_cast<ibool*>(user_arg);
4391
4392 exp = node->select_list;
4393
4394 while (exp) {
4395 dfield_t* dfield = que_node_get_val(exp);
4396 ulint len = dfield_get_len(dfield);
4397
4398 if (len != UNIV_SQL_NULL && len != 0) {
4399 *found = TRUE;
4400 }
4401
4402 exp = que_node_get_next(exp);
4403 }
4404
4405 return(FALSE);
4406 }
4407
4408 /*********************************************************************//**
4409 Check whether a particular word (term) exists in the FTS index.
4410 @return DB_SUCCESS if all went well else error code */
4411 static
4412 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4413 fts_is_word_in_index(
4414 /*=================*/
4415 trx_t* trx, /*!< in: FTS query state */
4416 que_t** graph, /* out: Query graph */
4417 fts_table_t* fts_table, /*!< in: table instance */
4418 const fts_string_t*
4419 word, /*!< in: the word to check */
4420 ibool* found) /* out: TRUE if exists */
4421 {
4422 pars_info_t* info;
4423 dberr_t error;
4424 char table_name[MAX_FULL_NAME_LEN];
4425
4426 trx->op_info = "looking up word in FTS index";
4427
4428 if (*graph) {
4429 info = (*graph)->info;
4430 } else {
4431 info = pars_info_create();
4432 }
4433
4434 fts_get_table_name(fts_table, table_name);
4435 pars_info_bind_id(info, true, "table_name", table_name);
4436 pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4437 pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4438
4439 if (*graph == NULL) {
4440 *graph = fts_parse_sql(
4441 fts_table,
4442 info,
4443 "DECLARE FUNCTION my_func;\n"
4444 "DECLARE CURSOR c IS"
4445 " SELECT doc_count\n"
4446 " FROM $table_name\n"
4447 " WHERE word = :word"
4448 " ORDER BY first_doc_id;\n"
4449 "BEGIN\n"
4450 "\n"
4451 "OPEN c;\n"
4452 "WHILE 1 = 1 LOOP\n"
4453 " FETCH c INTO my_func();\n"
4454 " IF c % NOTFOUND THEN\n"
4455 " EXIT;\n"
4456 " END IF;\n"
4457 "END LOOP;\n"
4458 "CLOSE c;");
4459 }
4460
4461 for (;;) {
4462 error = fts_eval_sql(trx, *graph);
4463
4464 if (error == DB_SUCCESS) {
4465
4466 break; /* Exit the loop. */
4467 } else {
4468
4469 if (error == DB_LOCK_WAIT_TIMEOUT) {
4470 ib::warn() << "Lock wait timeout reading"
4471 " FTS index. Retrying!";
4472
4473 trx->error_state = DB_SUCCESS;
4474 } else {
4475 ib::error() << "(" << ut_strerr(error)
4476 << ") while reading FTS index.";
4477
4478 break; /* Exit the loop. */
4479 }
4480 }
4481 }
4482
4483 return(error);
4484 }
4485 #endif /* FTS_DOC_STATS_DEBUG */
4486
4487 /*********************************************************************//**
4488 Begin Sync, create transaction, acquire locks, etc. */
4489 static
4490 void
fts_sync_begin(fts_sync_t * sync)4491 fts_sync_begin(
4492 /*===========*/
4493 fts_sync_t* sync) /*!< in: sync state */
4494 {
4495 fts_cache_t* cache = sync->table->fts->cache;
4496
4497 n_nodes = 0;
4498 elapsed_time = 0;
4499
4500 sync->start_time = ut_time_monotonic();
4501
4502 sync->trx = trx_allocate_for_background();
4503
4504 if (fts_enable_diag_print) {
4505 ib::info() << "FTS SYNC for table " << sync->table->name
4506 << ", deleted count: "
4507 << ib_vector_size(cache->deleted_doc_ids)
4508 << " size: " << cache->total_size << " bytes";
4509 }
4510 }
4511
4512 /*********************************************************************//**
4513 Run SYNC on the table, i.e., write out data from the index specific
4514 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4515 @return DB_SUCCESS if all OK */
4516 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4517 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4518 fts_sync_index(
4519 /*===========*/
4520 fts_sync_t* sync, /*!< in: sync state */
4521 fts_index_cache_t* index_cache) /*!< in: index cache */
4522 {
4523 trx_t* trx = sync->trx;
4524 dberr_t error = DB_SUCCESS;
4525
4526 trx->op_info = "doing SYNC index";
4527
4528 if (fts_enable_diag_print) {
4529 ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4530 }
4531
4532 ut_ad(rbt_validate(index_cache->words));
4533
4534 error = fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4535 sync->start_time);
4536
4537 #ifdef FTS_DOC_STATS_DEBUG
4538 /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4539 is not used currently for ranking. We disable fts_sync_write_doc_stats()
4540 for now */
4541 /* Write the per doc statistics that will be used for ranking. */
4542 if (error == DB_SUCCESS) {
4543
4544 error = fts_sync_write_doc_stats(trx, index_cache);
4545 }
4546 #endif /* FTS_DOC_STATS_DEBUG */
4547
4548 return(error);
4549 }
4550
4551 /** Check if index cache has been synced completely
4552 @param[in,out] index_cache index cache
4553 @return true if index is synced, otherwise false. */
4554 static
4555 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4556 fts_sync_index_check(
4557 fts_index_cache_t* index_cache)
4558 {
4559 const ib_rbt_node_t* rbt_node;
4560
4561 for (rbt_node = rbt_first(index_cache->words);
4562 rbt_node != NULL;
4563 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4564
4565 fts_tokenizer_word_t* word;
4566 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4567
4568 fts_node_t* fts_node;
4569 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4570
4571 if (!fts_node->synced) {
4572 return(false);
4573 }
4574 }
4575
4576 return(true);
4577 }
4578
4579 /** Reset synced flag in index cache when rollback
4580 @param[in,out] index_cache index cache */
4581 static
4582 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4583 fts_sync_index_reset(
4584 fts_index_cache_t* index_cache)
4585 {
4586 const ib_rbt_node_t* rbt_node;
4587
4588 for (rbt_node = rbt_first(index_cache->words);
4589 rbt_node != NULL;
4590 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4591
4592 fts_tokenizer_word_t* word;
4593 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4594
4595 fts_node_t* fts_node;
4596 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4597
4598 fts_node->synced = false;
4599 }
4600 }
4601
4602 /** Commit the SYNC, change state of processed doc ids etc.
4603 @param[in,out] sync sync state
4604 @return DB_SUCCESS if all OK */
4605 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4606 dberr_t
fts_sync_commit(fts_sync_t * sync)4607 fts_sync_commit(
4608 fts_sync_t* sync)
4609 {
4610 dberr_t error;
4611 trx_t* trx = sync->trx;
4612 fts_cache_t* cache = sync->table->fts->cache;
4613 doc_id_t last_doc_id;
4614
4615 trx->op_info = "doing SYNC commit";
4616
4617 /* After each Sync, update the CONFIG table about the max doc id
4618 we just sync-ed to index table */
4619 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4620 &last_doc_id);
4621
4622 /* Get the list of deleted documents that are either in the
4623 cache or were headed there but were deleted before the add
4624 thread got to them. */
4625
4626 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4627
4628 error = fts_sync_add_deleted_cache(
4629 sync, cache->deleted_doc_ids);
4630 }
4631
4632 /* We need to do this within the deleted lock since fts_delete() can
4633 attempt to add a deleted doc id to the cache deleted id array. */
4634 fts_cache_clear(cache);
4635 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4636 fts_cache_init(cache);
4637 rw_lock_x_unlock(&cache->lock);
4638
4639 if (error == DB_SUCCESS) {
4640
4641 fts_sql_commit(trx);
4642
4643 } else if (error != DB_SUCCESS) {
4644
4645 fts_sql_rollback(trx);
4646
4647 ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
4648 }
4649
4650 if (fts_enable_diag_print && elapsed_time) {
4651 ib::info() << "SYNC for table " << sync->table->name
4652 << ": SYNC time: "
4653 << (ut_time_monotonic() - sync->start_time)
4654 << " secs: elapsed "
4655 << (double) n_nodes / elapsed_time
4656 << " ins/sec";
4657 }
4658
4659 /* Avoid assertion in trx_free(). */
4660 trx->dict_operation_lock_mode = 0;
4661 trx_free_for_background(trx);
4662
4663 return(error);
4664 }
4665
4666 /*********************************************************************//**
4667 Rollback a sync operation */
4668 static
4669 void
fts_sync_rollback(fts_sync_t * sync)4670 fts_sync_rollback(
4671 /*==============*/
4672 fts_sync_t* sync) /*!< in: sync state */
4673 {
4674 trx_t* trx = sync->trx;
4675 fts_cache_t* cache = sync->table->fts->cache;
4676
4677 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4678 ulint j;
4679 fts_index_cache_t* index_cache;
4680
4681 index_cache = static_cast<fts_index_cache_t*>(
4682 ib_vector_get(cache->indexes, i));
4683
4684 /* Reset synced flag so nodes will not be skipped
4685 in the next sync, see fts_sync_write_words(). */
4686 fts_sync_index_reset(index_cache);
4687
4688 for (j = 0; fts_index_selector[j].value; ++j) {
4689
4690 if (index_cache->ins_graph[j] != NULL) {
4691
4692 fts_que_graph_free_check_lock(
4693 NULL, index_cache,
4694 index_cache->ins_graph[j]);
4695
4696 index_cache->ins_graph[j] = NULL;
4697 }
4698
4699 if (index_cache->sel_graph[j] != NULL) {
4700
4701 fts_que_graph_free_check_lock(
4702 NULL, index_cache,
4703 index_cache->sel_graph[j]);
4704
4705 index_cache->sel_graph[j] = NULL;
4706 }
4707 }
4708 }
4709
4710 rw_lock_x_unlock(&cache->lock);
4711
4712 fts_sql_rollback(trx);
4713
4714 /* Avoid assertion in trx_free(). */
4715 trx->dict_operation_lock_mode = 0;
4716 trx_free_for_background(trx);
4717 }
4718
4719 /** Check that all indexes are synced.
4720 @param[in,out] sync sync state
4721 @return true if all indexes are synced, false otherwise. */
4722 static
4723 bool
fts_check_all_indexes_synced(fts_sync_t * sync)4724 fts_check_all_indexes_synced(
4725 fts_sync_t* sync)
4726 {
4727 ulint i;
4728 fts_cache_t* cache = sync->table->fts->cache;
4729
4730 /* Make sure all the caches are synced. */
4731 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4732 fts_index_cache_t* index_cache;
4733
4734 index_cache = static_cast<fts_index_cache_t*>(
4735 ib_vector_get(cache->indexes, i));
4736
4737 if (index_cache->index->to_be_dropped
4738 || index_cache->index->table->to_be_dropped
4739 || fts_sync_index_check(index_cache)) {
4740 continue;
4741 }
4742
4743 return false;
4744 }
4745
4746 return true;
4747 }
4748
4749 /** Run SYNC on the table, i.e., write out data from the cache to the
4750 FTS auxiliary INDEX table and clear the cache at the end.
4751 @param[in,out] sync sync state
4752 @param[in] unlock_cache whether unlock cache lock when write node
4753 @param[in] wait whether wait when a sync is in progress
4754 @param[in] has_dict_lock whether has dict operation lock
4755 @return DB_SUCCESS if all OK */
4756 static
4757 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict_lock)4758 fts_sync(
4759 fts_sync_t* sync,
4760 bool unlock_cache,
4761 bool wait,
4762 bool has_dict_lock)
4763 {
4764 ulint i;
4765 dberr_t error = DB_SUCCESS;
4766 fts_cache_t* cache = sync->table->fts->cache;
4767
4768 rw_lock_x_lock(&cache->lock);
4769
4770 /* Check if cache is being synced.
4771 Note: we release cache lock in fts_sync_write_words() to
4772 avoid long wait for the lock by other threads. */
4773 while (sync->in_progress) {
4774 rw_lock_x_unlock(&cache->lock);
4775
4776 if (wait) {
4777 os_event_wait(sync->event);
4778 } else {
4779 return(DB_SUCCESS);
4780 }
4781
4782 rw_lock_x_lock(&cache->lock);
4783 }
4784
4785 sync->unlock_cache = unlock_cache;
4786 sync->in_progress = true;
4787
4788 DEBUG_SYNC_C("fts_sync_begin");
4789 fts_sync_begin(sync);
4790
4791 if (has_dict_lock) {
4792 /* If lock is already taken mark that in transaction
4793 * so rollback will not try to take it again.
4794 */
4795 sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4796 }
4797
4798 do {
4799 if (cache->total_size > fts_max_cache_size) {
4800 /* Avoid the case: sync never finish when
4801 insert/update keeps comming. */
4802 ut_ad(sync->unlock_cache);
4803 sync->unlock_cache = false;
4804 }
4805
4806 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4807 fts_index_cache_t* index_cache;
4808
4809 index_cache = static_cast<fts_index_cache_t*>(
4810 ib_vector_get(cache->indexes, i));
4811
4812 if (index_cache->index->to_be_dropped
4813 || index_cache->index->table->to_be_dropped) {
4814 continue;
4815 }
4816
4817 DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4818 os_thread_sleep(300000););
4819
4820 index_cache->index->index_fts_syncing = true;
4821
4822 error = fts_sync_index(sync, index_cache);
4823
4824 if (error != DB_SUCCESS) {
4825 break;
4826 }
4827 }
4828
4829 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4830 sync->interrupted = true;
4831 error = DB_INTERRUPTED;
4832 );
4833
4834 if (error != DB_SUCCESS) {
4835 break;
4836 }
4837 } while (!fts_check_all_indexes_synced(sync));
4838
4839 if (error == DB_SUCCESS && !sync->interrupted) {
4840 error = fts_sync_commit(sync);
4841 } else {
4842 fts_sync_rollback(sync);
4843 }
4844
4845 rw_lock_x_lock(&cache->lock);
4846 /* Clear fts syncing flags of any indexes in case sync is
4847 interrupted */
4848 DEBUG_SYNC_C("fts_instrument_sync");
4849 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4850 fts_index_cache_t* index_cache;
4851 index_cache = static_cast<fts_index_cache_t*>(
4852 ib_vector_get(cache->indexes, i));
4853 if (index_cache->index->index_fts_syncing == true) {
4854 index_cache->index->index_fts_syncing = false;
4855 }
4856 }
4857 sync->interrupted = false;
4858 sync->in_progress = false;
4859 os_event_set(sync->event);
4860 rw_lock_x_unlock(&cache->lock);
4861
4862 /* We need to check whether an optimize is required, for that
4863 we make copies of the two variables that control the trigger. These
4864 variables can change behind our back and we don't want to hold the
4865 lock for longer than is needed. */
4866 mutex_enter(&cache->deleted_lock);
4867
4868 cache->added = 0;
4869 cache->deleted = 0;
4870
4871 mutex_exit(&cache->deleted_lock);
4872
4873 return(error);
4874 }
4875
4876 /** Run SYNC on the table, i.e., write out data from the cache to the
4877 FTS auxiliary INDEX table and clear the cache at the end.
4878 @param[in,out] table fts table
4879 @param[in] unlock_cache whether unlock cache when write node
4880 @param[in] wait whether wait for existing sync to finish
4881 @param[in] has_dict whether has dict operation lock
4882 @return DB_SUCCESS on success, error code on failure. */
4883 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4884 fts_sync_table(
4885 dict_table_t* table,
4886 bool unlock_cache,
4887 bool wait,
4888 bool has_dict)
4889 {
4890 dberr_t err = DB_SUCCESS;
4891
4892 ut_ad(table->fts);
4893
4894 if (!dict_table_is_discarded(table) && table->fts->cache
4895 && !dict_table_is_corrupted(table)) {
4896 err = fts_sync(table->fts->cache->sync,
4897 unlock_cache, wait, has_dict);
4898 }
4899
4900 return(err);
4901 }
4902
4903 /** Check fts token
4904 1. for ngram token, check whether the token contains any words in stopwords
4905 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4906 or greater than fts_max_token_size.
4907 @param[in] token token string
4908 @param[in] stopwords stopwords rb tree
4909 @param[in] is_ngram is ngram parser
4910 @param[in] cs token charset
4911 @retval true if it is not stopword and length in range
4912 @retval false if it is stopword or lenght not in range */
4913 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs)4914 fts_check_token(
4915 const fts_string_t* token,
4916 const ib_rbt_t* stopwords,
4917 bool is_ngram,
4918 const CHARSET_INFO* cs)
4919 {
4920 ut_ad(cs != NULL || stopwords == NULL);
4921
4922 if (!is_ngram) {
4923 ib_rbt_bound_t parent;
4924
4925 if (token->f_n_char < fts_min_token_size
4926 || token->f_n_char > fts_max_token_size
4927 || (stopwords != NULL
4928 && rbt_search(stopwords, &parent, token) == 0)) {
4929 return(false);
4930 } else {
4931 return(true);
4932 }
4933 }
4934
4935 /* Check token for ngram. */
4936 DBUG_EXECUTE_IF(
4937 "fts_instrument_ignore_ngram_check",
4938 return(true);
4939 );
4940
4941 /* We ignore fts_min_token_size when ngram */
4942 ut_ad(token->f_n_char > 0
4943 && token->f_n_char <= fts_max_token_size);
4944
4945 if (stopwords == NULL) {
4946 return(true);
4947 }
4948
4949 /*Ngram checks whether the token contains any words in stopwords.
4950 We can't simply use CONTAIN to search in stopwords, because it's
4951 built on COMPARE. So we need to tokenize the token into words
4952 from unigram to f_n_char, and check them separately. */
4953 for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4954 ngram_token_size ++) {
4955 const char* start;
4956 const char* next;
4957 const char* end;
4958 ulint char_len;
4959 ulint n_chars;
4960
4961 start = reinterpret_cast<char*>(token->f_str);
4962 next = start;
4963 end = start + token->f_len;
4964 n_chars = 0;
4965
4966 while (next < end) {
4967 char_len = my_mbcharlen_ptr(cs, next, end);
4968
4969 if (next + char_len > end || char_len == 0) {
4970 break;
4971 } else {
4972 /* Skip SPACE */
4973 if (char_len == 1 && *next == ' ') {
4974 start = next + 1;
4975 next = start;
4976 n_chars = 0;
4977
4978 continue;
4979 }
4980
4981 next += char_len;
4982 n_chars++;
4983 }
4984
4985 if (n_chars == ngram_token_size) {
4986 fts_string_t ngram_token;
4987 ngram_token.f_str =
4988 reinterpret_cast<byte*>(
4989 const_cast<char*>(start));
4990 ngram_token.f_len = next - start;
4991 ngram_token.f_n_char = ngram_token_size;
4992
4993 ib_rbt_bound_t parent;
4994 if (rbt_search(stopwords, &parent,
4995 &ngram_token) == 0) {
4996 return(false);
4997 }
4998
4999 /* Move a char forward */
5000 start += my_mbcharlen_ptr(cs, start, end);
5001 n_chars = ngram_token_size - 1;
5002 }
5003 }
5004 }
5005
5006 return(true);
5007 }
5008
5009 /** Add the token and its start position to the token's list of positions.
5010 @param[in,out] result_doc result doc rb tree
5011 @param[in] str token string
5012 @param[in] position token position */
5013 static
5014 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)5015 fts_add_token(
5016 fts_doc_t* result_doc,
5017 fts_string_t str,
5018 ulint position)
5019 {
5020 /* Ignore string whose character number is less than
5021 "fts_min_token_size" or more than "fts_max_token_size" */
5022
5023 if (fts_check_token(&str, NULL, result_doc->is_ngram,
5024 result_doc->charset)) {
5025
5026 mem_heap_t* heap;
5027 fts_string_t t_str;
5028 fts_token_t* token;
5029 ib_rbt_bound_t parent;
5030 ulint newlen;
5031
5032 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
5033
5034 t_str.f_n_char = str.f_n_char;
5035
5036 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
5037
5038 t_str.f_str = static_cast<byte*>(
5039 mem_heap_alloc(heap, t_str.f_len));
5040
5041 /* For binary collations, a case sensitive search is
5042 performed. Hence don't convert to lower case. */
5043 if (my_binary_compare(result_doc->charset)) {
5044 memcpy(t_str.f_str, str.f_str, str.f_len);
5045 t_str.f_str[str.f_len]= 0;
5046 newlen= str.f_len;
5047 } else {
5048 newlen = innobase_fts_casedn_str(
5049 result_doc->charset, (char*) str.f_str, str.f_len,
5050 (char*) t_str.f_str, t_str.f_len);
5051 }
5052
5053 t_str.f_len = newlen;
5054 t_str.f_str[newlen] = 0;
5055
5056 /* Add the word to the document statistics. If the word
5057 hasn't been seen before we create a new entry for it. */
5058 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
5059 fts_token_t new_token;
5060
5061 new_token.text.f_len = newlen;
5062 new_token.text.f_str = t_str.f_str;
5063 new_token.text.f_n_char = t_str.f_n_char;
5064
5065 new_token.positions = ib_vector_create(
5066 result_doc->self_heap, sizeof(ulint), 32);
5067
5068 parent.last = rbt_add_node(
5069 result_doc->tokens, &parent, &new_token);
5070
5071 ut_ad(rbt_validate(result_doc->tokens));
5072 }
5073
5074 token = rbt_value(fts_token_t, parent.last);
5075 ib_vector_push(token->positions, &position);
5076 }
5077 }
5078
5079 /********************************************************************
5080 Process next token from document starting at the given position, i.e., add
5081 the token's start position to the token's list of positions.
5082 @return number of characters handled in this call */
5083 static
5084 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)5085 fts_process_token(
5086 /*==============*/
5087 fts_doc_t* doc, /* in/out: document to
5088 tokenize */
5089 fts_doc_t* result, /* out: if provided, save
5090 result here */
5091 ulint start_pos, /*!< in: start position in text */
5092 ulint add_pos) /*!< in: add this position to all
5093 tokens from this tokenization */
5094 {
5095 ulint ret;
5096 fts_string_t str;
5097 ulint position;
5098 fts_doc_t* result_doc;
5099 byte buf[FTS_MAX_WORD_LEN + 1];
5100
5101 str.f_str = buf;
5102
5103 /* Determine where to save the result. */
5104 result_doc = (result != NULL) ? result : doc;
5105
5106 /* The length of a string in characters is set here only. */
5107
5108 ret = innobase_mysql_fts_get_token(
5109 doc->charset, doc->text.f_str + start_pos,
5110 doc->text.f_str + doc->text.f_len, &str);
5111
5112 position = start_pos + ret - str.f_len + add_pos;
5113
5114 fts_add_token(result_doc, str, position);
5115
5116 return(ret);
5117 }
5118
5119 /*************************************************************//**
5120 Get token char size by charset
5121 @return token size */
5122 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)5123 fts_get_token_size(
5124 /*===============*/
5125 const CHARSET_INFO* cs, /*!< in: Character set */
5126 const char* token, /*!< in: token */
5127 ulint len) /*!< in: token length */
5128 {
5129 char* start;
5130 char* end;
5131 ulint size = 0;
5132
5133 /* const_cast is for reinterpret_cast below, or it will fail. */
5134 start = const_cast<char*>(token);
5135 end = start + len;
5136 while (start < end) {
5137 int ctype;
5138 int mbl;
5139
5140 mbl = cs->cset->ctype(
5141 cs, &ctype,
5142 reinterpret_cast<uchar*>(start),
5143 reinterpret_cast<uchar*>(end));
5144
5145 size++;
5146
5147 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
5148 }
5149
5150 return(size);
5151 }
5152
5153 /*************************************************************//**
5154 FTS plugin parser 'myql_parser' callback function for document tokenize.
5155 Refer to 'st_mysql_ftparser_param' for more detail.
5156 @return always returns 0 */
5157 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)5158 fts_tokenize_document_internal(
5159 /*===========================*/
5160 MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
5161 char* doc, /*!< in/out: document */
5162 int len) /*!< in: document length */
5163 {
5164 fts_string_t str;
5165 byte buf[FTS_MAX_WORD_LEN + 1];
5166 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
5167 { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
5168
5169 ut_ad(len >= 0);
5170
5171 str.f_str = buf;
5172
5173 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
5174 inc = innobase_mysql_fts_get_token(
5175 const_cast<CHARSET_INFO*>(param->cs),
5176 reinterpret_cast<byte*>(doc) + i,
5177 reinterpret_cast<byte*>(doc) + len,
5178 &str);
5179
5180 if (str.f_len > 0) {
5181 bool_info.position =
5182 static_cast<int>(i + inc - str.f_len);
5183 ut_ad(bool_info.position >= 0);
5184
5185 /* Stop when add word fails */
5186 if (param->mysql_add_word(
5187 param,
5188 reinterpret_cast<char*>(str.f_str),
5189 static_cast<int>(str.f_len),
5190 &bool_info)) {
5191 break;
5192 }
5193 }
5194 }
5195
5196 return(0);
5197 }
5198
5199 /******************************************************************//**
5200 FTS plugin parser 'myql_add_word' callback function for document tokenize.
5201 Refer to 'st_mysql_ftparser_param' for more detail.
5202 @return always returns 0 */
5203 static
5204 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)5205 fts_tokenize_add_word_for_parser(
5206 /*=============================*/
5207 MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
5208 char* word, /* in: token word */
5209 int word_len, /* in: word len */
5210 MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
5211 {
5212 fts_string_t str;
5213 fts_tokenize_param_t* fts_param;
5214 fts_doc_t* result_doc;
5215 ulint position;
5216
5217 fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
5218 result_doc = fts_param->result_doc;
5219 ut_ad(result_doc != NULL);
5220
5221 str.f_str = reinterpret_cast<byte*>(word);
5222 str.f_len = word_len;
5223 str.f_n_char = fts_get_token_size(
5224 const_cast<CHARSET_INFO*>(param->cs), word, word_len);
5225
5226 ut_ad(boolean_info->position >= 0);
5227 position = boolean_info->position + fts_param->add_pos;
5228
5229 fts_add_token(result_doc, str, position);
5230
5231 return(0);
5232 }
5233
5234 /******************************************************************//**
5235 Parse a document using an external / user supplied parser */
5236 static
5237 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)5238 fts_tokenize_by_parser(
5239 /*===================*/
5240 fts_doc_t* doc, /* in/out: document to tokenize */
5241 st_mysql_ftparser* parser, /* in: plugin fts parser */
5242 fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
5243 {
5244 MYSQL_FTPARSER_PARAM param;
5245
5246 ut_a(parser);
5247
5248 /* Set paramters for param */
5249 param.mysql_parse = fts_tokenize_document_internal;
5250 param.mysql_add_word = fts_tokenize_add_word_for_parser;
5251 param.mysql_ftparam = fts_param;
5252 param.cs = doc->charset;
5253 param.doc = reinterpret_cast<char*>(doc->text.f_str);
5254 param.length = static_cast<int>(doc->text.f_len);
5255 param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
5256
5257 PARSER_INIT(parser, ¶m);
5258 parser->parse(¶m);
5259 PARSER_DEINIT(parser, ¶m);
5260 }
5261
5262 /******************************************************************//**
5263 Tokenize a document. */
5264 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)5265 fts_tokenize_document(
5266 /*==================*/
5267 fts_doc_t* doc, /* in/out: document to
5268 tokenize */
5269 fts_doc_t* result, /* out: if provided, save
5270 the result token here */
5271 st_mysql_ftparser* parser) /* in: plugin fts parser */
5272 {
5273 ut_a(!doc->tokens);
5274 ut_a(doc->charset);
5275
5276 doc->tokens = rbt_create_arg_cmp(
5277 sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
5278
5279 if (parser != NULL) {
5280 fts_tokenize_param_t fts_param;
5281
5282 fts_param.result_doc = (result != NULL) ? result : doc;
5283 fts_param.add_pos = 0;
5284
5285 fts_tokenize_by_parser(doc, parser, &fts_param);
5286 } else {
5287 ulint inc;
5288
5289 for (ulint i = 0; i < doc->text.f_len; i += inc) {
5290 inc = fts_process_token(doc, result, i, 0);
5291 ut_a(inc > 0);
5292 }
5293 }
5294 }
5295
5296 /******************************************************************//**
5297 Continue to tokenize a document. */
5298 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)5299 fts_tokenize_document_next(
5300 /*=======================*/
5301 fts_doc_t* doc, /*!< in/out: document to
5302 tokenize */
5303 ulint add_pos, /*!< in: add this position to all
5304 tokens from this tokenization */
5305 fts_doc_t* result, /*!< out: if provided, save
5306 the result token here */
5307 st_mysql_ftparser* parser) /* in: plugin fts parser */
5308 {
5309 ut_a(doc->tokens);
5310
5311 if (parser) {
5312 fts_tokenize_param_t fts_param;
5313
5314 fts_param.result_doc = (result != NULL) ? result : doc;
5315 fts_param.add_pos = add_pos;
5316
5317 fts_tokenize_by_parser(doc, parser, &fts_param);
5318 } else {
5319 ulint inc;
5320
5321 for (ulint i = 0; i < doc->text.f_len; i += inc) {
5322 inc = fts_process_token(doc, result, i, add_pos);
5323 ut_a(inc > 0);
5324 }
5325 }
5326 }
5327
5328 /********************************************************************
5329 Create the vector of fts_get_doc_t instances. */
5330 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)5331 fts_get_docs_create(
5332 /*================*/
5333 /* out: vector of
5334 fts_get_doc_t instances */
5335 fts_cache_t* cache) /*!< in: fts cache */
5336 {
5337 ib_vector_t* get_docs;
5338
5339 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
5340
5341 /* We need one instance of fts_get_doc_t per index. */
5342 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
5343
5344 /* Create the get_doc instance, we need one of these
5345 per FTS index. */
5346 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
5347
5348 dict_index_t** index;
5349 fts_get_doc_t* get_doc;
5350
5351 index = static_cast<dict_index_t**>(
5352 ib_vector_get(cache->indexes, i));
5353
5354 get_doc = static_cast<fts_get_doc_t*>(
5355 ib_vector_push(get_docs, NULL));
5356
5357 memset(get_doc, 0x0, sizeof(*get_doc));
5358
5359 get_doc->index_cache = fts_get_index_cache(cache, *index);
5360 get_doc->cache = cache;
5361
5362 /* Must find the index cache. */
5363 ut_a(get_doc->index_cache != NULL);
5364 }
5365
5366 return(get_docs);
5367 }
5368
5369 /********************************************************************
5370 Release any resources held by the fts_get_doc_t instances. */
5371 static
5372 void
fts_get_docs_clear(ib_vector_t * get_docs)5373 fts_get_docs_clear(
5374 /*===============*/
5375 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
5376 {
5377 ulint i;
5378
5379 /* Release the get doc graphs if any. */
5380 for (i = 0; i < ib_vector_size(get_docs); ++i) {
5381
5382 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
5383 ib_vector_get(get_docs, i));
5384
5385 if (get_doc->get_document_graph != NULL) {
5386
5387 ut_a(get_doc->index_cache);
5388
5389 fts_que_graph_free(get_doc->get_document_graph);
5390 get_doc->get_document_graph = NULL;
5391 }
5392 }
5393 }
5394
5395 /*********************************************************************//**
5396 Get the initial Doc ID by consulting the CONFIG table
5397 @return initial Doc ID */
5398 doc_id_t
fts_init_doc_id(const dict_table_t * table)5399 fts_init_doc_id(
5400 /*============*/
5401 const dict_table_t* table) /*!< in: table */
5402 {
5403 doc_id_t max_doc_id = 0;
5404
5405 rw_lock_x_lock(&table->fts->cache->lock);
5406
5407 /* Return if the table is already initialized for DOC ID */
5408 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5409 rw_lock_x_unlock(&table->fts->cache->lock);
5410 return(0);
5411 }
5412
5413 DEBUG_SYNC_C("fts_initialize_doc_id");
5414
5415 /* Then compare this value with the ID value stored in the CONFIG
5416 table. The larger one will be our new initial Doc ID */
5417 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5418
5419 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5420 creating index (and add doc id column. No need to recovery
5421 documents */
5422 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5423 fts_init_index((dict_table_t*) table, TRUE);
5424 }
5425
5426 table->fts->fts_status |= ADDED_TABLE_SYNCED;
5427
5428 table->fts->cache->first_doc_id = max_doc_id;
5429
5430 rw_lock_x_unlock(&table->fts->cache->lock);
5431
5432 ut_ad(max_doc_id > 0);
5433
5434 return(max_doc_id);
5435 }
5436
5437 #ifdef FTS_MULT_INDEX
5438 /*********************************************************************//**
5439 Check if the index is in the affected set.
5440 @return TRUE if index is updated */
5441 static
5442 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5443 fts_is_index_updated(
5444 /*=================*/
5445 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
5446 const fts_get_doc_t* get_doc) /*!< in: info for reading
5447 document */
5448 {
5449 ulint i;
5450 dict_index_t* index = get_doc->index_cache->index;
5451
5452 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5453 const dict_index_t* updated_fts_index;
5454
5455 updated_fts_index = static_cast<const dict_index_t*>(
5456 ib_vector_getp_const(fts_indexes, i));
5457
5458 ut_a(updated_fts_index != NULL);
5459
5460 if (updated_fts_index == index) {
5461 return(TRUE);
5462 }
5463 }
5464
5465 return(FALSE);
5466 }
5467 #endif
5468
5469 /*********************************************************************//**
5470 Fetch COUNT(*) from specified table.
5471 @return the number of rows in the table */
5472 ulint
fts_get_rows_count(fts_table_t * fts_table)5473 fts_get_rows_count(
5474 /*===============*/
5475 fts_table_t* fts_table) /*!< in: fts table to read */
5476 {
5477 trx_t* trx;
5478 pars_info_t* info;
5479 que_t* graph;
5480 dberr_t error;
5481 ulint count = 0;
5482 char table_name[MAX_FULL_NAME_LEN];
5483
5484 trx = trx_allocate_for_background();
5485
5486 trx->op_info = "fetching FT table rows count";
5487
5488 info = pars_info_create();
5489
5490 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5491
5492 fts_get_table_name(fts_table, table_name);
5493 pars_info_bind_id(info, true, "table_name", table_name);
5494
5495 graph = fts_parse_sql(
5496 fts_table,
5497 info,
5498 "DECLARE FUNCTION my_func;\n"
5499 "DECLARE CURSOR c IS"
5500 " SELECT COUNT(*)"
5501 " FROM $table_name;\n"
5502 "BEGIN\n"
5503 "\n"
5504 "OPEN c;\n"
5505 "WHILE 1 = 1 LOOP\n"
5506 " FETCH c INTO my_func();\n"
5507 " IF c % NOTFOUND THEN\n"
5508 " EXIT;\n"
5509 " END IF;\n"
5510 "END LOOP;\n"
5511 "CLOSE c;");
5512
5513 for (;;) {
5514 error = fts_eval_sql(trx, graph);
5515
5516 if (error == DB_SUCCESS) {
5517 fts_sql_commit(trx);
5518
5519 break; /* Exit the loop. */
5520 } else {
5521 fts_sql_rollback(trx);
5522
5523 if (error == DB_LOCK_WAIT_TIMEOUT) {
5524 ib::warn() << "lock wait timeout reading"
5525 " FTS table. Retrying!";
5526
5527 trx->error_state = DB_SUCCESS;
5528 } else {
5529 ib::error() << "(" << ut_strerr(error)
5530 << ") while reading FTS table.";
5531
5532 break; /* Exit the loop. */
5533 }
5534 }
5535 }
5536
5537 fts_que_graph_free(graph);
5538
5539 trx_free_for_background(trx);
5540
5541 return(count);
5542 }
5543
5544 #ifdef FTS_CACHE_SIZE_DEBUG
5545 /*********************************************************************//**
5546 Read the max cache size parameter from the config table. */
5547 static
5548 void
fts_update_max_cache_size(fts_sync_t * sync)5549 fts_update_max_cache_size(
5550 /*======================*/
5551 fts_sync_t* sync) /*!< in: sync state */
5552 {
5553 trx_t* trx;
5554 fts_table_t fts_table;
5555
5556 trx = trx_allocate_for_background();
5557
5558 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5559
5560 /* The size returned is in bytes. */
5561 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5562
5563 fts_sql_commit(trx);
5564
5565 trx_free_for_background(trx);
5566 }
5567 #endif /* FTS_CACHE_SIZE_DEBUG */
5568
5569 /*********************************************************************//**
5570 Free the modified rows of a table. */
5571 UNIV_INLINE
5572 void
fts_trx_table_rows_free(ib_rbt_t * rows)5573 fts_trx_table_rows_free(
5574 /*====================*/
5575 ib_rbt_t* rows) /*!< in: rbt of rows to free */
5576 {
5577 const ib_rbt_node_t* node;
5578
5579 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5580 fts_trx_row_t* row;
5581
5582 row = rbt_value(fts_trx_row_t, node);
5583
5584 if (row->fts_indexes != NULL) {
5585 /* This vector shouldn't be using the
5586 heap allocator. */
5587 ut_a(row->fts_indexes->allocator->arg == NULL);
5588
5589 ib_vector_free(row->fts_indexes);
5590 row->fts_indexes = NULL;
5591 }
5592
5593 ut_free(rbt_remove_node(rows, node));
5594 }
5595
5596 ut_a(rbt_empty(rows));
5597 rbt_free(rows);
5598 }
5599
5600 /*********************************************************************//**
5601 Free an FTS savepoint instance. */
5602 UNIV_INLINE
5603 void
fts_savepoint_free(fts_savepoint_t * savepoint)5604 fts_savepoint_free(
5605 /*===============*/
5606 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5607 {
5608 const ib_rbt_node_t* node;
5609 ib_rbt_t* tables = savepoint->tables;
5610
5611 /* Nothing to free! */
5612 if (tables == NULL) {
5613 return;
5614 }
5615
5616 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5617 fts_trx_table_t* ftt;
5618 fts_trx_table_t** fttp;
5619
5620 fttp = rbt_value(fts_trx_table_t*, node);
5621 ftt = *fttp;
5622
5623 /* This can be NULL if a savepoint was released. */
5624 if (ftt->rows != NULL) {
5625 fts_trx_table_rows_free(ftt->rows);
5626 ftt->rows = NULL;
5627 }
5628
5629 /* This can be NULL if a savepoint was released. */
5630 if (ftt->added_doc_ids != NULL) {
5631 fts_doc_ids_free(ftt->added_doc_ids);
5632 ftt->added_doc_ids = NULL;
5633 }
5634
5635 /* The default savepoint name must be NULL. */
5636 if (ftt->docs_added_graph) {
5637 fts_que_graph_free(ftt->docs_added_graph);
5638 }
5639
5640 /* NOTE: We are responsible for free'ing the node */
5641 ut_free(rbt_remove_node(tables, node));
5642 }
5643
5644 ut_a(rbt_empty(tables));
5645 rbt_free(tables);
5646 savepoint->tables = NULL;
5647 }
5648
5649 /*********************************************************************//**
5650 Free an FTS trx. */
5651 void
fts_trx_free(fts_trx_t * fts_trx)5652 fts_trx_free(
5653 /*=========*/
5654 fts_trx_t* fts_trx) /* in, own: FTS trx */
5655 {
5656 ulint i;
5657
5658 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5659 fts_savepoint_t* savepoint;
5660
5661 savepoint = static_cast<fts_savepoint_t*>(
5662 ib_vector_get(fts_trx->savepoints, i));
5663
5664 /* The default savepoint name must be NULL. */
5665 if (i == 0) {
5666 ut_a(savepoint->name == NULL);
5667 }
5668
5669 fts_savepoint_free(savepoint);
5670 }
5671
5672 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5673 fts_savepoint_t* savepoint;
5674
5675 savepoint = static_cast<fts_savepoint_t*>(
5676 ib_vector_get(fts_trx->last_stmt, i));
5677
5678 /* The default savepoint name must be NULL. */
5679 if (i == 0) {
5680 ut_a(savepoint->name == NULL);
5681 }
5682
5683 fts_savepoint_free(savepoint);
5684 }
5685
5686 if (fts_trx->heap) {
5687 mem_heap_free(fts_trx->heap);
5688 }
5689 }
5690
5691 /*********************************************************************//**
5692 Extract the doc id from the FTS hidden column.
5693 @return doc id that was extracted from rec */
5694 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5695 fts_get_doc_id_from_row(
5696 /*====================*/
5697 dict_table_t* table, /*!< in: table */
5698 dtuple_t* row) /*!< in: row whose FTS doc id we
5699 want to extract.*/
5700 {
5701 dfield_t* field;
5702 doc_id_t doc_id = 0;
5703
5704 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5705
5706 field = dtuple_get_nth_field(row, table->fts->doc_col);
5707
5708 ut_a(dfield_get_len(field) == sizeof(doc_id));
5709 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5710
5711 doc_id = fts_read_doc_id(
5712 static_cast<const byte*>(dfield_get_data(field)));
5713
5714 return(doc_id);
5715 }
5716
5717 /** Extract the doc id from the record that belongs to index.
5718 @param[in] table table
5719 @param[in] rec record contains FTS_DOC_ID
5720 @param[in] index index of rec
5721 @param[in] heap heap memory
5722 @return doc id that was extracted from rec */
5723 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5724 fts_get_doc_id_from_rec(
5725 dict_table_t* table,
5726 const rec_t* rec,
5727 const dict_index_t* index,
5728 mem_heap_t* heap)
5729 {
5730 ulint len;
5731 const byte* data;
5732 ulint col_no;
5733 doc_id_t doc_id = 0;
5734 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5735 ulint* offsets = offsets_;
5736 mem_heap_t* my_heap = heap;
5737
5738 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5739
5740 rec_offs_init(offsets_);
5741
5742 offsets = rec_get_offsets(
5743 rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5744
5745 col_no = dict_col_get_index_pos(
5746 &table->cols[table->fts->doc_col], index);
5747
5748 ut_ad(col_no != ULINT_UNDEFINED);
5749
5750 data = rec_get_nth_field(rec, offsets, col_no, &len);
5751
5752 ut_a(len == 8);
5753 ut_ad(8 == sizeof(doc_id));
5754 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5755
5756 if (my_heap && !heap) {
5757 mem_heap_free(my_heap);
5758 }
5759
5760 return(doc_id);
5761 }
5762
5763 /*********************************************************************//**
5764 Search the index specific cache for a particular FTS index.
5765 @return the index specific cache else NULL */
5766 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5767 fts_find_index_cache(
5768 /*=================*/
5769 const fts_cache_t* cache, /*!< in: cache to search */
5770 const dict_index_t* index) /*!< in: index to search for */
5771 {
5772 /* We cast away the const because our internal function, takes
5773 non-const cache arg and returns a non-const pointer. */
5774 return(static_cast<fts_index_cache_t*>(
5775 fts_get_index_cache((fts_cache_t*) cache, index)));
5776 }
5777
5778 /*********************************************************************//**
5779 Search cache for word.
5780 @return the word node vector if found else NULL */
5781 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5782 fts_cache_find_word(
5783 /*================*/
5784 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5785 const fts_string_t* text) /*!< in: word to search for */
5786 {
5787 ib_rbt_bound_t parent;
5788 const ib_vector_t* nodes = NULL;
5789 #ifdef UNIV_DEBUG
5790 dict_table_t* table = index_cache->index->table;
5791 fts_cache_t* cache = table->fts->cache;
5792
5793 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5794 #endif /* UNIV_DEBUG */
5795
5796 /* Lookup the word in the rb tree */
5797 if (rbt_search(index_cache->words, &parent, text) == 0) {
5798 const fts_tokenizer_word_t* word;
5799
5800 word = rbt_value(fts_tokenizer_word_t, parent.last);
5801
5802 nodes = word->nodes;
5803 }
5804
5805 return(nodes);
5806 }
5807
5808 /*********************************************************************//**
5809 Check cache for deleted doc id.
5810 @return TRUE if deleted */
5811 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5812 fts_cache_is_deleted_doc_id(
5813 /*========================*/
5814 const fts_cache_t* cache, /*!< in: cache ito search */
5815 doc_id_t doc_id) /*!< in: doc id to search for */
5816 {
5817 ut_ad(mutex_own(&cache->deleted_lock));
5818
5819 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5820 const fts_update_t* update;
5821
5822 update = static_cast<const fts_update_t*>(
5823 ib_vector_get_const(cache->deleted_doc_ids, i));
5824
5825 if (doc_id == update->doc_id) {
5826
5827 return(TRUE);
5828 }
5829 }
5830
5831 return(FALSE);
5832 }
5833
5834 /*********************************************************************//**
5835 Append deleted doc ids to vector. */
5836 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5837 fts_cache_append_deleted_doc_ids(
5838 /*=============================*/
5839 const fts_cache_t* cache, /*!< in: cache to use */
5840 ib_vector_t* vector) /*!< in: append to this vector */
5841 {
5842 mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5843
5844 if (cache->deleted_doc_ids == NULL) {
5845 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5846 return;
5847 }
5848
5849
5850 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5851 fts_update_t* update;
5852
5853 update = static_cast<fts_update_t*>(
5854 ib_vector_get(cache->deleted_doc_ids, i));
5855
5856 ib_vector_push(vector, &update->doc_id);
5857 }
5858
5859 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5860 }
5861
5862 /*********************************************************************//**
5863 Wait for the background thread to start. We poll to detect change
5864 of state, which is acceptable, since the wait should happen only
5865 once during startup.
5866 @return true if the thread started else FALSE (i.e timed out) */
5867 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5868 fts_wait_for_background_thread_to_start(
5869 /*====================================*/
5870 dict_table_t* table, /*!< in: table to which the thread
5871 is attached */
5872 ulint max_wait) /*!< in: time in microseconds, if
5873 set to 0 then it disables
5874 timeout checking */
5875 {
5876 ulint count = 0;
5877 ibool done = FALSE;
5878
5879 ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5880
5881 for (;;) {
5882 fts_t* fts = table->fts;
5883
5884 mutex_enter(&fts->bg_threads_mutex);
5885
5886 if (fts->fts_status & BG_THREAD_READY) {
5887
5888 done = TRUE;
5889 }
5890
5891 mutex_exit(&fts->bg_threads_mutex);
5892
5893 if (!done) {
5894 os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5895
5896 if (max_wait > 0) {
5897
5898 max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5899
5900 /* We ignore the residual value. */
5901 if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5902 break;
5903 }
5904 }
5905
5906 ++count;
5907 } else {
5908 break;
5909 }
5910
5911 if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5912 ib::error() << "The background thread for the FTS"
5913 " table " << table->name
5914 << " refuses to start";
5915
5916 count = 0;
5917 }
5918 }
5919
5920 return(done);
5921 }
5922
5923 /*********************************************************************//**
5924 Add the FTS document id hidden column. */
5925 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5926 fts_add_doc_id_column(
5927 /*==================*/
5928 dict_table_t* table, /*!< in/out: Table with FTS index */
5929 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5930 {
5931 dict_mem_table_add_col(
5932 table, heap,
5933 FTS_DOC_ID_COL_NAME,
5934 DATA_INT,
5935 dtype_form_prtype(
5936 DATA_NOT_NULL | DATA_UNSIGNED
5937 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5938 sizeof(doc_id_t));
5939 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5940 }
5941
5942 /** Add new fts doc id to the update vector.
5943 @param[in] table the table that contains the FTS index.
5944 @param[in,out] ufield the fts doc id field in the update vector.
5945 No new memory is allocated for this in this
5946 function.
5947 @param[in,out] next_doc_id the fts doc id that has been added to the
5948 update vector. If 0, a new fts doc id is
5949 automatically generated. The memory provided
5950 for this argument will be used by the update
5951 vector. Ensure that the life time of this
5952 memory matches that of the update vector.
5953 @return the fts doc id used in the update vector */
5954 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5955 fts_update_doc_id(
5956 dict_table_t* table,
5957 upd_field_t* ufield,
5958 doc_id_t* next_doc_id)
5959 {
5960 doc_id_t doc_id;
5961 dberr_t error = DB_SUCCESS;
5962
5963 if (*next_doc_id) {
5964 doc_id = *next_doc_id;
5965 } else {
5966 /* Get the new document id that will be added. */
5967 error = fts_get_next_doc_id(table, &doc_id);
5968 }
5969
5970 if (error == DB_SUCCESS) {
5971 dict_index_t* clust_index;
5972 dict_col_t* col = dict_table_get_nth_col(
5973 table, table->fts->doc_col);
5974
5975 ufield->exp = NULL;
5976
5977 ufield->new_val.len = sizeof(doc_id);
5978
5979 clust_index = dict_table_get_first_index(table);
5980
5981 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5982 dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5983
5984 /* It is possible we update record that has
5985 not yet be sync-ed from last crash. */
5986
5987 /* Convert to storage byte order. */
5988 ut_a(doc_id != FTS_NULL_DOC_ID);
5989 fts_write_doc_id((byte*) next_doc_id, doc_id);
5990
5991 ufield->new_val.data = next_doc_id;
5992 ufield->new_val.ext = 0;
5993 }
5994
5995 return(doc_id);
5996 }
5997
5998 /*********************************************************************//**
5999 Check if the table has an FTS index. This is the non-inline version
6000 of dict_table_has_fts_index().
6001 @return TRUE if table has an FTS index */
6002 ibool
fts_dict_table_has_fts_index(dict_table_t * table)6003 fts_dict_table_has_fts_index(
6004 /*=========================*/
6005 dict_table_t* table) /*!< in: table */
6006 {
6007 return(dict_table_has_fts_index(table));
6008 }
6009
6010 /** fts_t constructor.
6011 @param[in] table table with FTS indexes
6012 @param[in,out] heap memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)6013 fts_t::fts_t(
6014 const dict_table_t* table,
6015 mem_heap_t* heap)
6016 :
6017 bg_threads(0),
6018 fts_status(0),
6019 add_wq(NULL),
6020 cache(NULL),
6021 doc_col(ULINT_UNDEFINED),
6022 fts_heap(heap)
6023 {
6024 ut_a(table->fts == NULL);
6025
6026 mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
6027
6028 ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
6029
6030 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
6031
6032 dict_table_get_all_fts_indexes(table, indexes);
6033 }
6034
6035 /** fts_t destructor. */
~fts_t()6036 fts_t::~fts_t()
6037 {
6038 mutex_free(&bg_threads_mutex);
6039
6040 ut_ad(add_wq == NULL);
6041
6042 if (cache != NULL) {
6043 fts_cache_clear(cache);
6044 fts_cache_destroy(cache);
6045 cache = NULL;
6046 }
6047
6048 /* There is no need to call ib_vector_free() on this->indexes
6049 because it is stored in this->fts_heap. */
6050 }
6051
6052 /*********************************************************************//**
6053 Create an instance of fts_t.
6054 @return instance of fts_t */
6055 fts_t*
fts_create(dict_table_t * table)6056 fts_create(
6057 /*=======*/
6058 dict_table_t* table) /*!< in/out: table with FTS indexes */
6059 {
6060 fts_t* fts;
6061 mem_heap_t* heap;
6062
6063 heap = mem_heap_create(512);
6064
6065 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
6066
6067 new(fts) fts_t(table, heap);
6068
6069 return(fts);
6070 }
6071
6072 /*********************************************************************//**
6073 Free the FTS resources. */
6074 void
fts_free(dict_table_t * table)6075 fts_free(
6076 /*=====*/
6077 dict_table_t* table) /*!< in/out: table with FTS indexes */
6078 {
6079 fts_t* fts = table->fts;
6080
6081 fts->~fts_t();
6082
6083 mem_heap_free(fts->fts_heap);
6084
6085 table->fts = NULL;
6086 }
6087
6088 /*********************************************************************//**
6089 Signal FTS threads to initiate shutdown. */
6090 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)6091 fts_start_shutdown(
6092 /*===============*/
6093 dict_table_t* table, /*!< in: table with FTS indexes */
6094 fts_t* fts) /*!< in: fts instance that needs
6095 to be informed about shutdown */
6096 {
6097 mutex_enter(&fts->bg_threads_mutex);
6098
6099 fts->fts_status |= BG_THREAD_STOP;
6100
6101 mutex_exit(&fts->bg_threads_mutex);
6102
6103 }
6104
6105 /*********************************************************************//**
6106 Wait for FTS threads to shutdown. */
6107 void
fts_shutdown(dict_table_t * table,fts_t * fts)6108 fts_shutdown(
6109 /*=========*/
6110 dict_table_t* table, /*!< in: table with FTS indexes */
6111 fts_t* fts) /*!< in: fts instance to shutdown */
6112 {
6113 mutex_enter(&fts->bg_threads_mutex);
6114
6115 ut_a(fts->fts_status & BG_THREAD_STOP);
6116
6117 dict_table_wait_for_bg_threads_to_exit(table, 20000);
6118
6119 mutex_exit(&fts->bg_threads_mutex);
6120 }
6121
6122 /*********************************************************************//**
6123 Take a FTS savepoint. */
6124 UNIV_INLINE
6125 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)6126 fts_savepoint_copy(
6127 /*===============*/
6128 const fts_savepoint_t* src, /*!< in: source savepoint */
6129 fts_savepoint_t* dst) /*!< out: destination savepoint */
6130 {
6131 const ib_rbt_node_t* node;
6132 const ib_rbt_t* tables;
6133
6134 tables = src->tables;
6135
6136 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
6137
6138 fts_trx_table_t* ftt_dst;
6139 const fts_trx_table_t** ftt_src;
6140
6141 ftt_src = rbt_value(const fts_trx_table_t*, node);
6142
6143 ftt_dst = fts_trx_table_clone(*ftt_src);
6144
6145 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
6146 }
6147 }
6148
6149 /*********************************************************************//**
6150 Take a FTS savepoint. */
6151 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)6152 fts_savepoint_take(
6153 /*===============*/
6154 trx_t* trx, /*!< in: transaction */
6155 fts_trx_t* fts_trx, /*!< in: fts transaction */
6156 const char* name) /*!< in: savepoint name */
6157 {
6158 mem_heap_t* heap;
6159 fts_savepoint_t* savepoint;
6160 fts_savepoint_t* last_savepoint;
6161
6162 ut_a(name != NULL);
6163
6164 heap = fts_trx->heap;
6165
6166 /* The implied savepoint must exist. */
6167 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
6168
6169 last_savepoint = static_cast<fts_savepoint_t*>(
6170 ib_vector_last(fts_trx->savepoints));
6171 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
6172
6173 if (last_savepoint->tables != NULL) {
6174 fts_savepoint_copy(last_savepoint, savepoint);
6175 }
6176 }
6177
6178 /*********************************************************************//**
6179 Lookup a savepoint instance by name.
6180 @return ULINT_UNDEFINED if not found */
6181 UNIV_INLINE
6182 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)6183 fts_savepoint_lookup(
6184 /*==================*/
6185 ib_vector_t* savepoints, /*!< in: savepoints */
6186 const char* name) /*!< in: savepoint name */
6187 {
6188 ulint i;
6189
6190 ut_a(ib_vector_size(savepoints) > 0);
6191
6192 for (i = 1; i < ib_vector_size(savepoints); ++i) {
6193 fts_savepoint_t* savepoint;
6194
6195 savepoint = static_cast<fts_savepoint_t*>(
6196 ib_vector_get(savepoints, i));
6197
6198 if (strcmp(name, savepoint->name) == 0) {
6199 return(i);
6200 }
6201 }
6202
6203 return(ULINT_UNDEFINED);
6204 }
6205
6206 /*********************************************************************//**
6207 Release the savepoint data identified by name. All savepoints created
6208 after the named savepoint are kept.
6209 @return DB_SUCCESS or error code */
6210 void
fts_savepoint_release(trx_t * trx,const char * name)6211 fts_savepoint_release(
6212 /*==================*/
6213 trx_t* trx, /*!< in: transaction */
6214 const char* name) /*!< in: savepoint name */
6215 {
6216 ut_a(name != NULL);
6217
6218 ib_vector_t* savepoints = trx->fts_trx->savepoints;
6219
6220 ut_a(ib_vector_size(savepoints) > 0);
6221
6222 ulint i = fts_savepoint_lookup(savepoints, name);
6223 if (i != ULINT_UNDEFINED) {
6224 ut_a(i >= 1);
6225
6226 fts_savepoint_t* savepoint;
6227 savepoint = static_cast<fts_savepoint_t*>(
6228 ib_vector_get(savepoints, i));
6229
6230 if (i == ib_vector_size(savepoints) - 1) {
6231 /* If the savepoint is the last, we save its
6232 tables to the previous savepoint. */
6233 fts_savepoint_t* prev_savepoint;
6234 prev_savepoint = static_cast<fts_savepoint_t*>(
6235 ib_vector_get(savepoints, i - 1));
6236
6237 ib_rbt_t* tables = savepoint->tables;
6238 savepoint->tables = prev_savepoint->tables;
6239 prev_savepoint->tables = tables;
6240 }
6241
6242 fts_savepoint_free(savepoint);
6243 ib_vector_remove(savepoints, *(void**)savepoint);
6244
6245 /* Make sure we don't delete the implied savepoint. */
6246 ut_a(ib_vector_size(savepoints) > 0);
6247 }
6248 }
6249
6250 /**********************************************************************//**
6251 Refresh last statement savepoint. */
6252 void
fts_savepoint_laststmt_refresh(trx_t * trx)6253 fts_savepoint_laststmt_refresh(
6254 /*===========================*/
6255 trx_t* trx) /*!< in: transaction */
6256 {
6257
6258 fts_trx_t* fts_trx;
6259 fts_savepoint_t* savepoint;
6260
6261 fts_trx = trx->fts_trx;
6262
6263 savepoint = static_cast<fts_savepoint_t*>(
6264 ib_vector_pop(fts_trx->last_stmt));
6265 fts_savepoint_free(savepoint);
6266
6267 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
6268 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
6269 }
6270
6271 /********************************************************************
6272 Undo the Doc ID add/delete operations in last stmt */
6273 static
6274 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)6275 fts_undo_last_stmt(
6276 /*===============*/
6277 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
6278 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
6279 {
6280 ib_rbt_t* s_rows;
6281 ib_rbt_t* l_rows;
6282 const ib_rbt_node_t* node;
6283
6284 l_rows = l_ftt->rows;
6285 s_rows = s_ftt->rows;
6286
6287 for (node = rbt_first(l_rows);
6288 node;
6289 node = rbt_next(l_rows, node)) {
6290 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
6291 ib_rbt_bound_t parent;
6292
6293 rbt_search(s_rows, &parent, &(l_row->doc_id));
6294
6295 if (parent.result == 0) {
6296 fts_trx_row_t* s_row = rbt_value(
6297 fts_trx_row_t, parent.last);
6298
6299 switch (l_row->state) {
6300 case FTS_INSERT:
6301 ut_free(rbt_remove_node(s_rows, parent.last));
6302 break;
6303
6304 case FTS_DELETE:
6305 if (s_row->state == FTS_NOTHING) {
6306 s_row->state = FTS_INSERT;
6307 } else if (s_row->state == FTS_DELETE) {
6308 ut_free(rbt_remove_node(
6309 s_rows, parent.last));
6310 }
6311 break;
6312
6313 /* FIXME: Check if FTS_MODIFY need to be addressed */
6314 case FTS_MODIFY:
6315 case FTS_NOTHING:
6316 break;
6317 default:
6318 ut_error;
6319 }
6320 }
6321 }
6322 }
6323
6324 /**********************************************************************//**
6325 Rollback to savepoint indentified by name.
6326 @return DB_SUCCESS or error code */
6327 void
fts_savepoint_rollback_last_stmt(trx_t * trx)6328 fts_savepoint_rollback_last_stmt(
6329 /*=============================*/
6330 trx_t* trx) /*!< in: transaction */
6331 {
6332 ib_vector_t* savepoints;
6333 fts_savepoint_t* savepoint;
6334 fts_savepoint_t* last_stmt;
6335 fts_trx_t* fts_trx;
6336 ib_rbt_bound_t parent;
6337 const ib_rbt_node_t* node;
6338 ib_rbt_t* l_tables;
6339 ib_rbt_t* s_tables;
6340
6341 fts_trx = trx->fts_trx;
6342 savepoints = fts_trx->savepoints;
6343
6344 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
6345 last_stmt = static_cast<fts_savepoint_t*>(
6346 ib_vector_last(fts_trx->last_stmt));
6347
6348 l_tables = last_stmt->tables;
6349 s_tables = savepoint->tables;
6350
6351 for (node = rbt_first(l_tables);
6352 node;
6353 node = rbt_next(l_tables, node)) {
6354
6355 fts_trx_table_t** l_ftt;
6356
6357 l_ftt = rbt_value(fts_trx_table_t*, node);
6358
6359 rbt_search_cmp(
6360 s_tables, &parent, &(*l_ftt)->table->id,
6361 fts_trx_table_id_cmp, NULL);
6362
6363 if (parent.result == 0) {
6364 fts_trx_table_t** s_ftt;
6365
6366 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
6367
6368 fts_undo_last_stmt(*s_ftt, *l_ftt);
6369 }
6370 }
6371 }
6372
6373 /**********************************************************************//**
6374 Rollback to savepoint indentified by name.
6375 @return DB_SUCCESS or error code */
6376 void
fts_savepoint_rollback(trx_t * trx,const char * name)6377 fts_savepoint_rollback(
6378 /*===================*/
6379 trx_t* trx, /*!< in: transaction */
6380 const char* name) /*!< in: savepoint name */
6381 {
6382 ulint i;
6383 ib_vector_t* savepoints;
6384
6385 ut_a(name != NULL);
6386
6387 savepoints = trx->fts_trx->savepoints;
6388
6389 /* We pop all savepoints from the the top of the stack up to
6390 and including the instance that was found. */
6391 i = fts_savepoint_lookup(savepoints, name);
6392
6393 if (i != ULINT_UNDEFINED) {
6394 fts_savepoint_t* savepoint;
6395
6396 ut_a(i > 0);
6397
6398 while (ib_vector_size(savepoints) > i) {
6399 fts_savepoint_t* savepoint;
6400
6401 savepoint = static_cast<fts_savepoint_t*>(
6402 ib_vector_pop(savepoints));
6403
6404 if (savepoint->name != NULL) {
6405 /* Since name was allocated on the heap, the
6406 memory will be released when the transaction
6407 completes. */
6408 savepoint->name = NULL;
6409
6410 fts_savepoint_free(savepoint);
6411 }
6412 }
6413
6414 /* Pop all a elements from the top of the stack that may
6415 have been released. We have to be careful that we don't
6416 delete the implied savepoint. */
6417
6418 for (savepoint = static_cast<fts_savepoint_t*>(
6419 ib_vector_last(savepoints));
6420 ib_vector_size(savepoints) > 1
6421 && savepoint->name == NULL;
6422 savepoint = static_cast<fts_savepoint_t*>(
6423 ib_vector_last(savepoints))) {
6424
6425 ib_vector_pop(savepoints);
6426 }
6427
6428 /* Make sure we don't delete the implied savepoint. */
6429 ut_a(ib_vector_size(savepoints) > 0);
6430
6431 /* Restore the savepoint. */
6432 fts_savepoint_take(trx, trx->fts_trx, name);
6433 }
6434 }
6435
6436 /** Check if a table is an FTS auxiliary table name.
6437 @param[out] table FTS table info
6438 @param[in] name Table name
6439 @param[in] len Length of table name
6440 @return true if the name matches an auxiliary table name pattern */
6441 static
6442 bool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6443 fts_is_aux_table_name(
6444 fts_aux_table_t* table,
6445 const char* name,
6446 ulint len)
6447 {
6448 const char* ptr;
6449 char* end;
6450 char my_name[MAX_FULL_NAME_LEN + 1];
6451
6452 ut_ad(len <= MAX_FULL_NAME_LEN);
6453 ut_memcpy(my_name, name, len);
6454 my_name[len] = 0;
6455 end = my_name + len;
6456
6457 ptr = static_cast<const char*>(memchr(my_name, '/', len));
6458
6459 if (ptr != NULL) {
6460 /* We will start the match after the '/' */
6461 ++ptr;
6462 len = end - ptr;
6463 }
6464
6465 /* All auxiliary tables are prefixed with "FTS_" and the name
6466 length will be at the very least greater than 20 bytes. */
6467 if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6468 ulint i;
6469
6470 /* Skip the prefix. */
6471 ptr += 4;
6472 len -= 4;
6473
6474 /* Try and read the table id. */
6475 if (!fts_read_object_id(&table->parent_id, ptr)) {
6476 return(false);
6477 }
6478
6479 /* Skip the table id. */
6480 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6481
6482 if (ptr == NULL) {
6483 return(false);
6484 }
6485
6486 /* Skip the underscore. */
6487 ++ptr;
6488 ut_a(end > ptr);
6489 len = end - ptr;
6490
6491 /* First search the common table suffix array. */
6492 for (i = 0; fts_common_tables[i] != NULL; ++i) {
6493
6494 if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6495 return(true);
6496 }
6497 }
6498
6499 /* Could be obsolete common tables. */
6500 if (strncmp(ptr, "ADDED", len) == 0
6501 || strncmp(ptr, "STOPWORDS", len) == 0) {
6502 return(true);
6503 }
6504
6505 /* Try and read the index id. */
6506 if (!fts_read_object_id(&table->index_id, ptr)) {
6507 return(false);
6508 }
6509
6510 /* Skip the table id. */
6511 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6512
6513 if (ptr == NULL) {
6514 return(false);
6515 }
6516
6517 /* Skip the underscore. */
6518 ++ptr;
6519 ut_a(end > ptr);
6520 len = end - ptr;
6521
6522 /* Search the FT index specific array. */
6523 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
6524
6525 if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6526 return(true);
6527 }
6528 }
6529
6530 /* Other FT index specific table(s). */
6531 if (strncmp(ptr, "DOC_ID", len) == 0) {
6532 return(true);
6533 }
6534 }
6535
6536 return(false);
6537 }
6538
6539 /**********************************************************************//**
6540 Callback function to read a single table ID column.
6541 @return Always return TRUE */
6542 static
6543 ibool
fts_read_tables(void * row,void * user_arg)6544 fts_read_tables(
6545 /*============*/
6546 void* row, /*!< in: sel_node_t* */
6547 void* user_arg) /*!< in: pointer to ib_vector_t */
6548 {
6549 int i;
6550 fts_aux_table_t*table;
6551 mem_heap_t* heap;
6552 ibool done = FALSE;
6553 ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
6554 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
6555 que_node_t* exp = sel_node->select_list;
6556
6557 /* Must be a heap allocated vector. */
6558 ut_a(tables->allocator->arg != NULL);
6559
6560 /* We will use this heap for allocating strings. */
6561 heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6562 table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6563
6564 memset(table, 0x0, sizeof(*table));
6565
6566 /* Iterate over the columns and read the values. */
6567 for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6568
6569 dfield_t* dfield = que_node_get_val(exp);
6570 void* data = dfield_get_data(dfield);
6571 ulint len = dfield_get_len(dfield);
6572
6573 ut_a(len != UNIV_SQL_NULL);
6574
6575 /* Note: The column numbers below must match the SELECT */
6576 switch (i) {
6577 case 0: /* NAME */
6578
6579 if (!fts_is_aux_table_name(
6580 table, static_cast<const char*>(data), len)) {
6581 ib_vector_pop(tables);
6582 done = TRUE;
6583 break;
6584 }
6585
6586 table->name = static_cast<char*>(
6587 mem_heap_alloc(heap, len + 1));
6588 memcpy(table->name, data, len);
6589 table->name[len] = 0;
6590 break;
6591
6592 case 1: /* ID */
6593 ut_a(len == 8);
6594 table->id = mach_read_from_8(
6595 static_cast<const byte*>(data));
6596 break;
6597
6598 default:
6599 ut_error;
6600 }
6601 }
6602
6603 return(TRUE);
6604 }
6605
6606 /******************************************************************//**
6607 Callback that sets a hex formatted FTS table's flags2 in
6608 SYS_TABLES. The flags is stored in MIX_LEN column.
6609 @return FALSE if all OK */
6610 static
6611 ibool
fts_set_hex_format(void * row,void * user_arg)6612 fts_set_hex_format(
6613 /*===============*/
6614 void* row, /*!< in: sel_node_t* */
6615 void* user_arg) /*!< in: bool set/unset flag */
6616 {
6617 sel_node_t* node = static_cast<sel_node_t*>(row);
6618 dfield_t* dfield = que_node_get_val(node->select_list);
6619
6620 ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6621 ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6622 /* There should be at most one matching record. So the value
6623 must be the default value. */
6624 ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6625 == ULINT32_UNDEFINED);
6626
6627 ulint flags2 = mach_read_from_4(
6628 static_cast<byte*>(dfield_get_data(dfield)));
6629
6630 flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6631
6632 mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6633
6634 return(FALSE);
6635 }
6636
6637 /*****************************************************************//**
6638 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6639 @return DB_SUCCESS or error code. */
6640 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6641 fts_update_hex_format_flag(
6642 /*=======================*/
6643 trx_t* trx, /*!< in/out: transaction that
6644 covers the update */
6645 table_id_t table_id, /*!< in: Table for which we want
6646 to set the root table->flags2 */
6647 bool dict_locked) /*!< in: set to true if the
6648 caller already owns the
6649 dict_sys_t::mutex. */
6650 {
6651 pars_info_t* info;
6652 ib_uint32_t flags2;
6653
6654 static const char sql[] =
6655 "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6656 "DECLARE FUNCTION my_func;\n"
6657 "DECLARE CURSOR c IS\n"
6658 " SELECT MIX_LEN"
6659 " FROM SYS_TABLES"
6660 " WHERE ID = :table_id FOR UPDATE;"
6661 "\n"
6662 "BEGIN\n"
6663 "OPEN c;\n"
6664 "WHILE 1 = 1 LOOP\n"
6665 " FETCH c INTO my_func();\n"
6666 " IF c % NOTFOUND THEN\n"
6667 " EXIT;\n"
6668 " END IF;\n"
6669 "END LOOP;\n"
6670 "UPDATE SYS_TABLES"
6671 " SET MIX_LEN = :flags2"
6672 " WHERE ID = :table_id;\n"
6673 "CLOSE c;\n"
6674 "END;\n";
6675
6676 flags2 = ULINT32_UNDEFINED;
6677
6678 info = pars_info_create();
6679
6680 pars_info_add_ull_literal(info, "table_id", table_id);
6681 pars_info_bind_int4_literal(info, "flags2", &flags2);
6682
6683 pars_info_bind_function(
6684 info, "my_func", fts_set_hex_format, &flags2);
6685
6686 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6687 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6688 }
6689
6690 dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6691
6692 ut_a(flags2 != ULINT32_UNDEFINED);
6693
6694 return(err);
6695 }
6696
6697 /*********************************************************************//**
6698 Rename an aux table to HEX format. It's called when "%016llu" is used
6699 to format an object id in table name, which only happens in Windows. */
6700 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6701 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6702 fts_rename_one_aux_table_to_hex_format(
6703 /*===================================*/
6704 trx_t* trx, /*!< in: transaction */
6705 const fts_aux_table_t* aux_table, /*!< in: table info */
6706 const dict_table_t* parent_table) /*!< in: parent table name */
6707 {
6708 const char* ptr;
6709 fts_table_t fts_table;
6710 char new_name[MAX_FULL_NAME_LEN];
6711 dberr_t error;
6712
6713 ptr = strchr(aux_table->name, '/');
6714 ut_a(ptr != NULL);
6715 ++ptr;
6716 /* Skip "FTS_", table id and underscore */
6717 for (ulint i = 0; i < 2; ++i) {
6718 ptr = strchr(ptr, '_');
6719 ut_a(ptr != NULL);
6720 ++ptr;
6721 }
6722
6723 fts_table.suffix = NULL;
6724 if (aux_table->index_id == 0) {
6725 fts_table.type = FTS_COMMON_TABLE;
6726
6727 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6728 if (strcmp(ptr, fts_common_tables[i]) == 0) {
6729 fts_table.suffix = fts_common_tables[i];
6730 break;
6731 }
6732 }
6733 } else {
6734 fts_table.type = FTS_INDEX_TABLE;
6735
6736 /* Skip index id and underscore */
6737 ptr = strchr(ptr, '_');
6738 ut_a(ptr != NULL);
6739 ++ptr;
6740
6741 for (ulint i = 0; fts_index_selector[i].value; ++i) {
6742 if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6743 fts_table.suffix = fts_get_suffix(i);
6744 break;
6745 }
6746 }
6747 }
6748
6749 ut_a(fts_table.suffix != NULL);
6750
6751 fts_table.parent = parent_table->name.m_name;
6752 fts_table.table_id = aux_table->parent_id;
6753 fts_table.index_id = aux_table->index_id;
6754 fts_table.table = parent_table;
6755
6756 fts_get_table_name(&fts_table, new_name);
6757 ut_ad(strcmp(new_name, aux_table->name) != 0);
6758
6759 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6760 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6761 }
6762
6763 error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6764 FALSE);
6765
6766 if (error != DB_SUCCESS) {
6767 ib::warn() << "Failed to rename aux table '"
6768 << aux_table->name << "' to new format '"
6769 << new_name << "'.";
6770 } else {
6771 ib::info() << "Renamed aux table '" << aux_table->name
6772 << "' to '" << new_name << "'.";
6773 }
6774
6775 return(error);
6776 }
6777
6778 /**********************************************************************//**
6779 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6780 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6781 It's called when "%016llu" is used to format an object id in table name,
6782 which only happens in Windows.
6783 Note the ids in tables are correct but the names are old ambiguous ones.
6784
6785 This function should make sure that either all the parent table and aux tables
6786 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6787 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6788 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6789 fts_rename_aux_tables_to_hex_format_low(
6790 /*====================================*/
6791 trx_t* trx, /*!< in: transaction */
6792 dict_table_t* parent_table, /*!< in: parent table */
6793 ib_vector_t* tables) /*!< in: aux tables to rename. */
6794 {
6795 dberr_t error;
6796 ulint count;
6797
6798 ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6799 ut_ad(!ib_vector_is_empty(tables));
6800
6801 error = fts_update_hex_format_flag(trx, parent_table->id, true);
6802
6803 if (error != DB_SUCCESS) {
6804 ib::warn() << "Setting parent table " << parent_table->name
6805 << " to hex format failed.";
6806 fts_sql_rollback(trx);
6807 return(error);
6808 }
6809
6810 DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6811
6812 for (count = 0; count < ib_vector_size(tables); ++count) {
6813 dict_table_t* table;
6814 fts_aux_table_t* aux_table;
6815
6816 aux_table = static_cast<fts_aux_table_t*>(
6817 ib_vector_get(tables, count));
6818
6819 table = dict_table_open_on_id(aux_table->id, TRUE,
6820 DICT_TABLE_OP_NORMAL);
6821
6822 ut_ad(table != NULL);
6823 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6824
6825 /* Set HEX_NAME flag here to make sure we can get correct
6826 new table name in following function */
6827 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6828 error = fts_rename_one_aux_table_to_hex_format(trx,
6829 aux_table, parent_table);
6830 /* We will rollback the trx if the error != DB_SUCCESS,
6831 so setting the flag here is the same with setting it in
6832 row_rename_table_for_mysql */
6833 DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6834
6835 if (error != DB_SUCCESS) {
6836 dict_table_close(table, TRUE, FALSE);
6837
6838 ib::warn() << "Failed to rename one aux table "
6839 << aux_table->name << ". Will revert"
6840 " all successful rename operations.";
6841
6842 fts_sql_rollback(trx);
6843 break;
6844 }
6845
6846 error = fts_update_hex_format_flag(trx, aux_table->id, true);
6847 dict_table_close(table, TRUE, FALSE);
6848
6849 if (error != DB_SUCCESS) {
6850 ib::warn() << "Setting aux table " << aux_table->name
6851 << " to hex format failed.";
6852
6853 fts_sql_rollback(trx);
6854 break;
6855 }
6856 }
6857
6858 if (error != DB_SUCCESS) {
6859 ut_ad(count != ib_vector_size(tables));
6860
6861 /* If rename fails, thr trx would be rolled back, we can't
6862 use it any more, we'll start a new background trx to do
6863 the reverting. */
6864
6865 ut_ad(!trx_is_started(trx));
6866
6867 bool not_rename = false;
6868
6869 /* Try to revert those succesful rename operations
6870 in order to revert the ibd file rename. */
6871 for (ulint i = 0; i <= count; ++i) {
6872 dict_table_t* table;
6873 fts_aux_table_t* aux_table;
6874 trx_t* trx_bg;
6875 dberr_t err;
6876
6877 aux_table = static_cast<fts_aux_table_t*>(
6878 ib_vector_get(tables, i));
6879
6880 table = dict_table_open_on_id(aux_table->id, TRUE,
6881 DICT_TABLE_OP_NORMAL);
6882 ut_ad(table != NULL);
6883
6884 if (not_rename) {
6885 DICT_TF2_FLAG_UNSET(table,
6886 DICT_TF2_FTS_AUX_HEX_NAME);
6887 }
6888
6889 if (!DICT_TF2_FLAG_IS_SET(table,
6890 DICT_TF2_FTS_AUX_HEX_NAME)) {
6891 dict_table_close(table, TRUE, FALSE);
6892 continue;
6893 }
6894
6895 trx_bg = trx_allocate_for_background();
6896 trx_bg->op_info = "Revert half done rename";
6897 trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6898 trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6899
6900 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6901 err = row_rename_table_for_mysql(table->name.m_name,
6902 aux_table->name,
6903 trx_bg, FALSE);
6904
6905 trx_bg->dict_operation_lock_mode = 0;
6906 dict_table_close(table, TRUE, FALSE);
6907
6908 if (err != DB_SUCCESS) {
6909 ib::warn() << "Failed to revert table "
6910 << table->name << ". Please revert"
6911 " manually.";
6912 fts_sql_rollback(trx_bg);
6913 trx_free_for_background(trx_bg);
6914 /* Continue to clear aux tables' flags2 */
6915 not_rename = true;
6916 continue;
6917 }
6918
6919 fts_sql_commit(trx_bg);
6920 trx_free_for_background(trx_bg);
6921 }
6922
6923 DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6924 }
6925
6926 return(error);
6927 }
6928
6929 /**********************************************************************//**
6930 Convert an id, which is actually a decimal number but was regard as a HEX
6931 from a string, to its real value. */
6932 static
6933 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6934 fts_fake_hex_to_dec(
6935 /*================*/
6936 ib_id_t id) /*!< in: number to convert */
6937 {
6938 ib_id_t dec_id = 0;
6939 char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6940
6941 #ifdef UNIV_DEBUG
6942 int ret =
6943 #endif /* UNIV_DEBUG */
6944 sprintf(tmp_id, UINT64PFx, id);
6945 ut_ad(ret == 16);
6946 #ifdef UNIV_DEBUG
6947 ret =
6948 #endif /* UNIV_DEBUG */
6949 #ifdef _WIN32
6950 sscanf(tmp_id, "%016llu", &dec_id);
6951 #else
6952 sscanf(tmp_id, "%016" PRIu64, &dec_id);
6953 #endif /* _WIN32 */
6954 ut_ad(ret == 1);
6955
6956 return dec_id;
6957 }
6958
6959 /*********************************************************************//**
6960 Compare two fts_aux_table_t parent_ids.
6961 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6962 UNIV_INLINE
6963 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6964 fts_check_aux_table_parent_id_cmp(
6965 /*==============================*/
6966 const void* p1, /*!< in: id1 */
6967 const void* p2) /*!< in: id2 */
6968 {
6969 const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1);
6970 const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2);
6971
6972 return static_cast<int>(fa1->parent_id - fa2->parent_id);
6973 }
6974
6975 /** Mark all the fts index associated with the parent table as corrupted.
6976 @param[in] trx transaction
6977 @param[in, out] parent_table fts index associated with this parent table
6978 will be marked as corrupted. */
6979 static
6980 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6981 fts_parent_all_index_set_corrupt(
6982 trx_t* trx,
6983 dict_table_t* parent_table)
6984 {
6985 fts_t* fts = parent_table->fts;
6986
6987 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6988 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6989 }
6990
6991 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6992 dict_index_t* index = static_cast<dict_index_t*>(
6993 ib_vector_getp_const(fts->indexes, j));
6994 dict_set_corrupted(index,
6995 trx, "DROP ORPHANED TABLE");
6996 }
6997 }
6998
6999 /** Mark the fts index which index id matches the id as corrupted.
7000 @param[in] trx transaction
7001 @param[in] id index id to search
7002 @param[in, out] parent_table parent table to check with all
7003 the index. */
7004 static
7005 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)7006 fts_set_index_corrupt(
7007 trx_t* trx,
7008 index_id_t id,
7009 dict_table_t* table)
7010 {
7011 fts_t* fts = table->fts;
7012
7013 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7014 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7015 }
7016
7017 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7018 dict_index_t* index = static_cast<dict_index_t*>(
7019 ib_vector_getp_const(fts->indexes, j));
7020 if (index->id == id) {
7021 dict_set_corrupted(index, trx,
7022 "DROP ORPHANED TABLE");
7023 break;
7024 }
7025 }
7026 }
7027
7028 /** Check the index for the aux table is corrupted.
7029 @param[in] aux_table auxiliary table
7030 @retval nonzero if index is corrupted, zero for valid index */
7031 static
7032 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)7033 fts_check_corrupt_index(
7034 fts_aux_table_t* aux_table)
7035 {
7036 dict_table_t* table;
7037 dict_index_t* index;
7038 table = dict_table_open_on_id(
7039 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7040
7041 if (table == NULL) {
7042 return(0);
7043 }
7044
7045 for (index = UT_LIST_GET_FIRST(table->indexes);
7046 index;
7047 index = UT_LIST_GET_NEXT(indexes, index)) {
7048 if (index->id == aux_table->index_id) {
7049 ut_ad(index->type & DICT_FTS);
7050 dict_table_close(table, true, false);
7051 return(dict_index_is_corrupted(index));
7052 }
7053 }
7054
7055 dict_table_close(table, true, false);
7056 return(0);
7057 }
7058
7059 /* Get parent table name if it's a fts aux table
7060 @param[in] aux_table_name aux table name
7061 @param[in] aux_table_len aux table length
7062 @return parent table name, or NULL */
7063 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)7064 fts_get_parent_table_name(
7065 const char* aux_table_name,
7066 ulint aux_table_len)
7067 {
7068 fts_aux_table_t aux_table;
7069 char* parent_table_name = NULL;
7070
7071 if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
7072 dict_table_t* parent_table;
7073
7074 parent_table = dict_table_open_on_id(
7075 aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7076
7077 if (parent_table != NULL) {
7078 parent_table_name = mem_strdupl(
7079 parent_table->name.m_name,
7080 strlen(parent_table->name.m_name));
7081
7082 dict_table_close(parent_table, TRUE, FALSE);
7083 }
7084 }
7085
7086 return(parent_table_name);
7087 }
7088
7089 /** Check the validity of the parent table.
7090 @param[in] aux_table auxiliary table
7091 @return true if it is a valid table or false if it is not */
7092 static
7093 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)7094 fts_valid_parent_table(
7095 const fts_aux_table_t* aux_table)
7096 {
7097 dict_table_t* parent_table;
7098 bool valid = false;
7099
7100 parent_table = dict_table_open_on_id(
7101 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7102
7103 if (parent_table != NULL && parent_table->fts != NULL) {
7104 if (aux_table->index_id == 0) {
7105 valid = true;
7106 } else {
7107 index_id_t id = aux_table->index_id;
7108 dict_index_t* index;
7109
7110 /* Search for the FT index in the table's list. */
7111 for (index = UT_LIST_GET_FIRST(parent_table->indexes);
7112 index;
7113 index = UT_LIST_GET_NEXT(indexes, index)) {
7114 if (index->id == id) {
7115 valid = true;
7116 break;
7117 }
7118
7119 }
7120 }
7121 }
7122
7123 if (parent_table) {
7124 dict_table_close(parent_table, TRUE, FALSE);
7125 }
7126
7127 return(valid);
7128 }
7129
7130 /** Try to rename all aux tables of the specified parent table.
7131 @param[in] aux_tables aux_tables to be renamed
7132 @param[in] parent_table parent table of all aux
7133 tables stored in tables. */
7134 static
7135 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)7136 fts_rename_aux_tables_to_hex_format(
7137 ib_vector_t* aux_tables,
7138 dict_table_t* parent_table)
7139 {
7140 dberr_t err;
7141 trx_t* trx_rename = trx_allocate_for_background();
7142 trx_rename->op_info = "Rename aux tables to hex format";
7143 trx_rename->dict_operation_lock_mode = RW_X_LATCH;
7144 trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
7145
7146 err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
7147 parent_table, aux_tables);
7148
7149 trx_rename->dict_operation_lock_mode = 0;
7150
7151 if (err != DB_SUCCESS) {
7152
7153 ib::warn() << "Rollback operations on all aux tables of "
7154 "table "<< parent_table->name << ". All the fts index "
7155 "associated with the table are marked as corrupted. "
7156 "Please rebuild the index again.";
7157
7158 /* Corrupting the fts index related to parent table. */
7159 trx_t* trx_corrupt;
7160 trx_corrupt = trx_allocate_for_background();
7161 trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
7162 trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
7163 fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
7164 trx_corrupt->dict_operation_lock_mode = 0;
7165 fts_sql_commit(trx_corrupt);
7166 trx_free_for_background(trx_corrupt);
7167 } else {
7168 fts_sql_commit(trx_rename);
7169 }
7170
7171 trx_free_for_background(trx_rename);
7172 ib_vector_reset(aux_tables);
7173 }
7174
7175 /** Set the hex format flag for the parent table.
7176 @param[in, out] parent_table parent table
7177 @param[in] trx transaction */
7178 static
7179 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)7180 fts_set_parent_hex_format_flag(
7181 dict_table_t* parent_table,
7182 trx_t* trx)
7183 {
7184 if (!DICT_TF2_FLAG_IS_SET(parent_table,
7185 DICT_TF2_FTS_AUX_HEX_NAME)) {
7186 DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
7187
7188 dberr_t err = fts_update_hex_format_flag(
7189 trx, parent_table->id, true);
7190
7191 if (err != DB_SUCCESS) {
7192 ib::fatal() << "Setting parent table "
7193 << parent_table->name
7194 << "to hex format failed. Please try "
7195 << "to restart the server again, if it "
7196 << "doesn't work, the system tables "
7197 << "might be corrupted.";
7198 } else {
7199 DICT_TF2_FLAG_SET(
7200 parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
7201 }
7202 }
7203 }
7204
7205 /** Drop the obsolete auxilary table.
7206 @param[in] tables tables to be dropped. */
7207 static
7208 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)7209 fts_drop_obsolete_aux_table_from_vector(
7210 ib_vector_t* tables)
7211 {
7212 dberr_t err;
7213
7214 for (ulint count = 0; count < ib_vector_size(tables);
7215 ++count) {
7216
7217 fts_aux_table_t* aux_drop_table;
7218 aux_drop_table = static_cast<fts_aux_table_t*>(
7219 ib_vector_get(tables, count));
7220 trx_t* trx_drop = trx_allocate_for_background();
7221 trx_drop->op_info = "Drop obsolete aux tables";
7222 trx_drop->dict_operation_lock_mode = RW_X_LATCH;
7223 trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
7224
7225 err = row_drop_table_for_mysql(
7226 aux_drop_table->name, trx_drop, false, true);
7227
7228 trx_drop->dict_operation_lock_mode = 0;
7229
7230 if (err != DB_SUCCESS) {
7231 /* We don't need to worry about the
7232 failure, since server would try to
7233 drop it on next restart, even if
7234 the table was broken. */
7235 ib::warn() << "Failed to drop obsolete aux table "
7236 << aux_drop_table->name << ", which is "
7237 << "harmless. will try to drop it on next "
7238 << "restart.";
7239
7240 fts_sql_rollback(trx_drop);
7241 } else {
7242 ib::info() << "Dropped obsolete aux"
7243 " table '" << aux_drop_table->name
7244 << "'.";
7245
7246 fts_sql_commit(trx_drop);
7247 }
7248
7249 trx_free_for_background(trx_drop);
7250 }
7251 }
7252
7253 /** Drop all the auxiliary table present in the vector.
7254 @param[in] trx transaction
7255 @param[in] tables tables to be dropped */
7256 static
7257 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)7258 fts_drop_aux_table_from_vector(
7259 trx_t* trx,
7260 ib_vector_t* tables)
7261 {
7262 for (ulint count = 0; count < ib_vector_size(tables);
7263 ++count) {
7264 fts_aux_table_t* aux_drop_table;
7265 aux_drop_table = static_cast<fts_aux_table_t*>(
7266 ib_vector_get(tables, count));
7267
7268 /* Check for the validity of the parent table */
7269 if (!fts_valid_parent_table(aux_drop_table)) {
7270
7271 ib::warn() << "Parent table of FTS auxiliary table "
7272 << aux_drop_table->name << " not found.";
7273
7274 dberr_t err = fts_drop_table(trx, aux_drop_table->name);
7275 if (err == DB_FAIL) {
7276
7277 char* path = fil_make_filepath(
7278 NULL, aux_drop_table->name, IBD, false);
7279
7280 if (path != NULL) {
7281 os_file_delete_if_exists(
7282 innodb_data_file_key,
7283 path , NULL);
7284 ut_free(path);
7285 }
7286 }
7287 }
7288 }
7289 }
7290
7291 /**********************************************************************//**
7292 Check and drop all orphaned FTS auxiliary tables, those that don't have
7293 a parent table or FTS index defined on them.
7294 @return DB_SUCCESS or error code */
7295 static MY_ATTRIBUTE((nonnull))
7296 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)7297 fts_check_and_drop_orphaned_tables(
7298 /*===============================*/
7299 trx_t* trx, /*!< in: transaction */
7300 ib_vector_t* tables) /*!< in: tables to check */
7301 {
7302 mem_heap_t* heap;
7303 ib_vector_t* aux_tables_to_rename;
7304 ib_vector_t* invalid_aux_tables;
7305 ib_vector_t* valid_aux_tables;
7306 ib_vector_t* drop_aux_tables;
7307 ib_vector_t* obsolete_aux_tables;
7308 ib_alloc_t* heap_alloc;
7309
7310 heap = mem_heap_create(1024);
7311 heap_alloc = ib_heap_allocator_create(heap);
7312
7313 /* We store all aux tables belonging to the same parent table here,
7314 and rename all these tables in a batch mode. */
7315 aux_tables_to_rename = ib_vector_create(heap_alloc,
7316 sizeof(fts_aux_table_t), 128);
7317
7318 /* We store all fake auxiliary table and orphaned table here. */
7319 invalid_aux_tables = ib_vector_create(heap_alloc,
7320 sizeof(fts_aux_table_t), 128);
7321
7322 /* We store all valid aux tables. We use this to filter the
7323 fake auxiliary table from invalid auxiliary tables. */
7324 valid_aux_tables = ib_vector_create(heap_alloc,
7325 sizeof(fts_aux_table_t), 128);
7326
7327 /* We store all auxiliary tables to be dropped. */
7328 drop_aux_tables = ib_vector_create(heap_alloc,
7329 sizeof(fts_aux_table_t), 128);
7330
7331 /* We store all obsolete auxiliary tables to be dropped. */
7332 obsolete_aux_tables = ib_vector_create(heap_alloc,
7333 sizeof(fts_aux_table_t), 128);
7334
7335 /* Sort by parent_id first, in case rename will fail */
7336 ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
7337
7338 for (ulint i = 0; i < ib_vector_size(tables); ++i) {
7339 dict_table_t* parent_table;
7340 fts_aux_table_t* aux_table;
7341 bool drop = false;
7342 dict_table_t* table;
7343 fts_aux_table_t* next_aux_table = NULL;
7344 ib_id_t orig_parent_id = 0;
7345 ib_id_t orig_index_id = 0;
7346 bool rename = false;
7347
7348 aux_table = static_cast<fts_aux_table_t*>(
7349 ib_vector_get(tables, i));
7350
7351 table = dict_table_open_on_id(
7352 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7353 orig_parent_id = aux_table->parent_id;
7354 orig_index_id = aux_table->index_id;
7355
7356 if (table == NULL
7357 || strcmp(table->name.m_name, aux_table->name)) {
7358
7359 bool fake_aux = false;
7360
7361 if (table != NULL) {
7362 dict_table_close(table, TRUE, FALSE);
7363 }
7364
7365 if (i + 1 < ib_vector_size(tables)) {
7366 next_aux_table = static_cast<fts_aux_table_t*>(
7367 ib_vector_get(tables, i + 1));
7368 }
7369
7370 /* To know whether aux table is fake fts or
7371 orphan fts table. */
7372 for (ulint count = 0;
7373 count < ib_vector_size(valid_aux_tables);
7374 count++) {
7375 fts_aux_table_t* valid_aux;
7376 valid_aux = static_cast<fts_aux_table_t*>(
7377 ib_vector_get(valid_aux_tables, count));
7378 if (strcmp(valid_aux->name,
7379 aux_table->name) == 0) {
7380 fake_aux = true;
7381 break;
7382 }
7383 }
7384
7385 /* All aux tables of parent table, whose id is
7386 last_parent_id, have been checked, try to rename
7387 them if necessary. */
7388 if ((next_aux_table == NULL
7389 || orig_parent_id != next_aux_table->parent_id)
7390 && (!ib_vector_is_empty(aux_tables_to_rename))) {
7391
7392 ib_id_t parent_id = fts_fake_hex_to_dec(
7393 aux_table->parent_id);
7394
7395 parent_table = dict_table_open_on_id(
7396 parent_id, TRUE,
7397 DICT_TABLE_OP_NORMAL);
7398
7399 fts_rename_aux_tables_to_hex_format(
7400 aux_tables_to_rename, parent_table);
7401
7402 dict_table_close(parent_table, TRUE,
7403 FALSE);
7404 }
7405
7406 /* If the aux table is fake aux table. Skip it. */
7407 if (!fake_aux) {
7408 ib_vector_push(invalid_aux_tables, aux_table);
7409 }
7410
7411 continue;
7412 } else if (!DICT_TF2_FLAG_IS_SET(table,
7413 DICT_TF2_FTS_AUX_HEX_NAME)) {
7414
7415 aux_table->parent_id = fts_fake_hex_to_dec(
7416 aux_table->parent_id);
7417
7418 if (aux_table->index_id != 0) {
7419 aux_table->index_id = fts_fake_hex_to_dec(
7420 aux_table->index_id);
7421 }
7422
7423 ut_ad(aux_table->id > aux_table->parent_id);
7424
7425 /* Check whether parent table id and index id
7426 are stored as decimal format. */
7427 if (fts_valid_parent_table(aux_table)) {
7428
7429 parent_table = dict_table_open_on_id(
7430 aux_table->parent_id, true,
7431 DICT_TABLE_OP_NORMAL);
7432
7433 ut_ad(parent_table != NULL);
7434 ut_ad(parent_table->fts != NULL);
7435
7436 if (!DICT_TF2_FLAG_IS_SET(
7437 parent_table,
7438 DICT_TF2_FTS_AUX_HEX_NAME)) {
7439 rename = true;
7440 }
7441
7442 dict_table_close(parent_table, TRUE, FALSE);
7443 }
7444
7445 if (!rename) {
7446 /* Reassign the original value of
7447 aux table if it is not in decimal format */
7448 aux_table->parent_id = orig_parent_id;
7449 aux_table->index_id = orig_index_id;
7450 }
7451 }
7452
7453 if (table != NULL) {
7454 dict_table_close(table, TRUE, FALSE);
7455 }
7456
7457 if (!rename) {
7458 /* Check the validity of the parent table. */
7459 if (!fts_valid_parent_table(aux_table)) {
7460 drop = true;
7461 }
7462 }
7463
7464 /* Filter out the fake aux table by comparing with the
7465 current valid auxiliary table name. */
7466 for (ulint count = 0;
7467 count < ib_vector_size(invalid_aux_tables); count++) {
7468 fts_aux_table_t* invalid_aux;
7469 invalid_aux = static_cast<fts_aux_table_t*>(
7470 ib_vector_get(invalid_aux_tables, count));
7471 if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7472 ib_vector_remove(
7473 invalid_aux_tables,
7474 *reinterpret_cast<void**>(invalid_aux));
7475 break;
7476 }
7477 }
7478
7479 ib_vector_push(valid_aux_tables, aux_table);
7480
7481 /* If the index associated with aux table is corrupted,
7482 skip it. */
7483 if (fts_check_corrupt_index(aux_table) > 0) {
7484
7485 if (i + 1 < ib_vector_size(tables)) {
7486 next_aux_table = static_cast<fts_aux_table_t*>(
7487 ib_vector_get(tables, i + 1));
7488 }
7489
7490 if (next_aux_table == NULL
7491 || orig_parent_id != next_aux_table->parent_id) {
7492
7493 parent_table = dict_table_open_on_id(
7494 aux_table->parent_id, TRUE,
7495 DICT_TABLE_OP_NORMAL);
7496
7497 if (!ib_vector_is_empty(aux_tables_to_rename)) {
7498 fts_rename_aux_tables_to_hex_format(
7499 aux_tables_to_rename, parent_table);
7500 } else {
7501 fts_set_parent_hex_format_flag(
7502 parent_table, trx);
7503 }
7504
7505 dict_table_close(parent_table, TRUE, FALSE);
7506 }
7507
7508 continue;
7509 }
7510
7511 parent_table = dict_table_open_on_id(
7512 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7513
7514 if (drop) {
7515 ib_vector_push(drop_aux_tables, aux_table);
7516 } else {
7517 if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7518 ib_vector_push(obsolete_aux_tables, aux_table);
7519 continue;
7520 }
7521 }
7522
7523 /* If the aux table is in decimal format, we should
7524 rename it, so push it to aux_tables_to_rename */
7525 if (!drop && rename) {
7526 bool rename_table = true;
7527 for (ulint count = 0;
7528 count < ib_vector_size(aux_tables_to_rename);
7529 count++) {
7530 fts_aux_table_t* rename_aux =
7531 static_cast<fts_aux_table_t*>(
7532 ib_vector_get(aux_tables_to_rename,
7533 count));
7534 if (strcmp(rename_aux->name,
7535 aux_table->name) == 0) {
7536 rename_table = false;
7537 break;
7538 }
7539 }
7540
7541 if (rename_table) {
7542 ib_vector_push(aux_tables_to_rename,
7543 aux_table);
7544 }
7545 }
7546
7547 if (i + 1 < ib_vector_size(tables)) {
7548 next_aux_table = static_cast<fts_aux_table_t*>(
7549 ib_vector_get(tables, i + 1));
7550 }
7551
7552 if ((next_aux_table == NULL
7553 || orig_parent_id != next_aux_table->parent_id)
7554 && !ib_vector_is_empty(aux_tables_to_rename)) {
7555
7556 ut_ad(rename);
7557 ut_ad(!DICT_TF2_FLAG_IS_SET(
7558 parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7559
7560 fts_rename_aux_tables_to_hex_format(
7561 aux_tables_to_rename,parent_table);
7562 }
7563
7564 /* The IDs are already in correct hex format. */
7565 if (!drop && !rename) {
7566 dict_table_t* table;
7567
7568 table = dict_table_open_on_id(
7569 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7570
7571 if (table != NULL
7572 && strcmp(table->name.m_name, aux_table->name)) {
7573 dict_table_close(table, TRUE, FALSE);
7574 table = NULL;
7575 }
7576
7577 if (table != NULL
7578 && !DICT_TF2_FLAG_IS_SET(
7579 table,
7580 DICT_TF2_FTS_AUX_HEX_NAME)) {
7581
7582 DBUG_EXECUTE_IF("aux_table_flag_fail",
7583 ib::warn() << "Setting aux table "
7584 << table->name << " to hex "
7585 "format failed.";
7586 fts_set_index_corrupt(
7587 trx, aux_table->index_id,
7588 parent_table);
7589 goto table_exit;);
7590
7591 dberr_t err = fts_update_hex_format_flag(
7592 trx, table->id, true);
7593
7594 if (err != DB_SUCCESS) {
7595 ib::warn() << "Setting aux table "
7596 << table->name << " to hex "
7597 "format failed.";
7598
7599 fts_set_index_corrupt(
7600 trx, aux_table->index_id,
7601 parent_table);
7602 } else {
7603 DICT_TF2_FLAG_SET(table,
7604 DICT_TF2_FTS_AUX_HEX_NAME);
7605 }
7606 }
7607 #ifndef NDEBUG
7608 table_exit:
7609 #endif /* !NDEBUG */
7610
7611 if (table != NULL) {
7612 dict_table_close(table, TRUE, FALSE);
7613 }
7614
7615 ut_ad(parent_table != NULL);
7616
7617 fts_set_parent_hex_format_flag(
7618 parent_table, trx);
7619 }
7620
7621 if (parent_table != NULL) {
7622 dict_table_close(parent_table, TRUE, FALSE);
7623 }
7624 }
7625
7626 fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7627 fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7628 fts_sql_commit(trx);
7629
7630 fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7631
7632 /* Free the memory allocated at the beginning */
7633 if (heap != NULL) {
7634 mem_heap_free(heap);
7635 }
7636 }
7637
7638 /**********************************************************************//**
7639 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7640 table or FTS index defined on them. */
7641 void
fts_drop_orphaned_tables(void)7642 fts_drop_orphaned_tables(void)
7643 /*==========================*/
7644 {
7645 trx_t* trx;
7646 pars_info_t* info;
7647 mem_heap_t* heap;
7648 que_t* graph;
7649 ib_vector_t* tables;
7650 ib_alloc_t* heap_alloc;
7651 space_name_list_t space_name_list;
7652 dberr_t error = DB_SUCCESS;
7653
7654 /* Note: We have to free the memory after we are done with the list. */
7655 error = fil_get_space_names(space_name_list);
7656
7657 if (error == DB_OUT_OF_MEMORY) {
7658 ib::fatal() << "Out of memory";
7659 }
7660
7661 heap = mem_heap_create(1024);
7662 heap_alloc = ib_heap_allocator_create(heap);
7663
7664 /* We store the table ids of all the FTS indexes that were found. */
7665 tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7666
7667 /* Get the list of all known .ibd files and check for orphaned
7668 FTS auxiliary files in that list. We need to remove them because
7669 users can't map them back to table names and this will create
7670 unnecessary clutter. */
7671
7672 for (space_name_list_t::iterator it = space_name_list.begin();
7673 it != space_name_list.end();
7674 ++it) {
7675
7676 fts_aux_table_t* fts_aux_table;
7677
7678 fts_aux_table = static_cast<fts_aux_table_t*>(
7679 ib_vector_push(tables, NULL));
7680
7681 memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7682
7683 if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7684 ib_vector_pop(tables);
7685 } else {
7686 ulint len = strlen(*it);
7687
7688 fts_aux_table->id = fil_space_get_id_by_name(*it);
7689
7690 /* We got this list from fil0fil.cc. The tablespace
7691 with this name must exist. */
7692 ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7693
7694 fts_aux_table->name = static_cast<char*>(
7695 mem_heap_dup(heap, *it, len + 1));
7696
7697 fts_aux_table->name[len] = 0;
7698 }
7699 }
7700
7701 trx = trx_allocate_for_background();
7702 trx->op_info = "dropping orphaned FTS tables";
7703 row_mysql_lock_data_dictionary(trx);
7704
7705 info = pars_info_create();
7706
7707 pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7708
7709 graph = fts_parse_sql_no_dict_lock(
7710 NULL,
7711 info,
7712 "DECLARE FUNCTION my_func;\n"
7713 "DECLARE CURSOR c IS"
7714 " SELECT NAME, ID"
7715 " FROM SYS_TABLES;\n"
7716 "BEGIN\n"
7717 "\n"
7718 "OPEN c;\n"
7719 "WHILE 1 = 1 LOOP\n"
7720 " FETCH c INTO my_func();\n"
7721 " IF c % NOTFOUND THEN\n"
7722 " EXIT;\n"
7723 " END IF;\n"
7724 "END LOOP;\n"
7725 "CLOSE c;");
7726
7727 for (;;) {
7728 error = fts_eval_sql(trx, graph);
7729
7730 if (error == DB_SUCCESS) {
7731 fts_check_and_drop_orphaned_tables(trx, tables);
7732 break; /* Exit the loop. */
7733 } else {
7734 ib_vector_reset(tables);
7735
7736 fts_sql_rollback(trx);
7737
7738 if (error == DB_LOCK_WAIT_TIMEOUT) {
7739 ib::warn() << "lock wait timeout reading"
7740 " SYS_TABLES. Retrying!";
7741
7742 trx->error_state = DB_SUCCESS;
7743 } else {
7744 ib::error() << "(" << ut_strerr(error)
7745 << ") while reading SYS_TABLES.";
7746
7747 break; /* Exit the loop. */
7748 }
7749 }
7750 }
7751
7752 que_graph_free(graph);
7753
7754 row_mysql_unlock_data_dictionary(trx);
7755
7756 trx_free_for_background(trx);
7757
7758 if (heap != NULL) {
7759 mem_heap_free(heap);
7760 }
7761
7762 /** Free the memory allocated to store the .ibd names. */
7763 for (space_name_list_t::iterator it = space_name_list.begin();
7764 it != space_name_list.end();
7765 ++it) {
7766
7767 UT_DELETE_ARRAY(*it);
7768 }
7769 }
7770
7771 /**********************************************************************//**
7772 Check whether user supplied stopword table is of the right format.
7773 Caller is responsible to hold dictionary locks.
7774 @return the stopword column charset if qualifies */
7775 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7776 fts_valid_stopword_table(
7777 /*=====================*/
7778 const char* stopword_table_name) /*!< in: Stopword table
7779 name */
7780 {
7781 dict_table_t* table;
7782 dict_col_t* col = NULL;
7783
7784 if (!stopword_table_name) {
7785 return(NULL);
7786 }
7787
7788 table = dict_table_get_low(stopword_table_name);
7789
7790 if (!table) {
7791 ib::error() << "User stopword table " << stopword_table_name
7792 << " does not exist.";
7793
7794 return(NULL);
7795 } else {
7796 const char* col_name;
7797
7798 col_name = dict_table_get_col_name(table, 0);
7799
7800 if (ut_strcmp(col_name, "value")) {
7801 ib::error() << "Invalid column name for stopword"
7802 " table " << stopword_table_name << ". Its"
7803 " first column must be named as 'value'.";
7804
7805 return(NULL);
7806 }
7807
7808 col = dict_table_get_nth_col(table, 0);
7809
7810 if (col->mtype != DATA_VARCHAR
7811 && col->mtype != DATA_VARMYSQL) {
7812 ib::error() << "Invalid column type for stopword"
7813 " table " << stopword_table_name << ". Its"
7814 " first column must be of varchar type";
7815
7816 return(NULL);
7817 }
7818 }
7819
7820 ut_ad(col);
7821
7822 return(fts_get_charset(col->prtype));
7823 }
7824
7825 /**********************************************************************//**
7826 This function loads the stopword into the FTS cache. It also
7827 records/fetches stopword configuration to/from FTS configure
7828 table, depending on whether we are creating or reloading the
7829 FTS.
7830 @return TRUE if load operation is successful */
7831 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7832 fts_load_stopword(
7833 /*==============*/
7834 const dict_table_t*
7835 table, /*!< in: Table with FTS */
7836 trx_t* trx, /*!< in: Transactions */
7837 const char* global_stopword_table, /*!< in: Global stopword table
7838 name */
7839 const char* session_stopword_table, /*!< in: Session stopword table
7840 name */
7841 ibool stopword_is_on, /*!< in: Whether stopword
7842 option is turned on/off */
7843 ibool reload) /*!< in: Whether it is
7844 for reloading FTS table */
7845 {
7846 fts_table_t fts_table;
7847 fts_string_t str;
7848 dberr_t error = DB_SUCCESS;
7849 ulint use_stopword;
7850 fts_cache_t* cache;
7851 const char* stopword_to_use = NULL;
7852 ibool new_trx = FALSE;
7853 byte str_buffer[MAX_FULL_NAME_LEN + 1];
7854
7855 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7856
7857 cache = table->fts->cache;
7858
7859 if (!reload && !(cache->stopword_info.status
7860 & STOPWORD_NOT_INIT)) {
7861 return(TRUE);
7862 }
7863
7864 if (!trx) {
7865 trx = trx_allocate_for_background();
7866 trx->op_info = "upload FTS stopword";
7867 new_trx = TRUE;
7868 }
7869
7870 /* First check whether stopword filtering is turned off */
7871 if (reload) {
7872 error = fts_config_get_ulint(
7873 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7874 } else {
7875 use_stopword = (ulint) stopword_is_on;
7876
7877 error = fts_config_set_ulint(
7878 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7879 }
7880
7881 if (error != DB_SUCCESS) {
7882 goto cleanup;
7883 }
7884
7885 /* If stopword is turned off, no need to continue to load the
7886 stopword into cache, but still need to do initialization */
7887 if (!use_stopword) {
7888 cache->stopword_info.status = STOPWORD_OFF;
7889 goto cleanup;
7890 }
7891
7892 if (reload) {
7893 /* Fetch the stopword table name from FTS config
7894 table */
7895 str.f_n_char = 0;
7896 str.f_str = str_buffer;
7897 str.f_len = sizeof(str_buffer) - 1;
7898
7899 error = fts_config_get_value(
7900 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7901
7902 if (error != DB_SUCCESS) {
7903 goto cleanup;
7904 }
7905
7906 if (strlen((char*) str.f_str) > 0) {
7907 stopword_to_use = (const char*) str.f_str;
7908 }
7909 } else {
7910 stopword_to_use = (session_stopword_table)
7911 ? session_stopword_table : global_stopword_table;
7912 }
7913
7914 if (stopword_to_use
7915 && fts_load_user_stopword(table->fts, stopword_to_use,
7916 &cache->stopword_info)) {
7917 /* Save the stopword table name to the configure
7918 table */
7919 if (!reload) {
7920 str.f_n_char = 0;
7921 str.f_str = (byte*) stopword_to_use;
7922 str.f_len = ut_strlen(stopword_to_use);
7923
7924 error = fts_config_set_value(
7925 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7926 }
7927 } else {
7928 /* Load system default stopword list */
7929 fts_load_default_stopword(&cache->stopword_info);
7930 }
7931
7932 cleanup:
7933 if (new_trx) {
7934 if (error == DB_SUCCESS) {
7935 fts_sql_commit(trx);
7936 } else {
7937 fts_sql_rollback(trx);
7938 }
7939
7940 trx_free_for_background(trx);
7941 }
7942
7943 if (!cache->stopword_info.cached_stopword) {
7944 cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
7945 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
7946 &my_charset_latin1);
7947 }
7948
7949 return(error == DB_SUCCESS);
7950 }
7951
7952 /**********************************************************************//**
7953 Callback function when we initialize the FTS at the start up
7954 time. It recovers the maximum Doc IDs presented in the current table.
7955 @return: always returns TRUE */
7956 static
7957 ibool
fts_init_get_doc_id(void * row,void * user_arg)7958 fts_init_get_doc_id(
7959 /*================*/
7960 void* row, /*!< in: sel_node_t* */
7961 void* user_arg) /*!< in: fts cache */
7962 {
7963 doc_id_t doc_id = FTS_NULL_DOC_ID;
7964 sel_node_t* node = static_cast<sel_node_t*>(row);
7965 que_node_t* exp = node->select_list;
7966 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
7967
7968 ut_ad(ib_vector_is_empty(cache->get_docs));
7969
7970 /* Copy each indexed column content into doc->text.f_str */
7971 if (exp) {
7972 dfield_t* dfield = que_node_get_val(exp);
7973 dtype_t* type = dfield_get_type(dfield);
7974 void* data = dfield_get_data(dfield);
7975
7976 ut_a(dtype_get_mtype(type) == DATA_INT);
7977
7978 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7979 static_cast<const byte*>(data)));
7980
7981 if (doc_id >= cache->next_doc_id) {
7982 cache->next_doc_id = doc_id + 1;
7983 }
7984 }
7985
7986 return(TRUE);
7987 }
7988
7989 /**********************************************************************//**
7990 Callback function when we initialize the FTS at the start up
7991 time. It recovers Doc IDs that have not sync-ed to the auxiliary
7992 table, and require to bring them back into FTS index.
7993 @return: always returns TRUE */
7994 static
7995 ibool
fts_init_recover_doc(void * row,void * user_arg)7996 fts_init_recover_doc(
7997 /*=================*/
7998 void* row, /*!< in: sel_node_t* */
7999 void* user_arg) /*!< in: fts cache */
8000 {
8001
8002 fts_doc_t doc;
8003 ulint doc_len = 0;
8004 ulint field_no = 0;
8005 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
8006 doc_id_t doc_id = FTS_NULL_DOC_ID;
8007 sel_node_t* node = static_cast<sel_node_t*>(row);
8008 que_node_t* exp = node->select_list;
8009 fts_cache_t* cache = get_doc->cache;
8010 st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
8011
8012 fts_doc_init(&doc);
8013 doc.found = TRUE;
8014
8015 ut_ad(cache);
8016
8017 /* Copy each indexed column content into doc->text.f_str */
8018 while (exp) {
8019 dfield_t* dfield = que_node_get_val(exp);
8020 ulint len = dfield_get_len(dfield);
8021
8022 if (field_no == 0) {
8023 dtype_t* type = dfield_get_type(dfield);
8024 void* data = dfield_get_data(dfield);
8025
8026 ut_a(dtype_get_mtype(type) == DATA_INT);
8027
8028 doc_id = static_cast<doc_id_t>(mach_read_from_8(
8029 static_cast<const byte*>(data)));
8030
8031 field_no++;
8032 exp = que_node_get_next(exp);
8033 continue;
8034 }
8035
8036 if (len == UNIV_SQL_NULL) {
8037 exp = que_node_get_next(exp);
8038 continue;
8039 }
8040
8041 ut_ad(get_doc);
8042
8043 if (!get_doc->index_cache->charset) {
8044 get_doc->index_cache->charset = fts_get_charset(
8045 dfield->type.prtype);
8046 }
8047
8048 doc.charset = get_doc->index_cache->charset;
8049 doc.is_ngram = get_doc->index_cache->index->is_ngram;
8050
8051 if (dfield_is_ext(dfield)) {
8052 dict_table_t* table = cache->sync->table;
8053
8054 doc.text.f_str = btr_copy_externally_stored_field(
8055 &doc.text.f_len,
8056 static_cast<byte*>(dfield_get_data(dfield)),
8057 dict_table_page_size(table), len,
8058 static_cast<mem_heap_t*>(doc.self_heap->arg));
8059 } else {
8060 doc.text.f_str = static_cast<byte*>(
8061 dfield_get_data(dfield));
8062
8063 doc.text.f_len = len;
8064 }
8065
8066 if (field_no == 1) {
8067 fts_tokenize_document(&doc, NULL, parser);
8068 } else {
8069 fts_tokenize_document_next(&doc, doc_len, NULL, parser);
8070 }
8071
8072 exp = que_node_get_next(exp);
8073
8074 doc_len += (exp) ? len + 1 : len;
8075
8076 field_no++;
8077 }
8078
8079 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
8080
8081 fts_doc_free(&doc);
8082
8083 cache->added++;
8084
8085 if (doc_id >= cache->next_doc_id) {
8086 cache->next_doc_id = doc_id + 1;
8087 }
8088
8089 return(TRUE);
8090 }
8091
8092 /**********************************************************************//**
8093 This function brings FTS index in sync when FTS index is first
8094 used. There are documents that have not yet sync-ed to auxiliary
8095 tables from last server abnormally shutdown, we will need to bring
8096 such document into FTS cache before any further operations
8097 @return TRUE if all OK */
8098 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)8099 fts_init_index(
8100 /*===========*/
8101 dict_table_t* table, /*!< in: Table with FTS */
8102 ibool has_cache_lock) /*!< in: Whether we already have
8103 cache lock */
8104 {
8105 dict_index_t* index;
8106 doc_id_t start_doc;
8107 fts_get_doc_t* get_doc = NULL;
8108 fts_cache_t* cache = table->fts->cache;
8109 bool need_init = false;
8110
8111 ut_ad(!mutex_own(&dict_sys->mutex));
8112
8113 /* First check cache->get_docs is initialized */
8114 if (!has_cache_lock) {
8115 rw_lock_x_lock(&cache->lock);
8116 }
8117
8118 rw_lock_x_lock(&cache->init_lock);
8119 if (cache->get_docs == NULL) {
8120 cache->get_docs = fts_get_docs_create(cache);
8121 }
8122 rw_lock_x_unlock(&cache->init_lock);
8123
8124 if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
8125 goto func_exit;
8126 }
8127
8128 need_init = true;
8129
8130 start_doc = cache->synced_doc_id;
8131
8132 if (!start_doc) {
8133 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
8134 cache->synced_doc_id = start_doc;
8135 }
8136
8137 /* No FTS index, this is the case when previous FTS index
8138 dropped, and we re-initialize the Doc ID system for subsequent
8139 insertion */
8140 if (ib_vector_is_empty(cache->get_docs)) {
8141 index = table->fts_doc_id_index;
8142
8143 ut_a(index);
8144
8145 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8146 FTS_FETCH_DOC_BY_ID_LARGE,
8147 fts_init_get_doc_id, cache);
8148 } else {
8149 if (table->fts->cache->stopword_info.status
8150 & STOPWORD_NOT_INIT) {
8151 fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
8152 }
8153
8154 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
8155 get_doc = static_cast<fts_get_doc_t*>(
8156 ib_vector_get(cache->get_docs, i));
8157
8158 index = get_doc->index_cache->index;
8159
8160 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8161 FTS_FETCH_DOC_BY_ID_LARGE,
8162 fts_init_recover_doc, get_doc);
8163 }
8164 }
8165
8166 table->fts->fts_status |= ADDED_TABLE_SYNCED;
8167
8168 fts_get_docs_clear(cache->get_docs);
8169
8170 func_exit:
8171 if (!has_cache_lock) {
8172 rw_lock_x_unlock(&cache->lock);
8173 }
8174
8175 if (need_init) {
8176 mutex_enter(&dict_sys->mutex);
8177 /* Register the table with the optimize thread. */
8178 fts_optimize_add_table(table);
8179 mutex_exit(&dict_sys->mutex);
8180 }
8181
8182 return(TRUE);
8183 }
8184
8185 /** Check if the all the auxillary tables associated with FTS index are in
8186 consistent state. For now consistency is check only by ensuring
8187 index->page_no != FIL_NULL
8188 @param[out] base_table table has host fts index
8189 @param[in,out] trx trx handler */
8190 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)8191 fts_check_corrupt(
8192 dict_table_t* base_table,
8193 trx_t* trx)
8194 {
8195 bool sane = true;
8196 fts_table_t fts_table;
8197
8198 /* Iterate over the common table and check for their sanity. */
8199 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
8200
8201 for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
8202
8203 char table_name[MAX_FULL_NAME_LEN];
8204
8205 fts_table.suffix = fts_common_tables[i];
8206 fts_get_table_name(&fts_table, table_name);
8207
8208 dict_table_t* aux_table = dict_table_open_on_name(
8209 table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
8210
8211 if (aux_table == NULL) {
8212 dict_set_corrupted(
8213 dict_table_get_first_index(base_table),
8214 trx, "FTS_SANITY_CHECK");
8215 ut_ad(base_table->corrupted == TRUE);
8216 sane = false;
8217 continue;
8218 }
8219
8220 for (dict_index_t* aux_table_index =
8221 UT_LIST_GET_FIRST(aux_table->indexes);
8222 aux_table_index != NULL;
8223 aux_table_index =
8224 UT_LIST_GET_NEXT(indexes, aux_table_index)) {
8225
8226 /* Check if auxillary table needed for FTS is sane. */
8227 if (aux_table_index->page == FIL_NULL) {
8228 dict_set_corrupted(
8229 dict_table_get_first_index(base_table),
8230 trx, "FTS_SANITY_CHECK");
8231 ut_ad(base_table->corrupted == TRUE);
8232 sane = false;
8233 }
8234 }
8235
8236 dict_table_close(aux_table, FALSE, FALSE);
8237 }
8238 }
8239