1 /*****************************************************************************
2
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2016, 2022, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file fts/fts0fts.cc
22 Full Text Search interface
23 ***********************************************************************/
24
25 #include "trx0roll.h"
26 #include "row0mysql.h"
27 #include "row0upd.h"
28 #include "dict0types.h"
29 #include "dict0stats_bg.h"
30 #include "row0sel.h"
31 #include "fts0fts.h"
32 #include "fts0priv.h"
33 #include "fts0types.h"
34 #include "fts0types.inl"
35 #include "fts0vlc.h"
36 #include "fts0plugin.h"
37 #include "dict0priv.h"
38 #include "dict0stats.h"
39 #include "btr0pcur.h"
40 #include "sync0sync.h"
41
42 static const ulint FTS_MAX_ID_LEN = 32;
43
44 /** Column name from the FTS config table */
45 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
46
47 /** Verify if a aux table name is a obsolete table
48 by looking up the key word in the obsolete table names */
49 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
50 (strstr((table_name), "DOC_ID") != NULL \
51 || strstr((table_name), "ADDED") != NULL \
52 || strstr((table_name), "STOPWORDS") != NULL)
53
54 /** This is maximum FTS cache for each table and would be
55 a configurable variable */
56 ulong fts_max_cache_size;
57
58 /** Whether the total memory used for FTS cache is exhausted, and we will
59 need a sync to free some memory */
60 bool fts_need_sync = false;
61
62 /** Variable specifying the total memory allocated for FTS cache */
63 ulong fts_max_total_cache_size;
64
65 /** This is FTS result cache limit for each query and would be
66 a configurable variable */
67 size_t fts_result_cache_limit;
68
69 /** Variable specifying the maximum FTS max token size */
70 ulong fts_max_token_size;
71
72 /** Variable specifying the minimum FTS max token size */
73 ulong fts_min_token_size;
74
75
76 // FIXME: testing
77 static time_t elapsed_time;
78 static ulint n_nodes;
79
80 #ifdef FTS_CACHE_SIZE_DEBUG
81 /** The cache size permissible lower limit (1K) */
82 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
83
84 /** The cache size permissible upper limit (1G) */
85 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
86 #endif
87
88 /** Time to sleep after DEADLOCK error before retrying operation. */
89 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
90
91 /** InnoDB default stopword list:
92 There are different versions of stopwords, the stop words listed
93 below comes from "Google Stopword" list. Reference:
94 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
95 The final version of InnoDB default stopword list is still pending
96 for decision */
97 const char *fts_default_stopword[] =
98 {
99 "a",
100 "about",
101 "an",
102 "are",
103 "as",
104 "at",
105 "be",
106 "by",
107 "com",
108 "de",
109 "en",
110 "for",
111 "from",
112 "how",
113 "i",
114 "in",
115 "is",
116 "it",
117 "la",
118 "of",
119 "on",
120 "or",
121 "that",
122 "the",
123 "this",
124 "to",
125 "was",
126 "what",
127 "when",
128 "where",
129 "who",
130 "will",
131 "with",
132 "und",
133 "the",
134 "www",
135 NULL
136 };
137
138 /** For storing table info when checking for orphaned tables. */
139 struct fts_aux_table_t {
140 table_id_t id; /*!< Table id */
141 table_id_t parent_id; /*!< Parent table id */
142 table_id_t index_id; /*!< Table FT index id */
143 char* name; /*!< Name of the table */
144 };
145
146 /** FTS auxiliary table suffixes that are common to all FT indexes. */
147 const char* fts_common_tables[] = {
148 "BEING_DELETED",
149 "BEING_DELETED_CACHE",
150 "CONFIG",
151 "DELETED",
152 "DELETED_CACHE",
153 NULL
154 };
155
156 /** FTS auxiliary INDEX split intervals. */
157 const fts_index_selector_t fts_index_selector[] = {
158 { 9, "INDEX_1" },
159 { 65, "INDEX_2" },
160 { 70, "INDEX_3" },
161 { 75, "INDEX_4" },
162 { 80, "INDEX_5" },
163 { 85, "INDEX_6" },
164 { 0 , NULL }
165 };
166
167 /** Default config values for FTS indexes on a table. */
168 static const char* fts_config_table_insert_values_sql =
169 "BEGIN\n"
170 "\n"
171 "INSERT INTO $config_table VALUES('"
172 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
173 ""
174 "INSERT INTO $config_table VALUES('"
175 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
176 ""
177 "INSERT INTO $config_table VALUES ('"
178 FTS_SYNCED_DOC_ID "', '0');\n"
179 ""
180 "INSERT INTO $config_table VALUES ('"
181 FTS_TOTAL_DELETED_COUNT "', '0');\n"
182 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
183 "INSERT INTO $config_table VALUES ('"
184 FTS_TABLE_STATE "', '0');\n";
185
186 /** FTS tokenize parmameter for plugin parser */
187 struct fts_tokenize_param_t {
188 fts_doc_t* result_doc; /*!< Result doc for tokens */
189 ulint add_pos; /*!< Added position for tokens */
190 };
191
192 /** Run SYNC on the table, i.e., write out data from the cache to the
193 FTS auxiliary INDEX table and clear the cache at the end.
194 @param[in,out] sync sync state
195 @param[in] unlock_cache whether unlock cache lock when write node
196 @param[in] wait whether wait when a sync is in progress
197 @return DB_SUCCESS if all OK */
198 static
199 dberr_t
200 fts_sync(
201 fts_sync_t* sync,
202 bool unlock_cache,
203 bool wait);
204
205 /****************************************************************//**
206 Release all resources help by the words rb tree e.g., the node ilist. */
207 static
208 void
209 fts_words_free(
210 /*===========*/
211 ib_rbt_t* words) /*!< in: rb tree of words */
212 MY_ATTRIBUTE((nonnull));
213 #ifdef FTS_CACHE_SIZE_DEBUG
214 /****************************************************************//**
215 Read the max cache size parameter from the config table. */
216 static
217 void
218 fts_update_max_cache_size(
219 /*======================*/
220 fts_sync_t* sync); /*!< in: sync state */
221 #endif
222
223 /*********************************************************************//**
224 This function fetches the document just inserted right before
225 we commit the transaction, and tokenize the inserted text data
226 and insert into FTS auxiliary table and its cache.
227 @return TRUE if successful */
228 static
229 ulint
230 fts_add_doc_by_id(
231 /*==============*/
232 fts_trx_table_t*ftt, /*!< in: FTS trx table */
233 doc_id_t doc_id); /*!< in: doc id */
234 /******************************************************************//**
235 Update the last document id. This function could create a new
236 transaction to update the last document id.
237 @return DB_SUCCESS if OK */
238 static
239 dberr_t
240 fts_update_sync_doc_id(
241 /*===================*/
242 const dict_table_t* table, /*!< in: table */
243 doc_id_t doc_id, /*!< in: last document id */
244 trx_t* trx) /*!< in: update trx, or NULL */
245 MY_ATTRIBUTE((nonnull(1)));
246
247 /** Tokenize a document.
248 @param[in,out] doc document to tokenize
249 @param[out] result tokenization result
250 @param[in] parser pluggable parser */
251 static
252 void
253 fts_tokenize_document(
254 fts_doc_t* doc,
255 fts_doc_t* result,
256 st_mysql_ftparser* parser);
257
258 /** Continue to tokenize a document.
259 @param[in,out] doc document to tokenize
260 @param[in] add_pos add this position to all tokens from this tokenization
261 @param[out] result tokenization result
262 @param[in] parser pluggable parser */
263 static
264 void
265 fts_tokenize_document_next(
266 fts_doc_t* doc,
267 ulint add_pos,
268 fts_doc_t* result,
269 st_mysql_ftparser* parser);
270
271 /** Create the vector of fts_get_doc_t instances.
272 @param[in,out] cache fts cache
273 @return vector of fts_get_doc_t instances */
274 static
275 ib_vector_t*
276 fts_get_docs_create(
277 fts_cache_t* cache);
278
279 /** Free the FTS cache.
280 @param[in,out] cache to be freed */
281 static
282 void
fts_cache_destroy(fts_cache_t * cache)283 fts_cache_destroy(fts_cache_t* cache)
284 {
285 rw_lock_free(&cache->lock);
286 rw_lock_free(&cache->init_lock);
287 mutex_free(&cache->deleted_lock);
288 mutex_free(&cache->doc_id_lock);
289 os_event_destroy(cache->sync->event);
290
291 if (cache->stopword_info.cached_stopword) {
292 rbt_free(cache->stopword_info.cached_stopword);
293 }
294
295 if (cache->sync_heap->arg) {
296 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
297 }
298
299 mem_heap_free(cache->cache_heap);
300 }
301
302 /** Get a character set based on precise type.
303 @param prtype precise type
304 @return the corresponding character set */
305 UNIV_INLINE
306 CHARSET_INFO*
fts_get_charset(ulint prtype)307 fts_get_charset(ulint prtype)
308 {
309 #ifdef UNIV_DEBUG
310 switch (prtype & DATA_MYSQL_TYPE_MASK) {
311 case MYSQL_TYPE_BIT:
312 case MYSQL_TYPE_STRING:
313 case MYSQL_TYPE_VAR_STRING:
314 case MYSQL_TYPE_TINY_BLOB:
315 case MYSQL_TYPE_MEDIUM_BLOB:
316 case MYSQL_TYPE_BLOB:
317 case MYSQL_TYPE_LONG_BLOB:
318 case MYSQL_TYPE_VARCHAR:
319 break;
320 default:
321 ut_error;
322 }
323 #endif /* UNIV_DEBUG */
324
325 uint cs_num = (uint) dtype_get_charset_coll(prtype);
326
327 if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
328 return(cs);
329 }
330
331 ib::fatal() << "Unable to find charset-collation " << cs_num;
332 return(NULL);
333 }
334
335 /****************************************************************//**
336 This function loads the default InnoDB stopword list */
337 static
338 void
fts_load_default_stopword(fts_stopword_t * stopword_info)339 fts_load_default_stopword(
340 /*======================*/
341 fts_stopword_t* stopword_info) /*!< in: stopword info */
342 {
343 fts_string_t str;
344 mem_heap_t* heap;
345 ib_alloc_t* allocator;
346 ib_rbt_t* stop_words;
347
348 allocator = stopword_info->heap;
349 heap = static_cast<mem_heap_t*>(allocator->arg);
350
351 if (!stopword_info->cached_stopword) {
352 stopword_info->cached_stopword = rbt_create_arg_cmp(
353 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
354 &my_charset_latin1);
355 }
356
357 stop_words = stopword_info->cached_stopword;
358
359 str.f_n_char = 0;
360
361 for (ulint i = 0; fts_default_stopword[i]; ++i) {
362 char* word;
363 fts_tokenizer_word_t new_word;
364
365 /* We are going to duplicate the value below. */
366 word = const_cast<char*>(fts_default_stopword[i]);
367
368 new_word.nodes = ib_vector_create(
369 allocator, sizeof(fts_node_t), 4);
370
371 str.f_len = ut_strlen(word);
372 str.f_str = reinterpret_cast<byte*>(word);
373
374 fts_string_dup(&new_word.text, &str, heap);
375
376 rbt_insert(stop_words, &new_word, &new_word);
377 }
378
379 stopword_info->status = STOPWORD_FROM_DEFAULT;
380 }
381
382 /****************************************************************//**
383 Callback function to read a single stopword value.
384 @return Always return TRUE */
385 static
386 ibool
fts_read_stopword(void * row,void * user_arg)387 fts_read_stopword(
388 /*==============*/
389 void* row, /*!< in: sel_node_t* */
390 void* user_arg) /*!< in: pointer to ib_vector_t */
391 {
392 ib_alloc_t* allocator;
393 fts_stopword_t* stopword_info;
394 sel_node_t* sel_node;
395 que_node_t* exp;
396 ib_rbt_t* stop_words;
397 dfield_t* dfield;
398 fts_string_t str;
399 mem_heap_t* heap;
400 ib_rbt_bound_t parent;
401
402 sel_node = static_cast<sel_node_t*>(row);
403 stopword_info = static_cast<fts_stopword_t*>(user_arg);
404
405 stop_words = stopword_info->cached_stopword;
406 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
407 heap = static_cast<mem_heap_t*>(allocator->arg);
408
409 exp = sel_node->select_list;
410
411 /* We only need to read the first column */
412 dfield = que_node_get_val(exp);
413
414 str.f_n_char = 0;
415 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
416 str.f_len = dfield_get_len(dfield);
417
418 /* Only create new node if it is a value not already existed */
419 if (str.f_len != UNIV_SQL_NULL
420 && rbt_search(stop_words, &parent, &str) != 0) {
421
422 fts_tokenizer_word_t new_word;
423
424 new_word.nodes = ib_vector_create(
425 allocator, sizeof(fts_node_t), 4);
426
427 new_word.text.f_str = static_cast<byte*>(
428 mem_heap_alloc(heap, str.f_len + 1));
429
430 memcpy(new_word.text.f_str, str.f_str, str.f_len);
431
432 new_word.text.f_n_char = 0;
433 new_word.text.f_len = str.f_len;
434 new_word.text.f_str[str.f_len] = 0;
435
436 rbt_insert(stop_words, &new_word, &new_word);
437 }
438
439 return(TRUE);
440 }
441
442 /******************************************************************//**
443 Load user defined stopword from designated user table
444 @return whether the operation is successful */
445 static
446 bool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)447 fts_load_user_stopword(
448 /*===================*/
449 fts_t* fts, /*!< in: FTS struct */
450 const char* stopword_table_name, /*!< in: Stopword table
451 name */
452 fts_stopword_t* stopword_info) /*!< in: Stopword info */
453 {
454 if (!fts->dict_locked) {
455 mutex_enter(&dict_sys.mutex);
456 }
457
458 /* Validate the user table existence in the right format */
459 bool ret= false;
460 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
461 if (!stopword_info->charset) {
462 cleanup:
463 if (!fts->dict_locked) {
464 mutex_exit(&dict_sys.mutex);
465 }
466
467 return ret;
468 }
469
470 trx_t* trx = trx_create();
471 trx->op_info = "Load user stopword table into FTS cache";
472
473 if (!stopword_info->cached_stopword) {
474 /* Create the stopword RB tree with the stopword column
475 charset. All comparison will use this charset */
476 stopword_info->cached_stopword = rbt_create_arg_cmp(
477 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
478 (void*)stopword_info->charset);
479
480 }
481
482 pars_info_t* info = pars_info_create();
483
484 pars_info_bind_id(info, "table_stopword", stopword_table_name);
485
486 pars_info_bind_function(info, "my_func", fts_read_stopword,
487 stopword_info);
488
489 que_t* graph = fts_parse_sql_no_dict_lock(
490 info,
491 "DECLARE FUNCTION my_func;\n"
492 "DECLARE CURSOR c IS"
493 " SELECT value"
494 " FROM $table_stopword;\n"
495 "BEGIN\n"
496 "\n"
497 "OPEN c;\n"
498 "WHILE 1 = 1 LOOP\n"
499 " FETCH c INTO my_func();\n"
500 " IF c % NOTFOUND THEN\n"
501 " EXIT;\n"
502 " END IF;\n"
503 "END LOOP;\n"
504 "CLOSE c;");
505
506 for (;;) {
507 dberr_t error = fts_eval_sql(trx, graph);
508
509 if (UNIV_LIKELY(error == DB_SUCCESS)) {
510 fts_sql_commit(trx);
511 stopword_info->status = STOPWORD_USER_TABLE;
512 break;
513 } else {
514 fts_sql_rollback(trx);
515
516 if (error == DB_LOCK_WAIT_TIMEOUT) {
517 ib::warn() << "Lock wait timeout reading user"
518 " stopword table. Retrying!";
519
520 trx->error_state = DB_SUCCESS;
521 } else {
522 ib::error() << "Error '" << error
523 << "' while reading user stopword"
524 " table.";
525 ret = FALSE;
526 break;
527 }
528 }
529 }
530
531 que_graph_free(graph);
532 trx->free();
533 ret = true;
534 goto cleanup;
535 }
536
537 /******************************************************************//**
538 Initialize the index cache. */
539 static
540 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)541 fts_index_cache_init(
542 /*=================*/
543 ib_alloc_t* allocator, /*!< in: the allocator to use */
544 fts_index_cache_t* index_cache) /*!< in: index cache */
545 {
546 ulint i;
547
548 ut_a(index_cache->words == NULL);
549
550 index_cache->words = rbt_create_arg_cmp(
551 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
552 (void*) index_cache->charset);
553
554 ut_a(index_cache->doc_stats == NULL);
555
556 index_cache->doc_stats = ib_vector_create(
557 allocator, sizeof(fts_doc_stats_t), 4);
558
559 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
560 ut_a(index_cache->ins_graph[i] == NULL);
561 ut_a(index_cache->sel_graph[i] == NULL);
562 }
563 }
564
565 /*********************************************************************//**
566 Initialize FTS cache. */
567 void
fts_cache_init(fts_cache_t * cache)568 fts_cache_init(
569 /*===========*/
570 fts_cache_t* cache) /*!< in: cache to initialize */
571 {
572 ulint i;
573
574 /* Just to make sure */
575 ut_a(cache->sync_heap->arg == NULL);
576
577 cache->sync_heap->arg = mem_heap_create(1024);
578
579 cache->total_size = 0;
580 cache->total_size_at_sync = 0;
581
582 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
583 cache->deleted_doc_ids = ib_vector_create(
584 cache->sync_heap, sizeof(doc_id_t), 4);
585 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
586
587 /* Reset the cache data for all the FTS indexes. */
588 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
589 fts_index_cache_t* index_cache;
590
591 index_cache = static_cast<fts_index_cache_t*>(
592 ib_vector_get(cache->indexes, i));
593
594 fts_index_cache_init(cache->sync_heap, index_cache);
595 }
596 }
597
598 /****************************************************************//**
599 Create a FTS cache. */
600 fts_cache_t*
fts_cache_create(dict_table_t * table)601 fts_cache_create(
602 /*=============*/
603 dict_table_t* table) /*!< in: table owns the FTS cache */
604 {
605 mem_heap_t* heap;
606 fts_cache_t* cache;
607
608 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
609
610 cache = static_cast<fts_cache_t*>(
611 mem_heap_zalloc(heap, sizeof(*cache)));
612
613 cache->cache_heap = heap;
614
615 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
616
617 rw_lock_create(
618 fts_cache_init_rw_lock_key, &cache->init_lock,
619 SYNC_FTS_CACHE_INIT);
620
621 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
622
623 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
624
625 /* This is the heap used to create the cache itself. */
626 cache->self_heap = ib_heap_allocator_create(heap);
627
628 /* This is a transient heap, used for storing sync data. */
629 cache->sync_heap = ib_heap_allocator_create(heap);
630 cache->sync_heap->arg = NULL;
631
632 cache->sync = static_cast<fts_sync_t*>(
633 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
634
635 cache->sync->table = table;
636 cache->sync->event = os_event_create(0);
637
638 /* Create the index cache vector that will hold the inverted indexes. */
639 cache->indexes = ib_vector_create(
640 cache->self_heap, sizeof(fts_index_cache_t), 2);
641
642 fts_cache_init(cache);
643
644 cache->stopword_info.cached_stopword = NULL;
645 cache->stopword_info.charset = NULL;
646
647 cache->stopword_info.heap = cache->self_heap;
648
649 cache->stopword_info.status = STOPWORD_NOT_INIT;
650
651 return(cache);
652 }
653
654 /*******************************************************************//**
655 Add a newly create index into FTS cache */
656 void
fts_add_index(dict_index_t * index,dict_table_t * table)657 fts_add_index(
658 /*==========*/
659 dict_index_t* index, /*!< FTS index to be added */
660 dict_table_t* table) /*!< table */
661 {
662 fts_t* fts = table->fts;
663 fts_cache_t* cache;
664 fts_index_cache_t* index_cache;
665
666 ut_ad(fts);
667 cache = table->fts->cache;
668
669 rw_lock_x_lock(&cache->init_lock);
670
671 ib_vector_push(fts->indexes, &index);
672
673 index_cache = fts_find_index_cache(cache, index);
674
675 if (!index_cache) {
676 /* Add new index cache structure */
677 index_cache = fts_cache_index_cache_create(table, index);
678 }
679
680 rw_lock_x_unlock(&cache->init_lock);
681 }
682
683 /*******************************************************************//**
684 recalibrate get_doc structure after index_cache in cache->indexes changed */
685 static
686 void
fts_reset_get_doc(fts_cache_t * cache)687 fts_reset_get_doc(
688 /*==============*/
689 fts_cache_t* cache) /*!< in: FTS index cache */
690 {
691 fts_get_doc_t* get_doc;
692 ulint i;
693
694 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
695
696 ib_vector_reset(cache->get_docs);
697
698 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
699 fts_index_cache_t* ind_cache;
700
701 ind_cache = static_cast<fts_index_cache_t*>(
702 ib_vector_get(cache->indexes, i));
703
704 get_doc = static_cast<fts_get_doc_t*>(
705 ib_vector_push(cache->get_docs, NULL));
706
707 memset(get_doc, 0x0, sizeof(*get_doc));
708
709 get_doc->index_cache = ind_cache;
710 get_doc->cache = cache;
711 }
712
713 ut_ad(ib_vector_size(cache->get_docs)
714 == ib_vector_size(cache->indexes));
715 }
716
717 /*******************************************************************//**
718 Check an index is in the table->indexes list
719 @return TRUE if it exists */
720 static
721 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)722 fts_in_dict_index(
723 /*==============*/
724 dict_table_t* table, /*!< in: Table */
725 dict_index_t* index_check) /*!< in: index to be checked */
726 {
727 dict_index_t* index;
728
729 for (index = dict_table_get_first_index(table);
730 index != NULL;
731 index = dict_table_get_next_index(index)) {
732
733 if (index == index_check) {
734 return(TRUE);
735 }
736 }
737
738 return(FALSE);
739 }
740
741 /*******************************************************************//**
742 Check an index is in the fts->cache->indexes list
743 @return TRUE if it exists */
744 static
745 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)746 fts_in_index_cache(
747 /*===============*/
748 dict_table_t* table, /*!< in: Table */
749 dict_index_t* index) /*!< in: index to be checked */
750 {
751 ulint i;
752
753 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
754 fts_index_cache_t* index_cache;
755
756 index_cache = static_cast<fts_index_cache_t*>(
757 ib_vector_get(table->fts->cache->indexes, i));
758
759 if (index_cache->index == index) {
760 return(TRUE);
761 }
762 }
763
764 return(FALSE);
765 }
766
767 /*******************************************************************//**
768 Check indexes in the fts->indexes is also present in index cache and
769 table->indexes list
770 @return TRUE if all indexes match */
771 ibool
fts_check_cached_index(dict_table_t * table)772 fts_check_cached_index(
773 /*===================*/
774 dict_table_t* table) /*!< in: Table where indexes are dropped */
775 {
776 ulint i;
777
778 if (!table->fts || !table->fts->cache) {
779 return(TRUE);
780 }
781
782 ut_a(ib_vector_size(table->fts->indexes)
783 == ib_vector_size(table->fts->cache->indexes));
784
785 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
786 dict_index_t* index;
787
788 index = static_cast<dict_index_t*>(
789 ib_vector_getp(table->fts->indexes, i));
790
791 if (!fts_in_index_cache(table, index)) {
792 return(FALSE);
793 }
794
795 if (!fts_in_dict_index(table, index)) {
796 return(FALSE);
797 }
798 }
799
800 return(TRUE);
801 }
802
803 /** Clear all fts resources when there is no internal DOC_ID
804 and there are no new fts index to add.
805 @param[in,out] table table where fts is to be freed
806 @param[in] trx transaction to drop all fts tables */
fts_clear_all(dict_table_t * table,trx_t * trx)807 void fts_clear_all(dict_table_t *table, trx_t *trx)
808 {
809 if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) ||
810 !table->fts ||
811 !ib_vector_is_empty(table->fts->indexes))
812 return;
813
814 for (const dict_index_t *index= dict_table_get_first_index(table);
815 index; index= dict_table_get_next_index(index))
816 if (index->type & DICT_FTS)
817 return;
818
819 fts_optimize_remove_table(table);
820
821 fts_drop_tables(trx, table);
822 fts_free(table);
823 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
824 }
825
826 /*******************************************************************//**
827 Drop auxiliary tables related to an FTS index
828 @return DB_SUCCESS or error number */
829 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)830 fts_drop_index(
831 /*===========*/
832 dict_table_t* table, /*!< in: Table where indexes are dropped */
833 dict_index_t* index, /*!< in: Index to be dropped */
834 trx_t* trx) /*!< in: Transaction for the drop */
835 {
836 ib_vector_t* indexes = table->fts->indexes;
837 dberr_t err = DB_SUCCESS;
838
839 ut_a(indexes);
840
841 if ((ib_vector_size(indexes) == 1
842 && (index == static_cast<dict_index_t*>(
843 ib_vector_getp(table->fts->indexes, 0)))
844 && DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
845 || ib_vector_is_empty(indexes)) {
846 doc_id_t current_doc_id;
847 doc_id_t first_doc_id;
848
849 /* If we are dropping the only FTS index of the table,
850 remove it from optimize thread */
851 fts_optimize_remove_table(table);
852
853 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
854
855 while (index->index_fts_syncing
856 && !trx_is_interrupted(trx)) {
857 DICT_BG_YIELD(trx);
858 }
859
860 current_doc_id = table->fts->cache->next_doc_id;
861 first_doc_id = table->fts->cache->first_doc_id;
862 fts_cache_clear(table->fts->cache);
863 fts_cache_destroy(table->fts->cache);
864 table->fts->cache = fts_cache_create(table);
865 table->fts->cache->next_doc_id = current_doc_id;
866 table->fts->cache->first_doc_id = first_doc_id;
867 } else {
868 fts_cache_t* cache = table->fts->cache;
869 fts_index_cache_t* index_cache;
870
871 rw_lock_x_lock(&cache->init_lock);
872
873 index_cache = fts_find_index_cache(cache, index);
874
875 if (index_cache != NULL) {
876 while (index->index_fts_syncing
877 && !trx_is_interrupted(trx)) {
878 DICT_BG_YIELD(trx);
879 }
880 if (index_cache->words) {
881 fts_words_free(index_cache->words);
882 rbt_free(index_cache->words);
883 }
884
885 ib_vector_remove(cache->indexes, *(void**) index_cache);
886 }
887
888 if (cache->get_docs) {
889 fts_reset_get_doc(cache);
890 }
891
892 rw_lock_x_unlock(&cache->init_lock);
893 }
894
895 err = fts_drop_index_tables(trx, index);
896
897 ib_vector_remove(indexes, (const void*) index);
898
899 return(err);
900 }
901
902 /****************************************************************//**
903 Free the query graph but check whether dict_sys.mutex is already
904 held */
905 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)906 fts_que_graph_free_check_lock(
907 /*==========================*/
908 fts_table_t* fts_table, /*!< in: FTS table */
909 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
910 que_t* graph) /*!< in: query graph */
911 {
912 bool has_dict = FALSE;
913
914 if (fts_table && fts_table->table) {
915 ut_ad(fts_table->table->fts);
916
917 has_dict = fts_table->table->fts->dict_locked;
918 } else if (index_cache) {
919 ut_ad(index_cache->index->table->fts);
920
921 has_dict = index_cache->index->table->fts->dict_locked;
922 }
923
924 if (!has_dict) {
925 mutex_enter(&dict_sys.mutex);
926 }
927
928 ut_ad(mutex_own(&dict_sys.mutex));
929
930 que_graph_free(graph);
931
932 if (!has_dict) {
933 mutex_exit(&dict_sys.mutex);
934 }
935 }
936
937 /****************************************************************//**
938 Create an FTS index cache. */
939 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)940 fts_index_get_charset(
941 /*==================*/
942 dict_index_t* index) /*!< in: FTS index */
943 {
944 CHARSET_INFO* charset = NULL;
945 dict_field_t* field;
946 ulint prtype;
947
948 field = dict_index_get_nth_field(index, 0);
949 prtype = field->col->prtype;
950
951 charset = fts_get_charset(prtype);
952
953 #ifdef FTS_DEBUG
954 /* Set up charset info for this index. Please note all
955 field of the FTS index should have the same charset */
956 for (i = 1; i < index->n_fields; i++) {
957 CHARSET_INFO* fld_charset;
958
959 field = dict_index_get_nth_field(index, i);
960 prtype = field->col->prtype;
961
962 fld_charset = fts_get_charset(prtype);
963
964 /* All FTS columns should have the same charset */
965 if (charset) {
966 ut_a(charset == fld_charset);
967 } else {
968 charset = fld_charset;
969 }
970 }
971 #endif
972
973 return(charset);
974
975 }
976 /****************************************************************//**
977 Create an FTS index cache.
978 @return Index Cache */
979 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)980 fts_cache_index_cache_create(
981 /*=========================*/
982 dict_table_t* table, /*!< in: table with FTS index */
983 dict_index_t* index) /*!< in: FTS index */
984 {
985 ulint n_bytes;
986 fts_index_cache_t* index_cache;
987 fts_cache_t* cache = table->fts->cache;
988
989 ut_a(cache != NULL);
990
991 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
992
993 /* Must not already exist in the cache vector. */
994 ut_a(fts_find_index_cache(cache, index) == NULL);
995
996 index_cache = static_cast<fts_index_cache_t*>(
997 ib_vector_push(cache->indexes, NULL));
998
999 memset(index_cache, 0x0, sizeof(*index_cache));
1000
1001 index_cache->index = index;
1002
1003 index_cache->charset = fts_index_get_charset(index);
1004
1005 n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1006
1007 index_cache->ins_graph = static_cast<que_t**>(
1008 mem_heap_zalloc(static_cast<mem_heap_t*>(
1009 cache->self_heap->arg), n_bytes));
1010
1011 index_cache->sel_graph = static_cast<que_t**>(
1012 mem_heap_zalloc(static_cast<mem_heap_t*>(
1013 cache->self_heap->arg), n_bytes));
1014
1015 fts_index_cache_init(cache->sync_heap, index_cache);
1016
1017 if (cache->get_docs) {
1018 fts_reset_get_doc(cache);
1019 }
1020
1021 return(index_cache);
1022 }
1023
1024 /****************************************************************//**
1025 Release all resources help by the words rb tree e.g., the node ilist. */
1026 static
1027 void
fts_words_free(ib_rbt_t * words)1028 fts_words_free(
1029 /*===========*/
1030 ib_rbt_t* words) /*!< in: rb tree of words */
1031 {
1032 const ib_rbt_node_t* rbt_node;
1033
1034 /* Free the resources held by a word. */
1035 for (rbt_node = rbt_first(words);
1036 rbt_node != NULL;
1037 rbt_node = rbt_first(words)) {
1038
1039 ulint i;
1040 fts_tokenizer_word_t* word;
1041
1042 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1043
1044 /* Free the ilists of this word. */
1045 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1046
1047 fts_node_t* fts_node = static_cast<fts_node_t*>(
1048 ib_vector_get(word->nodes, i));
1049
1050 ut_free(fts_node->ilist);
1051 fts_node->ilist = NULL;
1052 }
1053
1054 /* NOTE: We are responsible for free'ing the node */
1055 ut_free(rbt_remove_node(words, rbt_node));
1056 }
1057 }
1058
1059 /** Clear cache.
1060 @param[in,out] cache fts cache */
1061 void
fts_cache_clear(fts_cache_t * cache)1062 fts_cache_clear(
1063 fts_cache_t* cache)
1064 {
1065 ulint i;
1066
1067 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1068 ulint j;
1069 fts_index_cache_t* index_cache;
1070
1071 index_cache = static_cast<fts_index_cache_t*>(
1072 ib_vector_get(cache->indexes, i));
1073
1074 fts_words_free(index_cache->words);
1075
1076 rbt_free(index_cache->words);
1077
1078 index_cache->words = NULL;
1079
1080 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1081
1082 if (index_cache->ins_graph[j] != NULL) {
1083
1084 fts_que_graph_free_check_lock(
1085 NULL, index_cache,
1086 index_cache->ins_graph[j]);
1087
1088 index_cache->ins_graph[j] = NULL;
1089 }
1090
1091 if (index_cache->sel_graph[j] != NULL) {
1092
1093 fts_que_graph_free_check_lock(
1094 NULL, index_cache,
1095 index_cache->sel_graph[j]);
1096
1097 index_cache->sel_graph[j] = NULL;
1098 }
1099 }
1100
1101 index_cache->doc_stats = NULL;
1102 }
1103
1104 fts_need_sync = false;
1105
1106 cache->total_size = 0;
1107
1108 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1109 cache->deleted_doc_ids = NULL;
1110 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1111
1112 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1113 cache->sync_heap->arg = NULL;
1114 }
1115
1116 /*********************************************************************//**
1117 Search the index specific cache for a particular FTS index.
1118 @return the index cache else NULL */
1119 UNIV_INLINE
1120 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1121 fts_get_index_cache(
1122 /*================*/
1123 fts_cache_t* cache, /*!< in: cache to search */
1124 const dict_index_t* index) /*!< in: index to search for */
1125 {
1126 ulint i;
1127
1128 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1129 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1130
1131 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1132 fts_index_cache_t* index_cache;
1133
1134 index_cache = static_cast<fts_index_cache_t*>(
1135 ib_vector_get(cache->indexes, i));
1136
1137 if (index_cache->index == index) {
1138
1139 return(index_cache);
1140 }
1141 }
1142
1143 return(NULL);
1144 }
1145
1146 #ifdef FTS_DEBUG
1147 /*********************************************************************//**
1148 Search the index cache for a get_doc structure.
1149 @return the fts_get_doc_t item else NULL */
1150 static
1151 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1152 fts_get_index_get_doc(
1153 /*==================*/
1154 fts_cache_t* cache, /*!< in: cache to search */
1155 const dict_index_t* index) /*!< in: index to search for */
1156 {
1157 ulint i;
1158
1159 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1160
1161 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1162 fts_get_doc_t* get_doc;
1163
1164 get_doc = static_cast<fts_get_doc_t*>(
1165 ib_vector_get(cache->get_docs, i));
1166
1167 if (get_doc->index_cache->index == index) {
1168
1169 return(get_doc);
1170 }
1171 }
1172
1173 return(NULL);
1174 }
1175 #endif
1176
1177 /**********************************************************************//**
1178 Find an existing word, or if not found, create one and return it.
1179 @return specified word token */
1180 static
1181 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1182 fts_tokenizer_word_get(
1183 /*===================*/
1184 fts_cache_t* cache, /*!< in: cache */
1185 fts_index_cache_t*
1186 index_cache, /*!< in: index cache */
1187 fts_string_t* text) /*!< in: node text */
1188 {
1189 fts_tokenizer_word_t* word;
1190 ib_rbt_bound_t parent;
1191
1192 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1193
1194 /* If it is a stopword, do not index it */
1195 if (!fts_check_token(text,
1196 cache->stopword_info.cached_stopword,
1197 index_cache->charset)) {
1198
1199 return(NULL);
1200 }
1201
1202 /* Check if we found a match, if not then add word to tree. */
1203 if (rbt_search(index_cache->words, &parent, text) != 0) {
1204 mem_heap_t* heap;
1205 fts_tokenizer_word_t new_word;
1206
1207 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1208
1209 new_word.nodes = ib_vector_create(
1210 cache->sync_heap, sizeof(fts_node_t), 4);
1211
1212 fts_string_dup(&new_word.text, text, heap);
1213
1214 parent.last = rbt_add_node(
1215 index_cache->words, &parent, &new_word);
1216
1217 /* Take into account the RB tree memory use and the vector. */
1218 cache->total_size += sizeof(new_word)
1219 + sizeof(ib_rbt_node_t)
1220 + text->f_len
1221 + (sizeof(fts_node_t) * 4)
1222 + sizeof(*new_word.nodes);
1223
1224 ut_ad(rbt_validate(index_cache->words));
1225 }
1226
1227 word = rbt_value(fts_tokenizer_word_t, parent.last);
1228
1229 return(word);
1230 }
1231
1232 /**********************************************************************//**
1233 Add the given doc_id/word positions to the given node's ilist. */
1234 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1235 fts_cache_node_add_positions(
1236 /*=========================*/
1237 fts_cache_t* cache, /*!< in: cache */
1238 fts_node_t* node, /*!< in: word node */
1239 doc_id_t doc_id, /*!< in: doc id */
1240 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1241 {
1242 ulint i;
1243 byte* ptr;
1244 byte* ilist;
1245 ulint enc_len;
1246 ulint last_pos;
1247 byte* ptr_start;
1248 doc_id_t doc_id_delta;
1249
1250 #ifdef UNIV_DEBUG
1251 if (cache) {
1252 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1253 }
1254 #endif /* UNIV_DEBUG */
1255
1256 ut_ad(doc_id >= node->last_doc_id);
1257
1258 /* Calculate the space required to store the ilist. */
1259 doc_id_delta = doc_id - node->last_doc_id;
1260 enc_len = fts_get_encoded_len(doc_id_delta);
1261
1262 last_pos = 0;
1263 for (i = 0; i < ib_vector_size(positions); i++) {
1264 ulint pos = *(static_cast<ulint*>(
1265 ib_vector_get(positions, i)));
1266
1267 ut_ad(last_pos == 0 || pos > last_pos);
1268
1269 enc_len += fts_get_encoded_len(pos - last_pos);
1270 last_pos = pos;
1271 }
1272
1273 /* The 0x00 byte at the end of the token positions list. */
1274 enc_len++;
1275
1276 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1277 /* No need to allocate more space, we can fit in the new
1278 data at the end of the old one. */
1279 ilist = NULL;
1280 ptr = node->ilist + node->ilist_size;
1281 } else {
1282 ulint new_size = node->ilist_size + enc_len;
1283
1284 /* Over-reserve space by a fixed size for small lengths and
1285 by 20% for lengths >= 48 bytes. */
1286 if (new_size < 16) {
1287 new_size = 16;
1288 } else if (new_size < 32) {
1289 new_size = 32;
1290 } else if (new_size < 48) {
1291 new_size = 48;
1292 } else {
1293 new_size = (ulint)(1.2 * new_size);
1294 }
1295
1296 ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1297 ptr = ilist + node->ilist_size;
1298
1299 node->ilist_size_alloc = new_size;
1300 if (cache) {
1301 cache->total_size += new_size;
1302 }
1303 }
1304
1305 ptr_start = ptr;
1306
1307 /* Encode the new fragment. */
1308 ptr = fts_encode_int(doc_id_delta, ptr);
1309
1310 last_pos = 0;
1311 for (i = 0; i < ib_vector_size(positions); i++) {
1312 ulint pos = *(static_cast<ulint*>(
1313 ib_vector_get(positions, i)));
1314
1315 ptr = fts_encode_int(pos - last_pos, ptr);
1316 last_pos = pos;
1317 }
1318
1319 *ptr++ = 0;
1320
1321 ut_a(enc_len == (ulint)(ptr - ptr_start));
1322
1323 if (ilist) {
1324 /* Copy old ilist to the start of the new one and switch the
1325 new one into place in the node. */
1326 if (node->ilist_size > 0) {
1327 memcpy(ilist, node->ilist, node->ilist_size);
1328 ut_free(node->ilist);
1329 if (cache) {
1330 cache->total_size -= node->ilist_size;
1331 }
1332 }
1333
1334 node->ilist = ilist;
1335 }
1336
1337 node->ilist_size += enc_len;
1338
1339 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1340 node->first_doc_id = doc_id;
1341 }
1342
1343 node->last_doc_id = doc_id;
1344 ++node->doc_count;
1345 }
1346
1347 /**********************************************************************//**
1348 Add document to the cache. */
1349 static
1350 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1351 fts_cache_add_doc(
1352 /*==============*/
1353 fts_cache_t* cache, /*!< in: cache */
1354 fts_index_cache_t*
1355 index_cache, /*!< in: index cache */
1356 doc_id_t doc_id, /*!< in: doc id to add */
1357 ib_rbt_t* tokens) /*!< in: document tokens */
1358 {
1359 const ib_rbt_node_t* node;
1360 ulint n_words;
1361 fts_doc_stats_t* doc_stats;
1362
1363 if (!tokens) {
1364 return;
1365 }
1366
1367 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1368
1369 n_words = rbt_size(tokens);
1370
1371 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1372
1373 fts_tokenizer_word_t* word;
1374 fts_node_t* fts_node = NULL;
1375 fts_token_t* token = rbt_value(fts_token_t, node);
1376
1377 /* Find and/or add token to the cache. */
1378 word = fts_tokenizer_word_get(
1379 cache, index_cache, &token->text);
1380
1381 if (!word) {
1382 ut_free(rbt_remove_node(tokens, node));
1383 continue;
1384 }
1385
1386 if (ib_vector_size(word->nodes) > 0) {
1387 fts_node = static_cast<fts_node_t*>(
1388 ib_vector_last(word->nodes));
1389 }
1390
1391 if (fts_node == NULL || fts_node->synced
1392 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1393 || doc_id < fts_node->last_doc_id) {
1394
1395 fts_node = static_cast<fts_node_t*>(
1396 ib_vector_push(word->nodes, NULL));
1397
1398 memset(fts_node, 0x0, sizeof(*fts_node));
1399
1400 cache->total_size += sizeof(*fts_node);
1401 }
1402
1403 fts_cache_node_add_positions(
1404 cache, fts_node, doc_id, token->positions);
1405
1406 ut_free(rbt_remove_node(tokens, node));
1407 }
1408
1409 ut_a(rbt_empty(tokens));
1410
1411 /* Add to doc ids processed so far. */
1412 doc_stats = static_cast<fts_doc_stats_t*>(
1413 ib_vector_push(index_cache->doc_stats, NULL));
1414
1415 doc_stats->doc_id = doc_id;
1416 doc_stats->word_count = n_words;
1417
1418 /* Add the doc stats memory usage too. */
1419 cache->total_size += sizeof(*doc_stats);
1420
1421 if (doc_id > cache->sync->max_doc_id) {
1422 cache->sync->max_doc_id = doc_id;
1423 }
1424 }
1425
1426 /****************************************************************//**
1427 Drops a table. If the table can't be found we return a SUCCESS code.
1428 @return DB_SUCCESS or error code */
1429 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1430 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1431 fts_drop_table(
1432 /*===========*/
1433 trx_t* trx, /*!< in: transaction */
1434 const char* table_name) /*!< in: table to drop */
1435 {
1436 dict_table_t* table;
1437 dberr_t error = DB_SUCCESS;
1438
1439 /* Check that the table exists in our data dictionary.
1440 Similar to regular drop table case, we will open table with
1441 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1442 table = dict_table_open_on_name(
1443 table_name, TRUE, FALSE,
1444 static_cast<dict_err_ignore_t>(
1445 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1446
1447 if (table != 0) {
1448
1449 dict_table_close(table, TRUE, FALSE);
1450
1451 /* Pass nonatomic=false (dont allow data dict unlock),
1452 because the transaction may hold locks on SYS_* tables from
1453 previous calls to fts_drop_table(). */
1454 error = row_drop_table_for_mysql(table_name, trx,
1455 SQLCOM_DROP_DB, false, false);
1456
1457 if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
1458 ib::error() << "Unable to drop FTS index aux table "
1459 << table_name << ": " << error;
1460 }
1461 } else {
1462 error = DB_FAIL;
1463 }
1464
1465 return(error);
1466 }
1467
1468 /****************************************************************//**
1469 Rename a single auxiliary table due to database name change.
1470 @return DB_SUCCESS or error code */
1471 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1472 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1473 fts_rename_one_aux_table(
1474 /*=====================*/
1475 const char* new_name, /*!< in: new parent tbl name */
1476 const char* fts_table_old_name, /*!< in: old aux tbl name */
1477 trx_t* trx) /*!< in: transaction */
1478 {
1479 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1480 ulint new_db_name_len = dict_get_db_name_len(new_name);
1481 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1482 ulint table_new_name_len = strlen(fts_table_old_name)
1483 + new_db_name_len - old_db_name_len;
1484
1485 /* Check if the new and old database names are the same, if so,
1486 nothing to do */
1487 ut_ad((new_db_name_len != old_db_name_len)
1488 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1489
1490 /* Get the database name from "new_name", and table name
1491 from the fts_table_old_name */
1492 strncpy(fts_table_new_name, new_name, new_db_name_len);
1493 strncpy(fts_table_new_name + new_db_name_len,
1494 strchr(fts_table_old_name, '/'),
1495 table_new_name_len - new_db_name_len);
1496 fts_table_new_name[table_new_name_len] = 0;
1497
1498 return row_rename_table_for_mysql(
1499 fts_table_old_name, fts_table_new_name, trx, false, false);
1500 }
1501
1502 /****************************************************************//**
1503 Rename auxiliary tables for all fts index for a table. This(rename)
1504 is due to database name change
1505 @return DB_SUCCESS or error code */
1506 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1507 fts_rename_aux_tables(
1508 /*==================*/
1509 dict_table_t* table, /*!< in: user Table */
1510 const char* new_name, /*!< in: new table name */
1511 trx_t* trx) /*!< in: transaction */
1512 {
1513 ulint i;
1514 fts_table_t fts_table;
1515
1516 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1517
1518 dberr_t err = DB_SUCCESS;
1519 char old_table_name[MAX_FULL_NAME_LEN];
1520
1521 /* Rename common auxiliary tables */
1522 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1523 fts_table.suffix = fts_common_tables[i];
1524 fts_get_table_name(&fts_table, old_table_name, true);
1525
1526 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1527
1528 if (err != DB_SUCCESS) {
1529 return(err);
1530 }
1531 }
1532
1533 fts_t* fts = table->fts;
1534
1535 /* Rename index specific auxiliary tables */
1536 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1537 ++i) {
1538 dict_index_t* index;
1539
1540 index = static_cast<dict_index_t*>(
1541 ib_vector_getp(fts->indexes, i));
1542
1543 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1544
1545 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1546 fts_table.suffix = fts_get_suffix(j);
1547 fts_get_table_name(&fts_table, old_table_name, true);
1548
1549 err = fts_rename_one_aux_table(
1550 new_name, old_table_name, trx);
1551
1552 DBUG_EXECUTE_IF("fts_rename_failure",
1553 err = DB_DEADLOCK;
1554 fts_sql_rollback(trx););
1555
1556 if (err != DB_SUCCESS) {
1557 return(err);
1558 }
1559 }
1560 }
1561
1562 return(DB_SUCCESS);
1563 }
1564
1565 /** Drops the common ancillary tables needed for supporting an FTS index
1566 on the given table. row_mysql_lock_data_dictionary must have been called
1567 before this.
1568 @param[in] trx transaction to drop fts common table
1569 @param[in] fts_table table with an FTS index
1570 @param[in] drop_orphan True if the function is used to drop
1571 orphaned table
1572 @return DB_SUCCESS or error code */
1573 static dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table,bool drop_orphan=false)1574 fts_drop_common_tables(
1575 trx_t* trx,
1576 fts_table_t* fts_table,
1577 bool drop_orphan=false)
1578 {
1579 ulint i;
1580 dberr_t error = DB_SUCCESS;
1581
1582 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1583 dberr_t err;
1584 char table_name[MAX_FULL_NAME_LEN];
1585
1586 fts_table->suffix = fts_common_tables[i];
1587 fts_get_table_name(fts_table, table_name, true);
1588
1589 err = fts_drop_table(trx, table_name);
1590
1591 /* We only return the status of the last error. */
1592 if (err != DB_SUCCESS && err != DB_FAIL) {
1593 error = err;
1594 }
1595
1596 if (drop_orphan && err == DB_FAIL) {
1597 char* path = fil_make_filepath(
1598 NULL, table_name, IBD, false);
1599 if (path != NULL) {
1600 os_file_delete_if_exists(
1601 innodb_data_file_key, path, NULL);
1602 ut_free(path);
1603 }
1604 }
1605 }
1606
1607 return(error);
1608 }
1609
1610 /****************************************************************//**
1611 Since we do a horizontal split on the index table, we need to drop
1612 all the split tables.
1613 @return DB_SUCCESS or error code */
1614 static
1615 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1616 fts_drop_index_split_tables(
1617 /*========================*/
1618 trx_t* trx, /*!< in: transaction */
1619 dict_index_t* index) /*!< in: fts instance */
1620
1621 {
1622 ulint i;
1623 fts_table_t fts_table;
1624 dberr_t error = DB_SUCCESS;
1625
1626 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1627
1628 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1629 dberr_t err;
1630 char table_name[MAX_FULL_NAME_LEN];
1631
1632 fts_table.suffix = fts_get_suffix(i);
1633 fts_get_table_name(&fts_table, table_name, true);
1634
1635 err = fts_drop_table(trx, table_name);
1636
1637 /* We only return the status of the last error. */
1638 if (err != DB_SUCCESS && err != DB_FAIL) {
1639 error = err;
1640 }
1641 }
1642
1643 return(error);
1644 }
1645
1646 /****************************************************************//**
1647 Drops FTS auxiliary tables for an FTS index
1648 @return DB_SUCCESS or error code */
1649 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1650 fts_drop_index_tables(
1651 /*==================*/
1652 trx_t* trx, /*!< in: transaction */
1653 dict_index_t* index) /*!< in: Index to drop */
1654 {
1655 return(fts_drop_index_split_tables(trx, index));
1656 }
1657
1658 /****************************************************************//**
1659 Drops FTS ancillary tables needed for supporting an FTS index
1660 on the given table. row_mysql_lock_data_dictionary must have been called
1661 before this.
1662 @return DB_SUCCESS or error code */
1663 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1664 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1665 fts_drop_all_index_tables(
1666 /*======================*/
1667 trx_t* trx, /*!< in: transaction */
1668 fts_t* fts) /*!< in: fts instance */
1669 {
1670 dberr_t error = DB_SUCCESS;
1671
1672 for (ulint i = 0;
1673 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1674 ++i) {
1675
1676 dberr_t err;
1677 dict_index_t* index;
1678
1679 index = static_cast<dict_index_t*>(
1680 ib_vector_getp(fts->indexes, i));
1681
1682 err = fts_drop_index_tables(trx, index);
1683
1684 if (err != DB_SUCCESS) {
1685 error = err;
1686 }
1687 }
1688
1689 return(error);
1690 }
1691
1692 /*********************************************************************//**
1693 Drops the ancillary tables needed for supporting an FTS index on a
1694 given table. row_mysql_lock_data_dictionary must have been called before
1695 this.
1696 @return DB_SUCCESS or error code */
1697 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1698 fts_drop_tables(
1699 /*============*/
1700 trx_t* trx, /*!< in: transaction */
1701 dict_table_t* table) /*!< in: table has the FTS index */
1702 {
1703 dberr_t error;
1704 fts_table_t fts_table;
1705
1706 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1707
1708 /* TODO: This is not atomic and can cause problems during recovery. */
1709
1710 error = fts_drop_common_tables(trx, &fts_table);
1711
1712 if (error == DB_SUCCESS && table->fts) {
1713 error = fts_drop_all_index_tables(trx, table->fts);
1714 }
1715
1716 return(error);
1717 }
1718
1719 /** Create dict_table_t object for FTS Aux tables.
1720 @param[in] aux_table_name FTS Aux table name
1721 @param[in] table table object of FTS Index
1722 @param[in] n_cols number of columns for FTS Aux table
1723 @return table object for FTS Aux table */
1724 static
1725 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1726 fts_create_in_mem_aux_table(
1727 const char* aux_table_name,
1728 const dict_table_t* table,
1729 ulint n_cols)
1730 {
1731 dict_table_t* new_table = dict_mem_table_create(
1732 aux_table_name, NULL, n_cols, 0, table->flags,
1733 table->space_id == TRX_SYS_SPACE
1734 ? 0 : table->space_id == SRV_TMP_SPACE_ID
1735 ? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
1736
1737 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1738 ut_ad(table->data_dir_path != NULL);
1739 new_table->data_dir_path = mem_heap_strdup(
1740 new_table->heap, table->data_dir_path);
1741 }
1742
1743 return(new_table);
1744 }
1745
1746 /** Function to create on FTS common table.
1747 @param[in,out] trx InnoDB transaction
1748 @param[in] table Table that has FTS Index
1749 @param[in] fts_table_name FTS AUX table name
1750 @param[in] fts_suffix FTS AUX table suffix
1751 @param[in,out] heap temporary memory heap
1752 @return table object if created, else NULL */
1753 static
1754 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1755 fts_create_one_common_table(
1756 trx_t* trx,
1757 const dict_table_t* table,
1758 const char* fts_table_name,
1759 const char* fts_suffix,
1760 mem_heap_t* heap)
1761 {
1762 dict_table_t* new_table;
1763 dberr_t error;
1764 bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
1765
1766 if (!is_config) {
1767
1768 new_table = fts_create_in_mem_aux_table(
1769 fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1770
1771 dict_mem_table_add_col(
1772 new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1773 FTS_DELETED_TABLE_COL_LEN);
1774 } else {
1775 /* Config table has different schema. */
1776 new_table = fts_create_in_mem_aux_table(
1777 fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1778
1779 dict_mem_table_add_col(
1780 new_table, heap, "key", DATA_VARCHAR, 0,
1781 FTS_CONFIG_TABLE_KEY_COL_LEN);
1782
1783 dict_mem_table_add_col(
1784 new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1785 FTS_CONFIG_TABLE_VALUE_COL_LEN);
1786 }
1787
1788 dict_table_add_system_columns(new_table, heap);
1789 error = row_create_table_for_mysql(new_table, trx,
1790 FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
1791 if (error == DB_SUCCESS) {
1792
1793 dict_index_t* index = dict_mem_index_create(
1794 new_table, "FTS_COMMON_TABLE_IND",
1795 DICT_UNIQUE|DICT_CLUSTERED, 1);
1796
1797 if (!is_config) {
1798 dict_mem_index_add_field(index, "doc_id", 0);
1799 } else {
1800 dict_mem_index_add_field(index, "key", 0);
1801 }
1802
1803 /* We save and restore trx->dict_operation because
1804 row_create_index_for_mysql() changes the operation to
1805 TRX_DICT_OP_TABLE. */
1806 trx_dict_op_t op = trx_get_dict_operation(trx);
1807
1808 error = row_create_index_for_mysql(index, trx, NULL);
1809
1810 trx->dict_operation = op;
1811 } else {
1812 err_exit:
1813 new_table = NULL;
1814 ib::warn() << "Failed to create FTS common table "
1815 << fts_table_name;
1816 trx->error_state = error;
1817 return NULL;
1818 }
1819
1820 if (error != DB_SUCCESS) {
1821 dict_mem_table_free(new_table);
1822 trx->error_state = DB_SUCCESS;
1823 row_drop_table_for_mysql(fts_table_name, trx, SQLCOM_DROP_DB);
1824 goto err_exit;
1825 }
1826
1827 return(new_table);
1828 }
1829
1830 /** Creates the common auxiliary tables needed for supporting an FTS index
1831 on the given table. row_mysql_lock_data_dictionary must have been called
1832 before this.
1833 The following tables are created.
1834 CREATE TABLE $FTS_PREFIX_DELETED
1835 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1836 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1837 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1838 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1839 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1840 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1841 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1842 CREATE TABLE $FTS_PREFIX_CONFIG
1843 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1844 @param[in,out] trx transaction
1845 @param[in,out] table table with FTS index
1846 @param[in] skip_doc_id_index Skip index on doc id
1847 @return DB_SUCCESS if succeed */
1848 dberr_t
fts_create_common_tables(trx_t * trx,dict_table_t * table,bool skip_doc_id_index)1849 fts_create_common_tables(
1850 trx_t* trx,
1851 dict_table_t* table,
1852 bool skip_doc_id_index)
1853 {
1854 dberr_t error;
1855 que_t* graph;
1856 fts_table_t fts_table;
1857 mem_heap_t* heap = mem_heap_create(1024);
1858 pars_info_t* info;
1859 char fts_name[MAX_FULL_NAME_LEN];
1860 char full_name[sizeof(fts_common_tables) / sizeof(char*)]
1861 [MAX_FULL_NAME_LEN];
1862
1863 dict_index_t* index = NULL;
1864 trx_dict_op_t op;
1865 /* common_tables vector is used for dropping FTS common tables
1866 on error condition. */
1867 std::vector<dict_table_t*> common_tables;
1868 std::vector<dict_table_t*>::const_iterator it;
1869
1870 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1871
1872 op = trx_get_dict_operation(trx);
1873
1874 error = fts_drop_common_tables(trx, &fts_table);
1875
1876 if (error != DB_SUCCESS) {
1877
1878 goto func_exit;
1879 }
1880
1881 /* Create the FTS tables that are common to an FTS index. */
1882 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1883
1884 fts_table.suffix = fts_common_tables[i];
1885 fts_get_table_name(&fts_table, full_name[i], true);
1886 dict_table_t* common_table = fts_create_one_common_table(
1887 trx, table, full_name[i], fts_table.suffix, heap);
1888
1889 if (!common_table) {
1890 trx->error_state = DB_SUCCESS;
1891 error = DB_ERROR;
1892 goto func_exit;
1893 } else {
1894 common_tables.push_back(common_table);
1895 }
1896
1897 mem_heap_empty(heap);
1898
1899 DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1900 /* Return error after creating FTS_AUX_CONFIG table. */
1901 if (i == 4) {
1902 error = DB_ERROR;
1903 goto func_exit;
1904 }
1905 );
1906
1907 }
1908
1909 /* Write the default settings to the config table. */
1910 info = pars_info_create();
1911
1912 fts_table.suffix = "CONFIG";
1913 fts_get_table_name(&fts_table, fts_name, true);
1914 pars_info_bind_id(info, "config_table", fts_name);
1915
1916 graph = fts_parse_sql_no_dict_lock(
1917 info, fts_config_table_insert_values_sql);
1918
1919 error = fts_eval_sql(trx, graph);
1920
1921 que_graph_free(graph);
1922
1923 if (error != DB_SUCCESS || skip_doc_id_index) {
1924
1925 goto func_exit;
1926 }
1927
1928 index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
1929 DICT_UNIQUE, 1);
1930 dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1931
1932 op = trx_get_dict_operation(trx);
1933
1934 error = row_create_index_for_mysql(index, trx, NULL);
1935
1936 func_exit:
1937 if (error != DB_SUCCESS) {
1938 for (it = common_tables.begin(); it != common_tables.end();
1939 ++it) {
1940 row_drop_table_for_mysql((*it)->name.m_name, trx,
1941 SQLCOM_DROP_DB);
1942 }
1943 }
1944
1945 trx->dict_operation = op;
1946
1947 common_tables.clear();
1948 mem_heap_free(heap);
1949
1950 return(error);
1951 }
1952
1953 /** Create one FTS auxiliary index table for an FTS index.
1954 @param[in,out] trx transaction
1955 @param[in] index the index instance
1956 @param[in] fts_table fts_table structure
1957 @param[in,out] heap temporary memory heap
1958 @see row_merge_create_fts_sort_index()
1959 @return DB_SUCCESS or error code */
1960 static
1961 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,const fts_table_t * fts_table,mem_heap_t * heap)1962 fts_create_one_index_table(
1963 trx_t* trx,
1964 const dict_index_t* index,
1965 const fts_table_t* fts_table,
1966 mem_heap_t* heap)
1967 {
1968 dict_field_t* field;
1969 dict_table_t* new_table;
1970 char table_name[MAX_FULL_NAME_LEN];
1971 dberr_t error;
1972 CHARSET_INFO* charset;
1973
1974 ut_ad(index->type & DICT_FTS);
1975
1976 fts_get_table_name(fts_table, table_name, true);
1977
1978 new_table = fts_create_in_mem_aux_table(
1979 table_name, fts_table->table,
1980 FTS_AUX_INDEX_TABLE_NUM_COLS);
1981
1982 field = dict_index_get_nth_field(index, 0);
1983 charset = fts_get_charset(field->col->prtype);
1984
1985 dict_mem_table_add_col(new_table, heap, "word",
1986 charset == &my_charset_latin1
1987 ? DATA_VARCHAR : DATA_VARMYSQL,
1988 field->col->prtype,
1989 FTS_MAX_WORD_LEN_IN_CHAR
1990 * unsigned(field->col->mbmaxlen));
1991
1992 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
1993 DATA_NOT_NULL | DATA_UNSIGNED,
1994 FTS_INDEX_FIRST_DOC_ID_LEN);
1995
1996 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
1997 DATA_NOT_NULL | DATA_UNSIGNED,
1998 FTS_INDEX_LAST_DOC_ID_LEN);
1999
2000 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2001 DATA_NOT_NULL | DATA_UNSIGNED,
2002 FTS_INDEX_DOC_COUNT_LEN);
2003
2004 /* The precise type calculation is as follows:
2005 least signficiant byte: MySQL type code (not applicable for sys cols)
2006 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2007 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2008
2009 dict_mem_table_add_col(
2010 new_table, heap, "ilist", DATA_BLOB,
2011 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2012 FTS_INDEX_ILIST_LEN);
2013
2014 dict_table_add_system_columns(new_table, heap);
2015 error = row_create_table_for_mysql(new_table, trx,
2016 FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
2017
2018 if (error == DB_SUCCESS) {
2019 dict_index_t* index = dict_mem_index_create(
2020 new_table, "FTS_INDEX_TABLE_IND",
2021 DICT_UNIQUE|DICT_CLUSTERED, 2);
2022 dict_mem_index_add_field(index, "word", 0);
2023 dict_mem_index_add_field(index, "first_doc_id", 0);
2024
2025 trx_dict_op_t op = trx_get_dict_operation(trx);
2026
2027 error = row_create_index_for_mysql(index, trx, NULL);
2028
2029 trx->dict_operation = op;
2030 } else {
2031 err_exit:
2032 new_table = NULL;
2033 ib::warn() << "Failed to create FTS index table "
2034 << table_name;
2035 trx->error_state = error;
2036 return NULL;
2037 }
2038
2039 if (error != DB_SUCCESS) {
2040 dict_mem_table_free(new_table);
2041 trx->error_state = DB_SUCCESS;
2042 row_drop_table_for_mysql(table_name, trx, SQLCOM_DROP_DB);
2043 goto err_exit;
2044 }
2045
2046 return(new_table);
2047 }
2048
2049 /** Creates the column specific ancillary tables needed for supporting an
2050 FTS index on the given table. row_mysql_lock_data_dictionary must have
2051 been called before this.
2052
2053 All FTS AUX Index tables have the following schema.
2054 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2055 word VARCHAR(FTS_MAX_WORD_LEN),
2056 first_doc_id INT NOT NULL,
2057 last_doc_id UNSIGNED NOT NULL,
2058 doc_count UNSIGNED INT NOT NULL,
2059 ilist VARBINARY NOT NULL,
2060 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2061 @param[in,out] trx dictionary transaction
2062 @param[in] index fulltext index
2063 @param[in] id table id
2064 @return DB_SUCCESS or error code */
2065 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index,table_id_t id)2066 fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
2067 {
2068 ulint i;
2069 fts_table_t fts_table;
2070 dberr_t error = DB_SUCCESS;
2071 mem_heap_t* heap = mem_heap_create(1024);
2072
2073 fts_table.type = FTS_INDEX_TABLE;
2074 fts_table.index_id = index->id;
2075 fts_table.table_id = id;
2076 fts_table.table = index->table;
2077
2078 /* aux_idx_tables vector is used for dropping FTS AUX INDEX
2079 tables on error condition. */
2080 std::vector<dict_table_t*> aux_idx_tables;
2081 std::vector<dict_table_t*>::const_iterator it;
2082
2083 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2084 dict_table_t* new_table;
2085
2086 /* Create the FTS auxiliary tables that are specific
2087 to an FTS index. We need to preserve the table_id %s
2088 which fts_parse_sql_no_dict_lock() will fill in for us. */
2089 fts_table.suffix = fts_get_suffix(i);
2090
2091 new_table = fts_create_one_index_table(
2092 trx, index, &fts_table, heap);
2093
2094 if (new_table == NULL) {
2095 error = DB_FAIL;
2096 break;
2097 } else {
2098 aux_idx_tables.push_back(new_table);
2099 }
2100
2101 mem_heap_empty(heap);
2102
2103 DBUG_EXECUTE_IF("ib_fts_index_table_error",
2104 /* Return error after creating FTS_INDEX_5
2105 aux table. */
2106 if (i == 4) {
2107 error = DB_FAIL;
2108 break;
2109 }
2110 );
2111 }
2112
2113 if (error != DB_SUCCESS) {
2114
2115 for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2116 ++it) {
2117 row_drop_table_for_mysql((*it)->name.m_name, trx,
2118 SQLCOM_DROP_DB);
2119 }
2120 }
2121
2122 aux_idx_tables.clear();
2123 mem_heap_free(heap);
2124
2125 return(error);
2126 }
2127
2128 /******************************************************************//**
2129 Calculate the new state of a row given the existing state and a new event.
2130 @return new state of row */
2131 static
2132 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2133 fts_trx_row_get_new_state(
2134 /*======================*/
2135 fts_row_state old_state, /*!< in: existing state of row */
2136 fts_row_state event) /*!< in: new event */
2137 {
2138 /* The rules for transforming states:
2139
2140 I = inserted
2141 M = modified
2142 D = deleted
2143 N = nothing
2144
2145 M+D -> D:
2146
2147 If the row existed before the transaction started and it is modified
2148 during the transaction, followed by a deletion of the row, only the
2149 deletion will be signaled.
2150
2151 M+ -> M:
2152
2153 If the row existed before the transaction started and it is modified
2154 more than once during the transaction, only the last modification
2155 will be signaled.
2156
2157 IM*D -> N:
2158
2159 If a new row is added during the transaction (and possibly modified
2160 after its initial insertion) but it is deleted before the end of the
2161 transaction, nothing will be signaled.
2162
2163 IM* -> I:
2164
2165 If a new row is added during the transaction and modified after its
2166 initial insertion, only the addition will be signaled.
2167
2168 M*DI -> M:
2169
2170 If the row existed before the transaction started and it is deleted,
2171 then re-inserted, only a modification will be signaled. Note that
2172 this case is only possible if the table is using the row's primary
2173 key for FTS row ids, since those can be re-inserted by the user,
2174 which is not true for InnoDB generated row ids.
2175
2176 It is easily seen that the above rules decompose such that we do not
2177 need to store the row's entire history of events. Instead, we can
2178 store just one state for the row and update that when new events
2179 arrive. Then we can implement the above rules as a two-dimensional
2180 look-up table, and get checking of invalid combinations "for free"
2181 in the process. */
2182
2183 /* The lookup table for transforming states. old_state is the
2184 Y-axis, event is the X-axis. */
2185 static const fts_row_state table[4][4] = {
2186 /* I M D N */
2187 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2188 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2189 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2190 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2191 };
2192
2193 fts_row_state result;
2194
2195 ut_a(old_state < FTS_INVALID);
2196 ut_a(event < FTS_INVALID);
2197
2198 result = table[(int) old_state][(int) event];
2199 ut_a(result != FTS_INVALID);
2200
2201 return(result);
2202 }
2203
2204 /******************************************************************//**
2205 Create a savepoint instance.
2206 @return savepoint instance */
2207 static
2208 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2209 fts_savepoint_create(
2210 /*=================*/
2211 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2212 const char* name, /*!< in: savepoint name */
2213 mem_heap_t* heap) /*!< in: heap */
2214 {
2215 fts_savepoint_t* savepoint;
2216
2217 savepoint = static_cast<fts_savepoint_t*>(
2218 ib_vector_push(savepoints, NULL));
2219
2220 memset(savepoint, 0x0, sizeof(*savepoint));
2221
2222 if (name) {
2223 savepoint->name = mem_heap_strdup(heap, name);
2224 }
2225
2226 savepoint->tables = rbt_create(
2227 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2228
2229 return(savepoint);
2230 }
2231
2232 /******************************************************************//**
2233 Create an FTS trx.
2234 @return FTS trx */
2235 fts_trx_t*
fts_trx_create(trx_t * trx)2236 fts_trx_create(
2237 /*===========*/
2238 trx_t* trx) /*!< in/out: InnoDB
2239 transaction */
2240 {
2241 fts_trx_t* ftt;
2242 ib_alloc_t* heap_alloc;
2243 mem_heap_t* heap = mem_heap_create(1024);
2244 trx_named_savept_t* savep;
2245
2246 ut_a(trx->fts_trx == NULL);
2247
2248 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2249 ftt->trx = trx;
2250 ftt->heap = heap;
2251
2252 heap_alloc = ib_heap_allocator_create(heap);
2253
2254 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2255 heap_alloc, sizeof(fts_savepoint_t), 4));
2256
2257 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2258 heap_alloc, sizeof(fts_savepoint_t), 4));
2259
2260 /* Default instance has no name and no heap. */
2261 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2262 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2263
2264 /* Copy savepoints that already set before. */
2265 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2266 savep != NULL;
2267 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2268
2269 fts_savepoint_take(ftt, savep->name);
2270 }
2271
2272 return(ftt);
2273 }
2274
2275 /******************************************************************//**
2276 Create an FTS trx table.
2277 @return FTS trx table */
2278 static
2279 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2280 fts_trx_table_create(
2281 /*=================*/
2282 fts_trx_t* fts_trx, /*!< in: FTS trx */
2283 dict_table_t* table) /*!< in: table */
2284 {
2285 fts_trx_table_t* ftt;
2286
2287 ftt = static_cast<fts_trx_table_t*>(
2288 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2289
2290 memset(ftt, 0x0, sizeof(*ftt));
2291
2292 ftt->table = table;
2293 ftt->fts_trx = fts_trx;
2294
2295 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2296
2297 return(ftt);
2298 }
2299
2300 /******************************************************************//**
2301 Clone an FTS trx table.
2302 @return FTS trx table */
2303 static
2304 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2305 fts_trx_table_clone(
2306 /*=================*/
2307 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2308 {
2309 fts_trx_table_t* ftt;
2310
2311 ftt = static_cast<fts_trx_table_t*>(
2312 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2313
2314 memset(ftt, 0x0, sizeof(*ftt));
2315
2316 ftt->table = ftt_src->table;
2317 ftt->fts_trx = ftt_src->fts_trx;
2318
2319 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2320
2321 /* Copy the rb tree values to the new savepoint. */
2322 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2323
2324 /* These are only added on commit. At this stage we only have
2325 the updated row state. */
2326 ut_a(ftt_src->added_doc_ids == NULL);
2327
2328 return(ftt);
2329 }
2330
2331 /******************************************************************//**
2332 Initialize the FTS trx instance.
2333 @return FTS trx instance */
2334 static
2335 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2336 fts_trx_init(
2337 /*=========*/
2338 trx_t* trx, /*!< in: transaction */
2339 dict_table_t* table, /*!< in: FTS table instance */
2340 ib_vector_t* savepoints) /*!< in: Savepoints */
2341 {
2342 fts_trx_table_t* ftt;
2343 ib_rbt_bound_t parent;
2344 ib_rbt_t* tables;
2345 fts_savepoint_t* savepoint;
2346
2347 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2348
2349 tables = savepoint->tables;
2350 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2351
2352 if (parent.result == 0) {
2353 fts_trx_table_t** fttp;
2354
2355 fttp = rbt_value(fts_trx_table_t*, parent.last);
2356 ftt = *fttp;
2357 } else {
2358 ftt = fts_trx_table_create(trx->fts_trx, table);
2359 rbt_add_node(tables, &parent, &ftt);
2360 }
2361
2362 ut_a(ftt->table == table);
2363
2364 return(ftt);
2365 }
2366
2367 /******************************************************************//**
2368 Notify the FTS system about an operation on an FTS-indexed table. */
2369 static
2370 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2371 fts_trx_table_add_op(
2372 /*=================*/
2373 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2374 doc_id_t doc_id, /*!< in: doc id */
2375 fts_row_state state, /*!< in: state of the row */
2376 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2377 {
2378 ib_rbt_t* rows;
2379 ib_rbt_bound_t parent;
2380
2381 rows = ftt->rows;
2382 rbt_search(rows, &parent, &doc_id);
2383
2384 /* Row id found, update state, and if new state is FTS_NOTHING,
2385 we delete the row from our tree. */
2386 if (parent.result == 0) {
2387 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2388
2389 row->state = fts_trx_row_get_new_state(row->state, state);
2390
2391 if (row->state == FTS_NOTHING) {
2392 if (row->fts_indexes) {
2393 ib_vector_free(row->fts_indexes);
2394 }
2395
2396 ut_free(rbt_remove_node(rows, parent.last));
2397 row = NULL;
2398 } else if (row->fts_indexes != NULL) {
2399 ib_vector_free(row->fts_indexes);
2400 row->fts_indexes = fts_indexes;
2401 }
2402
2403 } else { /* Row-id not found, create a new one. */
2404 fts_trx_row_t row;
2405
2406 row.doc_id = doc_id;
2407 row.state = state;
2408 row.fts_indexes = fts_indexes;
2409
2410 rbt_add_node(rows, &parent, &row);
2411 }
2412 }
2413
2414 /******************************************************************//**
2415 Notify the FTS system about an operation on an FTS-indexed table. */
2416 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2417 fts_trx_add_op(
2418 /*===========*/
2419 trx_t* trx, /*!< in: InnoDB transaction */
2420 dict_table_t* table, /*!< in: table */
2421 doc_id_t doc_id, /*!< in: new doc id */
2422 fts_row_state state, /*!< in: state of the row */
2423 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2424 (NULL=all) */
2425 {
2426 fts_trx_table_t* tran_ftt;
2427 fts_trx_table_t* stmt_ftt;
2428
2429 if (!trx->fts_trx) {
2430 trx->fts_trx = fts_trx_create(trx);
2431 }
2432
2433 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2434 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2435
2436 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2437 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2438 }
2439
2440 /******************************************************************//**
2441 Fetch callback that converts a textual document id to a binary value and
2442 stores it in the given place.
2443 @return always returns NULL */
2444 static
2445 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2446 fts_fetch_store_doc_id(
2447 /*===================*/
2448 void* row, /*!< in: sel_node_t* */
2449 void* user_arg) /*!< in: doc_id_t* to store
2450 doc_id in */
2451 {
2452 int n_parsed;
2453 sel_node_t* node = static_cast<sel_node_t*>(row);
2454 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2455 dfield_t* dfield = que_node_get_val(node->select_list);
2456 dtype_t* type = dfield_get_type(dfield);
2457 ulint len = dfield_get_len(dfield);
2458
2459 char buf[32];
2460
2461 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2462 ut_a(len > 0 && len < sizeof(buf));
2463
2464 memcpy(buf, dfield_get_data(dfield), len);
2465 buf[len] = '\0';
2466
2467 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2468 ut_a(n_parsed == 1);
2469
2470 return(FALSE);
2471 }
2472
2473 #ifdef FTS_CACHE_SIZE_DEBUG
2474 /******************************************************************//**
2475 Get the max cache size in bytes. If there is an error reading the
2476 value we simply print an error message here and return the default
2477 value to the caller.
2478 @return max cache size in bytes */
2479 static
2480 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2481 fts_get_max_cache_size(
2482 /*===================*/
2483 trx_t* trx, /*!< in: transaction */
2484 fts_table_t* fts_table) /*!< in: table instance */
2485 {
2486 dberr_t error;
2487 fts_string_t value;
2488 ulong cache_size_in_mb;
2489
2490 /* Set to the default value. */
2491 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2492
2493 /* We set the length of value to the max bytes it can hold. This
2494 information is used by the callback that reads the value. */
2495 value.f_n_char = 0;
2496 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2497 value.f_str = ut_malloc_nokey(value.f_len + 1);
2498
2499 error = fts_config_get_value(
2500 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2501
2502 if (UNIV_LIKELY(error == DB_SUCCESS)) {
2503 value.f_str[value.f_len] = 0;
2504 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2505
2506 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2507
2508 ib::warn() << "FTS max cache size ("
2509 << cache_size_in_mb << ") out of range."
2510 " Minimum value is "
2511 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2512 << "MB and the maximum value is "
2513 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2514 << "MB, setting cache size to upper limit";
2515
2516 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2517
2518 } else if (cache_size_in_mb
2519 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2520
2521 ib::warn() << "FTS max cache size ("
2522 << cache_size_in_mb << ") out of range."
2523 " Minimum value is "
2524 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2525 << "MB and the maximum value is"
2526 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2527 << "MB, setting cache size to lower limit";
2528
2529 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2530 }
2531 } else {
2532 ib::error() << "(" << error << ") reading max"
2533 " cache config value from config table "
2534 << fts_table->table->name;
2535 }
2536
2537 ut_free(value.f_str);
2538
2539 return(cache_size_in_mb * 1024 * 1024);
2540 }
2541 #endif
2542
2543 /*********************************************************************//**
2544 Update the next and last Doc ID in the CONFIG table to be the input
2545 "doc_id" value (+ 1). We would do so after each FTS index build or
2546 table truncate */
2547 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,doc_id_t doc_id)2548 fts_update_next_doc_id(
2549 /*===================*/
2550 trx_t* trx, /*!< in/out: transaction */
2551 const dict_table_t* table, /*!< in: table */
2552 doc_id_t doc_id) /*!< in: DOC ID to set */
2553 {
2554 table->fts->cache->synced_doc_id = doc_id;
2555 table->fts->cache->next_doc_id = doc_id + 1;
2556
2557 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2558
2559 fts_update_sync_doc_id(
2560 table, table->fts->cache->synced_doc_id, trx);
2561
2562 }
2563
2564 /*********************************************************************//**
2565 Get the next available document id.
2566 @return DB_SUCCESS if OK */
2567 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2568 fts_get_next_doc_id(
2569 /*================*/
2570 const dict_table_t* table, /*!< in: table */
2571 doc_id_t* doc_id) /*!< out: new document id */
2572 {
2573 fts_cache_t* cache = table->fts->cache;
2574
2575 /* If the Doc ID system has not yet been initialized, we
2576 will consult the CONFIG table and user table to re-establish
2577 the initial value of the Doc ID */
2578 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2579 fts_init_doc_id(table);
2580 }
2581
2582 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2583 *doc_id = FTS_NULL_DOC_ID;
2584 return(DB_SUCCESS);
2585 }
2586
2587 DEBUG_SYNC_C("get_next_FTS_DOC_ID");
2588 mutex_enter(&cache->doc_id_lock);
2589 *doc_id = cache->next_doc_id++;
2590 mutex_exit(&cache->doc_id_lock);
2591
2592 return(DB_SUCCESS);
2593 }
2594
2595 /*********************************************************************//**
2596 This function fetch the Doc ID from CONFIG table, and compare with
2597 the Doc ID supplied. And store the larger one to the CONFIG table.
2598 @return DB_SUCCESS if OK */
2599 static MY_ATTRIBUTE((nonnull))
2600 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t cmp_doc_id,ibool read_only,doc_id_t * doc_id)2601 fts_cmp_set_sync_doc_id(
2602 /*====================*/
2603 const dict_table_t* table, /*!< in: table */
2604 doc_id_t cmp_doc_id, /*!< in: Doc ID to compare */
2605 ibool read_only, /*!< in: TRUE if read the
2606 synced_doc_id only */
2607 doc_id_t* doc_id) /*!< out: larger document id
2608 after comparing "cmp_doc_id"
2609 to the one stored in CONFIG
2610 table */
2611 {
2612 trx_t* trx;
2613 pars_info_t* info;
2614 dberr_t error;
2615 fts_table_t fts_table;
2616 que_t* graph = NULL;
2617 fts_cache_t* cache = table->fts->cache;
2618 char table_name[MAX_FULL_NAME_LEN];
2619 retry:
2620 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2621
2622 fts_table.suffix = "CONFIG";
2623 fts_table.table_id = table->id;
2624 fts_table.type = FTS_COMMON_TABLE;
2625 fts_table.table = table;
2626
2627 trx = trx_create();
2628 if (srv_read_only_mode) {
2629 trx_start_internal_read_only(trx);
2630 } else {
2631 trx_start_internal(trx);
2632 }
2633
2634 trx->op_info = "update the next FTS document id";
2635
2636 info = pars_info_create();
2637
2638 pars_info_bind_function(
2639 info, "my_func", fts_fetch_store_doc_id, doc_id);
2640
2641 fts_get_table_name(&fts_table, table_name);
2642 pars_info_bind_id(info, "config_table", table_name);
2643
2644 graph = fts_parse_sql(
2645 &fts_table, info,
2646 "DECLARE FUNCTION my_func;\n"
2647 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2648 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2649 "BEGIN\n"
2650 ""
2651 "OPEN c;\n"
2652 "WHILE 1 = 1 LOOP\n"
2653 " FETCH c INTO my_func();\n"
2654 " IF c % NOTFOUND THEN\n"
2655 " EXIT;\n"
2656 " END IF;\n"
2657 "END LOOP;\n"
2658 "CLOSE c;");
2659
2660 *doc_id = 0;
2661
2662 error = fts_eval_sql(trx, graph);
2663
2664 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2665
2666 // FIXME: We need to retry deadlock errors
2667 if (error != DB_SUCCESS) {
2668 goto func_exit;
2669 }
2670
2671 if (read_only) {
2672 /* InnoDB stores actual synced_doc_id value + 1 in
2673 FTS_CONFIG table. Reduce the value by 1 while reading
2674 after startup. */
2675 if (*doc_id) *doc_id -= 1;
2676 goto func_exit;
2677 }
2678
2679 if (cmp_doc_id == 0 && *doc_id) {
2680 cache->synced_doc_id = *doc_id - 1;
2681 } else {
2682 cache->synced_doc_id = ut_max(cmp_doc_id, *doc_id);
2683 }
2684
2685 mutex_enter(&cache->doc_id_lock);
2686 /* For each sync operation, we will add next_doc_id by 1,
2687 so to mark a sync operation */
2688 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2689 cache->next_doc_id = cache->synced_doc_id + 1;
2690 }
2691 mutex_exit(&cache->doc_id_lock);
2692
2693 if (cmp_doc_id > *doc_id) {
2694 error = fts_update_sync_doc_id(
2695 table, cache->synced_doc_id, trx);
2696 }
2697
2698 *doc_id = cache->next_doc_id;
2699
2700 func_exit:
2701
2702 if (UNIV_LIKELY(error == DB_SUCCESS)) {
2703 fts_sql_commit(trx);
2704 } else {
2705 *doc_id = 0;
2706
2707 ib::error() << "(" << error << ") while getting next doc id "
2708 "for table " << table->name;
2709 fts_sql_rollback(trx);
2710
2711 if (error == DB_DEADLOCK) {
2712 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2713 goto retry;
2714 }
2715 }
2716
2717 trx->free();
2718
2719 return(error);
2720 }
2721
2722 /*********************************************************************//**
2723 Update the last document id. This function could create a new
2724 transaction to update the last document id.
2725 @return DB_SUCCESS if OK */
2726 static
2727 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,doc_id_t doc_id,trx_t * trx)2728 fts_update_sync_doc_id(
2729 /*===================*/
2730 const dict_table_t* table, /*!< in: table */
2731 doc_id_t doc_id, /*!< in: last document id */
2732 trx_t* trx) /*!< in: update trx, or NULL */
2733 {
2734 byte id[FTS_MAX_ID_LEN];
2735 pars_info_t* info;
2736 fts_table_t fts_table;
2737 ulint id_len;
2738 que_t* graph = NULL;
2739 dberr_t error;
2740 ibool local_trx = FALSE;
2741 fts_cache_t* cache = table->fts->cache;
2742 char fts_name[MAX_FULL_NAME_LEN];
2743
2744 if (srv_read_only_mode) {
2745 return DB_READ_ONLY;
2746 }
2747
2748 fts_table.suffix = "CONFIG";
2749 fts_table.table_id = table->id;
2750 fts_table.type = FTS_COMMON_TABLE;
2751 fts_table.table = table;
2752
2753 if (!trx) {
2754 trx = trx_create();
2755 trx_start_internal(trx);
2756
2757 trx->op_info = "setting last FTS document id";
2758 local_trx = TRUE;
2759 }
2760
2761 info = pars_info_create();
2762
2763 id_len = (ulint) snprintf(
2764 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2765
2766 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2767
2768 fts_get_table_name(&fts_table, fts_name,
2769 table->fts->dict_locked);
2770 pars_info_bind_id(info, "table_name", fts_name);
2771
2772 graph = fts_parse_sql(
2773 &fts_table, info,
2774 "BEGIN"
2775 " UPDATE $table_name SET value = :doc_id"
2776 " WHERE key = 'synced_doc_id';");
2777
2778 error = fts_eval_sql(trx, graph);
2779
2780 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2781
2782 if (local_trx) {
2783 if (UNIV_LIKELY(error == DB_SUCCESS)) {
2784 fts_sql_commit(trx);
2785 cache->synced_doc_id = doc_id;
2786 } else {
2787 ib::error() << "(" << error << ") while"
2788 " updating last doc id for table"
2789 << table->name;
2790
2791 fts_sql_rollback(trx);
2792 }
2793 trx->free();
2794 }
2795
2796 return(error);
2797 }
2798
2799 /*********************************************************************//**
2800 Create a new fts_doc_ids_t.
2801 @return new fts_doc_ids_t */
2802 fts_doc_ids_t*
fts_doc_ids_create(void)2803 fts_doc_ids_create(void)
2804 /*====================*/
2805 {
2806 fts_doc_ids_t* fts_doc_ids;
2807 mem_heap_t* heap = mem_heap_create(512);
2808
2809 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2810 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2811
2812 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2813
2814 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2815 fts_doc_ids->self_heap, sizeof(doc_id_t), 32));
2816
2817 return(fts_doc_ids);
2818 }
2819
2820 /*********************************************************************//**
2821 Do commit-phase steps necessary for the insertion of a new row. */
2822 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)2823 fts_add(
2824 /*====*/
2825 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2826 fts_trx_row_t* row) /*!< in: row */
2827 {
2828 dict_table_t* table = ftt->table;
2829 doc_id_t doc_id = row->doc_id;
2830
2831 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2832
2833 fts_add_doc_by_id(ftt, doc_id);
2834
2835 mutex_enter(&table->fts->cache->deleted_lock);
2836 ++table->fts->cache->added;
2837 mutex_exit(&table->fts->cache->deleted_lock);
2838
2839 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2840 && doc_id >= table->fts->cache->next_doc_id) {
2841 table->fts->cache->next_doc_id = doc_id + 1;
2842 }
2843 }
2844
2845 /*********************************************************************//**
2846 Do commit-phase steps necessary for the deletion of a row.
2847 @return DB_SUCCESS or error code */
2848 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2849 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)2850 fts_delete(
2851 /*=======*/
2852 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2853 fts_trx_row_t* row) /*!< in: row */
2854 {
2855 que_t* graph;
2856 fts_table_t fts_table;
2857 dberr_t error = DB_SUCCESS;
2858 doc_id_t write_doc_id;
2859 dict_table_t* table = ftt->table;
2860 doc_id_t doc_id = row->doc_id;
2861 trx_t* trx = ftt->fts_trx->trx;
2862 pars_info_t* info = pars_info_create();
2863 fts_cache_t* cache = table->fts->cache;
2864
2865 /* we do not index Documents whose Doc ID value is 0 */
2866 if (doc_id == FTS_NULL_DOC_ID) {
2867 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2868 return(error);
2869 }
2870
2871 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2872
2873 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2874
2875 /* Convert to "storage" byte order. */
2876 fts_write_doc_id((byte*) &write_doc_id, doc_id);
2877 fts_bind_doc_id(info, "doc_id", &write_doc_id);
2878
2879 /* It is possible we update a record that has not yet been sync-ed
2880 into cache from last crash (delete Doc will not initialize the
2881 sync). Avoid any added counter accounting until the FTS cache
2882 is re-established and sync-ed */
2883 if (table->fts->added_synced
2884 && doc_id > cache->synced_doc_id) {
2885 mutex_enter(&table->fts->cache->deleted_lock);
2886
2887 /* The Doc ID could belong to those left in
2888 ADDED table from last crash. So need to check
2889 if it is less than first_doc_id when we initialize
2890 the Doc ID system after reboot */
2891 if (doc_id >= table->fts->cache->first_doc_id
2892 && table->fts->cache->added > 0) {
2893 --table->fts->cache->added;
2894 }
2895
2896 mutex_exit(&table->fts->cache->deleted_lock);
2897
2898 /* Only if the row was really deleted. */
2899 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2900 }
2901
2902 /* Note the deleted document for OPTIMIZE to purge. */
2903 if (error == DB_SUCCESS) {
2904 char table_name[MAX_FULL_NAME_LEN];
2905
2906 trx->op_info = "adding doc id to FTS DELETED";
2907
2908 info->graph_owns_us = TRUE;
2909
2910 fts_table.suffix = "DELETED";
2911
2912 fts_get_table_name(&fts_table, table_name);
2913 pars_info_bind_id(info, "deleted", table_name);
2914
2915 graph = fts_parse_sql(
2916 &fts_table,
2917 info,
2918 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
2919
2920 error = fts_eval_sql(trx, graph);
2921
2922 fts_que_graph_free(graph);
2923 } else {
2924 pars_info_free(info);
2925 }
2926
2927 /* Increment the total deleted count, this is used to calculate the
2928 number of documents indexed. */
2929 if (error == DB_SUCCESS) {
2930 mutex_enter(&table->fts->cache->deleted_lock);
2931
2932 ++table->fts->cache->deleted;
2933
2934 mutex_exit(&table->fts->cache->deleted_lock);
2935 }
2936
2937 return(error);
2938 }
2939
2940 /*********************************************************************//**
2941 Do commit-phase steps necessary for the modification of a row.
2942 @return DB_SUCCESS or error code */
2943 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2944 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)2945 fts_modify(
2946 /*=======*/
2947 fts_trx_table_t* ftt, /*!< in: FTS trx table */
2948 fts_trx_row_t* row) /*!< in: row */
2949 {
2950 dberr_t error;
2951
2952 ut_a(row->state == FTS_MODIFY);
2953
2954 error = fts_delete(ftt, row);
2955
2956 if (error == DB_SUCCESS) {
2957 fts_add(ftt, row);
2958 }
2959
2960 return(error);
2961 }
2962
2963 /*********************************************************************//**
2964 The given transaction is about to be committed; do whatever is necessary
2965 from the FTS system's POV.
2966 @return DB_SUCCESS or error code */
2967 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2968 dberr_t
fts_commit_table(fts_trx_table_t * ftt)2969 fts_commit_table(
2970 /*=============*/
2971 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
2972 {
2973 if (srv_read_only_mode) {
2974 return DB_READ_ONLY;
2975 }
2976
2977 const ib_rbt_node_t* node;
2978 ib_rbt_t* rows;
2979 dberr_t error = DB_SUCCESS;
2980 fts_cache_t* cache = ftt->table->fts->cache;
2981 trx_t* trx = trx_create();
2982
2983 trx_start_internal(trx);
2984
2985 rows = ftt->rows;
2986
2987 ftt->fts_trx->trx = trx;
2988
2989 if (cache->get_docs == NULL) {
2990 rw_lock_x_lock(&cache->init_lock);
2991 if (cache->get_docs == NULL) {
2992 cache->get_docs = fts_get_docs_create(cache);
2993 }
2994 rw_lock_x_unlock(&cache->init_lock);
2995 }
2996
2997 for (node = rbt_first(rows);
2998 node != NULL && error == DB_SUCCESS;
2999 node = rbt_next(rows, node)) {
3000
3001 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3002
3003 switch (row->state) {
3004 case FTS_INSERT:
3005 fts_add(ftt, row);
3006 break;
3007
3008 case FTS_MODIFY:
3009 error = fts_modify(ftt, row);
3010 break;
3011
3012 case FTS_DELETE:
3013 error = fts_delete(ftt, row);
3014 break;
3015
3016 default:
3017 ut_error;
3018 }
3019 }
3020
3021 fts_sql_commit(trx);
3022
3023 trx->free();
3024
3025 return(error);
3026 }
3027
3028 /*********************************************************************//**
3029 The given transaction is about to be committed; do whatever is necessary
3030 from the FTS system's POV.
3031 @return DB_SUCCESS or error code */
3032 dberr_t
fts_commit(trx_t * trx)3033 fts_commit(
3034 /*=======*/
3035 trx_t* trx) /*!< in: transaction */
3036 {
3037 const ib_rbt_node_t* node;
3038 dberr_t error;
3039 ib_rbt_t* tables;
3040 fts_savepoint_t* savepoint;
3041
3042 savepoint = static_cast<fts_savepoint_t*>(
3043 ib_vector_last(trx->fts_trx->savepoints));
3044 tables = savepoint->tables;
3045
3046 for (node = rbt_first(tables), error = DB_SUCCESS;
3047 node != NULL && error == DB_SUCCESS;
3048 node = rbt_next(tables, node)) {
3049
3050 fts_trx_table_t** ftt;
3051
3052 ftt = rbt_value(fts_trx_table_t*, node);
3053
3054 error = fts_commit_table(*ftt);
3055 }
3056
3057 return(error);
3058 }
3059
3060 /*********************************************************************//**
3061 Initialize a document. */
3062 void
fts_doc_init(fts_doc_t * doc)3063 fts_doc_init(
3064 /*=========*/
3065 fts_doc_t* doc) /*!< in: doc to initialize */
3066 {
3067 mem_heap_t* heap = mem_heap_create(32);
3068
3069 memset(doc, 0, sizeof(*doc));
3070
3071 doc->self_heap = ib_heap_allocator_create(heap);
3072 }
3073
3074 /*********************************************************************//**
3075 Free document. */
3076 void
fts_doc_free(fts_doc_t * doc)3077 fts_doc_free(
3078 /*=========*/
3079 fts_doc_t* doc) /*!< in: document */
3080 {
3081 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3082
3083 if (doc->tokens) {
3084 rbt_free(doc->tokens);
3085 }
3086
3087 ut_d(memset(doc, 0, sizeof(*doc)));
3088
3089 mem_heap_free(heap);
3090 }
3091
3092 /*********************************************************************//**
3093 Callback function for fetch that stores the text of an FTS document,
3094 converting each column to UTF-16.
3095 @return always FALSE */
3096 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3097 fts_query_expansion_fetch_doc(
3098 /*==========================*/
3099 void* row, /*!< in: sel_node_t* */
3100 void* user_arg) /*!< in: fts_doc_t* */
3101 {
3102 que_node_t* exp;
3103 sel_node_t* node = static_cast<sel_node_t*>(row);
3104 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3105 dfield_t* dfield;
3106 ulint len;
3107 ulint doc_len;
3108 fts_doc_t doc;
3109 CHARSET_INFO* doc_charset = NULL;
3110 ulint field_no = 0;
3111
3112 len = 0;
3113
3114 fts_doc_init(&doc);
3115 doc.found = TRUE;
3116
3117 exp = node->select_list;
3118 doc_len = 0;
3119
3120 doc_charset = result_doc->charset;
3121
3122 /* Copy each indexed column content into doc->text.f_str */
3123 while (exp) {
3124 dfield = que_node_get_val(exp);
3125 len = dfield_get_len(dfield);
3126
3127 /* NULL column */
3128 if (len == UNIV_SQL_NULL) {
3129 exp = que_node_get_next(exp);
3130 continue;
3131 }
3132
3133 if (!doc_charset) {
3134 doc_charset = fts_get_charset(dfield->type.prtype);
3135 }
3136
3137 doc.charset = doc_charset;
3138
3139 if (dfield_is_ext(dfield)) {
3140 /* We ignore columns that are stored externally, this
3141 could result in too many words to search */
3142 exp = que_node_get_next(exp);
3143 continue;
3144 } else {
3145 doc.text.f_n_char = 0;
3146
3147 doc.text.f_str = static_cast<byte*>(
3148 dfield_get_data(dfield));
3149
3150 doc.text.f_len = len;
3151 }
3152
3153 if (field_no == 0) {
3154 fts_tokenize_document(&doc, result_doc,
3155 result_doc->parser);
3156 } else {
3157 fts_tokenize_document_next(&doc, doc_len, result_doc,
3158 result_doc->parser);
3159 }
3160
3161 exp = que_node_get_next(exp);
3162
3163 doc_len += (exp) ? len + 1 : len;
3164
3165 field_no++;
3166 }
3167
3168 ut_ad(doc_charset);
3169
3170 if (!result_doc->charset) {
3171 result_doc->charset = doc_charset;
3172 }
3173
3174 fts_doc_free(&doc);
3175
3176 return(FALSE);
3177 }
3178
3179 /*********************************************************************//**
3180 fetch and tokenize the document. */
3181 static
3182 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,rec_offs * offsets,fts_doc_t * doc)3183 fts_fetch_doc_from_rec(
3184 /*===================*/
3185 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3186 dict_index_t* clust_index, /*!< in: cluster index */
3187 btr_pcur_t* pcur, /*!< in: cursor whose position
3188 has been stored */
3189 rec_offs* offsets, /*!< in: offsets */
3190 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3191 documents */
3192 {
3193 dict_index_t* index;
3194 const rec_t* clust_rec;
3195 const dict_field_t* ifield;
3196 ulint clust_pos;
3197 ulint doc_len = 0;
3198 st_mysql_ftparser* parser;
3199
3200 if (!get_doc) {
3201 return;
3202 }
3203
3204 index = get_doc->index_cache->index;
3205 parser = get_doc->index_cache->index->parser;
3206
3207 clust_rec = btr_pcur_get_rec(pcur);
3208 ut_ad(!page_rec_is_comp(clust_rec)
3209 || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
3210
3211 for (ulint i = 0; i < index->n_fields; i++) {
3212 ifield = dict_index_get_nth_field(index, i);
3213 clust_pos = dict_col_get_clust_pos(ifield->col, clust_index);
3214
3215 if (!get_doc->index_cache->charset) {
3216 get_doc->index_cache->charset = fts_get_charset(
3217 ifield->col->prtype);
3218 }
3219
3220 if (rec_offs_nth_extern(offsets, clust_pos)) {
3221 doc->text.f_str =
3222 btr_rec_copy_externally_stored_field(
3223 clust_rec, offsets,
3224 btr_pcur_get_block(pcur)->zip_size(),
3225 clust_pos, &doc->text.f_len,
3226 static_cast<mem_heap_t*>(
3227 doc->self_heap->arg));
3228 } else {
3229 doc->text.f_str = (byte*) rec_get_nth_field(
3230 clust_rec, offsets, clust_pos,
3231 &doc->text.f_len);
3232 }
3233
3234 doc->found = TRUE;
3235 doc->charset = get_doc->index_cache->charset;
3236
3237 /* Null Field */
3238 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3239 continue;
3240 }
3241
3242 if (!doc_len) {
3243 fts_tokenize_document(doc, NULL, parser);
3244 } else {
3245 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3246 }
3247
3248 doc_len += doc->text.f_len + 1;
3249 }
3250 }
3251
3252 /** Fetch the data from tuple and tokenize the document.
3253 @param[in] get_doc FTS index's get_doc struct
3254 @param[in] tuple tuple should be arranged in table schema order
3255 @param[out] doc fts doc to hold parsed documents. */
3256 static
3257 void
fts_fetch_doc_from_tuple(fts_get_doc_t * get_doc,const dtuple_t * tuple,fts_doc_t * doc)3258 fts_fetch_doc_from_tuple(
3259 fts_get_doc_t* get_doc,
3260 const dtuple_t* tuple,
3261 fts_doc_t* doc)
3262 {
3263 dict_index_t* index;
3264 st_mysql_ftparser* parser;
3265 ulint doc_len = 0;
3266 ulint processed_doc = 0;
3267 ulint num_field;
3268
3269 if (get_doc == NULL) {
3270 return;
3271 }
3272
3273 index = get_doc->index_cache->index;
3274 parser = get_doc->index_cache->index->parser;
3275 num_field = dict_index_get_n_fields(index);
3276
3277 for (ulint i = 0; i < num_field; i++) {
3278 const dict_field_t* ifield;
3279 const dict_col_t* col;
3280 ulint pos;
3281
3282 ifield = dict_index_get_nth_field(index, i);
3283 col = dict_field_get_col(ifield);
3284 pos = dict_col_get_no(col);
3285 const dfield_t* field = dtuple_get_nth_field(tuple, pos);
3286
3287 if (!get_doc->index_cache->charset) {
3288 get_doc->index_cache->charset = fts_get_charset(
3289 ifield->col->prtype);
3290 }
3291
3292 ut_ad(!dfield_is_ext(field));
3293
3294 doc->text.f_str = (byte*) dfield_get_data(field);
3295 doc->text.f_len = dfield_get_len(field);
3296 doc->found = TRUE;
3297 doc->charset = get_doc->index_cache->charset;
3298
3299 /* field data is NULL. */
3300 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3301 continue;
3302 }
3303
3304 if (processed_doc == 0) {
3305 fts_tokenize_document(doc, NULL, parser);
3306 } else {
3307 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3308 }
3309
3310 processed_doc++;
3311 doc_len += doc->text.f_len + 1;
3312 }
3313 }
3314
3315 /** Fetch the document from tuple, tokenize the text data and
3316 insert the text data into fts auxiliary table and
3317 its cache. Moreover this tuple fields doesn't contain any information
3318 about externally stored field. This tuple contains data directly
3319 converted from mysql.
3320 @param[in] ftt FTS transaction table
3321 @param[in] doc_id doc id
3322 @param[in] tuple tuple from where data can be retrieved
3323 and tuple should be arranged in table
3324 schema order. */
3325 void
fts_add_doc_from_tuple(fts_trx_table_t * ftt,doc_id_t doc_id,const dtuple_t * tuple)3326 fts_add_doc_from_tuple(
3327 fts_trx_table_t*ftt,
3328 doc_id_t doc_id,
3329 const dtuple_t* tuple)
3330 {
3331 mtr_t mtr;
3332 fts_cache_t* cache = ftt->table->fts->cache;
3333
3334 ut_ad(cache->get_docs);
3335
3336 if (!ftt->table->fts->added_synced) {
3337 fts_init_index(ftt->table, FALSE);
3338 }
3339
3340 mtr_start(&mtr);
3341
3342 ulint num_idx = ib_vector_size(cache->get_docs);
3343
3344 for (ulint i = 0; i < num_idx; ++i) {
3345 fts_doc_t doc;
3346 dict_table_t* table;
3347 fts_get_doc_t* get_doc;
3348
3349 get_doc = static_cast<fts_get_doc_t*>(
3350 ib_vector_get(cache->get_docs, i));
3351 table = get_doc->index_cache->index->table;
3352
3353 fts_doc_init(&doc);
3354 fts_fetch_doc_from_tuple(
3355 get_doc, tuple, &doc);
3356
3357 if (doc.found) {
3358 mtr_commit(&mtr);
3359 rw_lock_x_lock(&table->fts->cache->lock);
3360
3361 if (table->fts->cache->stopword_info.status
3362 & STOPWORD_NOT_INIT) {
3363 fts_load_stopword(table, NULL, NULL,
3364 true, true);
3365 }
3366
3367 fts_cache_add_doc(
3368 table->fts->cache,
3369 get_doc->index_cache,
3370 doc_id, doc.tokens);
3371
3372 rw_lock_x_unlock(&table->fts->cache->lock);
3373
3374 if (cache->total_size > fts_max_cache_size / 5
3375 || fts_need_sync) {
3376 fts_sync(cache->sync, true, false);
3377 }
3378
3379 mtr_start(&mtr);
3380
3381 }
3382
3383 fts_doc_free(&doc);
3384 }
3385
3386 mtr_commit(&mtr);
3387 }
3388
3389 /*********************************************************************//**
3390 This function fetches the document inserted during the committing
3391 transaction, and tokenize the inserted text data and insert into
3392 FTS auxiliary table and its cache.
3393 @return TRUE if successful */
3394 static
3395 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id)3396 fts_add_doc_by_id(
3397 /*==============*/
3398 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3399 doc_id_t doc_id) /*!< in: doc id */
3400 {
3401 mtr_t mtr;
3402 mem_heap_t* heap;
3403 btr_pcur_t pcur;
3404 dict_table_t* table;
3405 dtuple_t* tuple;
3406 dfield_t* dfield;
3407 fts_get_doc_t* get_doc;
3408 doc_id_t temp_doc_id;
3409 dict_index_t* clust_index;
3410 dict_index_t* fts_id_index;
3411 ibool is_id_cluster;
3412 fts_cache_t* cache = ftt->table->fts->cache;
3413
3414 ut_ad(cache->get_docs);
3415
3416 /* If Doc ID has been supplied by the user, then the table
3417 might not yet be sync-ed */
3418
3419 if (!ftt->table->fts->added_synced) {
3420 fts_init_index(ftt->table, FALSE);
3421 }
3422
3423 /* Get the first FTS index's get_doc */
3424 get_doc = static_cast<fts_get_doc_t*>(
3425 ib_vector_get(cache->get_docs, 0));
3426 ut_ad(get_doc);
3427
3428 table = get_doc->index_cache->index->table;
3429
3430 heap = mem_heap_create(512);
3431
3432 clust_index = dict_table_get_first_index(table);
3433 fts_id_index = table->fts_doc_id_index;
3434
3435 /* Check whether the index on FTS_DOC_ID is cluster index */
3436 is_id_cluster = (clust_index == fts_id_index);
3437
3438 mtr_start(&mtr);
3439 btr_pcur_init(&pcur);
3440
3441 /* Search based on Doc ID. Here, we'll need to consider the case
3442 when there is no primary index on Doc ID */
3443 tuple = dtuple_create(heap, 1);
3444 dfield = dtuple_get_nth_field(tuple, 0);
3445 dfield->type.mtype = DATA_INT;
3446 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3447
3448 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3449 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3450
3451 btr_pcur_open_with_no_init(
3452 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3453 &pcur, 0, &mtr);
3454
3455 /* If we have a match, add the data to doc structure */
3456 if (btr_pcur_get_low_match(&pcur) == 1) {
3457 const rec_t* rec;
3458 btr_pcur_t* doc_pcur;
3459 const rec_t* clust_rec;
3460 btr_pcur_t clust_pcur;
3461 rec_offs* offsets = NULL;
3462 ulint num_idx = ib_vector_size(cache->get_docs);
3463
3464 rec = btr_pcur_get_rec(&pcur);
3465
3466 /* Doc could be deleted */
3467 if (page_rec_is_infimum(rec)
3468 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3469
3470 goto func_exit;
3471 }
3472
3473 if (is_id_cluster) {
3474 clust_rec = rec;
3475 doc_pcur = &pcur;
3476 } else {
3477 dtuple_t* clust_ref;
3478 ulint n_fields;
3479
3480 btr_pcur_init(&clust_pcur);
3481 n_fields = dict_index_get_n_unique(clust_index);
3482
3483 clust_ref = dtuple_create(heap, n_fields);
3484 dict_index_copy_types(clust_ref, clust_index, n_fields);
3485
3486 row_build_row_ref_in_tuple(
3487 clust_ref, rec, fts_id_index, NULL);
3488
3489 btr_pcur_open_with_no_init(
3490 clust_index, clust_ref, PAGE_CUR_LE,
3491 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3492
3493 doc_pcur = &clust_pcur;
3494 clust_rec = btr_pcur_get_rec(&clust_pcur);
3495
3496 }
3497
3498 offsets = rec_get_offsets(clust_rec, clust_index, NULL,
3499 clust_index->n_core_fields,
3500 ULINT_UNDEFINED, &heap);
3501
3502 for (ulint i = 0; i < num_idx; ++i) {
3503 fts_doc_t doc;
3504 dict_table_t* table;
3505 fts_get_doc_t* get_doc;
3506
3507 get_doc = static_cast<fts_get_doc_t*>(
3508 ib_vector_get(cache->get_docs, i));
3509
3510 table = get_doc->index_cache->index->table;
3511
3512 fts_doc_init(&doc);
3513
3514 fts_fetch_doc_from_rec(
3515 get_doc, clust_index, doc_pcur, offsets, &doc);
3516
3517 if (doc.found) {
3518 ibool success MY_ATTRIBUTE((unused));
3519
3520 btr_pcur_store_position(doc_pcur, &mtr);
3521 mtr_commit(&mtr);
3522
3523 rw_lock_x_lock(&table->fts->cache->lock);
3524
3525 if (table->fts->cache->stopword_info.status
3526 & STOPWORD_NOT_INIT) {
3527 fts_load_stopword(table, NULL,
3528 NULL, true, true);
3529 }
3530
3531 fts_cache_add_doc(
3532 table->fts->cache,
3533 get_doc->index_cache,
3534 doc_id, doc.tokens);
3535
3536 bool need_sync = !cache->sync->in_progress
3537 && (fts_need_sync
3538 || (cache->total_size
3539 - cache->total_size_at_sync)
3540 > fts_max_cache_size / 10);
3541 if (need_sync) {
3542 cache->total_size_at_sync =
3543 cache->total_size;
3544 }
3545
3546 rw_lock_x_unlock(&table->fts->cache->lock);
3547
3548 DBUG_EXECUTE_IF(
3549 "fts_instrument_sync",
3550 fts_optimize_request_sync_table(table);
3551 os_event_wait(cache->sync->event);
3552 );
3553
3554 DBUG_EXECUTE_IF(
3555 "fts_instrument_sync_debug",
3556 fts_sync(cache->sync, true, true);
3557 );
3558
3559 DEBUG_SYNC_C("fts_instrument_sync_request");
3560 DBUG_EXECUTE_IF(
3561 "fts_instrument_sync_request",
3562 fts_optimize_request_sync_table(table);
3563 );
3564
3565 if (need_sync) {
3566 fts_optimize_request_sync_table(table);
3567 }
3568
3569 mtr_start(&mtr);
3570
3571 if (i < num_idx - 1) {
3572
3573 success = btr_pcur_restore_position(
3574 BTR_SEARCH_LEAF, doc_pcur,
3575 &mtr);
3576
3577 ut_ad(success);
3578 }
3579 }
3580
3581 fts_doc_free(&doc);
3582 }
3583
3584 if (!is_id_cluster) {
3585 btr_pcur_close(doc_pcur);
3586 }
3587 }
3588 func_exit:
3589 mtr_commit(&mtr);
3590
3591 btr_pcur_close(&pcur);
3592
3593 mem_heap_free(heap);
3594 return(TRUE);
3595 }
3596
3597
3598 /*********************************************************************//**
3599 Callback function to read a single ulint column.
3600 return always returns TRUE */
3601 static
3602 ibool
fts_read_ulint(void * row,void * user_arg)3603 fts_read_ulint(
3604 /*===========*/
3605 void* row, /*!< in: sel_node_t* */
3606 void* user_arg) /*!< in: pointer to ulint */
3607 {
3608 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3609 ulint* value = static_cast<ulint*>(user_arg);
3610 que_node_t* exp = sel_node->select_list;
3611 dfield_t* dfield = que_node_get_val(exp);
3612 void* data = dfield_get_data(dfield);
3613
3614 *value = mach_read_from_4(static_cast<const byte*>(data));
3615
3616 return(TRUE);
3617 }
3618
3619 /*********************************************************************//**
3620 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3621 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3622 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3623 fts_get_max_doc_id(
3624 /*===============*/
3625 dict_table_t* table) /*!< in: user table */
3626 {
3627 dict_index_t* index;
3628 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3629 doc_id_t doc_id = 0;
3630 mtr_t mtr;
3631 btr_pcur_t pcur;
3632
3633 index = table->fts_doc_id_index;
3634
3635 if (!index) {
3636 return(0);
3637 }
3638
3639 ut_ad(!index->is_instant());
3640
3641 dfield = dict_index_get_nth_field(index, 0);
3642
3643 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3644 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3645 #endif
3646
3647 mtr_start(&mtr);
3648
3649 /* fetch the largest indexes value */
3650 btr_pcur_open_at_index_side(
3651 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3652
3653 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3654 const rec_t* rec = NULL;
3655
3656 do {
3657 rec = btr_pcur_get_rec(&pcur);
3658
3659 if (page_rec_is_user_rec(rec)) {
3660 break;
3661 }
3662 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3663
3664 if (!rec || rec_is_metadata(rec, *index)) {
3665 goto func_exit;
3666 }
3667
3668 doc_id = fts_read_doc_id(rec);
3669 }
3670
3671 func_exit:
3672 btr_pcur_close(&pcur);
3673 mtr_commit(&mtr);
3674 return(doc_id);
3675 }
3676
3677 /*********************************************************************//**
3678 Fetch document with the given document id.
3679 @return DB_SUCCESS if OK else error */
3680 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3681 fts_doc_fetch_by_doc_id(
3682 /*====================*/
3683 fts_get_doc_t* get_doc, /*!< in: state */
3684 doc_id_t doc_id, /*!< in: id of document to
3685 fetch */
3686 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3687 or NULL */
3688 ulint option, /*!< in: search option, if it is
3689 greater than doc_id or equal */
3690 fts_sql_callback
3691 callback, /*!< in: callback to read */
3692 void* arg) /*!< in: callback arg */
3693 {
3694 pars_info_t* info;
3695 dberr_t error;
3696 const char* select_str;
3697 doc_id_t write_doc_id;
3698 dict_index_t* index;
3699 trx_t* trx = trx_create();
3700 que_t* graph;
3701
3702 trx->op_info = "fetching indexed FTS document";
3703
3704 /* The FTS index can be supplied by caller directly with
3705 "index_to_use", otherwise, get it from "get_doc" */
3706 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3707
3708 if (get_doc && get_doc->get_document_graph) {
3709 info = get_doc->get_document_graph->info;
3710 } else {
3711 info = pars_info_create();
3712 }
3713
3714 /* Convert to "storage" byte order. */
3715 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3716 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3717 pars_info_bind_function(info, "my_func", callback, arg);
3718
3719 select_str = fts_get_select_columns_str(index, info, info->heap);
3720 pars_info_bind_id(info, "table_name", index->table->name.m_name);
3721
3722 if (!get_doc || !get_doc->get_document_graph) {
3723 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3724 graph = fts_parse_sql(
3725 NULL,
3726 info,
3727 mem_heap_printf(info->heap,
3728 "DECLARE FUNCTION my_func;\n"
3729 "DECLARE CURSOR c IS"
3730 " SELECT %s FROM $table_name"
3731 " WHERE %s = :doc_id;\n"
3732 "BEGIN\n"
3733 ""
3734 "OPEN c;\n"
3735 "WHILE 1 = 1 LOOP\n"
3736 " FETCH c INTO my_func();\n"
3737 " IF c %% NOTFOUND THEN\n"
3738 " EXIT;\n"
3739 " END IF;\n"
3740 "END LOOP;\n"
3741 "CLOSE c;",
3742 select_str, FTS_DOC_ID_COL_NAME));
3743 } else {
3744 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3745
3746 /* This is used for crash recovery of table with
3747 hidden DOC ID or FTS indexes. We will scan the table
3748 to re-processing user table rows whose DOC ID or
3749 FTS indexed documents have not been sync-ed to disc
3750 during recent crash.
3751 In the case that all fulltext indexes are dropped
3752 for a table, we will keep the "hidden" FTS_DOC_ID
3753 column, and this scan is to retreive the largest
3754 DOC ID being used in the table to determine the
3755 appropriate next DOC ID.
3756 In the case of there exists fulltext index(es), this
3757 operation will re-tokenize any docs that have not
3758 been sync-ed to the disk, and re-prime the FTS
3759 cached */
3760 graph = fts_parse_sql(
3761 NULL,
3762 info,
3763 mem_heap_printf(info->heap,
3764 "DECLARE FUNCTION my_func;\n"
3765 "DECLARE CURSOR c IS"
3766 " SELECT %s, %s FROM $table_name"
3767 " WHERE %s > :doc_id;\n"
3768 "BEGIN\n"
3769 ""
3770 "OPEN c;\n"
3771 "WHILE 1 = 1 LOOP\n"
3772 " FETCH c INTO my_func();\n"
3773 " IF c %% NOTFOUND THEN\n"
3774 " EXIT;\n"
3775 " END IF;\n"
3776 "END LOOP;\n"
3777 "CLOSE c;",
3778 FTS_DOC_ID_COL_NAME,
3779 select_str, FTS_DOC_ID_COL_NAME));
3780 }
3781 if (get_doc) {
3782 get_doc->get_document_graph = graph;
3783 }
3784 } else {
3785 graph = get_doc->get_document_graph;
3786 }
3787
3788 error = fts_eval_sql(trx, graph);
3789 fts_sql_commit(trx);
3790 trx->free();
3791
3792 if (!get_doc) {
3793 fts_que_graph_free(graph);
3794 }
3795
3796 return(error);
3797 }
3798
3799 /*********************************************************************//**
3800 Write out a single word's data as new entry/entries in the INDEX table.
3801 @return DB_SUCCESS if all OK. */
3802 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3803 fts_write_node(
3804 /*===========*/
3805 trx_t* trx, /*!< in: transaction */
3806 que_t** graph, /*!< in: query graph */
3807 fts_table_t* fts_table, /*!< in: aux table */
3808 fts_string_t* word, /*!< in: word in UTF-8 */
3809 fts_node_t* node) /*!< in: node columns */
3810 {
3811 pars_info_t* info;
3812 dberr_t error;
3813 ib_uint32_t doc_count;
3814 time_t start_time;
3815 doc_id_t last_doc_id;
3816 doc_id_t first_doc_id;
3817 char table_name[MAX_FULL_NAME_LEN];
3818
3819 ut_a(node->ilist != NULL);
3820
3821 if (*graph) {
3822 info = (*graph)->info;
3823 } else {
3824 info = pars_info_create();
3825
3826 fts_get_table_name(fts_table, table_name);
3827 pars_info_bind_id(info, "index_table_name", table_name);
3828 }
3829
3830 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3831
3832 /* Convert to "storage" byte order. */
3833 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3834 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3835
3836 /* Convert to "storage" byte order. */
3837 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3838 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3839
3840 ut_a(node->last_doc_id >= node->first_doc_id);
3841
3842 /* Convert to "storage" byte order. */
3843 mach_write_to_4((byte*) &doc_count, node->doc_count);
3844 pars_info_bind_int4_literal(
3845 info, "doc_count", (const ib_uint32_t*) &doc_count);
3846
3847 /* Set copy_name to FALSE since it's a static. */
3848 pars_info_bind_literal(
3849 info, "ilist", node->ilist, node->ilist_size,
3850 DATA_BLOB, DATA_BINARY_TYPE);
3851
3852 if (!*graph) {
3853
3854 *graph = fts_parse_sql(
3855 fts_table,
3856 info,
3857 "BEGIN\n"
3858 "INSERT INTO $index_table_name VALUES"
3859 " (:token, :first_doc_id,"
3860 " :last_doc_id, :doc_count, :ilist);");
3861 }
3862
3863 start_time = time(NULL);
3864 error = fts_eval_sql(trx, *graph);
3865 elapsed_time += time(NULL) - start_time;
3866 ++n_nodes;
3867
3868 return(error);
3869 }
3870
3871 /*********************************************************************//**
3872 Add rows to the DELETED_CACHE table.
3873 @return DB_SUCCESS if all went well else error code*/
3874 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3875 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)3876 fts_sync_add_deleted_cache(
3877 /*=======================*/
3878 fts_sync_t* sync, /*!< in: sync state */
3879 ib_vector_t* doc_ids) /*!< in: doc ids to add */
3880 {
3881 ulint i;
3882 pars_info_t* info;
3883 que_t* graph;
3884 fts_table_t fts_table;
3885 char table_name[MAX_FULL_NAME_LEN];
3886 doc_id_t dummy = 0;
3887 dberr_t error = DB_SUCCESS;
3888 ulint n_elems = ib_vector_size(doc_ids);
3889
3890 ut_a(ib_vector_size(doc_ids) > 0);
3891
3892 ib_vector_sort(doc_ids, fts_doc_id_cmp);
3893
3894 info = pars_info_create();
3895
3896 fts_bind_doc_id(info, "doc_id", &dummy);
3897
3898 FTS_INIT_FTS_TABLE(
3899 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3900
3901 fts_get_table_name(&fts_table, table_name);
3902 pars_info_bind_id(info, "table_name", table_name);
3903
3904 graph = fts_parse_sql(
3905 &fts_table,
3906 info,
3907 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
3908
3909 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3910 doc_id_t* update;
3911 doc_id_t write_doc_id;
3912
3913 update = static_cast<doc_id_t*>(ib_vector_get(doc_ids, i));
3914
3915 /* Convert to "storage" byte order. */
3916 fts_write_doc_id((byte*) &write_doc_id, *update);
3917 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3918
3919 error = fts_eval_sql(sync->trx, graph);
3920 }
3921
3922 fts_que_graph_free(graph);
3923
3924 return(error);
3925 }
3926
3927 /** Write the words and ilist to disk.
3928 @param[in,out] trx transaction
3929 @param[in] index_cache index cache
3930 @param[in] unlock_cache whether unlock cache when write node
3931 @return DB_SUCCESS if all went well else error code */
3932 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3933 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache)3934 fts_sync_write_words(
3935 trx_t* trx,
3936 fts_index_cache_t* index_cache,
3937 bool unlock_cache)
3938 {
3939 fts_table_t fts_table;
3940 ulint n_nodes = 0;
3941 ulint n_words = 0;
3942 const ib_rbt_node_t* rbt_node;
3943 dberr_t error = DB_SUCCESS;
3944 ibool print_error = FALSE;
3945 dict_table_t* table = index_cache->index->table;
3946
3947 FTS_INIT_INDEX_TABLE(
3948 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
3949
3950 n_words = rbt_size(index_cache->words);
3951
3952 /* We iterate over the entire tree, even if there is an error,
3953 since we want to free the memory used during caching. */
3954 for (rbt_node = rbt_first(index_cache->words);
3955 rbt_node;
3956 rbt_node = rbt_next(index_cache->words, rbt_node)) {
3957
3958 ulint i;
3959 ulint selected;
3960 fts_tokenizer_word_t* word;
3961
3962 word = rbt_value(fts_tokenizer_word_t, rbt_node);
3963
3964 DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
3965 os_thread_sleep(300000););
3966
3967 selected = fts_select_index(
3968 index_cache->charset, word->text.f_str,
3969 word->text.f_len);
3970
3971 fts_table.suffix = fts_get_suffix(selected);
3972
3973 /* We iterate over all the nodes even if there was an error */
3974 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
3975
3976 fts_node_t* fts_node = static_cast<fts_node_t*>(
3977 ib_vector_get(word->nodes, i));
3978
3979 if (fts_node->synced) {
3980 continue;
3981 } else {
3982 fts_node->synced = true;
3983 }
3984
3985 /*FIXME: we need to handle the error properly. */
3986 if (error == DB_SUCCESS) {
3987 if (unlock_cache) {
3988 rw_lock_x_unlock(
3989 &table->fts->cache->lock);
3990 }
3991
3992 error = fts_write_node(
3993 trx,
3994 &index_cache->ins_graph[selected],
3995 &fts_table, &word->text, fts_node);
3996
3997 DEBUG_SYNC_C("fts_write_node");
3998 DBUG_EXECUTE_IF("fts_write_node_crash",
3999 DBUG_SUICIDE(););
4000
4001 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4002 os_thread_sleep(1000000);
4003 );
4004
4005 if (unlock_cache) {
4006 rw_lock_x_lock(
4007 &table->fts->cache->lock);
4008 }
4009 }
4010 }
4011
4012 n_nodes += ib_vector_size(word->nodes);
4013
4014 if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) {
4015 ib::error() << "(" << error << ") writing"
4016 " word node to FTS auxiliary index table "
4017 << table->name;
4018 print_error = TRUE;
4019 }
4020 }
4021
4022 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4023 printf("Avg number of nodes: %lf\n",
4024 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4025 }
4026
4027 return(error);
4028 }
4029
4030 /*********************************************************************//**
4031 Begin Sync, create transaction, acquire locks, etc. */
4032 static
4033 void
fts_sync_begin(fts_sync_t * sync)4034 fts_sync_begin(
4035 /*===========*/
4036 fts_sync_t* sync) /*!< in: sync state */
4037 {
4038 fts_cache_t* cache = sync->table->fts->cache;
4039
4040 n_nodes = 0;
4041 elapsed_time = 0;
4042
4043 sync->start_time = time(NULL);
4044
4045 sync->trx = trx_create();
4046 trx_start_internal(sync->trx);
4047
4048 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4049 ib::info() << "FTS SYNC for table " << sync->table->name
4050 << ", deleted count: "
4051 << ib_vector_size(cache->deleted_doc_ids)
4052 << " size: " << cache->total_size << " bytes";
4053 }
4054 }
4055
4056 /*********************************************************************//**
4057 Run SYNC on the table, i.e., write out data from the index specific
4058 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4059 @return DB_SUCCESS if all OK */
4060 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4061 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4062 fts_sync_index(
4063 /*===========*/
4064 fts_sync_t* sync, /*!< in: sync state */
4065 fts_index_cache_t* index_cache) /*!< in: index cache */
4066 {
4067 trx_t* trx = sync->trx;
4068
4069 trx->op_info = "doing SYNC index";
4070
4071 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4072 ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4073 }
4074
4075 ut_ad(rbt_validate(index_cache->words));
4076
4077 return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
4078 }
4079
4080 /** Check if index cache has been synced completely
4081 @param[in,out] index_cache index cache
4082 @return true if index is synced, otherwise false. */
4083 static
4084 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4085 fts_sync_index_check(
4086 fts_index_cache_t* index_cache)
4087 {
4088 const ib_rbt_node_t* rbt_node;
4089
4090 for (rbt_node = rbt_first(index_cache->words);
4091 rbt_node != NULL;
4092 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4093
4094 fts_tokenizer_word_t* word;
4095 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4096
4097 fts_node_t* fts_node;
4098 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4099
4100 if (!fts_node->synced) {
4101 return(false);
4102 }
4103 }
4104
4105 return(true);
4106 }
4107
4108 /** Reset synced flag in index cache when rollback
4109 @param[in,out] index_cache index cache */
4110 static
4111 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4112 fts_sync_index_reset(
4113 fts_index_cache_t* index_cache)
4114 {
4115 const ib_rbt_node_t* rbt_node;
4116
4117 for (rbt_node = rbt_first(index_cache->words);
4118 rbt_node != NULL;
4119 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4120
4121 fts_tokenizer_word_t* word;
4122 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4123
4124 fts_node_t* fts_node;
4125 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4126
4127 fts_node->synced = false;
4128 }
4129 }
4130
4131 /** Commit the SYNC, change state of processed doc ids etc.
4132 @param[in,out] sync sync state
4133 @return DB_SUCCESS if all OK */
4134 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4135 dberr_t
fts_sync_commit(fts_sync_t * sync)4136 fts_sync_commit(
4137 fts_sync_t* sync)
4138 {
4139 dberr_t error;
4140 trx_t* trx = sync->trx;
4141 fts_cache_t* cache = sync->table->fts->cache;
4142 doc_id_t last_doc_id;
4143
4144 trx->op_info = "doing SYNC commit";
4145
4146 /* After each Sync, update the CONFIG table about the max doc id
4147 we just sync-ed to index table */
4148 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4149 &last_doc_id);
4150
4151 /* Get the list of deleted documents that are either in the
4152 cache or were headed there but were deleted before the add
4153 thread got to them. */
4154
4155 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4156
4157 error = fts_sync_add_deleted_cache(
4158 sync, cache->deleted_doc_ids);
4159 }
4160
4161 /* We need to do this within the deleted lock since fts_delete() can
4162 attempt to add a deleted doc id to the cache deleted id array. */
4163 fts_cache_clear(cache);
4164 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4165 fts_cache_init(cache);
4166 rw_lock_x_unlock(&cache->lock);
4167
4168 if (UNIV_LIKELY(error == DB_SUCCESS)) {
4169 fts_sql_commit(trx);
4170 } else {
4171 fts_sql_rollback(trx);
4172 ib::error() << "(" << error << ") during SYNC of "
4173 "table " << sync->table->name;
4174 }
4175
4176 if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) {
4177 ib::info() << "SYNC for table " << sync->table->name
4178 << ": SYNC time: "
4179 << (time(NULL) - sync->start_time)
4180 << " secs: elapsed "
4181 << (double) n_nodes / elapsed_time
4182 << " ins/sec";
4183 }
4184
4185 /* Avoid assertion in trx_t::free(). */
4186 trx->dict_operation_lock_mode = 0;
4187 trx->free();
4188
4189 return(error);
4190 }
4191
4192 /** Rollback a sync operation
4193 @param[in,out] sync sync state */
4194 static
4195 void
fts_sync_rollback(fts_sync_t * sync)4196 fts_sync_rollback(
4197 fts_sync_t* sync)
4198 {
4199 trx_t* trx = sync->trx;
4200 fts_cache_t* cache = sync->table->fts->cache;
4201
4202 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4203 ulint j;
4204 fts_index_cache_t* index_cache;
4205
4206 index_cache = static_cast<fts_index_cache_t*>(
4207 ib_vector_get(cache->indexes, i));
4208
4209 /* Reset synced flag so nodes will not be skipped
4210 in the next sync, see fts_sync_write_words(). */
4211 fts_sync_index_reset(index_cache);
4212
4213 for (j = 0; fts_index_selector[j].value; ++j) {
4214
4215 if (index_cache->ins_graph[j] != NULL) {
4216
4217 fts_que_graph_free_check_lock(
4218 NULL, index_cache,
4219 index_cache->ins_graph[j]);
4220
4221 index_cache->ins_graph[j] = NULL;
4222 }
4223
4224 if (index_cache->sel_graph[j] != NULL) {
4225
4226 fts_que_graph_free_check_lock(
4227 NULL, index_cache,
4228 index_cache->sel_graph[j]);
4229
4230 index_cache->sel_graph[j] = NULL;
4231 }
4232 }
4233 }
4234
4235 rw_lock_x_unlock(&cache->lock);
4236
4237 fts_sql_rollback(trx);
4238
4239 /* Avoid assertion in trx_t::free(). */
4240 trx->dict_operation_lock_mode = 0;
4241 trx->free();
4242 }
4243
4244 /** Run SYNC on the table, i.e., write out data from the cache to the
4245 FTS auxiliary INDEX table and clear the cache at the end.
4246 @param[in,out] sync sync state
4247 @param[in] unlock_cache whether unlock cache lock when write node
4248 @param[in] wait whether wait when a sync is in progress
4249 @return DB_SUCCESS if all OK */
4250 static
4251 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait)4252 fts_sync(
4253 fts_sync_t* sync,
4254 bool unlock_cache,
4255 bool wait)
4256 {
4257 if (srv_read_only_mode) {
4258 return DB_READ_ONLY;
4259 }
4260
4261 ulint i;
4262 dberr_t error = DB_SUCCESS;
4263 fts_cache_t* cache = sync->table->fts->cache;
4264
4265 rw_lock_x_lock(&cache->lock);
4266
4267 /* Check if cache is being synced.
4268 Note: we release cache lock in fts_sync_write_words() to
4269 avoid long wait for the lock by other threads. */
4270 while (sync->in_progress) {
4271 rw_lock_x_unlock(&cache->lock);
4272
4273 if (wait) {
4274 os_event_wait(sync->event);
4275 } else {
4276 return(DB_SUCCESS);
4277 }
4278
4279 rw_lock_x_lock(&cache->lock);
4280 }
4281
4282 sync->unlock_cache = unlock_cache;
4283 sync->in_progress = true;
4284
4285 DEBUG_SYNC_C("fts_sync_begin");
4286 fts_sync_begin(sync);
4287
4288 begin_sync:
4289 if (cache->total_size > fts_max_cache_size) {
4290 /* Avoid the case: sync never finish when
4291 insert/update keeps comming. */
4292 ut_ad(sync->unlock_cache);
4293 sync->unlock_cache = false;
4294 }
4295
4296 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4297 fts_index_cache_t* index_cache;
4298
4299 index_cache = static_cast<fts_index_cache_t*>(
4300 ib_vector_get(cache->indexes, i));
4301
4302 if (index_cache->index->to_be_dropped
4303 || index_cache->index->table->to_be_dropped) {
4304 continue;
4305 }
4306
4307 DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4308 os_thread_sleep(300000););
4309 index_cache->index->index_fts_syncing = true;
4310
4311 error = fts_sync_index(sync, index_cache);
4312
4313 if (error != DB_SUCCESS) {
4314 goto end_sync;
4315 }
4316 }
4317
4318 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4319 sync->interrupted = true;
4320 error = DB_INTERRUPTED;
4321 goto end_sync;
4322 );
4323
4324 /* Make sure all the caches are synced. */
4325 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4326 fts_index_cache_t* index_cache;
4327
4328 index_cache = static_cast<fts_index_cache_t*>(
4329 ib_vector_get(cache->indexes, i));
4330
4331 if (index_cache->index->to_be_dropped
4332 || index_cache->index->table->to_be_dropped
4333 || fts_sync_index_check(index_cache)) {
4334 continue;
4335 }
4336
4337 goto begin_sync;
4338 }
4339
4340 end_sync:
4341 if (error == DB_SUCCESS && !sync->interrupted) {
4342 error = fts_sync_commit(sync);
4343 } else {
4344 fts_sync_rollback(sync);
4345 }
4346
4347 rw_lock_x_lock(&cache->lock);
4348 /* Clear fts syncing flags of any indexes in case sync is
4349 interrupted */
4350 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4351 static_cast<fts_index_cache_t*>(
4352 ib_vector_get(cache->indexes, i))
4353 ->index->index_fts_syncing = false;
4354 }
4355
4356 sync->interrupted = false;
4357 sync->in_progress = false;
4358 os_event_set(sync->event);
4359 rw_lock_x_unlock(&cache->lock);
4360
4361 /* We need to check whether an optimize is required, for that
4362 we make copies of the two variables that control the trigger. These
4363 variables can change behind our back and we don't want to hold the
4364 lock for longer than is needed. */
4365 mutex_enter(&cache->deleted_lock);
4366
4367 cache->added = 0;
4368 cache->deleted = 0;
4369
4370 mutex_exit(&cache->deleted_lock);
4371
4372 return(error);
4373 }
4374
4375 /** Run SYNC on the table, i.e., write out data from the cache to the
4376 FTS auxiliary INDEX table and clear the cache at the end.
4377 @param[in,out] table fts table
4378 @param[in] wait whether wait for existing sync to finish
4379 @return DB_SUCCESS on success, error code on failure. */
fts_sync_table(dict_table_t * table,bool wait)4380 dberr_t fts_sync_table(dict_table_t* table, bool wait)
4381 {
4382 dberr_t err = DB_SUCCESS;
4383
4384 ut_ad(table->fts);
4385
4386 if (table->space && table->fts->cache
4387 && !dict_table_is_corrupted(table)) {
4388 err = fts_sync(table->fts->cache->sync, !wait, wait);
4389 }
4390
4391 return(err);
4392 }
4393
4394 /** Check if a fts token is a stopword or less than fts_min_token_size
4395 or greater than fts_max_token_size.
4396 @param[in] token token string
4397 @param[in] stopwords stopwords rb tree
4398 @param[in] cs token charset
4399 @retval true if it is not stopword and length in range
4400 @retval false if it is stopword or lenght not in range */
4401 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,const CHARSET_INFO * cs)4402 fts_check_token(
4403 const fts_string_t* token,
4404 const ib_rbt_t* stopwords,
4405 const CHARSET_INFO* cs)
4406 {
4407 ut_ad(cs != NULL || stopwords == NULL);
4408
4409 ib_rbt_bound_t parent;
4410
4411 return(token->f_n_char >= fts_min_token_size
4412 && token->f_n_char <= fts_max_token_size
4413 && (stopwords == NULL
4414 || rbt_search(stopwords, &parent, token) != 0));
4415 }
4416
4417 /** Add the token and its start position to the token's list of positions.
4418 @param[in,out] result_doc result doc rb tree
4419 @param[in] str token string
4420 @param[in] position token position */
4421 static
4422 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)4423 fts_add_token(
4424 fts_doc_t* result_doc,
4425 fts_string_t str,
4426 ulint position)
4427 {
4428 /* Ignore string whose character number is less than
4429 "fts_min_token_size" or more than "fts_max_token_size" */
4430
4431 if (fts_check_token(&str, NULL, result_doc->charset)) {
4432
4433 mem_heap_t* heap;
4434 fts_string_t t_str;
4435 fts_token_t* token;
4436 ib_rbt_bound_t parent;
4437 ulint newlen;
4438
4439 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4440
4441 t_str.f_n_char = str.f_n_char;
4442
4443 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4444
4445 t_str.f_str = static_cast<byte*>(
4446 mem_heap_alloc(heap, t_str.f_len));
4447
4448 /* For binary collations, a case sensitive search is
4449 performed. Hence don't convert to lower case. */
4450 if (my_binary_compare(result_doc->charset)) {
4451 memcpy(t_str.f_str, str.f_str, str.f_len);
4452 t_str.f_str[str.f_len]= 0;
4453 newlen= str.f_len;
4454 } else {
4455 newlen = innobase_fts_casedn_str(
4456 result_doc->charset, (char*) str.f_str, str.f_len,
4457 (char*) t_str.f_str, t_str.f_len);
4458 }
4459
4460 t_str.f_len = newlen;
4461 t_str.f_str[newlen] = 0;
4462
4463 /* Add the word to the document statistics. If the word
4464 hasn't been seen before we create a new entry for it. */
4465 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4466 fts_token_t new_token;
4467
4468 new_token.text.f_len = newlen;
4469 new_token.text.f_str = t_str.f_str;
4470 new_token.text.f_n_char = t_str.f_n_char;
4471
4472 new_token.positions = ib_vector_create(
4473 result_doc->self_heap, sizeof(ulint), 32);
4474
4475 parent.last = rbt_add_node(
4476 result_doc->tokens, &parent, &new_token);
4477
4478 ut_ad(rbt_validate(result_doc->tokens));
4479 }
4480
4481 token = rbt_value(fts_token_t, parent.last);
4482 ib_vector_push(token->positions, &position);
4483 }
4484 }
4485
4486 /********************************************************************
4487 Process next token from document starting at the given position, i.e., add
4488 the token's start position to the token's list of positions.
4489 @return number of characters handled in this call */
4490 static
4491 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4492 fts_process_token(
4493 /*==============*/
4494 fts_doc_t* doc, /* in/out: document to
4495 tokenize */
4496 fts_doc_t* result, /* out: if provided, save
4497 result here */
4498 ulint start_pos, /*!< in: start position in text */
4499 ulint add_pos) /*!< in: add this position to all
4500 tokens from this tokenization */
4501 {
4502 ulint ret;
4503 fts_string_t str;
4504 ulint position;
4505 fts_doc_t* result_doc;
4506 byte buf[FTS_MAX_WORD_LEN + 1];
4507
4508 str.f_str = buf;
4509
4510 /* Determine where to save the result. */
4511 result_doc = (result != NULL) ? result : doc;
4512
4513 /* The length of a string in characters is set here only. */
4514
4515 ret = innobase_mysql_fts_get_token(
4516 doc->charset, doc->text.f_str + start_pos,
4517 doc->text.f_str + doc->text.f_len, &str);
4518
4519 position = start_pos + ret - str.f_len + add_pos;
4520
4521 fts_add_token(result_doc, str, position);
4522
4523 return(ret);
4524 }
4525
4526 /*************************************************************//**
4527 Get token char size by charset
4528 @return token size */
4529 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)4530 fts_get_token_size(
4531 /*===============*/
4532 const CHARSET_INFO* cs, /*!< in: Character set */
4533 const char* token, /*!< in: token */
4534 ulint len) /*!< in: token length */
4535 {
4536 char* start;
4537 char* end;
4538 ulint size = 0;
4539
4540 /* const_cast is for reinterpret_cast below, or it will fail. */
4541 start = const_cast<char*>(token);
4542 end = start + len;
4543 while (start < end) {
4544 int ctype;
4545 int mbl;
4546
4547 mbl = cs->cset->ctype(
4548 cs, &ctype,
4549 reinterpret_cast<uchar*>(start),
4550 reinterpret_cast<uchar*>(end));
4551
4552 size++;
4553
4554 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4555 }
4556
4557 return(size);
4558 }
4559
4560 /*************************************************************//**
4561 FTS plugin parser 'myql_parser' callback function for document tokenize.
4562 Refer to 'st_mysql_ftparser_param' for more detail.
4563 @return always returns 0 */
4564 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,const char * doc,int len)4565 fts_tokenize_document_internal(
4566 /*===========================*/
4567 MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
4568 const char* doc,/*!< in/out: document */
4569 int len) /*!< in: document length */
4570 {
4571 fts_string_t str;
4572 byte buf[FTS_MAX_WORD_LEN + 1];
4573 /* JAN: TODO: MySQL 5.7
4574 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4575 { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
4576 */
4577 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4578 { FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
4579
4580 ut_ad(len >= 0);
4581
4582 str.f_str = buf;
4583
4584 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4585 inc = innobase_mysql_fts_get_token(
4586 const_cast<CHARSET_INFO*>(param->cs),
4587 (uchar*)(doc) + i,
4588 (uchar*)(doc) + len,
4589 &str);
4590
4591 if (str.f_len > 0) {
4592 /* JAN: TODO: MySQL 5.7
4593 bool_info.position =
4594 static_cast<int>(i + inc - str.f_len);
4595 ut_ad(bool_info.position >= 0);
4596 */
4597
4598 /* Stop when add word fails */
4599 if (param->mysql_add_word(
4600 param,
4601 reinterpret_cast<char*>(str.f_str),
4602 static_cast<int>(str.f_len),
4603 &bool_info)) {
4604 break;
4605 }
4606 }
4607 }
4608
4609 return(0);
4610 }
4611
4612 /******************************************************************//**
4613 FTS plugin parser 'myql_add_word' callback function for document tokenize.
4614 Refer to 'st_mysql_ftparser_param' for more detail.
4615 @return always returns 0 */
4616 static
4617 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO *)4618 fts_tokenize_add_word_for_parser(
4619 /*=============================*/
4620 MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
4621 const char* word, /* in: token word */
4622 int word_len, /* in: word len */
4623 MYSQL_FTPARSER_BOOLEAN_INFO*)
4624 {
4625 fts_string_t str;
4626 fts_tokenize_param_t* fts_param;
4627 fts_doc_t* result_doc;
4628 ulint position;
4629
4630 fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
4631 result_doc = fts_param->result_doc;
4632 ut_ad(result_doc != NULL);
4633
4634 str.f_str = (byte*)(word);
4635 str.f_len = ulint(word_len);
4636 str.f_n_char = fts_get_token_size(
4637 const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
4638
4639 /* JAN: TODO: MySQL 5.7 FTS
4640 ut_ad(boolean_info->position >= 0);
4641 position = boolean_info->position + fts_param->add_pos;
4642 */
4643 position = fts_param->add_pos++;
4644
4645 fts_add_token(result_doc, str, position);
4646
4647 return(0);
4648 }
4649
4650 /******************************************************************//**
4651 Parse a document using an external / user supplied parser */
4652 static
4653 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)4654 fts_tokenize_by_parser(
4655 /*===================*/
4656 fts_doc_t* doc, /* in/out: document to tokenize */
4657 st_mysql_ftparser* parser, /* in: plugin fts parser */
4658 fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
4659 {
4660 MYSQL_FTPARSER_PARAM param;
4661
4662 ut_a(parser);
4663
4664 /* Set paramters for param */
4665 param.mysql_parse = fts_tokenize_document_internal;
4666 param.mysql_add_word = fts_tokenize_add_word_for_parser;
4667 param.mysql_ftparam = fts_param;
4668 param.cs = doc->charset;
4669 param.doc = reinterpret_cast<char*>(doc->text.f_str);
4670 param.length = static_cast<int>(doc->text.f_len);
4671 param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
4672
4673 PARSER_INIT(parser, ¶m);
4674 parser->parse(¶m);
4675 PARSER_DEINIT(parser, ¶m);
4676 }
4677
4678 /** Tokenize a document.
4679 @param[in,out] doc document to tokenize
4680 @param[out] result tokenization result
4681 @param[in] parser pluggable parser */
4682 static
4683 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)4684 fts_tokenize_document(
4685 fts_doc_t* doc,
4686 fts_doc_t* result,
4687 st_mysql_ftparser* parser)
4688 {
4689 ut_a(!doc->tokens);
4690 ut_a(doc->charset);
4691
4692 doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t),
4693 innobase_fts_text_cmp,
4694 (void*) doc->charset);
4695
4696 if (parser != NULL) {
4697 fts_tokenize_param_t fts_param;
4698 fts_param.result_doc = (result != NULL) ? result : doc;
4699 fts_param.add_pos = 0;
4700
4701 fts_tokenize_by_parser(doc, parser, &fts_param);
4702 } else {
4703 ulint inc;
4704
4705 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4706 inc = fts_process_token(doc, result, i, 0);
4707 ut_a(inc > 0);
4708 }
4709 }
4710 }
4711
4712 /** Continue to tokenize a document.
4713 @param[in,out] doc document to tokenize
4714 @param[in] add_pos add this position to all tokens from this tokenization
4715 @param[out] result tokenization result
4716 @param[in] parser pluggable parser */
4717 static
4718 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)4719 fts_tokenize_document_next(
4720 fts_doc_t* doc,
4721 ulint add_pos,
4722 fts_doc_t* result,
4723 st_mysql_ftparser* parser)
4724 {
4725 ut_a(doc->tokens);
4726
4727 if (parser) {
4728 fts_tokenize_param_t fts_param;
4729
4730 fts_param.result_doc = (result != NULL) ? result : doc;
4731 fts_param.add_pos = add_pos;
4732
4733 fts_tokenize_by_parser(doc, parser, &fts_param);
4734 } else {
4735 ulint inc;
4736
4737 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4738 inc = fts_process_token(doc, result, i, add_pos);
4739 ut_a(inc > 0);
4740 }
4741 }
4742 }
4743
4744 /** Create the vector of fts_get_doc_t instances.
4745 @param[in,out] cache fts cache
4746 @return vector of fts_get_doc_t instances */
4747 static
4748 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)4749 fts_get_docs_create(
4750 fts_cache_t* cache)
4751 {
4752 ib_vector_t* get_docs;
4753
4754 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4755
4756 /* We need one instance of fts_get_doc_t per index. */
4757 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4758
4759 /* Create the get_doc instance, we need one of these
4760 per FTS index. */
4761 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4762
4763 dict_index_t** index;
4764 fts_get_doc_t* get_doc;
4765
4766 index = static_cast<dict_index_t**>(
4767 ib_vector_get(cache->indexes, i));
4768
4769 get_doc = static_cast<fts_get_doc_t*>(
4770 ib_vector_push(get_docs, NULL));
4771
4772 memset(get_doc, 0x0, sizeof(*get_doc));
4773
4774 get_doc->index_cache = fts_get_index_cache(cache, *index);
4775 get_doc->cache = cache;
4776
4777 /* Must find the index cache. */
4778 ut_a(get_doc->index_cache != NULL);
4779 }
4780
4781 return(get_docs);
4782 }
4783
4784 /********************************************************************
4785 Release any resources held by the fts_get_doc_t instances. */
4786 static
4787 void
fts_get_docs_clear(ib_vector_t * get_docs)4788 fts_get_docs_clear(
4789 /*===============*/
4790 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
4791 {
4792 ulint i;
4793
4794 /* Release the get doc graphs if any. */
4795 for (i = 0; i < ib_vector_size(get_docs); ++i) {
4796
4797 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
4798 ib_vector_get(get_docs, i));
4799
4800 if (get_doc->get_document_graph != NULL) {
4801
4802 ut_a(get_doc->index_cache);
4803
4804 fts_que_graph_free(get_doc->get_document_graph);
4805 get_doc->get_document_graph = NULL;
4806 }
4807 }
4808 }
4809
4810 /*********************************************************************//**
4811 Get the initial Doc ID by consulting the CONFIG table
4812 @return initial Doc ID */
4813 doc_id_t
fts_init_doc_id(const dict_table_t * table)4814 fts_init_doc_id(
4815 /*============*/
4816 const dict_table_t* table) /*!< in: table */
4817 {
4818 doc_id_t max_doc_id = 0;
4819
4820 rw_lock_x_lock(&table->fts->cache->lock);
4821
4822 /* Return if the table is already initialized for DOC ID */
4823 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4824 rw_lock_x_unlock(&table->fts->cache->lock);
4825 return(0);
4826 }
4827
4828 DEBUG_SYNC_C("fts_initialize_doc_id");
4829
4830 /* Then compare this value with the ID value stored in the CONFIG
4831 table. The larger one will be our new initial Doc ID */
4832 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4833
4834 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4835 creating index (and add doc id column. No need to recovery
4836 documents */
4837 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4838 fts_init_index((dict_table_t*) table, TRUE);
4839 }
4840
4841 table->fts->added_synced = true;
4842
4843 table->fts->cache->first_doc_id = max_doc_id;
4844
4845 rw_lock_x_unlock(&table->fts->cache->lock);
4846
4847 ut_ad(max_doc_id > 0);
4848
4849 return(max_doc_id);
4850 }
4851
4852 #ifdef FTS_MULT_INDEX
4853 /*********************************************************************//**
4854 Check if the index is in the affected set.
4855 @return TRUE if index is updated */
4856 static
4857 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)4858 fts_is_index_updated(
4859 /*=================*/
4860 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
4861 const fts_get_doc_t* get_doc) /*!< in: info for reading
4862 document */
4863 {
4864 ulint i;
4865 dict_index_t* index = get_doc->index_cache->index;
4866
4867 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4868 const dict_index_t* updated_fts_index;
4869
4870 updated_fts_index = static_cast<const dict_index_t*>(
4871 ib_vector_getp_const(fts_indexes, i));
4872
4873 ut_a(updated_fts_index != NULL);
4874
4875 if (updated_fts_index == index) {
4876 return(TRUE);
4877 }
4878 }
4879
4880 return(FALSE);
4881 }
4882 #endif
4883
4884 /*********************************************************************//**
4885 Fetch COUNT(*) from specified table.
4886 @return the number of rows in the table */
4887 ulint
fts_get_rows_count(fts_table_t * fts_table)4888 fts_get_rows_count(
4889 /*===============*/
4890 fts_table_t* fts_table) /*!< in: fts table to read */
4891 {
4892 trx_t* trx;
4893 pars_info_t* info;
4894 que_t* graph;
4895 dberr_t error;
4896 ulint count = 0;
4897 char table_name[MAX_FULL_NAME_LEN];
4898
4899 trx = trx_create();
4900 trx->op_info = "fetching FT table rows count";
4901
4902 info = pars_info_create();
4903
4904 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
4905
4906 fts_get_table_name(fts_table, table_name);
4907 pars_info_bind_id(info, "table_name", table_name);
4908
4909 graph = fts_parse_sql(
4910 fts_table,
4911 info,
4912 "DECLARE FUNCTION my_func;\n"
4913 "DECLARE CURSOR c IS"
4914 " SELECT COUNT(*)"
4915 " FROM $table_name;\n"
4916 "BEGIN\n"
4917 "\n"
4918 "OPEN c;\n"
4919 "WHILE 1 = 1 LOOP\n"
4920 " FETCH c INTO my_func();\n"
4921 " IF c % NOTFOUND THEN\n"
4922 " EXIT;\n"
4923 " END IF;\n"
4924 "END LOOP;\n"
4925 "CLOSE c;");
4926
4927 for (;;) {
4928 error = fts_eval_sql(trx, graph);
4929
4930 if (UNIV_LIKELY(error == DB_SUCCESS)) {
4931 fts_sql_commit(trx);
4932
4933 break; /* Exit the loop. */
4934 } else {
4935 fts_sql_rollback(trx);
4936
4937 if (error == DB_LOCK_WAIT_TIMEOUT) {
4938 ib::warn() << "lock wait timeout reading"
4939 " FTS table. Retrying!";
4940
4941 trx->error_state = DB_SUCCESS;
4942 } else {
4943 ib::error() << "(" << error
4944 << ") while reading FTS table "
4945 << table_name;
4946
4947 break; /* Exit the loop. */
4948 }
4949 }
4950 }
4951
4952 fts_que_graph_free(graph);
4953
4954 trx->free();
4955
4956 return(count);
4957 }
4958
4959 #ifdef FTS_CACHE_SIZE_DEBUG
4960 /*********************************************************************//**
4961 Read the max cache size parameter from the config table. */
4962 static
4963 void
fts_update_max_cache_size(fts_sync_t * sync)4964 fts_update_max_cache_size(
4965 /*======================*/
4966 fts_sync_t* sync) /*!< in: sync state */
4967 {
4968 trx_t* trx;
4969 fts_table_t fts_table;
4970
4971 trx = trx_create();
4972
4973 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
4974
4975 /* The size returned is in bytes. */
4976 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
4977
4978 fts_sql_commit(trx);
4979
4980 trx->free();
4981 }
4982 #endif /* FTS_CACHE_SIZE_DEBUG */
4983
4984 /*********************************************************************//**
4985 Free the modified rows of a table. */
4986 UNIV_INLINE
4987 void
fts_trx_table_rows_free(ib_rbt_t * rows)4988 fts_trx_table_rows_free(
4989 /*====================*/
4990 ib_rbt_t* rows) /*!< in: rbt of rows to free */
4991 {
4992 const ib_rbt_node_t* node;
4993
4994 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
4995 fts_trx_row_t* row;
4996
4997 row = rbt_value(fts_trx_row_t, node);
4998
4999 if (row->fts_indexes != NULL) {
5000 /* This vector shouldn't be using the
5001 heap allocator. */
5002 ut_a(row->fts_indexes->allocator->arg == NULL);
5003
5004 ib_vector_free(row->fts_indexes);
5005 row->fts_indexes = NULL;
5006 }
5007
5008 ut_free(rbt_remove_node(rows, node));
5009 }
5010
5011 ut_a(rbt_empty(rows));
5012 rbt_free(rows);
5013 }
5014
5015 /*********************************************************************//**
5016 Free an FTS savepoint instance. */
5017 UNIV_INLINE
5018 void
fts_savepoint_free(fts_savepoint_t * savepoint)5019 fts_savepoint_free(
5020 /*===============*/
5021 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5022 {
5023 const ib_rbt_node_t* node;
5024 ib_rbt_t* tables = savepoint->tables;
5025
5026 /* Nothing to free! */
5027 if (tables == NULL) {
5028 return;
5029 }
5030
5031 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5032 fts_trx_table_t* ftt;
5033 fts_trx_table_t** fttp;
5034
5035 fttp = rbt_value(fts_trx_table_t*, node);
5036 ftt = *fttp;
5037
5038 /* This can be NULL if a savepoint was released. */
5039 if (ftt->rows != NULL) {
5040 fts_trx_table_rows_free(ftt->rows);
5041 ftt->rows = NULL;
5042 }
5043
5044 /* This can be NULL if a savepoint was released. */
5045 if (ftt->added_doc_ids != NULL) {
5046 fts_doc_ids_free(ftt->added_doc_ids);
5047 ftt->added_doc_ids = NULL;
5048 }
5049
5050 /* The default savepoint name must be NULL. */
5051 if (ftt->docs_added_graph) {
5052 fts_que_graph_free(ftt->docs_added_graph);
5053 }
5054
5055 /* NOTE: We are responsible for free'ing the node */
5056 ut_free(rbt_remove_node(tables, node));
5057 }
5058
5059 ut_a(rbt_empty(tables));
5060 rbt_free(tables);
5061 savepoint->tables = NULL;
5062 }
5063
5064 /*********************************************************************//**
5065 Free an FTS trx. */
5066 void
fts_trx_free(fts_trx_t * fts_trx)5067 fts_trx_free(
5068 /*=========*/
5069 fts_trx_t* fts_trx) /* in, own: FTS trx */
5070 {
5071 ulint i;
5072
5073 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5074 fts_savepoint_t* savepoint;
5075
5076 savepoint = static_cast<fts_savepoint_t*>(
5077 ib_vector_get(fts_trx->savepoints, i));
5078
5079 /* The default savepoint name must be NULL. */
5080 if (i == 0) {
5081 ut_a(savepoint->name == NULL);
5082 }
5083
5084 fts_savepoint_free(savepoint);
5085 }
5086
5087 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5088 fts_savepoint_t* savepoint;
5089
5090 savepoint = static_cast<fts_savepoint_t*>(
5091 ib_vector_get(fts_trx->last_stmt, i));
5092
5093 /* The default savepoint name must be NULL. */
5094 if (i == 0) {
5095 ut_a(savepoint->name == NULL);
5096 }
5097
5098 fts_savepoint_free(savepoint);
5099 }
5100
5101 if (fts_trx->heap) {
5102 mem_heap_free(fts_trx->heap);
5103 }
5104 }
5105
5106 /*********************************************************************//**
5107 Extract the doc id from the FTS hidden column.
5108 @return doc id that was extracted from rec */
5109 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5110 fts_get_doc_id_from_row(
5111 /*====================*/
5112 dict_table_t* table, /*!< in: table */
5113 dtuple_t* row) /*!< in: row whose FTS doc id we
5114 want to extract.*/
5115 {
5116 dfield_t* field;
5117 doc_id_t doc_id = 0;
5118
5119 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5120
5121 field = dtuple_get_nth_field(row, table->fts->doc_col);
5122
5123 ut_a(dfield_get_len(field) == sizeof(doc_id));
5124 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5125
5126 doc_id = fts_read_doc_id(
5127 static_cast<const byte*>(dfield_get_data(field)));
5128
5129 return(doc_id);
5130 }
5131
5132 /** Extract the doc id from the record that belongs to index.
5133 @param[in] rec record containing FTS_DOC_ID
5134 @param[in] index index of rec
5135 @param[in] offsets rec_get_offsets(rec,index)
5136 @return doc id that was extracted from rec */
5137 doc_id_t
fts_get_doc_id_from_rec(const rec_t * rec,const dict_index_t * index,const rec_offs * offsets)5138 fts_get_doc_id_from_rec(
5139 const rec_t* rec,
5140 const dict_index_t* index,
5141 const rec_offs* offsets)
5142 {
5143 ulint f = dict_col_get_index_pos(
5144 &index->table->cols[index->table->fts->doc_col], index);
5145 ulint len;
5146 doc_id_t doc_id = mach_read_from_8(
5147 rec_get_nth_field(rec, offsets, f, &len));
5148 ut_ad(len == 8);
5149 return doc_id;
5150 }
5151
5152 /*********************************************************************//**
5153 Search the index specific cache for a particular FTS index.
5154 @return the index specific cache else NULL */
5155 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5156 fts_find_index_cache(
5157 /*=================*/
5158 const fts_cache_t* cache, /*!< in: cache to search */
5159 const dict_index_t* index) /*!< in: index to search for */
5160 {
5161 /* We cast away the const because our internal function, takes
5162 non-const cache arg and returns a non-const pointer. */
5163 return(static_cast<fts_index_cache_t*>(
5164 fts_get_index_cache((fts_cache_t*) cache, index)));
5165 }
5166
5167 /*********************************************************************//**
5168 Search cache for word.
5169 @return the word node vector if found else NULL */
5170 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5171 fts_cache_find_word(
5172 /*================*/
5173 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5174 const fts_string_t* text) /*!< in: word to search for */
5175 {
5176 ib_rbt_bound_t parent;
5177 const ib_vector_t* nodes = NULL;
5178 #ifdef UNIV_DEBUG
5179 dict_table_t* table = index_cache->index->table;
5180 fts_cache_t* cache = table->fts->cache;
5181
5182 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5183 #endif /* UNIV_DEBUG */
5184
5185 /* Lookup the word in the rb tree */
5186 if (rbt_search(index_cache->words, &parent, text) == 0) {
5187 const fts_tokenizer_word_t* word;
5188
5189 word = rbt_value(fts_tokenizer_word_t, parent.last);
5190
5191 nodes = word->nodes;
5192 }
5193
5194 return(nodes);
5195 }
5196
5197 /*********************************************************************//**
5198 Append deleted doc ids to vector. */
5199 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5200 fts_cache_append_deleted_doc_ids(
5201 /*=============================*/
5202 const fts_cache_t* cache, /*!< in: cache to use */
5203 ib_vector_t* vector) /*!< in: append to this vector */
5204 {
5205 mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5206
5207 if (cache->deleted_doc_ids == NULL) {
5208 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5209 return;
5210 }
5211
5212
5213 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5214 doc_id_t* update;
5215
5216 update = static_cast<doc_id_t*>(
5217 ib_vector_get(cache->deleted_doc_ids, i));
5218
5219 ib_vector_push(vector, &update);
5220 }
5221
5222 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5223 }
5224
5225 /*********************************************************************//**
5226 Add the FTS document id hidden column. */
5227 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5228 fts_add_doc_id_column(
5229 /*==================*/
5230 dict_table_t* table, /*!< in/out: Table with FTS index */
5231 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5232 {
5233 dict_mem_table_add_col(
5234 table, heap,
5235 FTS_DOC_ID_COL_NAME,
5236 DATA_INT,
5237 dtype_form_prtype(
5238 DATA_NOT_NULL | DATA_UNSIGNED
5239 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5240 sizeof(doc_id_t));
5241 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5242 }
5243
5244 /** Add new fts doc id to the update vector.
5245 @param[in] table the table that contains the FTS index.
5246 @param[in,out] ufield the fts doc id field in the update vector.
5247 No new memory is allocated for this in this
5248 function.
5249 @param[in,out] next_doc_id the fts doc id that has been added to the
5250 update vector. If 0, a new fts doc id is
5251 automatically generated. The memory provided
5252 for this argument will be used by the update
5253 vector. Ensure that the life time of this
5254 memory matches that of the update vector.
5255 @return the fts doc id used in the update vector */
5256 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5257 fts_update_doc_id(
5258 dict_table_t* table,
5259 upd_field_t* ufield,
5260 doc_id_t* next_doc_id)
5261 {
5262 doc_id_t doc_id;
5263 dberr_t error = DB_SUCCESS;
5264
5265 if (*next_doc_id) {
5266 doc_id = *next_doc_id;
5267 } else {
5268 /* Get the new document id that will be added. */
5269 error = fts_get_next_doc_id(table, &doc_id);
5270 }
5271
5272 if (error == DB_SUCCESS) {
5273 dict_index_t* clust_index;
5274 dict_col_t* col = dict_table_get_nth_col(
5275 table, table->fts->doc_col);
5276
5277 ufield->exp = NULL;
5278
5279 ufield->new_val.len = sizeof(doc_id);
5280
5281 clust_index = dict_table_get_first_index(table);
5282
5283 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5284 dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5285
5286 /* It is possible we update record that has
5287 not yet be sync-ed from last crash. */
5288
5289 /* Convert to storage byte order. */
5290 ut_a(doc_id != FTS_NULL_DOC_ID);
5291 fts_write_doc_id((byte*) next_doc_id, doc_id);
5292
5293 ufield->new_val.data = next_doc_id;
5294 ufield->new_val.ext = 0;
5295 }
5296
5297 return(doc_id);
5298 }
5299
5300 /** fts_t constructor.
5301 @param[in] table table with FTS indexes
5302 @param[in,out] heap memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)5303 fts_t::fts_t(
5304 const dict_table_t* table,
5305 mem_heap_t* heap)
5306 :
5307 added_synced(0), dict_locked(0),
5308 add_wq(NULL),
5309 cache(NULL),
5310 doc_col(ULINT_UNDEFINED), in_queue(false),
5311 fts_heap(heap)
5312 {
5313 ut_a(table->fts == NULL);
5314
5315 ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
5316
5317 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5318
5319 dict_table_get_all_fts_indexes(table, indexes);
5320 }
5321
5322 /** fts_t destructor. */
~fts_t()5323 fts_t::~fts_t()
5324 {
5325 ut_ad(add_wq == NULL);
5326
5327 if (cache != NULL) {
5328 fts_cache_clear(cache);
5329 fts_cache_destroy(cache);
5330 cache = NULL;
5331 }
5332
5333 /* There is no need to call ib_vector_free() on this->indexes
5334 because it is stored in this->fts_heap. */
5335 }
5336
5337 /*********************************************************************//**
5338 Create an instance of fts_t.
5339 @return instance of fts_t */
5340 fts_t*
fts_create(dict_table_t * table)5341 fts_create(
5342 /*=======*/
5343 dict_table_t* table) /*!< in/out: table with FTS indexes */
5344 {
5345 fts_t* fts;
5346 mem_heap_t* heap;
5347
5348 heap = mem_heap_create(512);
5349
5350 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5351
5352 new(fts) fts_t(table, heap);
5353
5354 return(fts);
5355 }
5356
5357 /*********************************************************************//**
5358 Free the FTS resources. */
5359 void
fts_free(dict_table_t * table)5360 fts_free(
5361 /*=====*/
5362 dict_table_t* table) /*!< in/out: table with FTS indexes */
5363 {
5364 fts_t* fts = table->fts;
5365
5366 fts->~fts_t();
5367
5368 mem_heap_free(fts->fts_heap);
5369
5370 table->fts = NULL;
5371 }
5372
5373 /*********************************************************************//**
5374 Take a FTS savepoint. */
5375 UNIV_INLINE
5376 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5377 fts_savepoint_copy(
5378 /*===============*/
5379 const fts_savepoint_t* src, /*!< in: source savepoint */
5380 fts_savepoint_t* dst) /*!< out: destination savepoint */
5381 {
5382 const ib_rbt_node_t* node;
5383 const ib_rbt_t* tables;
5384
5385 tables = src->tables;
5386
5387 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5388
5389 fts_trx_table_t* ftt_dst;
5390 const fts_trx_table_t** ftt_src;
5391
5392 ftt_src = rbt_value(const fts_trx_table_t*, node);
5393
5394 ftt_dst = fts_trx_table_clone(*ftt_src);
5395
5396 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5397 }
5398 }
5399
5400 /*********************************************************************//**
5401 Take a FTS savepoint. */
5402 void
fts_savepoint_take(fts_trx_t * fts_trx,const char * name)5403 fts_savepoint_take(
5404 /*===============*/
5405 fts_trx_t* fts_trx, /*!< in: fts transaction */
5406 const char* name) /*!< in: savepoint name */
5407 {
5408 mem_heap_t* heap;
5409 fts_savepoint_t* savepoint;
5410 fts_savepoint_t* last_savepoint;
5411
5412 ut_a(name != NULL);
5413
5414 heap = fts_trx->heap;
5415
5416 /* The implied savepoint must exist. */
5417 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5418
5419 last_savepoint = static_cast<fts_savepoint_t*>(
5420 ib_vector_last(fts_trx->savepoints));
5421 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5422
5423 if (last_savepoint->tables != NULL) {
5424 fts_savepoint_copy(last_savepoint, savepoint);
5425 }
5426 }
5427
5428 /*********************************************************************//**
5429 Lookup a savepoint instance by name.
5430 @return ULINT_UNDEFINED if not found */
5431 UNIV_INLINE
5432 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5433 fts_savepoint_lookup(
5434 /*==================*/
5435 ib_vector_t* savepoints, /*!< in: savepoints */
5436 const char* name) /*!< in: savepoint name */
5437 {
5438 ulint i;
5439
5440 ut_a(ib_vector_size(savepoints) > 0);
5441
5442 for (i = 1; i < ib_vector_size(savepoints); ++i) {
5443 fts_savepoint_t* savepoint;
5444
5445 savepoint = static_cast<fts_savepoint_t*>(
5446 ib_vector_get(savepoints, i));
5447
5448 if (strcmp(name, savepoint->name) == 0) {
5449 return(i);
5450 }
5451 }
5452
5453 return(ULINT_UNDEFINED);
5454 }
5455
5456 /*********************************************************************//**
5457 Release the savepoint data identified by name. All savepoints created
5458 after the named savepoint are kept.
5459 @return DB_SUCCESS or error code */
5460 void
fts_savepoint_release(trx_t * trx,const char * name)5461 fts_savepoint_release(
5462 /*==================*/
5463 trx_t* trx, /*!< in: transaction */
5464 const char* name) /*!< in: savepoint name */
5465 {
5466 ut_a(name != NULL);
5467
5468 ib_vector_t* savepoints = trx->fts_trx->savepoints;
5469
5470 ut_a(ib_vector_size(savepoints) > 0);
5471
5472 ulint i = fts_savepoint_lookup(savepoints, name);
5473 if (i != ULINT_UNDEFINED) {
5474 ut_a(i >= 1);
5475
5476 fts_savepoint_t* savepoint;
5477 savepoint = static_cast<fts_savepoint_t*>(
5478 ib_vector_get(savepoints, i));
5479
5480 if (i == ib_vector_size(savepoints) - 1) {
5481 /* If the savepoint is the last, we save its
5482 tables to the previous savepoint. */
5483 fts_savepoint_t* prev_savepoint;
5484 prev_savepoint = static_cast<fts_savepoint_t*>(
5485 ib_vector_get(savepoints, i - 1));
5486
5487 ib_rbt_t* tables = savepoint->tables;
5488 savepoint->tables = prev_savepoint->tables;
5489 prev_savepoint->tables = tables;
5490 }
5491
5492 fts_savepoint_free(savepoint);
5493 ib_vector_remove(savepoints, *(void**)savepoint);
5494
5495 /* Make sure we don't delete the implied savepoint. */
5496 ut_a(ib_vector_size(savepoints) > 0);
5497 }
5498 }
5499
5500 /**********************************************************************//**
5501 Refresh last statement savepoint. */
5502 void
fts_savepoint_laststmt_refresh(trx_t * trx)5503 fts_savepoint_laststmt_refresh(
5504 /*===========================*/
5505 trx_t* trx) /*!< in: transaction */
5506 {
5507
5508 fts_trx_t* fts_trx;
5509 fts_savepoint_t* savepoint;
5510
5511 fts_trx = trx->fts_trx;
5512
5513 savepoint = static_cast<fts_savepoint_t*>(
5514 ib_vector_pop(fts_trx->last_stmt));
5515 fts_savepoint_free(savepoint);
5516
5517 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5518 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5519 }
5520
5521 /********************************************************************
5522 Undo the Doc ID add/delete operations in last stmt */
5523 static
5524 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5525 fts_undo_last_stmt(
5526 /*===============*/
5527 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
5528 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
5529 {
5530 ib_rbt_t* s_rows;
5531 ib_rbt_t* l_rows;
5532 const ib_rbt_node_t* node;
5533
5534 l_rows = l_ftt->rows;
5535 s_rows = s_ftt->rows;
5536
5537 for (node = rbt_first(l_rows);
5538 node;
5539 node = rbt_next(l_rows, node)) {
5540 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
5541 ib_rbt_bound_t parent;
5542
5543 rbt_search(s_rows, &parent, &(l_row->doc_id));
5544
5545 if (parent.result == 0) {
5546 fts_trx_row_t* s_row = rbt_value(
5547 fts_trx_row_t, parent.last);
5548
5549 switch (l_row->state) {
5550 case FTS_INSERT:
5551 ut_free(rbt_remove_node(s_rows, parent.last));
5552 break;
5553
5554 case FTS_DELETE:
5555 if (s_row->state == FTS_NOTHING) {
5556 s_row->state = FTS_INSERT;
5557 } else if (s_row->state == FTS_DELETE) {
5558 ut_free(rbt_remove_node(
5559 s_rows, parent.last));
5560 }
5561 break;
5562
5563 /* FIXME: Check if FTS_MODIFY need to be addressed */
5564 case FTS_MODIFY:
5565 case FTS_NOTHING:
5566 break;
5567 default:
5568 ut_error;
5569 }
5570 }
5571 }
5572 }
5573
5574 /**********************************************************************//**
5575 Rollback to savepoint indentified by name.
5576 @return DB_SUCCESS or error code */
5577 void
fts_savepoint_rollback_last_stmt(trx_t * trx)5578 fts_savepoint_rollback_last_stmt(
5579 /*=============================*/
5580 trx_t* trx) /*!< in: transaction */
5581 {
5582 ib_vector_t* savepoints;
5583 fts_savepoint_t* savepoint;
5584 fts_savepoint_t* last_stmt;
5585 fts_trx_t* fts_trx;
5586 ib_rbt_bound_t parent;
5587 const ib_rbt_node_t* node;
5588 ib_rbt_t* l_tables;
5589 ib_rbt_t* s_tables;
5590
5591 fts_trx = trx->fts_trx;
5592 savepoints = fts_trx->savepoints;
5593
5594 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5595 last_stmt = static_cast<fts_savepoint_t*>(
5596 ib_vector_last(fts_trx->last_stmt));
5597
5598 l_tables = last_stmt->tables;
5599 s_tables = savepoint->tables;
5600
5601 for (node = rbt_first(l_tables);
5602 node;
5603 node = rbt_next(l_tables, node)) {
5604
5605 fts_trx_table_t** l_ftt;
5606
5607 l_ftt = rbt_value(fts_trx_table_t*, node);
5608
5609 rbt_search_cmp(
5610 s_tables, &parent, &(*l_ftt)->table->id,
5611 fts_trx_table_id_cmp, NULL);
5612
5613 if (parent.result == 0) {
5614 fts_trx_table_t** s_ftt;
5615
5616 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5617
5618 fts_undo_last_stmt(*s_ftt, *l_ftt);
5619 }
5620 }
5621 }
5622
5623 /**********************************************************************//**
5624 Rollback to savepoint indentified by name.
5625 @return DB_SUCCESS or error code */
5626 void
fts_savepoint_rollback(trx_t * trx,const char * name)5627 fts_savepoint_rollback(
5628 /*===================*/
5629 trx_t* trx, /*!< in: transaction */
5630 const char* name) /*!< in: savepoint name */
5631 {
5632 ulint i;
5633 ib_vector_t* savepoints;
5634
5635 ut_a(name != NULL);
5636
5637 savepoints = trx->fts_trx->savepoints;
5638
5639 /* We pop all savepoints from the the top of the stack up to
5640 and including the instance that was found. */
5641 i = fts_savepoint_lookup(savepoints, name);
5642
5643 if (i != ULINT_UNDEFINED) {
5644 fts_savepoint_t* savepoint;
5645
5646 ut_a(i > 0);
5647
5648 while (ib_vector_size(savepoints) > i) {
5649 fts_savepoint_t* savepoint;
5650
5651 savepoint = static_cast<fts_savepoint_t*>(
5652 ib_vector_pop(savepoints));
5653
5654 if (savepoint->name != NULL) {
5655 /* Since name was allocated on the heap, the
5656 memory will be released when the transaction
5657 completes. */
5658 savepoint->name = NULL;
5659
5660 fts_savepoint_free(savepoint);
5661 }
5662 }
5663
5664 /* Pop all a elements from the top of the stack that may
5665 have been released. We have to be careful that we don't
5666 delete the implied savepoint. */
5667
5668 for (savepoint = static_cast<fts_savepoint_t*>(
5669 ib_vector_last(savepoints));
5670 ib_vector_size(savepoints) > 1
5671 && savepoint->name == NULL;
5672 savepoint = static_cast<fts_savepoint_t*>(
5673 ib_vector_last(savepoints))) {
5674
5675 ib_vector_pop(savepoints);
5676 }
5677
5678 /* Make sure we don't delete the implied savepoint. */
5679 ut_a(ib_vector_size(savepoints) > 0);
5680
5681 /* Restore the savepoint. */
5682 fts_savepoint_take(trx->fts_trx, name);
5683 }
5684 }
5685
fts_check_aux_table(const char * name,table_id_t * table_id,index_id_t * index_id)5686 bool fts_check_aux_table(const char *name,
5687 table_id_t *table_id,
5688 index_id_t *index_id)
5689 {
5690 ulint len= strlen(name);
5691 const char* ptr;
5692 const char* end= name + len;
5693
5694 ut_ad(len <= MAX_FULL_NAME_LEN);
5695 ptr= static_cast<const char*>(memchr(name, '/', len));
5696
5697 if (ptr != NULL)
5698 {
5699 /* We will start the match after the '/' */
5700 ++ptr;
5701 len = end - ptr;
5702 }
5703
5704 /* All auxiliary tables are prefixed with "FTS_" and the name
5705 length will be at the very least greater than 20 bytes. */
5706 if (ptr && len > 20 && !memcmp(ptr, "FTS_", 4))
5707 {
5708 /* Skip the prefix. */
5709 ptr+= 4;
5710 len-= 4;
5711
5712 const char *table_id_ptr= ptr;
5713 /* Skip the table id. */
5714 ptr= static_cast<const char*>(memchr(ptr, '_', len));
5715
5716 if (!ptr)
5717 return false;
5718
5719 /* Skip the underscore. */
5720 ++ptr;
5721 ut_ad(end > ptr);
5722 len= end - ptr;
5723
5724 sscanf(table_id_ptr, UINT64PFx, table_id);
5725 /* First search the common table suffix array. */
5726 for (ulint i = 0; fts_common_tables[i]; ++i)
5727 {
5728 if (!strncmp(ptr, fts_common_tables[i], len))
5729 return true;
5730 }
5731
5732 /* Could be obsolete common tables. */
5733 if ((len == 5 && !memcmp(ptr, "ADDED", len)) ||
5734 (len == 9 && !memcmp(ptr, "STOPWORDS", len)))
5735 return true;
5736
5737 const char* index_id_ptr= ptr;
5738 /* Skip the index id. */
5739 ptr= static_cast<const char*>(memchr(ptr, '_', len));
5740 if (!ptr)
5741 return false;
5742
5743 sscanf(index_id_ptr, UINT64PFx, index_id);
5744
5745 /* Skip the underscore. */
5746 ++ptr;
5747 ut_a(end > ptr);
5748 len= end - ptr;
5749
5750 if (len > 7)
5751 return false;
5752
5753 /* Search the FT index specific array. */
5754 for (ulint i = 0; i < FTS_NUM_AUX_INDEX; ++i)
5755 {
5756 if (!memcmp(ptr, "INDEX_", len - 1))
5757 return true;
5758 }
5759
5760 /* Other FT index specific table(s). */
5761 if (len == 6 && !memcmp(ptr, "DOC_ID", len))
5762 return true;
5763 }
5764
5765 return false;
5766 }
5767
5768 typedef std::pair<table_id_t,index_id_t> fts_aux_id;
5769 typedef std::set<fts_aux_id> fts_space_set_t;
5770
5771 /** Iterate over all the spaces in the space list and fetch the
5772 fts parent table id and index id.
5773 @param[in,out] fts_space_set store the list of tablespace id and
5774 index id */
fil_get_fts_spaces(fts_space_set_t & fts_space_set)5775 static void fil_get_fts_spaces(fts_space_set_t& fts_space_set)
5776 {
5777 mutex_enter(&fil_system.mutex);
5778
5779 for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list);
5780 space;
5781 space= UT_LIST_GET_NEXT(space_list, space))
5782 {
5783 index_id_t index_id= 0;
5784 table_id_t table_id= 0;
5785
5786 if (space->purpose == FIL_TYPE_TABLESPACE
5787 && fts_check_aux_table(space->name, &table_id, &index_id))
5788 fts_space_set.insert(std::make_pair(table_id, index_id));
5789 }
5790
5791 mutex_exit(&fil_system.mutex);
5792 }
5793
5794 /** Check whether the parent table id and index id of fts auxilary
5795 tables with SYS_INDEXES. If it exists then we can safely ignore the
5796 fts table from orphaned tables.
5797 @param[in,out] fts_space_set fts space set contains set of auxiliary
5798 table ids */
fts_check_orphaned_tables(fts_space_set_t & fts_space_set)5799 static void fts_check_orphaned_tables(fts_space_set_t& fts_space_set)
5800 {
5801 btr_pcur_t pcur;
5802 mtr_t mtr;
5803 trx_t* trx = trx_create();
5804 trx->op_info = "checking fts orphaned tables";
5805
5806 row_mysql_lock_data_dictionary(trx);
5807
5808 mtr.start();
5809 btr_pcur_open_at_index_side(
5810 true, dict_table_get_first_index(dict_sys.sys_indexes),
5811 BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
5812
5813 do
5814 {
5815 const rec_t *rec;
5816 const byte *tbl_field;
5817 const byte *index_field;
5818 ulint len;
5819
5820 btr_pcur_move_to_next_user_rec(&pcur, &mtr);
5821 if (!btr_pcur_is_on_user_rec(&pcur))
5822 break;
5823
5824 rec= btr_pcur_get_rec(&pcur);
5825 if (rec_get_deleted_flag(rec, 0))
5826 continue;
5827
5828 tbl_field= rec_get_nth_field_old(rec, 0, &len);
5829 if (len != 8)
5830 continue;
5831
5832 index_field= rec_get_nth_field_old(rec, 1, &len);
5833 if (len != 8)
5834 continue;
5835
5836 table_id_t table_id = mach_read_from_8(tbl_field);
5837 index_id_t index_id = mach_read_from_8(index_field);
5838
5839 fts_space_set_t::iterator it = fts_space_set.find(
5840 fts_aux_id(table_id, index_id));
5841
5842 if (it != fts_space_set.end())
5843 fts_space_set.erase(*it);
5844 else
5845 {
5846 it= fts_space_set.find(fts_aux_id(table_id, 0));
5847 if (it != fts_space_set.end())
5848 fts_space_set.erase(*it);
5849 }
5850 } while(!fts_space_set.empty());
5851
5852 btr_pcur_close(&pcur);
5853 mtr.commit();
5854 row_mysql_unlock_data_dictionary(trx);
5855 trx->free();
5856 }
5857
5858 /** Drop all fts auxilary table for the respective fts_id
5859 @param[in] fts_id fts auxilary table ids */
fts_drop_all_aux_tables(trx_t * trx,fts_table_t * fts_table)5860 static void fts_drop_all_aux_tables(trx_t *trx, fts_table_t *fts_table)
5861 {
5862 char fts_table_name[MAX_FULL_NAME_LEN];
5863 for (ulint i= 0;i < FTS_NUM_AUX_INDEX; i++)
5864 {
5865 fts_table->suffix= fts_get_suffix(i);
5866 fts_get_table_name(fts_table, fts_table_name, true);
5867
5868 /* Drop all fts aux and common table */
5869 dberr_t err= fts_drop_table(trx, fts_table_name);
5870
5871 if (err == DB_FAIL)
5872 {
5873 char *path= fil_make_filepath(NULL, fts_table_name, IBD, false);
5874
5875 if (path != NULL)
5876 {
5877 os_file_delete_if_exists(innodb_data_file_key, path , NULL);
5878 ut_free(path);
5879 }
5880 }
5881 }
5882 }
5883
5884 /** Drop all orphaned FTS auxiliary tables, those that don't have
5885 a parent table or FTS index defined on them. */
fts_drop_orphaned_tables()5886 void fts_drop_orphaned_tables()
5887 {
5888 fts_space_set_t fts_space_set;
5889 fil_get_fts_spaces(fts_space_set);
5890
5891 if (fts_space_set.empty())
5892 return;
5893
5894 fts_check_orphaned_tables(fts_space_set);
5895
5896 if (fts_space_set.empty())
5897 return;
5898
5899 trx_t* trx= trx_create();
5900 trx->op_info= "Drop orphaned aux FTS tables";
5901 row_mysql_lock_data_dictionary(trx);
5902
5903 for (fts_space_set_t::iterator it = fts_space_set.begin();
5904 it != fts_space_set.end(); it++)
5905 {
5906 fts_table_t fts_table;
5907 dict_table_t *table= dict_table_open_on_id(it->first, TRUE,
5908 DICT_TABLE_OP_NORMAL);
5909 if (!table)
5910 continue;
5911
5912 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
5913 fts_drop_common_tables(trx, &fts_table, true);
5914
5915 fts_table.type= FTS_INDEX_TABLE;
5916 fts_table.index_id= it->second;
5917 fts_drop_all_aux_tables(trx, &fts_table);
5918
5919 dict_table_close(table, true, false);
5920 }
5921 trx_commit_for_mysql(trx);
5922 row_mysql_unlock_data_dictionary(trx);
5923 trx->dict_operation_lock_mode= 0;
5924 trx->free();
5925 }
5926
5927 /**********************************************************************//**
5928 Check whether user supplied stopword table is of the right format.
5929 Caller is responsible to hold dictionary locks.
5930 @return the stopword column charset if qualifies */
5931 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)5932 fts_valid_stopword_table(
5933 /*=====================*/
5934 const char* stopword_table_name) /*!< in: Stopword table
5935 name */
5936 {
5937 dict_table_t* table;
5938 dict_col_t* col = NULL;
5939
5940 if (!stopword_table_name) {
5941 return(NULL);
5942 }
5943
5944 table = dict_table_get_low(stopword_table_name);
5945
5946 if (!table) {
5947 ib::error() << "User stopword table " << stopword_table_name
5948 << " does not exist.";
5949
5950 return(NULL);
5951 } else {
5952 const char* col_name;
5953
5954 col_name = dict_table_get_col_name(table, 0);
5955
5956 if (ut_strcmp(col_name, "value")) {
5957 ib::error() << "Invalid column name for stopword"
5958 " table " << stopword_table_name << ". Its"
5959 " first column must be named as 'value'.";
5960
5961 return(NULL);
5962 }
5963
5964 col = dict_table_get_nth_col(table, 0);
5965
5966 if (col->mtype != DATA_VARCHAR
5967 && col->mtype != DATA_VARMYSQL) {
5968 ib::error() << "Invalid column type for stopword"
5969 " table " << stopword_table_name << ". Its"
5970 " first column must be of varchar type";
5971
5972 return(NULL);
5973 }
5974 }
5975
5976 ut_ad(col);
5977
5978 return(fts_get_charset(col->prtype));
5979 }
5980
5981 /**********************************************************************//**
5982 This function loads the stopword into the FTS cache. It also
5983 records/fetches stopword configuration to/from FTS configure
5984 table, depending on whether we are creating or reloading the
5985 FTS.
5986 @return true if load operation is successful */
5987 bool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * session_stopword_table,bool stopword_is_on,bool reload)5988 fts_load_stopword(
5989 /*==============*/
5990 const dict_table_t*
5991 table, /*!< in: Table with FTS */
5992 trx_t* trx, /*!< in: Transactions */
5993 const char* session_stopword_table, /*!< in: Session stopword table
5994 name */
5995 bool stopword_is_on, /*!< in: Whether stopword
5996 option is turned on/off */
5997 bool reload) /*!< in: Whether it is
5998 for reloading FTS table */
5999 {
6000 fts_table_t fts_table;
6001 fts_string_t str;
6002 dberr_t error = DB_SUCCESS;
6003 ulint use_stopword;
6004 fts_cache_t* cache;
6005 const char* stopword_to_use = NULL;
6006 ibool new_trx = FALSE;
6007 byte str_buffer[MAX_FULL_NAME_LEN + 1];
6008
6009 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
6010
6011 cache = table->fts->cache;
6012
6013 if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
6014 return true;
6015 }
6016
6017 if (!trx) {
6018 trx = trx_create();
6019 if (srv_read_only_mode) {
6020 trx_start_internal_read_only(trx);
6021 } else {
6022 trx_start_internal(trx);
6023 }
6024 trx->op_info = "upload FTS stopword";
6025 new_trx = TRUE;
6026 }
6027
6028 /* First check whether stopword filtering is turned off */
6029 if (reload) {
6030 error = fts_config_get_ulint(
6031 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
6032 } else {
6033 use_stopword = (ulint) stopword_is_on;
6034
6035 error = fts_config_set_ulint(
6036 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
6037 }
6038
6039 if (error != DB_SUCCESS) {
6040 goto cleanup;
6041 }
6042
6043 /* If stopword is turned off, no need to continue to load the
6044 stopword into cache, but still need to do initialization */
6045 if (!use_stopword) {
6046 cache->stopword_info.status = STOPWORD_OFF;
6047 goto cleanup;
6048 }
6049
6050 if (reload) {
6051 /* Fetch the stopword table name from FTS config
6052 table */
6053 str.f_n_char = 0;
6054 str.f_str = str_buffer;
6055 str.f_len = sizeof(str_buffer) - 1;
6056
6057 error = fts_config_get_value(
6058 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6059
6060 if (error != DB_SUCCESS) {
6061 goto cleanup;
6062 }
6063
6064 if (*str.f_str) {
6065 stopword_to_use = (const char*) str.f_str;
6066 }
6067 } else {
6068 stopword_to_use = session_stopword_table;
6069 }
6070
6071 if (stopword_to_use
6072 && fts_load_user_stopword(table->fts, stopword_to_use,
6073 &cache->stopword_info)) {
6074 /* Save the stopword table name to the configure
6075 table */
6076 if (!reload) {
6077 str.f_n_char = 0;
6078 str.f_str = (byte*) stopword_to_use;
6079 str.f_len = ut_strlen(stopword_to_use);
6080
6081 error = fts_config_set_value(
6082 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6083 }
6084 } else {
6085 /* Load system default stopword list */
6086 fts_load_default_stopword(&cache->stopword_info);
6087 }
6088
6089 cleanup:
6090 if (new_trx) {
6091 if (error == DB_SUCCESS) {
6092 fts_sql_commit(trx);
6093 } else {
6094 fts_sql_rollback(trx);
6095 }
6096
6097 trx->free();
6098 }
6099
6100 if (!cache->stopword_info.cached_stopword) {
6101 cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
6102 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
6103 &my_charset_latin1);
6104 }
6105
6106 return error == DB_SUCCESS;
6107 }
6108
6109 /**********************************************************************//**
6110 Callback function when we initialize the FTS at the start up
6111 time. It recovers the maximum Doc IDs presented in the current table.
6112 @return: always returns TRUE */
6113 static
6114 ibool
fts_init_get_doc_id(void * row,void * user_arg)6115 fts_init_get_doc_id(
6116 /*================*/
6117 void* row, /*!< in: sel_node_t* */
6118 void* user_arg) /*!< in: fts cache */
6119 {
6120 doc_id_t doc_id = FTS_NULL_DOC_ID;
6121 sel_node_t* node = static_cast<sel_node_t*>(row);
6122 que_node_t* exp = node->select_list;
6123 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
6124
6125 ut_ad(ib_vector_is_empty(cache->get_docs));
6126
6127 /* Copy each indexed column content into doc->text.f_str */
6128 if (exp) {
6129 dfield_t* dfield = que_node_get_val(exp);
6130 dtype_t* type = dfield_get_type(dfield);
6131 void* data = dfield_get_data(dfield);
6132
6133 ut_a(dtype_get_mtype(type) == DATA_INT);
6134
6135 doc_id = static_cast<doc_id_t>(mach_read_from_8(
6136 static_cast<const byte*>(data)));
6137
6138 if (doc_id >= cache->next_doc_id) {
6139 cache->next_doc_id = doc_id + 1;
6140 }
6141 }
6142
6143 return(TRUE);
6144 }
6145
6146 /**********************************************************************//**
6147 Callback function when we initialize the FTS at the start up
6148 time. It recovers Doc IDs that have not sync-ed to the auxiliary
6149 table, and require to bring them back into FTS index.
6150 @return: always returns TRUE */
6151 static
6152 ibool
fts_init_recover_doc(void * row,void * user_arg)6153 fts_init_recover_doc(
6154 /*=================*/
6155 void* row, /*!< in: sel_node_t* */
6156 void* user_arg) /*!< in: fts cache */
6157 {
6158
6159 fts_doc_t doc;
6160 ulint doc_len = 0;
6161 ulint field_no = 0;
6162 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
6163 doc_id_t doc_id = FTS_NULL_DOC_ID;
6164 sel_node_t* node = static_cast<sel_node_t*>(row);
6165 que_node_t* exp = node->select_list;
6166 fts_cache_t* cache = get_doc->cache;
6167 st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
6168
6169 fts_doc_init(&doc);
6170 doc.found = TRUE;
6171
6172 ut_ad(cache);
6173
6174 /* Copy each indexed column content into doc->text.f_str */
6175 while (exp) {
6176 dfield_t* dfield = que_node_get_val(exp);
6177 ulint len = dfield_get_len(dfield);
6178
6179 if (field_no == 0) {
6180 dtype_t* type = dfield_get_type(dfield);
6181 void* data = dfield_get_data(dfield);
6182
6183 ut_a(dtype_get_mtype(type) == DATA_INT);
6184
6185 doc_id = static_cast<doc_id_t>(mach_read_from_8(
6186 static_cast<const byte*>(data)));
6187
6188 field_no++;
6189 exp = que_node_get_next(exp);
6190 continue;
6191 }
6192
6193 if (len == UNIV_SQL_NULL) {
6194 exp = que_node_get_next(exp);
6195 continue;
6196 }
6197
6198 ut_ad(get_doc);
6199
6200 if (!get_doc->index_cache->charset) {
6201 get_doc->index_cache->charset = fts_get_charset(
6202 dfield->type.prtype);
6203 }
6204
6205 doc.charset = get_doc->index_cache->charset;
6206
6207 if (dfield_is_ext(dfield)) {
6208 dict_table_t* table = cache->sync->table;
6209
6210 doc.text.f_str = btr_copy_externally_stored_field(
6211 &doc.text.f_len,
6212 static_cast<byte*>(dfield_get_data(dfield)),
6213 table->space->zip_size(), len,
6214 static_cast<mem_heap_t*>(doc.self_heap->arg));
6215 } else {
6216 doc.text.f_str = static_cast<byte*>(
6217 dfield_get_data(dfield));
6218
6219 doc.text.f_len = len;
6220 }
6221
6222 if (field_no == 1) {
6223 fts_tokenize_document(&doc, NULL, parser);
6224 } else {
6225 fts_tokenize_document_next(&doc, doc_len, NULL, parser);
6226 }
6227
6228 exp = que_node_get_next(exp);
6229
6230 doc_len += (exp) ? len + 1 : len;
6231
6232 field_no++;
6233 }
6234
6235 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
6236
6237 fts_doc_free(&doc);
6238
6239 cache->added++;
6240
6241 if (doc_id >= cache->next_doc_id) {
6242 cache->next_doc_id = doc_id + 1;
6243 }
6244
6245 return(TRUE);
6246 }
6247
6248 /**********************************************************************//**
6249 This function brings FTS index in sync when FTS index is first
6250 used. There are documents that have not yet sync-ed to auxiliary
6251 tables from last server abnormally shutdown, we will need to bring
6252 such document into FTS cache before any further operations
6253 @return TRUE if all OK */
6254 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)6255 fts_init_index(
6256 /*===========*/
6257 dict_table_t* table, /*!< in: Table with FTS */
6258 ibool has_cache_lock) /*!< in: Whether we already have
6259 cache lock */
6260 {
6261 dict_index_t* index;
6262 doc_id_t start_doc;
6263 fts_get_doc_t* get_doc = NULL;
6264 fts_cache_t* cache = table->fts->cache;
6265 bool need_init = false;
6266
6267 ut_ad(!mutex_own(&dict_sys.mutex));
6268
6269 /* First check cache->get_docs is initialized */
6270 if (!has_cache_lock) {
6271 rw_lock_x_lock(&cache->lock);
6272 }
6273
6274 rw_lock_x_lock(&cache->init_lock);
6275 if (cache->get_docs == NULL) {
6276 cache->get_docs = fts_get_docs_create(cache);
6277 }
6278 rw_lock_x_unlock(&cache->init_lock);
6279
6280 if (table->fts->added_synced) {
6281 goto func_exit;
6282 }
6283
6284 need_init = true;
6285
6286 start_doc = cache->synced_doc_id;
6287
6288 if (!start_doc) {
6289 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
6290 cache->synced_doc_id = start_doc;
6291 }
6292
6293 /* No FTS index, this is the case when previous FTS index
6294 dropped, and we re-initialize the Doc ID system for subsequent
6295 insertion */
6296 if (ib_vector_is_empty(cache->get_docs)) {
6297 index = table->fts_doc_id_index;
6298
6299 ut_a(index);
6300
6301 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6302 FTS_FETCH_DOC_BY_ID_LARGE,
6303 fts_init_get_doc_id, cache);
6304 } else {
6305 if (table->fts->cache->stopword_info.status
6306 & STOPWORD_NOT_INIT) {
6307 fts_load_stopword(table, NULL, NULL, true, true);
6308 }
6309
6310 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
6311 get_doc = static_cast<fts_get_doc_t*>(
6312 ib_vector_get(cache->get_docs, i));
6313
6314 index = get_doc->index_cache->index;
6315
6316 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6317 FTS_FETCH_DOC_BY_ID_LARGE,
6318 fts_init_recover_doc, get_doc);
6319 }
6320 }
6321
6322 table->fts->added_synced = true;
6323
6324 fts_get_docs_clear(cache->get_docs);
6325
6326 func_exit:
6327 if (!has_cache_lock) {
6328 rw_lock_x_unlock(&cache->lock);
6329 }
6330
6331 if (need_init) {
6332 mutex_enter(&dict_sys.mutex);
6333 /* Register the table with the optimize thread. */
6334 fts_optimize_add_table(table);
6335 mutex_exit(&dict_sys.mutex);
6336 }
6337
6338 return(TRUE);
6339 }
6340