1 /*****************************************************************************
2
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2016, 2022, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file fts/fts0fts.cc
22 Full Text Search interface
23 ***********************************************************************/
24
25 #include "trx0roll.h"
26 #include "row0mysql.h"
27 #include "row0upd.h"
28 #include "dict0types.h"
29 #include "dict0stats_bg.h"
30 #include "row0sel.h"
31 #include "fts0fts.h"
32 #include "fts0priv.h"
33 #include "fts0types.h"
34 #include "fts0types.inl"
35 #include "fts0vlc.h"
36 #include "fts0plugin.h"
37 #include "dict0priv.h"
38 #include "dict0stats.h"
39 #include "btr0pcur.h"
40 #include "sync0sync.h"
41
42 static const ulint FTS_MAX_ID_LEN = 32;
43
44 /** Column name from the FTS config table */
45 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
46
47 /** Verify if a aux table name is a obsolete table
48 by looking up the key word in the obsolete table names */
49 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
50 (strstr((table_name), "DOC_ID") != NULL \
51 || strstr((table_name), "ADDED") != NULL \
52 || strstr((table_name), "STOPWORDS") != NULL)
53
54 /** This is maximum FTS cache for each table and would be
55 a configurable variable */
56 ulong fts_max_cache_size;
57
58 /** Whether the total memory used for FTS cache is exhausted, and we will
59 need a sync to free some memory */
60 bool fts_need_sync = false;
61
62 /** Variable specifying the total memory allocated for FTS cache */
63 ulong fts_max_total_cache_size;
64
65 /** This is FTS result cache limit for each query and would be
66 a configurable variable */
67 size_t fts_result_cache_limit;
68
69 /** Variable specifying the maximum FTS max token size */
70 ulong fts_max_token_size;
71
72 /** Variable specifying the minimum FTS max token size */
73 ulong fts_min_token_size;
74
75
76 // FIXME: testing
77 static time_t elapsed_time;
78 static ulint n_nodes;
79
80 #ifdef FTS_CACHE_SIZE_DEBUG
81 /** The cache size permissible lower limit (1K) */
82 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
83
84 /** The cache size permissible upper limit (1G) */
85 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
86 #endif
87
88 /** Time to sleep after DEADLOCK error before retrying operation. */
89 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
90
91 /** InnoDB default stopword list:
92 There are different versions of stopwords, the stop words listed
93 below comes from "Google Stopword" list. Reference:
94 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
95 The final version of InnoDB default stopword list is still pending
96 for decision */
97 const char *fts_default_stopword[] =
98 {
99 "a",
100 "about",
101 "an",
102 "are",
103 "as",
104 "at",
105 "be",
106 "by",
107 "com",
108 "de",
109 "en",
110 "for",
111 "from",
112 "how",
113 "i",
114 "in",
115 "is",
116 "it",
117 "la",
118 "of",
119 "on",
120 "or",
121 "that",
122 "the",
123 "this",
124 "to",
125 "was",
126 "what",
127 "when",
128 "where",
129 "who",
130 "will",
131 "with",
132 "und",
133 "the",
134 "www",
135 NULL
136 };
137
138 /** For storing table info when checking for orphaned tables. */
139 struct fts_aux_table_t {
140 table_id_t id; /*!< Table id */
141 table_id_t parent_id; /*!< Parent table id */
142 table_id_t index_id; /*!< Table FT index id */
143 char* name; /*!< Name of the table */
144 };
145
146 /** FTS auxiliary table suffixes that are common to all FT indexes. */
147 const char* fts_common_tables[] = {
148 "BEING_DELETED",
149 "BEING_DELETED_CACHE",
150 "CONFIG",
151 "DELETED",
152 "DELETED_CACHE",
153 NULL
154 };
155
156 /** FTS auxiliary INDEX split intervals. */
157 const fts_index_selector_t fts_index_selector[] = {
158 { 9, "INDEX_1" },
159 { 65, "INDEX_2" },
160 { 70, "INDEX_3" },
161 { 75, "INDEX_4" },
162 { 80, "INDEX_5" },
163 { 85, "INDEX_6" },
164 { 0 , NULL }
165 };
166
167 /** Default config values for FTS indexes on a table. */
168 static const char* fts_config_table_insert_values_sql =
169 "BEGIN\n"
170 "\n"
171 "INSERT INTO $config_table VALUES('"
172 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
173 ""
174 "INSERT INTO $config_table VALUES('"
175 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
176 ""
177 "INSERT INTO $config_table VALUES ('"
178 FTS_SYNCED_DOC_ID "', '0');\n"
179 ""
180 "INSERT INTO $config_table VALUES ('"
181 FTS_TOTAL_DELETED_COUNT "', '0');\n"
182 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
183 "INSERT INTO $config_table VALUES ('"
184 FTS_TABLE_STATE "', '0');\n";
185
186 /** FTS tokenize parmameter for plugin parser */
187 struct fts_tokenize_param_t {
188 fts_doc_t* result_doc; /*!< Result doc for tokens */
189 ulint add_pos; /*!< Added position for tokens */
190 };
191
192 /** Run SYNC on the table, i.e., write out data from the cache to the
193 FTS auxiliary INDEX table and clear the cache at the end.
194 @param[in,out] sync sync state
195 @param[in] unlock_cache whether unlock cache lock when write node
196 @param[in] wait whether wait when a sync is in progress
197 @return DB_SUCCESS if all OK */
198 static
199 dberr_t
200 fts_sync(
201 fts_sync_t* sync,
202 bool unlock_cache,
203 bool wait);
204
205 /****************************************************************//**
206 Release all resources help by the words rb tree e.g., the node ilist. */
207 static
208 void
209 fts_words_free(
210 /*===========*/
211 ib_rbt_t* words) /*!< in: rb tree of words */
212 MY_ATTRIBUTE((nonnull));
213 #ifdef FTS_CACHE_SIZE_DEBUG
214 /****************************************************************//**
215 Read the max cache size parameter from the config table. */
216 static
217 void
218 fts_update_max_cache_size(
219 /*======================*/
220 fts_sync_t* sync); /*!< in: sync state */
221 #endif
222
223 /*********************************************************************//**
224 This function fetches the document just inserted right before
225 we commit the transaction, and tokenize the inserted text data
226 and insert into FTS auxiliary table and its cache.
227 @return TRUE if successful */
228 static
229 ulint
230 fts_add_doc_by_id(
231 /*==============*/
232 fts_trx_table_t*ftt, /*!< in: FTS trx table */
233 doc_id_t doc_id); /*!< in: doc id */
234 /******************************************************************//**
235 Update the last document id. This function could create a new
236 transaction to update the last document id.
237 @return DB_SUCCESS if OK */
238 static
239 dberr_t
240 fts_update_sync_doc_id(
241 /*===================*/
242 const dict_table_t* table, /*!< in: table */
243 doc_id_t doc_id, /*!< in: last document id */
244 trx_t* trx) /*!< in: update trx, or NULL */
245 MY_ATTRIBUTE((nonnull(1)));
246
247 /** Tokenize a document.
248 @param[in,out] doc document to tokenize
249 @param[out] result tokenization result
250 @param[in] parser pluggable parser */
251 static
252 void
253 fts_tokenize_document(
254 fts_doc_t* doc,
255 fts_doc_t* result,
256 st_mysql_ftparser* parser);
257
258 /** Continue to tokenize a document.
259 @param[in,out] doc document to tokenize
260 @param[in] add_pos add this position to all tokens from this tokenization
261 @param[out] result tokenization result
262 @param[in] parser pluggable parser */
263 static
264 void
265 fts_tokenize_document_next(
266 fts_doc_t* doc,
267 ulint add_pos,
268 fts_doc_t* result,
269 st_mysql_ftparser* parser);
270
271 /** Create the vector of fts_get_doc_t instances.
272 @param[in,out] cache fts cache
273 @return vector of fts_get_doc_t instances */
274 static
275 ib_vector_t*
276 fts_get_docs_create(
277 fts_cache_t* cache);
278
279 /** Free the FTS cache.
280 @param[in,out] cache to be freed */
281 static
282 void
fts_cache_destroy(fts_cache_t * cache)283 fts_cache_destroy(fts_cache_t* cache)
284 {
285 rw_lock_free(&cache->lock);
286 rw_lock_free(&cache->init_lock);
287 mutex_free(&cache->deleted_lock);
288 mutex_free(&cache->doc_id_lock);
289 os_event_destroy(cache->sync->event);
290
291 if (cache->stopword_info.cached_stopword) {
292 rbt_free(cache->stopword_info.cached_stopword);
293 }
294
295 if (cache->sync_heap->arg) {
296 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
297 }
298
299 mem_heap_free(cache->cache_heap);
300 }
301
302 /** Get a character set based on precise type.
303 @param prtype precise type
304 @return the corresponding character set */
305 UNIV_INLINE
306 CHARSET_INFO*
fts_get_charset(ulint prtype)307 fts_get_charset(ulint prtype)
308 {
309 #ifdef UNIV_DEBUG
310 switch (prtype & DATA_MYSQL_TYPE_MASK) {
311 case MYSQL_TYPE_BIT:
312 case MYSQL_TYPE_STRING:
313 case MYSQL_TYPE_VAR_STRING:
314 case MYSQL_TYPE_TINY_BLOB:
315 case MYSQL_TYPE_MEDIUM_BLOB:
316 case MYSQL_TYPE_BLOB:
317 case MYSQL_TYPE_LONG_BLOB:
318 case MYSQL_TYPE_VARCHAR:
319 break;
320 default:
321 ut_error;
322 }
323 #endif /* UNIV_DEBUG */
324
325 uint cs_num = (uint) dtype_get_charset_coll(prtype);
326
327 if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
328 return(cs);
329 }
330
331 ib::fatal() << "Unable to find charset-collation " << cs_num;
332 return(NULL);
333 }
334
335 /****************************************************************//**
336 This function loads the default InnoDB stopword list */
337 static
338 void
fts_load_default_stopword(fts_stopword_t * stopword_info)339 fts_load_default_stopword(
340 /*======================*/
341 fts_stopword_t* stopword_info) /*!< in: stopword info */
342 {
343 fts_string_t str;
344 mem_heap_t* heap;
345 ib_alloc_t* allocator;
346 ib_rbt_t* stop_words;
347
348 allocator = stopword_info->heap;
349 heap = static_cast<mem_heap_t*>(allocator->arg);
350
351 if (!stopword_info->cached_stopword) {
352 stopword_info->cached_stopword = rbt_create_arg_cmp(
353 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
354 &my_charset_latin1);
355 }
356
357 stop_words = stopword_info->cached_stopword;
358
359 str.f_n_char = 0;
360
361 for (ulint i = 0; fts_default_stopword[i]; ++i) {
362 char* word;
363 fts_tokenizer_word_t new_word;
364
365 /* We are going to duplicate the value below. */
366 word = const_cast<char*>(fts_default_stopword[i]);
367
368 new_word.nodes = ib_vector_create(
369 allocator, sizeof(fts_node_t), 4);
370
371 str.f_len = ut_strlen(word);
372 str.f_str = reinterpret_cast<byte*>(word);
373
374 fts_string_dup(&new_word.text, &str, heap);
375
376 rbt_insert(stop_words, &new_word, &new_word);
377 }
378
379 stopword_info->status = STOPWORD_FROM_DEFAULT;
380 }
381
382 /****************************************************************//**
383 Callback function to read a single stopword value.
384 @return Always return TRUE */
385 static
386 ibool
fts_read_stopword(void * row,void * user_arg)387 fts_read_stopword(
388 /*==============*/
389 void* row, /*!< in: sel_node_t* */
390 void* user_arg) /*!< in: pointer to ib_vector_t */
391 {
392 ib_alloc_t* allocator;
393 fts_stopword_t* stopword_info;
394 sel_node_t* sel_node;
395 que_node_t* exp;
396 ib_rbt_t* stop_words;
397 dfield_t* dfield;
398 fts_string_t str;
399 mem_heap_t* heap;
400 ib_rbt_bound_t parent;
401
402 sel_node = static_cast<sel_node_t*>(row);
403 stopword_info = static_cast<fts_stopword_t*>(user_arg);
404
405 stop_words = stopword_info->cached_stopword;
406 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
407 heap = static_cast<mem_heap_t*>(allocator->arg);
408
409 exp = sel_node->select_list;
410
411 /* We only need to read the first column */
412 dfield = que_node_get_val(exp);
413
414 str.f_n_char = 0;
415 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
416 str.f_len = dfield_get_len(dfield);
417
418 /* Only create new node if it is a value not already existed */
419 if (str.f_len != UNIV_SQL_NULL
420 && rbt_search(stop_words, &parent, &str) != 0) {
421
422 fts_tokenizer_word_t new_word;
423
424 new_word.nodes = ib_vector_create(
425 allocator, sizeof(fts_node_t), 4);
426
427 new_word.text.f_str = static_cast<byte*>(
428 mem_heap_alloc(heap, str.f_len + 1));
429
430 memcpy(new_word.text.f_str, str.f_str, str.f_len);
431
432 new_word.text.f_n_char = 0;
433 new_word.text.f_len = str.f_len;
434 new_word.text.f_str[str.f_len] = 0;
435
436 rbt_insert(stop_words, &new_word, &new_word);
437 }
438
439 return(TRUE);
440 }
441
442 /******************************************************************//**
443 Load user defined stopword from designated user table
444 @return whether the operation is successful */
445 static
446 bool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)447 fts_load_user_stopword(
448 /*===================*/
449 fts_t* fts, /*!< in: FTS struct */
450 const char* stopword_table_name, /*!< in: Stopword table
451 name */
452 fts_stopword_t* stopword_info) /*!< in: Stopword info */
453 {
454 if (!fts->dict_locked) {
455 mutex_enter(&dict_sys->mutex);
456 }
457
458 /* Validate the user table existence in the right format */
459 bool ret= false;
460 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
461 if (!stopword_info->charset) {
462 cleanup:
463 if (!fts->dict_locked) {
464 mutex_exit(&dict_sys->mutex);
465 }
466
467 return ret;
468 }
469
470 trx_t* trx = trx_create();
471 trx->op_info = "Load user stopword table into FTS cache";
472
473 if (!stopword_info->cached_stopword) {
474 /* Create the stopword RB tree with the stopword column
475 charset. All comparison will use this charset */
476 stopword_info->cached_stopword = rbt_create_arg_cmp(
477 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
478 (void*)stopword_info->charset);
479
480 }
481
482 pars_info_t* info = pars_info_create();
483
484 pars_info_bind_id(info, "table_stopword", stopword_table_name);
485
486 pars_info_bind_function(info, "my_func", fts_read_stopword,
487 stopword_info);
488
489 que_t* graph = fts_parse_sql_no_dict_lock(
490 info,
491 "DECLARE FUNCTION my_func;\n"
492 "DECLARE CURSOR c IS"
493 " SELECT value"
494 " FROM $table_stopword;\n"
495 "BEGIN\n"
496 "\n"
497 "OPEN c;\n"
498 "WHILE 1 = 1 LOOP\n"
499 " FETCH c INTO my_func();\n"
500 " IF c % NOTFOUND THEN\n"
501 " EXIT;\n"
502 " END IF;\n"
503 "END LOOP;\n"
504 "CLOSE c;");
505
506 for (;;) {
507 dberr_t error = fts_eval_sql(trx, graph);
508
509 if (UNIV_LIKELY(error == DB_SUCCESS)) {
510 fts_sql_commit(trx);
511 stopword_info->status = STOPWORD_USER_TABLE;
512 break;
513 } else {
514 fts_sql_rollback(trx);
515
516 if (error == DB_LOCK_WAIT_TIMEOUT) {
517 ib::warn() << "Lock wait timeout reading user"
518 " stopword table. Retrying!";
519
520 trx->error_state = DB_SUCCESS;
521 } else {
522 ib::error() << "Error '" << error
523 << "' while reading user stopword"
524 " table.";
525 ret = FALSE;
526 break;
527 }
528 }
529 }
530
531 que_graph_free(graph);
532 trx->free();
533 ret = true;
534 goto cleanup;
535 }
536
537 /******************************************************************//**
538 Initialize the index cache. */
539 static
540 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)541 fts_index_cache_init(
542 /*=================*/
543 ib_alloc_t* allocator, /*!< in: the allocator to use */
544 fts_index_cache_t* index_cache) /*!< in: index cache */
545 {
546 ulint i;
547
548 ut_a(index_cache->words == NULL);
549
550 index_cache->words = rbt_create_arg_cmp(
551 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
552 (void*) index_cache->charset);
553
554 ut_a(index_cache->doc_stats == NULL);
555
556 index_cache->doc_stats = ib_vector_create(
557 allocator, sizeof(fts_doc_stats_t), 4);
558
559 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
560 ut_a(index_cache->ins_graph[i] == NULL);
561 ut_a(index_cache->sel_graph[i] == NULL);
562 }
563 }
564
565 /*********************************************************************//**
566 Initialize FTS cache. */
567 void
fts_cache_init(fts_cache_t * cache)568 fts_cache_init(
569 /*===========*/
570 fts_cache_t* cache) /*!< in: cache to initialize */
571 {
572 ulint i;
573
574 /* Just to make sure */
575 ut_a(cache->sync_heap->arg == NULL);
576
577 cache->sync_heap->arg = mem_heap_create(1024);
578
579 cache->total_size = 0;
580 cache->total_size_at_sync = 0;
581
582 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
583 cache->deleted_doc_ids = ib_vector_create(
584 cache->sync_heap, sizeof(doc_id_t), 4);
585 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
586
587 /* Reset the cache data for all the FTS indexes. */
588 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
589 fts_index_cache_t* index_cache;
590
591 index_cache = static_cast<fts_index_cache_t*>(
592 ib_vector_get(cache->indexes, i));
593
594 fts_index_cache_init(cache->sync_heap, index_cache);
595 }
596 }
597
598 /****************************************************************//**
599 Create a FTS cache. */
600 fts_cache_t*
fts_cache_create(dict_table_t * table)601 fts_cache_create(
602 /*=============*/
603 dict_table_t* table) /*!< in: table owns the FTS cache */
604 {
605 mem_heap_t* heap;
606 fts_cache_t* cache;
607
608 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
609
610 cache = static_cast<fts_cache_t*>(
611 mem_heap_zalloc(heap, sizeof(*cache)));
612
613 cache->cache_heap = heap;
614
615 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
616
617 rw_lock_create(
618 fts_cache_init_rw_lock_key, &cache->init_lock,
619 SYNC_FTS_CACHE_INIT);
620
621 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
622
623 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
624
625 /* This is the heap used to create the cache itself. */
626 cache->self_heap = ib_heap_allocator_create(heap);
627
628 /* This is a transient heap, used for storing sync data. */
629 cache->sync_heap = ib_heap_allocator_create(heap);
630 cache->sync_heap->arg = NULL;
631
632 cache->sync = static_cast<fts_sync_t*>(
633 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
634
635 cache->sync->table = table;
636 cache->sync->event = os_event_create(0);
637
638 /* Create the index cache vector that will hold the inverted indexes. */
639 cache->indexes = ib_vector_create(
640 cache->self_heap, sizeof(fts_index_cache_t), 2);
641
642 fts_cache_init(cache);
643
644 cache->stopword_info.cached_stopword = NULL;
645 cache->stopword_info.charset = NULL;
646
647 cache->stopword_info.heap = cache->self_heap;
648
649 cache->stopword_info.status = STOPWORD_NOT_INIT;
650
651 return(cache);
652 }
653
654 /*******************************************************************//**
655 Add a newly create index into FTS cache */
656 void
fts_add_index(dict_index_t * index,dict_table_t * table)657 fts_add_index(
658 /*==========*/
659 dict_index_t* index, /*!< FTS index to be added */
660 dict_table_t* table) /*!< table */
661 {
662 fts_t* fts = table->fts;
663 fts_cache_t* cache;
664 fts_index_cache_t* index_cache;
665
666 ut_ad(fts);
667 cache = table->fts->cache;
668
669 rw_lock_x_lock(&cache->init_lock);
670
671 ib_vector_push(fts->indexes, &index);
672
673 index_cache = fts_find_index_cache(cache, index);
674
675 if (!index_cache) {
676 /* Add new index cache structure */
677 index_cache = fts_cache_index_cache_create(table, index);
678 }
679
680 rw_lock_x_unlock(&cache->init_lock);
681 }
682
683 /*******************************************************************//**
684 recalibrate get_doc structure after index_cache in cache->indexes changed */
685 static
686 void
fts_reset_get_doc(fts_cache_t * cache)687 fts_reset_get_doc(
688 /*==============*/
689 fts_cache_t* cache) /*!< in: FTS index cache */
690 {
691 fts_get_doc_t* get_doc;
692 ulint i;
693
694 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
695
696 ib_vector_reset(cache->get_docs);
697
698 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
699 fts_index_cache_t* ind_cache;
700
701 ind_cache = static_cast<fts_index_cache_t*>(
702 ib_vector_get(cache->indexes, i));
703
704 get_doc = static_cast<fts_get_doc_t*>(
705 ib_vector_push(cache->get_docs, NULL));
706
707 memset(get_doc, 0x0, sizeof(*get_doc));
708
709 get_doc->index_cache = ind_cache;
710 get_doc->cache = cache;
711 }
712
713 ut_ad(ib_vector_size(cache->get_docs)
714 == ib_vector_size(cache->indexes));
715 }
716
717 /*******************************************************************//**
718 Check an index is in the table->indexes list
719 @return TRUE if it exists */
720 static
721 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)722 fts_in_dict_index(
723 /*==============*/
724 dict_table_t* table, /*!< in: Table */
725 dict_index_t* index_check) /*!< in: index to be checked */
726 {
727 dict_index_t* index;
728
729 for (index = dict_table_get_first_index(table);
730 index != NULL;
731 index = dict_table_get_next_index(index)) {
732
733 if (index == index_check) {
734 return(TRUE);
735 }
736 }
737
738 return(FALSE);
739 }
740
741 /*******************************************************************//**
742 Check an index is in the fts->cache->indexes list
743 @return TRUE if it exists */
744 static
745 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)746 fts_in_index_cache(
747 /*===============*/
748 dict_table_t* table, /*!< in: Table */
749 dict_index_t* index) /*!< in: index to be checked */
750 {
751 ulint i;
752
753 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
754 fts_index_cache_t* index_cache;
755
756 index_cache = static_cast<fts_index_cache_t*>(
757 ib_vector_get(table->fts->cache->indexes, i));
758
759 if (index_cache->index == index) {
760 return(TRUE);
761 }
762 }
763
764 return(FALSE);
765 }
766
767 /*******************************************************************//**
768 Check indexes in the fts->indexes is also present in index cache and
769 table->indexes list
770 @return TRUE if all indexes match */
771 ibool
fts_check_cached_index(dict_table_t * table)772 fts_check_cached_index(
773 /*===================*/
774 dict_table_t* table) /*!< in: Table where indexes are dropped */
775 {
776 ulint i;
777
778 if (!table->fts || !table->fts->cache) {
779 return(TRUE);
780 }
781
782 ut_a(ib_vector_size(table->fts->indexes)
783 == ib_vector_size(table->fts->cache->indexes));
784
785 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
786 dict_index_t* index;
787
788 index = static_cast<dict_index_t*>(
789 ib_vector_getp(table->fts->indexes, i));
790
791 if (!fts_in_index_cache(table, index)) {
792 return(FALSE);
793 }
794
795 if (!fts_in_dict_index(table, index)) {
796 return(FALSE);
797 }
798 }
799
800 return(TRUE);
801 }
802
803 /** Clear all fts resources when there is no internal DOC_ID
804 and there are no new fts index to add.
805 @param[in,out] table table where fts is to be freed
806 @param[in] trx transaction to drop all fts tables */
fts_clear_all(dict_table_t * table,trx_t * trx)807 void fts_clear_all(dict_table_t *table, trx_t *trx)
808 {
809 if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) ||
810 !table->fts ||
811 !ib_vector_is_empty(table->fts->indexes))
812 return;
813
814 for (const dict_index_t *index= dict_table_get_first_index(table);
815 index; index= dict_table_get_next_index(index))
816 if (index->type & DICT_FTS)
817 return;
818
819 fts_optimize_remove_table(table);
820
821 fts_drop_tables(trx, table);
822 fts_free(table);
823 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
824 }
825
826 /*******************************************************************//**
827 Drop auxiliary tables related to an FTS index
828 @return DB_SUCCESS or error number */
829 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)830 fts_drop_index(
831 /*===========*/
832 dict_table_t* table, /*!< in: Table where indexes are dropped */
833 dict_index_t* index, /*!< in: Index to be dropped */
834 trx_t* trx) /*!< in: Transaction for the drop */
835 {
836 ib_vector_t* indexes = table->fts->indexes;
837 dberr_t err = DB_SUCCESS;
838
839 ut_a(indexes);
840
841 if ((ib_vector_size(indexes) == 1
842 && (index == static_cast<dict_index_t*>(
843 ib_vector_getp(table->fts->indexes, 0)))
844 && DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
845 || ib_vector_is_empty(indexes)) {
846 doc_id_t current_doc_id;
847 doc_id_t first_doc_id;
848
849 /* If we are dropping the only FTS index of the table,
850 remove it from optimize thread */
851 fts_optimize_remove_table(table);
852
853 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
854
855 while (index->index_fts_syncing
856 && !trx_is_interrupted(trx)) {
857 DICT_BG_YIELD(trx);
858 }
859
860 current_doc_id = table->fts->cache->next_doc_id;
861 first_doc_id = table->fts->cache->first_doc_id;
862 fts_cache_clear(table->fts->cache);
863 fts_cache_destroy(table->fts->cache);
864 table->fts->cache = fts_cache_create(table);
865 table->fts->cache->next_doc_id = current_doc_id;
866 table->fts->cache->first_doc_id = first_doc_id;
867 } else {
868 fts_cache_t* cache = table->fts->cache;
869 fts_index_cache_t* index_cache;
870
871 rw_lock_x_lock(&cache->init_lock);
872
873 index_cache = fts_find_index_cache(cache, index);
874
875 if (index_cache != NULL) {
876 while (index->index_fts_syncing
877 && !trx_is_interrupted(trx)) {
878 DICT_BG_YIELD(trx);
879 }
880 if (index_cache->words) {
881 fts_words_free(index_cache->words);
882 rbt_free(index_cache->words);
883 }
884
885 ib_vector_remove(cache->indexes, *(void**) index_cache);
886 }
887
888 if (cache->get_docs) {
889 fts_reset_get_doc(cache);
890 }
891
892 rw_lock_x_unlock(&cache->init_lock);
893 }
894
895 err = fts_drop_index_tables(trx, index);
896
897 ib_vector_remove(indexes, (const void*) index);
898
899 return(err);
900 }
901
902 /****************************************************************//**
903 Free the query graph but check whether dict_sys->mutex is already
904 held */
905 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)906 fts_que_graph_free_check_lock(
907 /*==========================*/
908 fts_table_t* fts_table, /*!< in: FTS table */
909 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
910 que_t* graph) /*!< in: query graph */
911 {
912 bool has_dict = FALSE;
913
914 if (fts_table && fts_table->table) {
915 ut_ad(fts_table->table->fts);
916
917 has_dict = fts_table->table->fts->dict_locked;
918 } else if (index_cache) {
919 ut_ad(index_cache->index->table->fts);
920
921 has_dict = index_cache->index->table->fts->dict_locked;
922 }
923
924 if (!has_dict) {
925 mutex_enter(&dict_sys->mutex);
926 }
927
928 ut_ad(mutex_own(&dict_sys->mutex));
929
930 que_graph_free(graph);
931
932 if (!has_dict) {
933 mutex_exit(&dict_sys->mutex);
934 }
935 }
936
937 /****************************************************************//**
938 Create an FTS index cache. */
939 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)940 fts_index_get_charset(
941 /*==================*/
942 dict_index_t* index) /*!< in: FTS index */
943 {
944 CHARSET_INFO* charset = NULL;
945 dict_field_t* field;
946 ulint prtype;
947
948 field = dict_index_get_nth_field(index, 0);
949 prtype = field->col->prtype;
950
951 charset = fts_get_charset(prtype);
952
953 #ifdef FTS_DEBUG
954 /* Set up charset info for this index. Please note all
955 field of the FTS index should have the same charset */
956 for (i = 1; i < index->n_fields; i++) {
957 CHARSET_INFO* fld_charset;
958
959 field = dict_index_get_nth_field(index, i);
960 prtype = field->col->prtype;
961
962 fld_charset = fts_get_charset(prtype);
963
964 /* All FTS columns should have the same charset */
965 if (charset) {
966 ut_a(charset == fld_charset);
967 } else {
968 charset = fld_charset;
969 }
970 }
971 #endif
972
973 return(charset);
974
975 }
976 /****************************************************************//**
977 Create an FTS index cache.
978 @return Index Cache */
979 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)980 fts_cache_index_cache_create(
981 /*=========================*/
982 dict_table_t* table, /*!< in: table with FTS index */
983 dict_index_t* index) /*!< in: FTS index */
984 {
985 ulint n_bytes;
986 fts_index_cache_t* index_cache;
987 fts_cache_t* cache = table->fts->cache;
988
989 ut_a(cache != NULL);
990
991 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
992
993 /* Must not already exist in the cache vector. */
994 ut_a(fts_find_index_cache(cache, index) == NULL);
995
996 index_cache = static_cast<fts_index_cache_t*>(
997 ib_vector_push(cache->indexes, NULL));
998
999 memset(index_cache, 0x0, sizeof(*index_cache));
1000
1001 index_cache->index = index;
1002
1003 index_cache->charset = fts_index_get_charset(index);
1004
1005 n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1006
1007 index_cache->ins_graph = static_cast<que_t**>(
1008 mem_heap_zalloc(static_cast<mem_heap_t*>(
1009 cache->self_heap->arg), n_bytes));
1010
1011 index_cache->sel_graph = static_cast<que_t**>(
1012 mem_heap_zalloc(static_cast<mem_heap_t*>(
1013 cache->self_heap->arg), n_bytes));
1014
1015 fts_index_cache_init(cache->sync_heap, index_cache);
1016
1017 if (cache->get_docs) {
1018 fts_reset_get_doc(cache);
1019 }
1020
1021 return(index_cache);
1022 }
1023
1024 /****************************************************************//**
1025 Release all resources help by the words rb tree e.g., the node ilist. */
1026 static
1027 void
fts_words_free(ib_rbt_t * words)1028 fts_words_free(
1029 /*===========*/
1030 ib_rbt_t* words) /*!< in: rb tree of words */
1031 {
1032 const ib_rbt_node_t* rbt_node;
1033
1034 /* Free the resources held by a word. */
1035 for (rbt_node = rbt_first(words);
1036 rbt_node != NULL;
1037 rbt_node = rbt_first(words)) {
1038
1039 ulint i;
1040 fts_tokenizer_word_t* word;
1041
1042 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1043
1044 /* Free the ilists of this word. */
1045 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1046
1047 fts_node_t* fts_node = static_cast<fts_node_t*>(
1048 ib_vector_get(word->nodes, i));
1049
1050 ut_free(fts_node->ilist);
1051 fts_node->ilist = NULL;
1052 }
1053
1054 /* NOTE: We are responsible for free'ing the node */
1055 ut_free(rbt_remove_node(words, rbt_node));
1056 }
1057 }
1058
1059 /** Clear cache.
1060 @param[in,out] cache fts cache */
1061 void
fts_cache_clear(fts_cache_t * cache)1062 fts_cache_clear(
1063 fts_cache_t* cache)
1064 {
1065 ulint i;
1066
1067 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1068 ulint j;
1069 fts_index_cache_t* index_cache;
1070
1071 index_cache = static_cast<fts_index_cache_t*>(
1072 ib_vector_get(cache->indexes, i));
1073
1074 fts_words_free(index_cache->words);
1075
1076 rbt_free(index_cache->words);
1077
1078 index_cache->words = NULL;
1079
1080 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1081
1082 if (index_cache->ins_graph[j] != NULL) {
1083
1084 fts_que_graph_free_check_lock(
1085 NULL, index_cache,
1086 index_cache->ins_graph[j]);
1087
1088 index_cache->ins_graph[j] = NULL;
1089 }
1090
1091 if (index_cache->sel_graph[j] != NULL) {
1092
1093 fts_que_graph_free_check_lock(
1094 NULL, index_cache,
1095 index_cache->sel_graph[j]);
1096
1097 index_cache->sel_graph[j] = NULL;
1098 }
1099 }
1100
1101 index_cache->doc_stats = NULL;
1102 }
1103
1104 fts_need_sync = false;
1105
1106 cache->total_size = 0;
1107
1108 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1109 cache->deleted_doc_ids = NULL;
1110 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1111
1112 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1113 cache->sync_heap->arg = NULL;
1114 }
1115
1116 /*********************************************************************//**
1117 Search the index specific cache for a particular FTS index.
1118 @return the index cache else NULL */
1119 UNIV_INLINE
1120 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1121 fts_get_index_cache(
1122 /*================*/
1123 fts_cache_t* cache, /*!< in: cache to search */
1124 const dict_index_t* index) /*!< in: index to search for */
1125 {
1126 ulint i;
1127
1128 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1129 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1130
1131 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1132 fts_index_cache_t* index_cache;
1133
1134 index_cache = static_cast<fts_index_cache_t*>(
1135 ib_vector_get(cache->indexes, i));
1136
1137 if (index_cache->index == index) {
1138
1139 return(index_cache);
1140 }
1141 }
1142
1143 return(NULL);
1144 }
1145
1146 #ifdef FTS_DEBUG
1147 /*********************************************************************//**
1148 Search the index cache for a get_doc structure.
1149 @return the fts_get_doc_t item else NULL */
1150 static
1151 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1152 fts_get_index_get_doc(
1153 /*==================*/
1154 fts_cache_t* cache, /*!< in: cache to search */
1155 const dict_index_t* index) /*!< in: index to search for */
1156 {
1157 ulint i;
1158
1159 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1160
1161 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1162 fts_get_doc_t* get_doc;
1163
1164 get_doc = static_cast<fts_get_doc_t*>(
1165 ib_vector_get(cache->get_docs, i));
1166
1167 if (get_doc->index_cache->index == index) {
1168
1169 return(get_doc);
1170 }
1171 }
1172
1173 return(NULL);
1174 }
1175 #endif
1176
1177 /**********************************************************************//**
1178 Find an existing word, or if not found, create one and return it.
1179 @return specified word token */
1180 static
1181 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1182 fts_tokenizer_word_get(
1183 /*===================*/
1184 fts_cache_t* cache, /*!< in: cache */
1185 fts_index_cache_t*
1186 index_cache, /*!< in: index cache */
1187 fts_string_t* text) /*!< in: node text */
1188 {
1189 fts_tokenizer_word_t* word;
1190 ib_rbt_bound_t parent;
1191
1192 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1193
1194 /* If it is a stopword, do not index it */
1195 if (!fts_check_token(text,
1196 cache->stopword_info.cached_stopword,
1197 index_cache->charset)) {
1198
1199 return(NULL);
1200 }
1201
1202 /* Check if we found a match, if not then add word to tree. */
1203 if (rbt_search(index_cache->words, &parent, text) != 0) {
1204 mem_heap_t* heap;
1205 fts_tokenizer_word_t new_word;
1206
1207 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1208
1209 new_word.nodes = ib_vector_create(
1210 cache->sync_heap, sizeof(fts_node_t), 4);
1211
1212 fts_string_dup(&new_word.text, text, heap);
1213
1214 parent.last = rbt_add_node(
1215 index_cache->words, &parent, &new_word);
1216
1217 /* Take into account the RB tree memory use and the vector. */
1218 cache->total_size += sizeof(new_word)
1219 + sizeof(ib_rbt_node_t)
1220 + text->f_len
1221 + (sizeof(fts_node_t) * 4)
1222 + sizeof(*new_word.nodes);
1223
1224 ut_ad(rbt_validate(index_cache->words));
1225 }
1226
1227 word = rbt_value(fts_tokenizer_word_t, parent.last);
1228
1229 return(word);
1230 }
1231
1232 /**********************************************************************//**
1233 Add the given doc_id/word positions to the given node's ilist. */
1234 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1235 fts_cache_node_add_positions(
1236 /*=========================*/
1237 fts_cache_t* cache, /*!< in: cache */
1238 fts_node_t* node, /*!< in: word node */
1239 doc_id_t doc_id, /*!< in: doc id */
1240 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1241 {
1242 ulint i;
1243 byte* ptr;
1244 byte* ilist;
1245 ulint enc_len;
1246 ulint last_pos;
1247 byte* ptr_start;
1248 doc_id_t doc_id_delta;
1249
1250 #ifdef UNIV_DEBUG
1251 if (cache) {
1252 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1253 }
1254 #endif /* UNIV_DEBUG */
1255
1256 ut_ad(doc_id >= node->last_doc_id);
1257
1258 /* Calculate the space required to store the ilist. */
1259 doc_id_delta = doc_id - node->last_doc_id;
1260 enc_len = fts_get_encoded_len(doc_id_delta);
1261
1262 last_pos = 0;
1263 for (i = 0; i < ib_vector_size(positions); i++) {
1264 ulint pos = *(static_cast<ulint*>(
1265 ib_vector_get(positions, i)));
1266
1267 ut_ad(last_pos == 0 || pos > last_pos);
1268
1269 enc_len += fts_get_encoded_len(pos - last_pos);
1270 last_pos = pos;
1271 }
1272
1273 /* The 0x00 byte at the end of the token positions list. */
1274 enc_len++;
1275
1276 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1277 /* No need to allocate more space, we can fit in the new
1278 data at the end of the old one. */
1279 ilist = NULL;
1280 ptr = node->ilist + node->ilist_size;
1281 } else {
1282 ulint new_size = node->ilist_size + enc_len;
1283
1284 /* Over-reserve space by a fixed size for small lengths and
1285 by 20% for lengths >= 48 bytes. */
1286 if (new_size < 16) {
1287 new_size = 16;
1288 } else if (new_size < 32) {
1289 new_size = 32;
1290 } else if (new_size < 48) {
1291 new_size = 48;
1292 } else {
1293 new_size = (ulint)(1.2 * new_size);
1294 }
1295
1296 ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1297 ptr = ilist + node->ilist_size;
1298
1299 node->ilist_size_alloc = new_size;
1300 if (cache) {
1301 cache->total_size += new_size;
1302 }
1303 }
1304
1305 ptr_start = ptr;
1306
1307 /* Encode the new fragment. */
1308 ptr = fts_encode_int(doc_id_delta, ptr);
1309
1310 last_pos = 0;
1311 for (i = 0; i < ib_vector_size(positions); i++) {
1312 ulint pos = *(static_cast<ulint*>(
1313 ib_vector_get(positions, i)));
1314
1315 ptr = fts_encode_int(pos - last_pos, ptr);
1316 last_pos = pos;
1317 }
1318
1319 *ptr++ = 0;
1320
1321 ut_a(enc_len == (ulint)(ptr - ptr_start));
1322
1323 if (ilist) {
1324 /* Copy old ilist to the start of the new one and switch the
1325 new one into place in the node. */
1326 if (node->ilist_size > 0) {
1327 memcpy(ilist, node->ilist, node->ilist_size);
1328 ut_free(node->ilist);
1329 if (cache) {
1330 cache->total_size -= node->ilist_size;
1331 }
1332 }
1333
1334 node->ilist = ilist;
1335 }
1336
1337 node->ilist_size += enc_len;
1338
1339 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1340 node->first_doc_id = doc_id;
1341 }
1342
1343 node->last_doc_id = doc_id;
1344 ++node->doc_count;
1345 }
1346
1347 /**********************************************************************//**
1348 Add document to the cache. */
1349 static
1350 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1351 fts_cache_add_doc(
1352 /*==============*/
1353 fts_cache_t* cache, /*!< in: cache */
1354 fts_index_cache_t*
1355 index_cache, /*!< in: index cache */
1356 doc_id_t doc_id, /*!< in: doc id to add */
1357 ib_rbt_t* tokens) /*!< in: document tokens */
1358 {
1359 const ib_rbt_node_t* node;
1360 ulint n_words;
1361 fts_doc_stats_t* doc_stats;
1362
1363 if (!tokens) {
1364 return;
1365 }
1366
1367 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1368
1369 n_words = rbt_size(tokens);
1370
1371 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1372
1373 fts_tokenizer_word_t* word;
1374 fts_node_t* fts_node = NULL;
1375 fts_token_t* token = rbt_value(fts_token_t, node);
1376
1377 /* Find and/or add token to the cache. */
1378 word = fts_tokenizer_word_get(
1379 cache, index_cache, &token->text);
1380
1381 if (!word) {
1382 ut_free(rbt_remove_node(tokens, node));
1383 continue;
1384 }
1385
1386 if (ib_vector_size(word->nodes) > 0) {
1387 fts_node = static_cast<fts_node_t*>(
1388 ib_vector_last(word->nodes));
1389 }
1390
1391 if (fts_node == NULL || fts_node->synced
1392 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1393 || doc_id < fts_node->last_doc_id) {
1394
1395 fts_node = static_cast<fts_node_t*>(
1396 ib_vector_push(word->nodes, NULL));
1397
1398 memset(fts_node, 0x0, sizeof(*fts_node));
1399
1400 cache->total_size += sizeof(*fts_node);
1401 }
1402
1403 fts_cache_node_add_positions(
1404 cache, fts_node, doc_id, token->positions);
1405
1406 ut_free(rbt_remove_node(tokens, node));
1407 }
1408
1409 ut_a(rbt_empty(tokens));
1410
1411 /* Add to doc ids processed so far. */
1412 doc_stats = static_cast<fts_doc_stats_t*>(
1413 ib_vector_push(index_cache->doc_stats, NULL));
1414
1415 doc_stats->doc_id = doc_id;
1416 doc_stats->word_count = n_words;
1417
1418 /* Add the doc stats memory usage too. */
1419 cache->total_size += sizeof(*doc_stats);
1420
1421 if (doc_id > cache->sync->max_doc_id) {
1422 cache->sync->max_doc_id = doc_id;
1423 }
1424 }
1425
1426 /****************************************************************//**
1427 Drops a table. If the table can't be found we return a SUCCESS code.
1428 @return DB_SUCCESS or error code */
1429 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1430 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1431 fts_drop_table(
1432 /*===========*/
1433 trx_t* trx, /*!< in: transaction */
1434 const char* table_name) /*!< in: table to drop */
1435 {
1436 dict_table_t* table;
1437 dberr_t error = DB_SUCCESS;
1438
1439 /* Check that the table exists in our data dictionary.
1440 Similar to regular drop table case, we will open table with
1441 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1442 table = dict_table_open_on_name(
1443 table_name, TRUE, FALSE,
1444 static_cast<dict_err_ignore_t>(
1445 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1446
1447 if (table != 0) {
1448
1449 dict_table_close(table, TRUE, FALSE);
1450
1451 /* Pass nonatomic=false (dont allow data dict unlock),
1452 because the transaction may hold locks on SYS_* tables from
1453 previous calls to fts_drop_table(). */
1454 error = row_drop_table_for_mysql(table_name, trx,
1455 SQLCOM_DROP_DB, false, false);
1456
1457 if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
1458 ib::error() << "Unable to drop FTS index aux table "
1459 << table_name << ": " << error;
1460 }
1461 } else {
1462 error = DB_FAIL;
1463 }
1464
1465 return(error);
1466 }
1467
1468 /****************************************************************//**
1469 Rename a single auxiliary table due to database name change.
1470 @return DB_SUCCESS or error code */
1471 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1472 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1473 fts_rename_one_aux_table(
1474 /*=====================*/
1475 const char* new_name, /*!< in: new parent tbl name */
1476 const char* fts_table_old_name, /*!< in: old aux tbl name */
1477 trx_t* trx) /*!< in: transaction */
1478 {
1479 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1480 ulint new_db_name_len = dict_get_db_name_len(new_name);
1481 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1482 ulint table_new_name_len = strlen(fts_table_old_name)
1483 + new_db_name_len - old_db_name_len;
1484
1485 /* Check if the new and old database names are the same, if so,
1486 nothing to do */
1487 ut_ad((new_db_name_len != old_db_name_len)
1488 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1489
1490 /* Get the database name from "new_name", and table name
1491 from the fts_table_old_name */
1492 strncpy(fts_table_new_name, new_name, new_db_name_len);
1493 strncpy(fts_table_new_name + new_db_name_len,
1494 strchr(fts_table_old_name, '/'),
1495 table_new_name_len - new_db_name_len);
1496 fts_table_new_name[table_new_name_len] = 0;
1497
1498 return row_rename_table_for_mysql(
1499 fts_table_old_name, fts_table_new_name, trx, false, false);
1500 }
1501
1502 /****************************************************************//**
1503 Rename auxiliary tables for all fts index for a table. This(rename)
1504 is due to database name change
1505 @return DB_SUCCESS or error code */
1506 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1507 fts_rename_aux_tables(
1508 /*==================*/
1509 dict_table_t* table, /*!< in: user Table */
1510 const char* new_name, /*!< in: new table name */
1511 trx_t* trx) /*!< in: transaction */
1512 {
1513 ulint i;
1514 fts_table_t fts_table;
1515
1516 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1517
1518 dberr_t err = DB_SUCCESS;
1519 char old_table_name[MAX_FULL_NAME_LEN];
1520
1521 /* Rename common auxiliary tables */
1522 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1523 fts_table.suffix = fts_common_tables[i];
1524 fts_get_table_name(&fts_table, old_table_name, true);
1525
1526 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1527
1528 if (err != DB_SUCCESS) {
1529 return(err);
1530 }
1531 }
1532
1533 fts_t* fts = table->fts;
1534
1535 /* Rename index specific auxiliary tables */
1536 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1537 ++i) {
1538 dict_index_t* index;
1539
1540 index = static_cast<dict_index_t*>(
1541 ib_vector_getp(fts->indexes, i));
1542
1543 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1544
1545 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1546 fts_table.suffix = fts_get_suffix(j);
1547 fts_get_table_name(&fts_table, old_table_name, true);
1548
1549 err = fts_rename_one_aux_table(
1550 new_name, old_table_name, trx);
1551
1552 DBUG_EXECUTE_IF("fts_rename_failure",
1553 err = DB_DEADLOCK;
1554 fts_sql_rollback(trx););
1555
1556 if (err != DB_SUCCESS) {
1557 return(err);
1558 }
1559 }
1560 }
1561
1562 return(DB_SUCCESS);
1563 }
1564
1565 /** Drops the common ancillary tables needed for supporting an FTS index
1566 on the given table. row_mysql_lock_data_dictionary must have been called
1567 before this.
1568 @param[in] trx transaction to drop fts common table
1569 @param[in] fts_table table with an FTS index
1570 @param[in] drop_orphan True if the function is used to drop
1571 orphaned table
1572 @return DB_SUCCESS or error code */
1573 static dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table,bool drop_orphan=false)1574 fts_drop_common_tables(
1575 trx_t* trx,
1576 fts_table_t* fts_table,
1577 bool drop_orphan=false)
1578 {
1579 ulint i;
1580 dberr_t error = DB_SUCCESS;
1581
1582 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1583 dberr_t err;
1584 char table_name[MAX_FULL_NAME_LEN];
1585
1586 fts_table->suffix = fts_common_tables[i];
1587 fts_get_table_name(fts_table, table_name, true);
1588
1589 err = fts_drop_table(trx, table_name);
1590
1591 /* We only return the status of the last error. */
1592 if (err != DB_SUCCESS && err != DB_FAIL) {
1593 error = err;
1594 }
1595
1596 if (drop_orphan && err == DB_FAIL) {
1597 char* path = fil_make_filepath(
1598 NULL, table_name, IBD, false);
1599 if (path != NULL) {
1600 os_file_delete_if_exists(
1601 innodb_data_file_key, path, NULL);
1602 ut_free(path);
1603 }
1604 }
1605 }
1606
1607 return(error);
1608 }
1609
1610 /****************************************************************//**
1611 Since we do a horizontal split on the index table, we need to drop
1612 all the split tables.
1613 @return DB_SUCCESS or error code */
1614 static
1615 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1616 fts_drop_index_split_tables(
1617 /*========================*/
1618 trx_t* trx, /*!< in: transaction */
1619 dict_index_t* index) /*!< in: fts instance */
1620
1621 {
1622 ulint i;
1623 fts_table_t fts_table;
1624 dberr_t error = DB_SUCCESS;
1625
1626 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1627
1628 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1629 dberr_t err;
1630 char table_name[MAX_FULL_NAME_LEN];
1631
1632 fts_table.suffix = fts_get_suffix(i);
1633 fts_get_table_name(&fts_table, table_name, true);
1634
1635 err = fts_drop_table(trx, table_name);
1636
1637 /* We only return the status of the last error. */
1638 if (err != DB_SUCCESS && err != DB_FAIL) {
1639 error = err;
1640 }
1641 }
1642
1643 return(error);
1644 }
1645
1646 /****************************************************************//**
1647 Drops FTS auxiliary tables for an FTS index
1648 @return DB_SUCCESS or error code */
1649 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1650 fts_drop_index_tables(
1651 /*==================*/
1652 trx_t* trx, /*!< in: transaction */
1653 dict_index_t* index) /*!< in: Index to drop */
1654 {
1655 return(fts_drop_index_split_tables(trx, index));
1656 }
1657
1658 /****************************************************************//**
1659 Drops FTS ancillary tables needed for supporting an FTS index
1660 on the given table. row_mysql_lock_data_dictionary must have been called
1661 before this.
1662 @return DB_SUCCESS or error code */
1663 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1664 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1665 fts_drop_all_index_tables(
1666 /*======================*/
1667 trx_t* trx, /*!< in: transaction */
1668 fts_t* fts) /*!< in: fts instance */
1669 {
1670 dberr_t error = DB_SUCCESS;
1671
1672 for (ulint i = 0;
1673 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1674 ++i) {
1675
1676 dberr_t err;
1677 dict_index_t* index;
1678
1679 index = static_cast<dict_index_t*>(
1680 ib_vector_getp(fts->indexes, i));
1681
1682 err = fts_drop_index_tables(trx, index);
1683
1684 if (err != DB_SUCCESS) {
1685 error = err;
1686 }
1687 }
1688
1689 return(error);
1690 }
1691
1692 /*********************************************************************//**
1693 Drops the ancillary tables needed for supporting an FTS index on a
1694 given table. row_mysql_lock_data_dictionary must have been called before
1695 this.
1696 @return DB_SUCCESS or error code */
1697 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1698 fts_drop_tables(
1699 /*============*/
1700 trx_t* trx, /*!< in: transaction */
1701 dict_table_t* table) /*!< in: table has the FTS index */
1702 {
1703 dberr_t error;
1704 fts_table_t fts_table;
1705
1706 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1707
1708 /* TODO: This is not atomic and can cause problems during recovery. */
1709
1710 error = fts_drop_common_tables(trx, &fts_table);
1711
1712 if (error == DB_SUCCESS && table->fts) {
1713 error = fts_drop_all_index_tables(trx, table->fts);
1714 }
1715
1716 return(error);
1717 }
1718
1719 /** Create dict_table_t object for FTS Aux tables.
1720 @param[in] aux_table_name FTS Aux table name
1721 @param[in] table table object of FTS Index
1722 @param[in] n_cols number of columns for FTS Aux table
1723 @return table object for FTS Aux table */
1724 static
1725 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1726 fts_create_in_mem_aux_table(
1727 const char* aux_table_name,
1728 const dict_table_t* table,
1729 ulint n_cols)
1730 {
1731 dict_table_t* new_table = dict_mem_table_create(
1732 aux_table_name, NULL, n_cols, 0, table->flags,
1733 table->space_id == TRX_SYS_SPACE
1734 ? 0 : table->space_id == SRV_TMP_SPACE_ID
1735 ? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
1736
1737 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1738 ut_ad(table->data_dir_path != NULL);
1739 new_table->data_dir_path = mem_heap_strdup(
1740 new_table->heap, table->data_dir_path);
1741 }
1742
1743 return(new_table);
1744 }
1745
1746 /** Function to create on FTS common table.
1747 @param[in,out] trx InnoDB transaction
1748 @param[in] table Table that has FTS Index
1749 @param[in] fts_table_name FTS AUX table name
1750 @param[in] fts_suffix FTS AUX table suffix
1751 @param[in,out] heap temporary memory heap
1752 @return table object if created, else NULL */
1753 static
1754 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1755 fts_create_one_common_table(
1756 trx_t* trx,
1757 const dict_table_t* table,
1758 const char* fts_table_name,
1759 const char* fts_suffix,
1760 mem_heap_t* heap)
1761 {
1762 dict_table_t* new_table;
1763 dberr_t error;
1764 bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
1765
1766 if (!is_config) {
1767
1768 new_table = fts_create_in_mem_aux_table(
1769 fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1770
1771 dict_mem_table_add_col(
1772 new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1773 FTS_DELETED_TABLE_COL_LEN);
1774 } else {
1775 /* Config table has different schema. */
1776 new_table = fts_create_in_mem_aux_table(
1777 fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1778
1779 dict_mem_table_add_col(
1780 new_table, heap, "key", DATA_VARCHAR, 0,
1781 FTS_CONFIG_TABLE_KEY_COL_LEN);
1782
1783 dict_mem_table_add_col(
1784 new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1785 FTS_CONFIG_TABLE_VALUE_COL_LEN);
1786 }
1787
1788 dict_table_add_system_columns(new_table, heap);
1789 error = row_create_table_for_mysql(new_table, trx,
1790 FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
1791 if (error == DB_SUCCESS) {
1792
1793 dict_index_t* index = dict_mem_index_create(
1794 new_table, "FTS_COMMON_TABLE_IND",
1795 DICT_UNIQUE|DICT_CLUSTERED, 1);
1796
1797 if (!is_config) {
1798 dict_mem_index_add_field(index, "doc_id", 0);
1799 } else {
1800 dict_mem_index_add_field(index, "key", 0);
1801 }
1802
1803 /* We save and restore trx->dict_operation because
1804 row_create_index_for_mysql() changes the operation to
1805 TRX_DICT_OP_TABLE. */
1806 trx_dict_op_t op = trx_get_dict_operation(trx);
1807
1808 error = row_create_index_for_mysql(index, trx, NULL);
1809
1810 trx->dict_operation = op;
1811 } else {
1812 err_exit:
1813 new_table = NULL;
1814 ib::warn() << "Failed to create FTS common table "
1815 << fts_table_name;
1816 trx->error_state = error;
1817 return NULL;
1818 }
1819
1820 if (error != DB_SUCCESS) {
1821 dict_mem_table_free(new_table);
1822 trx->error_state = DB_SUCCESS;
1823 row_drop_table_for_mysql(fts_table_name, trx, SQLCOM_DROP_DB);
1824 goto err_exit;
1825 }
1826
1827 return(new_table);
1828 }
1829
1830 /** Creates the common auxiliary tables needed for supporting an FTS index
1831 on the given table. row_mysql_lock_data_dictionary must have been called
1832 before this.
1833 The following tables are created.
1834 CREATE TABLE $FTS_PREFIX_DELETED
1835 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1836 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1837 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1838 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1839 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1840 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1841 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1842 CREATE TABLE $FTS_PREFIX_CONFIG
1843 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1844 @param[in,out] trx transaction
1845 @param[in,out] table table with FTS index
1846 @param[in] skip_doc_id_index Skip index on doc id
1847 @return DB_SUCCESS if succeed */
1848 dberr_t
fts_create_common_tables(trx_t * trx,dict_table_t * table,bool skip_doc_id_index)1849 fts_create_common_tables(
1850 trx_t* trx,
1851 dict_table_t* table,
1852 bool skip_doc_id_index)
1853 {
1854 dberr_t error;
1855 que_t* graph;
1856 fts_table_t fts_table;
1857 mem_heap_t* heap = mem_heap_create(1024);
1858 pars_info_t* info;
1859 char fts_name[MAX_FULL_NAME_LEN];
1860 char full_name[sizeof(fts_common_tables) / sizeof(char*)]
1861 [MAX_FULL_NAME_LEN];
1862
1863 dict_index_t* index = NULL;
1864 trx_dict_op_t op;
1865 /* common_tables vector is used for dropping FTS common tables
1866 on error condition. */
1867 std::vector<dict_table_t*> common_tables;
1868 std::vector<dict_table_t*>::const_iterator it;
1869
1870 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1871
1872 op = trx_get_dict_operation(trx);
1873
1874 error = fts_drop_common_tables(trx, &fts_table);
1875
1876 if (error != DB_SUCCESS) {
1877
1878 goto func_exit;
1879 }
1880
1881 /* Create the FTS tables that are common to an FTS index. */
1882 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1883
1884 fts_table.suffix = fts_common_tables[i];
1885 fts_get_table_name(&fts_table, full_name[i], true);
1886 dict_table_t* common_table = fts_create_one_common_table(
1887 trx, table, full_name[i], fts_table.suffix, heap);
1888
1889 if (!common_table) {
1890 trx->error_state = DB_SUCCESS;
1891 error = DB_ERROR;
1892 goto func_exit;
1893 } else {
1894 common_tables.push_back(common_table);
1895 }
1896
1897 mem_heap_empty(heap);
1898
1899 DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1900 /* Return error after creating FTS_AUX_CONFIG table. */
1901 if (i == 4) {
1902 error = DB_ERROR;
1903 goto func_exit;
1904 }
1905 );
1906
1907 }
1908
1909 /* Write the default settings to the config table. */
1910 info = pars_info_create();
1911
1912 fts_table.suffix = "CONFIG";
1913 fts_get_table_name(&fts_table, fts_name, true);
1914 pars_info_bind_id(info, "config_table", fts_name);
1915
1916 graph = fts_parse_sql_no_dict_lock(
1917 info, fts_config_table_insert_values_sql);
1918
1919 error = fts_eval_sql(trx, graph);
1920
1921 que_graph_free(graph);
1922
1923 if (error != DB_SUCCESS || skip_doc_id_index) {
1924
1925 goto func_exit;
1926 }
1927
1928 index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
1929 DICT_UNIQUE, 1);
1930 dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1931
1932 op = trx_get_dict_operation(trx);
1933
1934 error = row_create_index_for_mysql(index, trx, NULL);
1935
1936 func_exit:
1937 if (error != DB_SUCCESS) {
1938 for (it = common_tables.begin(); it != common_tables.end();
1939 ++it) {
1940 row_drop_table_for_mysql((*it)->name.m_name, trx,
1941 SQLCOM_DROP_DB);
1942 }
1943 }
1944
1945 trx->dict_operation = op;
1946
1947 common_tables.clear();
1948 mem_heap_free(heap);
1949
1950 return(error);
1951 }
1952
1953 /** Create one FTS auxiliary index table for an FTS index.
1954 @param[in,out] trx transaction
1955 @param[in] index the index instance
1956 @param[in] fts_table fts_table structure
1957 @param[in,out] heap temporary memory heap
1958 @see row_merge_create_fts_sort_index()
1959 @return DB_SUCCESS or error code */
1960 static
1961 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,const fts_table_t * fts_table,mem_heap_t * heap)1962 fts_create_one_index_table(
1963 trx_t* trx,
1964 const dict_index_t* index,
1965 const fts_table_t* fts_table,
1966 mem_heap_t* heap)
1967 {
1968 dict_field_t* field;
1969 dict_table_t* new_table;
1970 char table_name[MAX_FULL_NAME_LEN];
1971 dberr_t error;
1972 CHARSET_INFO* charset;
1973
1974 ut_ad(index->type & DICT_FTS);
1975
1976 fts_get_table_name(fts_table, table_name, true);
1977
1978 new_table = fts_create_in_mem_aux_table(
1979 table_name, fts_table->table,
1980 FTS_AUX_INDEX_TABLE_NUM_COLS);
1981
1982 field = dict_index_get_nth_field(index, 0);
1983 charset = fts_get_charset(field->col->prtype);
1984
1985 dict_mem_table_add_col(new_table, heap, "word",
1986 charset == &my_charset_latin1
1987 ? DATA_VARCHAR : DATA_VARMYSQL,
1988 field->col->prtype,
1989 FTS_MAX_WORD_LEN_IN_CHAR
1990 * unsigned(field->col->mbmaxlen));
1991
1992 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
1993 DATA_NOT_NULL | DATA_UNSIGNED,
1994 FTS_INDEX_FIRST_DOC_ID_LEN);
1995
1996 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
1997 DATA_NOT_NULL | DATA_UNSIGNED,
1998 FTS_INDEX_LAST_DOC_ID_LEN);
1999
2000 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2001 DATA_NOT_NULL | DATA_UNSIGNED,
2002 FTS_INDEX_DOC_COUNT_LEN);
2003
2004 /* The precise type calculation is as follows:
2005 least signficiant byte: MySQL type code (not applicable for sys cols)
2006 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2007 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2008
2009 dict_mem_table_add_col(
2010 new_table, heap, "ilist", DATA_BLOB,
2011 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2012 FTS_INDEX_ILIST_LEN);
2013
2014 dict_table_add_system_columns(new_table, heap);
2015 error = row_create_table_for_mysql(new_table, trx,
2016 FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
2017
2018 if (error == DB_SUCCESS) {
2019 dict_index_t* index = dict_mem_index_create(
2020 new_table, "FTS_INDEX_TABLE_IND",
2021 DICT_UNIQUE|DICT_CLUSTERED, 2);
2022 dict_mem_index_add_field(index, "word", 0);
2023 dict_mem_index_add_field(index, "first_doc_id", 0);
2024
2025 trx_dict_op_t op = trx_get_dict_operation(trx);
2026
2027 error = row_create_index_for_mysql(index, trx, NULL);
2028
2029 trx->dict_operation = op;
2030 } else {
2031 err_exit:
2032 new_table = NULL;
2033 ib::warn() << "Failed to create FTS index table "
2034 << table_name;
2035 trx->error_state = error;
2036 return NULL;
2037 }
2038
2039 if (error != DB_SUCCESS) {
2040 dict_mem_table_free(new_table);
2041 trx->error_state = DB_SUCCESS;
2042 row_drop_table_for_mysql(table_name, trx, SQLCOM_DROP_DB);
2043 goto err_exit;
2044 }
2045
2046 return(new_table);
2047 }
2048
2049 /** Creates the column specific ancillary tables needed for supporting an
2050 FTS index on the given table. row_mysql_lock_data_dictionary must have
2051 been called before this.
2052
2053 All FTS AUX Index tables have the following schema.
2054 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2055 word VARCHAR(FTS_MAX_WORD_LEN),
2056 first_doc_id INT NOT NULL,
2057 last_doc_id UNSIGNED NOT NULL,
2058 doc_count UNSIGNED INT NOT NULL,
2059 ilist VARBINARY NOT NULL,
2060 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2061 @param[in,out] trx dictionary transaction
2062 @param[in] index fulltext index
2063 @param[in] id table id
2064 @return DB_SUCCESS or error code */
2065 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index,table_id_t id)2066 fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
2067 {
2068 ulint i;
2069 fts_table_t fts_table;
2070 dberr_t error = DB_SUCCESS;
2071 mem_heap_t* heap = mem_heap_create(1024);
2072
2073 fts_table.type = FTS_INDEX_TABLE;
2074 fts_table.index_id = index->id;
2075 fts_table.table_id = id;
2076 fts_table.table = index->table;
2077
2078 /* aux_idx_tables vector is used for dropping FTS AUX INDEX
2079 tables on error condition. */
2080 std::vector<dict_table_t*> aux_idx_tables;
2081 std::vector<dict_table_t*>::const_iterator it;
2082
2083 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2084 dict_table_t* new_table;
2085
2086 /* Create the FTS auxiliary tables that are specific
2087 to an FTS index. We need to preserve the table_id %s
2088 which fts_parse_sql_no_dict_lock() will fill in for us. */
2089 fts_table.suffix = fts_get_suffix(i);
2090
2091 new_table = fts_create_one_index_table(
2092 trx, index, &fts_table, heap);
2093
2094 if (new_table == NULL) {
2095 error = DB_FAIL;
2096 break;
2097 } else {
2098 aux_idx_tables.push_back(new_table);
2099 }
2100
2101 mem_heap_empty(heap);
2102
2103 DBUG_EXECUTE_IF("ib_fts_index_table_error",
2104 /* Return error after creating FTS_INDEX_5
2105 aux table. */
2106 if (i == 4) {
2107 error = DB_FAIL;
2108 break;
2109 }
2110 );
2111 }
2112
2113 if (error != DB_SUCCESS) {
2114
2115 for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2116 ++it) {
2117 row_drop_table_for_mysql((*it)->name.m_name, trx,
2118 SQLCOM_DROP_DB);
2119 }
2120 }
2121
2122 aux_idx_tables.clear();
2123 mem_heap_free(heap);
2124
2125 return(error);
2126 }
2127
2128 /******************************************************************//**
2129 Calculate the new state of a row given the existing state and a new event.
2130 @return new state of row */
2131 static
2132 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2133 fts_trx_row_get_new_state(
2134 /*======================*/
2135 fts_row_state old_state, /*!< in: existing state of row */
2136 fts_row_state event) /*!< in: new event */
2137 {
2138 /* The rules for transforming states:
2139
2140 I = inserted
2141 M = modified
2142 D = deleted
2143 N = nothing
2144
2145 M+D -> D:
2146
2147 If the row existed before the transaction started and it is modified
2148 during the transaction, followed by a deletion of the row, only the
2149 deletion will be signaled.
2150
2151 M+ -> M:
2152
2153 If the row existed before the transaction started and it is modified
2154 more than once during the transaction, only the last modification
2155 will be signaled.
2156
2157 IM*D -> N:
2158
2159 If a new row is added during the transaction (and possibly modified
2160 after its initial insertion) but it is deleted before the end of the
2161 transaction, nothing will be signaled.
2162
2163 IM* -> I:
2164
2165 If a new row is added during the transaction and modified after its
2166 initial insertion, only the addition will be signaled.
2167
2168 M*DI -> M:
2169
2170 If the row existed before the transaction started and it is deleted,
2171 then re-inserted, only a modification will be signaled. Note that
2172 this case is only possible if the table is using the row's primary
2173 key for FTS row ids, since those can be re-inserted by the user,
2174 which is not true for InnoDB generated row ids.
2175
2176 It is easily seen that the above rules decompose such that we do not
2177 need to store the row's entire history of events. Instead, we can
2178 store just one state for the row and update that when new events
2179 arrive. Then we can implement the above rules as a two-dimensional
2180 look-up table, and get checking of invalid combinations "for free"
2181 in the process. */
2182
2183 /* The lookup table for transforming states. old_state is the
2184 Y-axis, event is the X-axis. */
2185 static const fts_row_state table[4][4] = {
2186 /* I M D N */
2187 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2188 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2189 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2190 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2191 };
2192
2193 fts_row_state result;
2194
2195 ut_a(old_state < FTS_INVALID);
2196 ut_a(event < FTS_INVALID);
2197
2198 result = table[(int) old_state][(int) event];
2199 ut_a(result != FTS_INVALID);
2200
2201 return(result);
2202 }
2203
2204 /******************************************************************//**
2205 Create a savepoint instance.
2206 @return savepoint instance */
2207 static
2208 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2209 fts_savepoint_create(
2210 /*=================*/
2211 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2212 const char* name, /*!< in: savepoint name */
2213 mem_heap_t* heap) /*!< in: heap */
2214 {
2215 fts_savepoint_t* savepoint;
2216
2217 savepoint = static_cast<fts_savepoint_t*>(
2218 ib_vector_push(savepoints, NULL));
2219
2220 memset(savepoint, 0x0, sizeof(*savepoint));
2221
2222 if (name) {
2223 savepoint->name = mem_heap_strdup(heap, name);
2224 }
2225
2226 savepoint->tables = rbt_create(
2227 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2228
2229 return(savepoint);
2230 }
2231
2232 /******************************************************************//**
2233 Create an FTS trx.
2234 @return FTS trx */
2235 fts_trx_t*
fts_trx_create(trx_t * trx)2236 fts_trx_create(
2237 /*===========*/
2238 trx_t* trx) /*!< in/out: InnoDB
2239 transaction */
2240 {
2241 fts_trx_t* ftt;
2242 ib_alloc_t* heap_alloc;
2243 mem_heap_t* heap = mem_heap_create(1024);
2244 trx_named_savept_t* savep;
2245
2246 ut_a(trx->fts_trx == NULL);
2247
2248 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2249 ftt->trx = trx;
2250 ftt->heap = heap;
2251
2252 heap_alloc = ib_heap_allocator_create(heap);
2253
2254 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2255 heap_alloc, sizeof(fts_savepoint_t), 4));
2256
2257 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2258 heap_alloc, sizeof(fts_savepoint_t), 4));
2259
2260 /* Default instance has no name and no heap. */
2261 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2262 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2263
2264 /* Copy savepoints that already set before. */
2265 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2266 savep != NULL;
2267 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2268
2269 fts_savepoint_take(ftt, savep->name);
2270 }
2271
2272 return(ftt);
2273 }
2274
2275 /******************************************************************//**
2276 Create an FTS trx table.
2277 @return FTS trx table */
2278 static
2279 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2280 fts_trx_table_create(
2281 /*=================*/
2282 fts_trx_t* fts_trx, /*!< in: FTS trx */
2283 dict_table_t* table) /*!< in: table */
2284 {
2285 fts_trx_table_t* ftt;
2286
2287 ftt = static_cast<fts_trx_table_t*>(
2288 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2289
2290 memset(ftt, 0x0, sizeof(*ftt));
2291
2292 ftt->table = table;
2293 ftt->fts_trx = fts_trx;
2294
2295 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2296
2297 return(ftt);
2298 }
2299
2300 /******************************************************************//**
2301 Clone an FTS trx table.
2302 @return FTS trx table */
2303 static
2304 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2305 fts_trx_table_clone(
2306 /*=================*/
2307 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2308 {
2309 fts_trx_table_t* ftt;
2310
2311 ftt = static_cast<fts_trx_table_t*>(
2312 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2313
2314 memset(ftt, 0x0, sizeof(*ftt));
2315
2316 ftt->table = ftt_src->table;
2317 ftt->fts_trx = ftt_src->fts_trx;
2318
2319 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2320
2321 /* Copy the rb tree values to the new savepoint. */
2322 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2323
2324 /* These are only added on commit. At this stage we only have
2325 the updated row state. */
2326 ut_a(ftt_src->added_doc_ids == NULL);
2327
2328 return(ftt);
2329 }
2330
2331 /******************************************************************//**
2332 Initialize the FTS trx instance.
2333 @return FTS trx instance */
2334 static
2335 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2336 fts_trx_init(
2337 /*=========*/
2338 trx_t* trx, /*!< in: transaction */
2339 dict_table_t* table, /*!< in: FTS table instance */
2340 ib_vector_t* savepoints) /*!< in: Savepoints */
2341 {
2342 fts_trx_table_t* ftt;
2343 ib_rbt_bound_t parent;
2344 ib_rbt_t* tables;
2345 fts_savepoint_t* savepoint;
2346
2347 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2348
2349 tables = savepoint->tables;
2350 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2351
2352 if (parent.result == 0) {
2353 fts_trx_table_t** fttp;
2354
2355 fttp = rbt_value(fts_trx_table_t*, parent.last);
2356 ftt = *fttp;
2357 } else {
2358 ftt = fts_trx_table_create(trx->fts_trx, table);
2359 rbt_add_node(tables, &parent, &ftt);
2360 }
2361
2362 ut_a(ftt->table == table);
2363
2364 return(ftt);
2365 }
2366
2367 /******************************************************************//**
2368 Notify the FTS system about an operation on an FTS-indexed table. */
2369 static
2370 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2371 fts_trx_table_add_op(
2372 /*=================*/
2373 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2374 doc_id_t doc_id, /*!< in: doc id */
2375 fts_row_state state, /*!< in: state of the row */
2376 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2377 {
2378 ib_rbt_t* rows;
2379 ib_rbt_bound_t parent;
2380
2381 rows = ftt->rows;
2382 rbt_search(rows, &parent, &doc_id);
2383
2384 /* Row id found, update state, and if new state is FTS_NOTHING,
2385 we delete the row from our tree. */
2386 if (parent.result == 0) {
2387 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2388
2389 row->state = fts_trx_row_get_new_state(row->state, state);
2390
2391 if (row->state == FTS_NOTHING) {
2392 if (row->fts_indexes) {
2393 ib_vector_free(row->fts_indexes);
2394 }
2395
2396 ut_free(rbt_remove_node(rows, parent.last));
2397 row = NULL;
2398 } else if (row->fts_indexes != NULL) {
2399 ib_vector_free(row->fts_indexes);
2400 row->fts_indexes = fts_indexes;
2401 }
2402
2403 } else { /* Row-id not found, create a new one. */
2404 fts_trx_row_t row;
2405
2406 row.doc_id = doc_id;
2407 row.state = state;
2408 row.fts_indexes = fts_indexes;
2409
2410 rbt_add_node(rows, &parent, &row);
2411 }
2412 }
2413
2414 /******************************************************************//**
2415 Notify the FTS system about an operation on an FTS-indexed table. */
2416 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2417 fts_trx_add_op(
2418 /*===========*/
2419 trx_t* trx, /*!< in: InnoDB transaction */
2420 dict_table_t* table, /*!< in: table */
2421 doc_id_t doc_id, /*!< in: new doc id */
2422 fts_row_state state, /*!< in: state of the row */
2423 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2424 (NULL=all) */
2425 {
2426 fts_trx_table_t* tran_ftt;
2427 fts_trx_table_t* stmt_ftt;
2428
2429 if (!trx->fts_trx) {
2430 trx->fts_trx = fts_trx_create(trx);
2431 }
2432
2433 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2434 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2435
2436 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2437 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2438 }
2439
2440 /******************************************************************//**
2441 Fetch callback that converts a textual document id to a binary value and
2442 stores it in the given place.
2443 @return always returns NULL */
2444 static
2445 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2446 fts_fetch_store_doc_id(
2447 /*===================*/
2448 void* row, /*!< in: sel_node_t* */
2449 void* user_arg) /*!< in: doc_id_t* to store
2450 doc_id in */
2451 {
2452 int n_parsed;
2453 sel_node_t* node = static_cast<sel_node_t*>(row);
2454 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2455 dfield_t* dfield = que_node_get_val(node->select_list);
2456 dtype_t* type = dfield_get_type(dfield);
2457 ulint len = dfield_get_len(dfield);
2458
2459 char buf[32];
2460
2461 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2462 ut_a(len > 0 && len < sizeof(buf));
2463
2464 memcpy(buf, dfield_get_data(dfield), len);
2465 buf[len] = '\0';
2466
2467 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2468 ut_a(n_parsed == 1);
2469
2470 return(FALSE);
2471 }
2472
2473 #ifdef FTS_CACHE_SIZE_DEBUG
2474 /******************************************************************//**
2475 Get the max cache size in bytes. If there is an error reading the
2476 value we simply print an error message here and return the default
2477 value to the caller.
2478 @return max cache size in bytes */
2479 static
2480 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2481 fts_get_max_cache_size(
2482 /*===================*/
2483 trx_t* trx, /*!< in: transaction */
2484 fts_table_t* fts_table) /*!< in: table instance */
2485 {
2486 dberr_t error;
2487 fts_string_t value;
2488 ulong cache_size_in_mb;
2489
2490 /* Set to the default value. */
2491 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2492
2493 /* We set the length of value to the max bytes it can hold. This
2494 information is used by the callback that reads the value. */
2495 value.f_n_char = 0;
2496 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2497 value.f_str = ut_malloc_nokey(value.f_len + 1);
2498
2499 error = fts_config_get_value(
2500 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2501
2502 if (UNIV_LIKELY(error == DB_SUCCESS)) {
2503 value.f_str[value.f_len] = 0;
2504 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2505
2506 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2507
2508 ib::warn() << "FTS max cache size ("
2509 << cache_size_in_mb << ") out of range."
2510 " Minimum value is "
2511 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2512 << "MB and the maximum value is "
2513 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2514 << "MB, setting cache size to upper limit";
2515
2516 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2517
2518 } else if (cache_size_in_mb
2519 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2520
2521 ib::warn() << "FTS max cache size ("
2522 << cache_size_in_mb << ") out of range."
2523 " Minimum value is "
2524 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2525 << "MB and the maximum value is"
2526 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2527 << "MB, setting cache size to lower limit";
2528
2529 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2530 }
2531 } else {
2532 ib::error() << "(" << error << ") reading max"
2533 " cache config value from config table "
2534 << fts_table->table->name;
2535 }
2536
2537 ut_free(value.f_str);
2538
2539 return(cache_size_in_mb * 1024 * 1024);
2540 }
2541 #endif
2542
2543 /*********************************************************************//**
2544 Update the next and last Doc ID in the CONFIG table to be the input
2545 "doc_id" value (+ 1). We would do so after each FTS index build or
2546 table truncate */
2547 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,doc_id_t doc_id)2548 fts_update_next_doc_id(
2549 /*===================*/
2550 trx_t* trx, /*!< in/out: transaction */
2551 const dict_table_t* table, /*!< in: table */
2552 doc_id_t doc_id) /*!< in: DOC ID to set */
2553 {
2554 table->fts->cache->synced_doc_id = doc_id;
2555 table->fts->cache->next_doc_id = doc_id + 1;
2556
2557 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2558
2559 fts_update_sync_doc_id(
2560 table, table->fts->cache->synced_doc_id, trx);
2561
2562 }
2563
2564 /*********************************************************************//**
2565 Get the next available document id.
2566 @return DB_SUCCESS if OK */
2567 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2568 fts_get_next_doc_id(
2569 /*================*/
2570 const dict_table_t* table, /*!< in: table */
2571 doc_id_t* doc_id) /*!< out: new document id */
2572 {
2573 fts_cache_t* cache = table->fts->cache;
2574
2575 /* If the Doc ID system has not yet been initialized, we
2576 will consult the CONFIG table and user table to re-establish
2577 the initial value of the Doc ID */
2578 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2579 fts_init_doc_id(table);
2580 }
2581
2582 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2583 *doc_id = FTS_NULL_DOC_ID;
2584 return(DB_SUCCESS);
2585 }
2586
2587 DEBUG_SYNC_C("get_next_FTS_DOC_ID");
2588 mutex_enter(&cache->doc_id_lock);
2589 *doc_id = cache->next_doc_id++;
2590 mutex_exit(&cache->doc_id_lock);
2591
2592 return(DB_SUCCESS);
2593 }
2594
2595 /*********************************************************************//**
2596 This function fetch the Doc ID from CONFIG table, and compare with
2597 the Doc ID supplied. And store the larger one to the CONFIG table.
2598 @return DB_SUCCESS if OK */
2599 static MY_ATTRIBUTE((nonnull))
2600 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t cmp_doc_id,ibool read_only,doc_id_t * doc_id)2601 fts_cmp_set_sync_doc_id(
2602 /*====================*/
2603 const dict_table_t* table, /*!< in: table */
2604 doc_id_t cmp_doc_id, /*!< in: Doc ID to compare */
2605 ibool read_only, /*!< in: TRUE if read the
2606 synced_doc_id only */
2607 doc_id_t* doc_id) /*!< out: larger document id
2608 after comparing "cmp_doc_id"
2609 to the one stored in CONFIG
2610 table */
2611 {
2612 trx_t* trx;
2613 pars_info_t* info;
2614 dberr_t error;
2615 fts_table_t fts_table;
2616 que_t* graph = NULL;
2617 fts_cache_t* cache = table->fts->cache;
2618 char table_name[MAX_FULL_NAME_LEN];
2619 retry:
2620 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2621
2622 fts_table.suffix = "CONFIG";
2623 fts_table.table_id = table->id;
2624 fts_table.type = FTS_COMMON_TABLE;
2625 fts_table.table = table;
2626
2627 trx = trx_create();
2628 if (srv_read_only_mode) {
2629 trx_start_internal_read_only(trx);
2630 } else {
2631 trx_start_internal(trx);
2632 }
2633
2634 trx->op_info = "update the next FTS document id";
2635
2636 info = pars_info_create();
2637
2638 pars_info_bind_function(
2639 info, "my_func", fts_fetch_store_doc_id, doc_id);
2640
2641 fts_get_table_name(&fts_table, table_name);
2642 pars_info_bind_id(info, "config_table", table_name);
2643
2644 graph = fts_parse_sql(
2645 &fts_table, info,
2646 "DECLARE FUNCTION my_func;\n"
2647 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2648 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2649 "BEGIN\n"
2650 ""
2651 "OPEN c;\n"
2652 "WHILE 1 = 1 LOOP\n"
2653 " FETCH c INTO my_func();\n"
2654 " IF c % NOTFOUND THEN\n"
2655 " EXIT;\n"
2656 " END IF;\n"
2657 "END LOOP;\n"
2658 "CLOSE c;");
2659
2660 *doc_id = 0;
2661
2662 error = fts_eval_sql(trx, graph);
2663
2664 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2665
2666 // FIXME: We need to retry deadlock errors
2667 if (error != DB_SUCCESS) {
2668 goto func_exit;
2669 }
2670
2671 if (read_only) {
2672 /* InnoDB stores actual synced_doc_id value + 1 in
2673 FTS_CONFIG table. Reduce the value by 1 while reading
2674 after startup. */
2675 if (*doc_id) *doc_id -= 1;
2676 goto func_exit;
2677 }
2678
2679 if (cmp_doc_id == 0 && *doc_id) {
2680 cache->synced_doc_id = *doc_id - 1;
2681 } else {
2682 cache->synced_doc_id = ut_max(cmp_doc_id, *doc_id);
2683 }
2684
2685 mutex_enter(&cache->doc_id_lock);
2686 /* For each sync operation, we will add next_doc_id by 1,
2687 so to mark a sync operation */
2688 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2689 cache->next_doc_id = cache->synced_doc_id + 1;
2690 }
2691 mutex_exit(&cache->doc_id_lock);
2692
2693 if (cmp_doc_id > *doc_id) {
2694 error = fts_update_sync_doc_id(
2695 table, cache->synced_doc_id, trx);
2696 }
2697
2698 *doc_id = cache->next_doc_id;
2699
2700 func_exit:
2701
2702 if (UNIV_LIKELY(error == DB_SUCCESS)) {
2703 fts_sql_commit(trx);
2704 } else {
2705 *doc_id = 0;
2706
2707 ib::error() << "(" << error << ") while getting next doc id "
2708 "for table " << table->name;
2709 fts_sql_rollback(trx);
2710
2711 if (error == DB_DEADLOCK) {
2712 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2713 goto retry;
2714 }
2715 }
2716
2717 trx->free();
2718
2719 return(error);
2720 }
2721
2722 /*********************************************************************//**
2723 Update the last document id. This function could create a new
2724 transaction to update the last document id.
2725 @return DB_SUCCESS if OK */
2726 static
2727 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,doc_id_t doc_id,trx_t * trx)2728 fts_update_sync_doc_id(
2729 /*===================*/
2730 const dict_table_t* table, /*!< in: table */
2731 doc_id_t doc_id, /*!< in: last document id */
2732 trx_t* trx) /*!< in: update trx, or NULL */
2733 {
2734 byte id[FTS_MAX_ID_LEN];
2735 pars_info_t* info;
2736 fts_table_t fts_table;
2737 ulint id_len;
2738 que_t* graph = NULL;
2739 dberr_t error;
2740 ibool local_trx = FALSE;
2741 fts_cache_t* cache = table->fts->cache;
2742 char fts_name[MAX_FULL_NAME_LEN];
2743
2744 if (srv_read_only_mode) {
2745 return DB_READ_ONLY;
2746 }
2747
2748 fts_table.suffix = "CONFIG";
2749 fts_table.table_id = table->id;
2750 fts_table.type = FTS_COMMON_TABLE;
2751 fts_table.table = table;
2752
2753 if (!trx) {
2754 trx = trx_create();
2755 trx_start_internal(trx);
2756
2757 trx->op_info = "setting last FTS document id";
2758 local_trx = TRUE;
2759 }
2760
2761 info = pars_info_create();
2762
2763 id_len = (ulint) snprintf(
2764 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2765
2766 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2767
2768 fts_get_table_name(&fts_table, fts_name,
2769 table->fts->dict_locked);
2770 pars_info_bind_id(info, "table_name", fts_name);
2771
2772 graph = fts_parse_sql(
2773 &fts_table, info,
2774 "BEGIN"
2775 " UPDATE $table_name SET value = :doc_id"
2776 " WHERE key = 'synced_doc_id';");
2777
2778 error = fts_eval_sql(trx, graph);
2779
2780 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2781
2782 if (local_trx) {
2783 if (UNIV_LIKELY(error == DB_SUCCESS)) {
2784 fts_sql_commit(trx);
2785 cache->synced_doc_id = doc_id;
2786 } else {
2787 ib::error() << "(" << error << ") while"
2788 " updating last doc id for table"
2789 << table->name;
2790
2791 fts_sql_rollback(trx);
2792 }
2793 trx->free();
2794 }
2795
2796 return(error);
2797 }
2798
2799 /*********************************************************************//**
2800 Create a new fts_doc_ids_t.
2801 @return new fts_doc_ids_t */
2802 fts_doc_ids_t*
fts_doc_ids_create(void)2803 fts_doc_ids_create(void)
2804 /*====================*/
2805 {
2806 fts_doc_ids_t* fts_doc_ids;
2807 mem_heap_t* heap = mem_heap_create(512);
2808
2809 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2810 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2811
2812 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2813
2814 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2815 fts_doc_ids->self_heap, sizeof(doc_id_t), 32));
2816
2817 return(fts_doc_ids);
2818 }
2819
2820 /*********************************************************************//**
2821 Do commit-phase steps necessary for the insertion of a new row. */
2822 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)2823 fts_add(
2824 /*====*/
2825 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2826 fts_trx_row_t* row) /*!< in: row */
2827 {
2828 dict_table_t* table = ftt->table;
2829 doc_id_t doc_id = row->doc_id;
2830
2831 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2832
2833 fts_add_doc_by_id(ftt, doc_id);
2834
2835 mutex_enter(&table->fts->cache->deleted_lock);
2836 ++table->fts->cache->added;
2837 mutex_exit(&table->fts->cache->deleted_lock);
2838
2839 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2840 && doc_id >= table->fts->cache->next_doc_id) {
2841 table->fts->cache->next_doc_id = doc_id + 1;
2842 }
2843 }
2844
2845 /*********************************************************************//**
2846 Do commit-phase steps necessary for the deletion of a row.
2847 @return DB_SUCCESS or error code */
2848 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2849 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)2850 fts_delete(
2851 /*=======*/
2852 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2853 fts_trx_row_t* row) /*!< in: row */
2854 {
2855 que_t* graph;
2856 fts_table_t fts_table;
2857 dberr_t error = DB_SUCCESS;
2858 doc_id_t write_doc_id;
2859 dict_table_t* table = ftt->table;
2860 doc_id_t doc_id = row->doc_id;
2861 trx_t* trx = ftt->fts_trx->trx;
2862 pars_info_t* info = pars_info_create();
2863 fts_cache_t* cache = table->fts->cache;
2864
2865 /* we do not index Documents whose Doc ID value is 0 */
2866 if (doc_id == FTS_NULL_DOC_ID) {
2867 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2868 return(error);
2869 }
2870
2871 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2872
2873 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2874
2875 /* Convert to "storage" byte order. */
2876 fts_write_doc_id((byte*) &write_doc_id, doc_id);
2877 fts_bind_doc_id(info, "doc_id", &write_doc_id);
2878
2879 /* It is possible we update a record that has not yet been sync-ed
2880 into cache from last crash (delete Doc will not initialize the
2881 sync). Avoid any added counter accounting until the FTS cache
2882 is re-established and sync-ed */
2883 if (table->fts->added_synced
2884 && doc_id > cache->synced_doc_id) {
2885 mutex_enter(&table->fts->cache->deleted_lock);
2886
2887 /* The Doc ID could belong to those left in
2888 ADDED table from last crash. So need to check
2889 if it is less than first_doc_id when we initialize
2890 the Doc ID system after reboot */
2891 if (doc_id >= table->fts->cache->first_doc_id
2892 && table->fts->cache->added > 0) {
2893 --table->fts->cache->added;
2894 }
2895
2896 mutex_exit(&table->fts->cache->deleted_lock);
2897
2898 /* Only if the row was really deleted. */
2899 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2900 }
2901
2902 /* Note the deleted document for OPTIMIZE to purge. */
2903 if (error == DB_SUCCESS) {
2904 char table_name[MAX_FULL_NAME_LEN];
2905
2906 trx->op_info = "adding doc id to FTS DELETED";
2907
2908 info->graph_owns_us = TRUE;
2909
2910 fts_table.suffix = "DELETED";
2911
2912 fts_get_table_name(&fts_table, table_name);
2913 pars_info_bind_id(info, "deleted", table_name);
2914
2915 graph = fts_parse_sql(
2916 &fts_table,
2917 info,
2918 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
2919
2920 error = fts_eval_sql(trx, graph);
2921
2922 fts_que_graph_free(graph);
2923 } else {
2924 pars_info_free(info);
2925 }
2926
2927 /* Increment the total deleted count, this is used to calculate the
2928 number of documents indexed. */
2929 if (error == DB_SUCCESS) {
2930 mutex_enter(&table->fts->cache->deleted_lock);
2931
2932 ++table->fts->cache->deleted;
2933
2934 mutex_exit(&table->fts->cache->deleted_lock);
2935 }
2936
2937 return(error);
2938 }
2939
2940 /*********************************************************************//**
2941 Do commit-phase steps necessary for the modification of a row.
2942 @return DB_SUCCESS or error code */
2943 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2944 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)2945 fts_modify(
2946 /*=======*/
2947 fts_trx_table_t* ftt, /*!< in: FTS trx table */
2948 fts_trx_row_t* row) /*!< in: row */
2949 {
2950 dberr_t error;
2951
2952 ut_a(row->state == FTS_MODIFY);
2953
2954 error = fts_delete(ftt, row);
2955
2956 if (error == DB_SUCCESS) {
2957 fts_add(ftt, row);
2958 }
2959
2960 return(error);
2961 }
2962
2963 /*********************************************************************//**
2964 The given transaction is about to be committed; do whatever is necessary
2965 from the FTS system's POV.
2966 @return DB_SUCCESS or error code */
2967 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2968 dberr_t
fts_commit_table(fts_trx_table_t * ftt)2969 fts_commit_table(
2970 /*=============*/
2971 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
2972 {
2973 if (srv_read_only_mode) {
2974 return DB_READ_ONLY;
2975 }
2976
2977 const ib_rbt_node_t* node;
2978 ib_rbt_t* rows;
2979 dberr_t error = DB_SUCCESS;
2980 fts_cache_t* cache = ftt->table->fts->cache;
2981 trx_t* trx = trx_create();
2982
2983 trx_start_internal(trx);
2984
2985 rows = ftt->rows;
2986
2987 ftt->fts_trx->trx = trx;
2988
2989 if (cache->get_docs == NULL) {
2990 rw_lock_x_lock(&cache->init_lock);
2991 if (cache->get_docs == NULL) {
2992 cache->get_docs = fts_get_docs_create(cache);
2993 }
2994 rw_lock_x_unlock(&cache->init_lock);
2995 }
2996
2997 for (node = rbt_first(rows);
2998 node != NULL && error == DB_SUCCESS;
2999 node = rbt_next(rows, node)) {
3000
3001 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3002
3003 switch (row->state) {
3004 case FTS_INSERT:
3005 fts_add(ftt, row);
3006 break;
3007
3008 case FTS_MODIFY:
3009 error = fts_modify(ftt, row);
3010 break;
3011
3012 case FTS_DELETE:
3013 error = fts_delete(ftt, row);
3014 break;
3015
3016 default:
3017 ut_error;
3018 }
3019 }
3020
3021 fts_sql_commit(trx);
3022
3023 trx->free();
3024
3025 return(error);
3026 }
3027
3028 /*********************************************************************//**
3029 The given transaction is about to be committed; do whatever is necessary
3030 from the FTS system's POV.
3031 @return DB_SUCCESS or error code */
3032 dberr_t
fts_commit(trx_t * trx)3033 fts_commit(
3034 /*=======*/
3035 trx_t* trx) /*!< in: transaction */
3036 {
3037 const ib_rbt_node_t* node;
3038 dberr_t error;
3039 ib_rbt_t* tables;
3040 fts_savepoint_t* savepoint;
3041
3042 savepoint = static_cast<fts_savepoint_t*>(
3043 ib_vector_last(trx->fts_trx->savepoints));
3044 tables = savepoint->tables;
3045
3046 for (node = rbt_first(tables), error = DB_SUCCESS;
3047 node != NULL && error == DB_SUCCESS;
3048 node = rbt_next(tables, node)) {
3049
3050 fts_trx_table_t** ftt;
3051
3052 ftt = rbt_value(fts_trx_table_t*, node);
3053
3054 error = fts_commit_table(*ftt);
3055 }
3056
3057 return(error);
3058 }
3059
3060 /*********************************************************************//**
3061 Initialize a document. */
3062 void
fts_doc_init(fts_doc_t * doc)3063 fts_doc_init(
3064 /*=========*/
3065 fts_doc_t* doc) /*!< in: doc to initialize */
3066 {
3067 mem_heap_t* heap = mem_heap_create(32);
3068
3069 memset(doc, 0, sizeof(*doc));
3070
3071 doc->self_heap = ib_heap_allocator_create(heap);
3072 }
3073
3074 /*********************************************************************//**
3075 Free document. */
3076 void
fts_doc_free(fts_doc_t * doc)3077 fts_doc_free(
3078 /*=========*/
3079 fts_doc_t* doc) /*!< in: document */
3080 {
3081 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3082
3083 if (doc->tokens) {
3084 rbt_free(doc->tokens);
3085 }
3086
3087 ut_d(memset(doc, 0, sizeof(*doc)));
3088
3089 mem_heap_free(heap);
3090 }
3091
3092 /*********************************************************************//**
3093 Callback function for fetch that stores the text of an FTS document,
3094 converting each column to UTF-16.
3095 @return always FALSE */
3096 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3097 fts_query_expansion_fetch_doc(
3098 /*==========================*/
3099 void* row, /*!< in: sel_node_t* */
3100 void* user_arg) /*!< in: fts_doc_t* */
3101 {
3102 que_node_t* exp;
3103 sel_node_t* node = static_cast<sel_node_t*>(row);
3104 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3105 dfield_t* dfield;
3106 ulint len;
3107 ulint doc_len;
3108 fts_doc_t doc;
3109 CHARSET_INFO* doc_charset = NULL;
3110 ulint field_no = 0;
3111
3112 len = 0;
3113
3114 fts_doc_init(&doc);
3115 doc.found = TRUE;
3116
3117 exp = node->select_list;
3118 doc_len = 0;
3119
3120 doc_charset = result_doc->charset;
3121
3122 /* Copy each indexed column content into doc->text.f_str */
3123 while (exp) {
3124 dfield = que_node_get_val(exp);
3125 len = dfield_get_len(dfield);
3126
3127 /* NULL column */
3128 if (len == UNIV_SQL_NULL) {
3129 exp = que_node_get_next(exp);
3130 continue;
3131 }
3132
3133 if (!doc_charset) {
3134 doc_charset = fts_get_charset(dfield->type.prtype);
3135 }
3136
3137 doc.charset = doc_charset;
3138
3139 if (dfield_is_ext(dfield)) {
3140 /* We ignore columns that are stored externally, this
3141 could result in too many words to search */
3142 exp = que_node_get_next(exp);
3143 continue;
3144 } else {
3145 doc.text.f_n_char = 0;
3146
3147 doc.text.f_str = static_cast<byte*>(
3148 dfield_get_data(dfield));
3149
3150 doc.text.f_len = len;
3151 }
3152
3153 if (field_no == 0) {
3154 fts_tokenize_document(&doc, result_doc,
3155 result_doc->parser);
3156 } else {
3157 fts_tokenize_document_next(&doc, doc_len, result_doc,
3158 result_doc->parser);
3159 }
3160
3161 exp = que_node_get_next(exp);
3162
3163 doc_len += (exp) ? len + 1 : len;
3164
3165 field_no++;
3166 }
3167
3168 ut_ad(doc_charset);
3169
3170 if (!result_doc->charset) {
3171 result_doc->charset = doc_charset;
3172 }
3173
3174 fts_doc_free(&doc);
3175
3176 return(FALSE);
3177 }
3178
3179 /*********************************************************************//**
3180 fetch and tokenize the document. */
3181 static
3182 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,rec_offs * offsets,fts_doc_t * doc)3183 fts_fetch_doc_from_rec(
3184 /*===================*/
3185 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3186 dict_index_t* clust_index, /*!< in: cluster index */
3187 btr_pcur_t* pcur, /*!< in: cursor whose position
3188 has been stored */
3189 rec_offs* offsets, /*!< in: offsets */
3190 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3191 documents */
3192 {
3193 dict_index_t* index;
3194 dict_table_t* table;
3195 const rec_t* clust_rec;
3196 ulint num_field;
3197 const dict_field_t* ifield;
3198 const dict_col_t* col;
3199 ulint clust_pos;
3200 ulint i;
3201 ulint doc_len = 0;
3202 ulint processed_doc = 0;
3203 st_mysql_ftparser* parser;
3204
3205 if (!get_doc) {
3206 return;
3207 }
3208
3209 index = get_doc->index_cache->index;
3210 table = get_doc->index_cache->index->table;
3211 parser = get_doc->index_cache->index->parser;
3212
3213 clust_rec = btr_pcur_get_rec(pcur);
3214 ut_ad(!page_rec_is_comp(clust_rec)
3215 || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
3216
3217 num_field = dict_index_get_n_fields(index);
3218
3219 for (i = 0; i < num_field; i++) {
3220 ifield = dict_index_get_nth_field(index, i);
3221 col = dict_field_get_col(ifield);
3222 clust_pos = dict_col_get_clust_pos(col, clust_index);
3223
3224 if (!get_doc->index_cache->charset) {
3225 get_doc->index_cache->charset = fts_get_charset(
3226 ifield->col->prtype);
3227 }
3228
3229 if (rec_offs_nth_extern(offsets, clust_pos)) {
3230 doc->text.f_str =
3231 btr_rec_copy_externally_stored_field(
3232 clust_rec, offsets,
3233 dict_table_page_size(table),
3234 clust_pos, &doc->text.f_len,
3235 static_cast<mem_heap_t*>(
3236 doc->self_heap->arg));
3237 } else {
3238 doc->text.f_str = (byte*) rec_get_nth_field(
3239 clust_rec, offsets, clust_pos,
3240 &doc->text.f_len);
3241 }
3242
3243 doc->found = TRUE;
3244 doc->charset = get_doc->index_cache->charset;
3245
3246 /* Null Field */
3247 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3248 continue;
3249 }
3250
3251 if (processed_doc == 0) {
3252 fts_tokenize_document(doc, NULL, parser);
3253 } else {
3254 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3255 }
3256
3257 processed_doc++;
3258 doc_len += doc->text.f_len + 1;
3259 }
3260 }
3261
3262 /** Fetch the data from tuple and tokenize the document.
3263 @param[in] get_doc FTS index's get_doc struct
3264 @param[in] tuple tuple should be arranged in table schema order
3265 @param[out] doc fts doc to hold parsed documents. */
3266 static
3267 void
fts_fetch_doc_from_tuple(fts_get_doc_t * get_doc,const dtuple_t * tuple,fts_doc_t * doc)3268 fts_fetch_doc_from_tuple(
3269 fts_get_doc_t* get_doc,
3270 const dtuple_t* tuple,
3271 fts_doc_t* doc)
3272 {
3273 dict_index_t* index;
3274 st_mysql_ftparser* parser;
3275 ulint doc_len = 0;
3276 ulint processed_doc = 0;
3277 ulint num_field;
3278
3279 if (get_doc == NULL) {
3280 return;
3281 }
3282
3283 index = get_doc->index_cache->index;
3284 parser = get_doc->index_cache->index->parser;
3285 num_field = dict_index_get_n_fields(index);
3286
3287 for (ulint i = 0; i < num_field; i++) {
3288 const dict_field_t* ifield;
3289 const dict_col_t* col;
3290 ulint pos;
3291
3292 ifield = dict_index_get_nth_field(index, i);
3293 col = dict_field_get_col(ifield);
3294 pos = dict_col_get_no(col);
3295 const dfield_t* field = dtuple_get_nth_field(tuple, pos);
3296
3297 if (!get_doc->index_cache->charset) {
3298 get_doc->index_cache->charset = fts_get_charset(
3299 ifield->col->prtype);
3300 }
3301
3302 ut_ad(!dfield_is_ext(field));
3303
3304 doc->text.f_str = (byte*) dfield_get_data(field);
3305 doc->text.f_len = dfield_get_len(field);
3306 doc->found = TRUE;
3307 doc->charset = get_doc->index_cache->charset;
3308
3309 /* field data is NULL. */
3310 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3311 continue;
3312 }
3313
3314 if (processed_doc == 0) {
3315 fts_tokenize_document(doc, NULL, parser);
3316 } else {
3317 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3318 }
3319
3320 processed_doc++;
3321 doc_len += doc->text.f_len + 1;
3322 }
3323 }
3324
3325 /** Fetch the document from tuple, tokenize the text data and
3326 insert the text data into fts auxiliary table and
3327 its cache. Moreover this tuple fields doesn't contain any information
3328 about externally stored field. This tuple contains data directly
3329 converted from mysql.
3330 @param[in] ftt FTS transaction table
3331 @param[in] doc_id doc id
3332 @param[in] tuple tuple from where data can be retrieved
3333 and tuple should be arranged in table
3334 schema order. */
3335 void
fts_add_doc_from_tuple(fts_trx_table_t * ftt,doc_id_t doc_id,const dtuple_t * tuple)3336 fts_add_doc_from_tuple(
3337 fts_trx_table_t*ftt,
3338 doc_id_t doc_id,
3339 const dtuple_t* tuple)
3340 {
3341 mtr_t mtr;
3342 fts_cache_t* cache = ftt->table->fts->cache;
3343
3344 ut_ad(cache->get_docs);
3345
3346 if (!ftt->table->fts->added_synced) {
3347 fts_init_index(ftt->table, FALSE);
3348 }
3349
3350 mtr_start(&mtr);
3351
3352 ulint num_idx = ib_vector_size(cache->get_docs);
3353
3354 for (ulint i = 0; i < num_idx; ++i) {
3355 fts_doc_t doc;
3356 dict_table_t* table;
3357 fts_get_doc_t* get_doc;
3358
3359 get_doc = static_cast<fts_get_doc_t*>(
3360 ib_vector_get(cache->get_docs, i));
3361 table = get_doc->index_cache->index->table;
3362
3363 fts_doc_init(&doc);
3364 fts_fetch_doc_from_tuple(
3365 get_doc, tuple, &doc);
3366
3367 if (doc.found) {
3368 mtr_commit(&mtr);
3369 rw_lock_x_lock(&table->fts->cache->lock);
3370
3371 if (table->fts->cache->stopword_info.status
3372 & STOPWORD_NOT_INIT) {
3373 fts_load_stopword(table, NULL, NULL,
3374 true, true);
3375 }
3376
3377 fts_cache_add_doc(
3378 table->fts->cache,
3379 get_doc->index_cache,
3380 doc_id, doc.tokens);
3381
3382 rw_lock_x_unlock(&table->fts->cache->lock);
3383
3384 if (cache->total_size > fts_max_cache_size / 5
3385 || fts_need_sync) {
3386 fts_sync(cache->sync, true, false);
3387 }
3388
3389 mtr_start(&mtr);
3390
3391 }
3392
3393 fts_doc_free(&doc);
3394 }
3395
3396 mtr_commit(&mtr);
3397 }
3398
3399 /*********************************************************************//**
3400 This function fetches the document inserted during the committing
3401 transaction, and tokenize the inserted text data and insert into
3402 FTS auxiliary table and its cache.
3403 @return TRUE if successful */
3404 static
3405 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id)3406 fts_add_doc_by_id(
3407 /*==============*/
3408 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3409 doc_id_t doc_id) /*!< in: doc id */
3410 {
3411 mtr_t mtr;
3412 mem_heap_t* heap;
3413 btr_pcur_t pcur;
3414 dict_table_t* table;
3415 dtuple_t* tuple;
3416 dfield_t* dfield;
3417 fts_get_doc_t* get_doc;
3418 doc_id_t temp_doc_id;
3419 dict_index_t* clust_index;
3420 dict_index_t* fts_id_index;
3421 ibool is_id_cluster;
3422 fts_cache_t* cache = ftt->table->fts->cache;
3423
3424 ut_ad(cache->get_docs);
3425
3426 /* If Doc ID has been supplied by the user, then the table
3427 might not yet be sync-ed */
3428
3429 if (!ftt->table->fts->added_synced) {
3430 fts_init_index(ftt->table, FALSE);
3431 }
3432
3433 /* Get the first FTS index's get_doc */
3434 get_doc = static_cast<fts_get_doc_t*>(
3435 ib_vector_get(cache->get_docs, 0));
3436 ut_ad(get_doc);
3437
3438 table = get_doc->index_cache->index->table;
3439
3440 heap = mem_heap_create(512);
3441
3442 clust_index = dict_table_get_first_index(table);
3443 fts_id_index = table->fts_doc_id_index;
3444
3445 /* Check whether the index on FTS_DOC_ID is cluster index */
3446 is_id_cluster = (clust_index == fts_id_index);
3447
3448 mtr_start(&mtr);
3449 btr_pcur_init(&pcur);
3450
3451 /* Search based on Doc ID. Here, we'll need to consider the case
3452 when there is no primary index on Doc ID */
3453 tuple = dtuple_create(heap, 1);
3454 dfield = dtuple_get_nth_field(tuple, 0);
3455 dfield->type.mtype = DATA_INT;
3456 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3457
3458 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3459 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3460
3461 btr_pcur_open_with_no_init(
3462 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3463 &pcur, 0, &mtr);
3464
3465 /* If we have a match, add the data to doc structure */
3466 if (btr_pcur_get_low_match(&pcur) == 1) {
3467 const rec_t* rec;
3468 btr_pcur_t* doc_pcur;
3469 const rec_t* clust_rec;
3470 btr_pcur_t clust_pcur;
3471 rec_offs* offsets = NULL;
3472 ulint num_idx = ib_vector_size(cache->get_docs);
3473
3474 rec = btr_pcur_get_rec(&pcur);
3475
3476 /* Doc could be deleted */
3477 if (page_rec_is_infimum(rec)
3478 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3479
3480 goto func_exit;
3481 }
3482
3483 if (is_id_cluster) {
3484 clust_rec = rec;
3485 doc_pcur = &pcur;
3486 } else {
3487 dtuple_t* clust_ref;
3488 ulint n_fields;
3489
3490 btr_pcur_init(&clust_pcur);
3491 n_fields = dict_index_get_n_unique(clust_index);
3492
3493 clust_ref = dtuple_create(heap, n_fields);
3494 dict_index_copy_types(clust_ref, clust_index, n_fields);
3495
3496 row_build_row_ref_in_tuple(
3497 clust_ref, rec, fts_id_index, NULL);
3498
3499 btr_pcur_open_with_no_init(
3500 clust_index, clust_ref, PAGE_CUR_LE,
3501 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3502
3503 doc_pcur = &clust_pcur;
3504 clust_rec = btr_pcur_get_rec(&clust_pcur);
3505
3506 }
3507
3508 offsets = rec_get_offsets(clust_rec, clust_index, NULL,
3509 clust_index->n_core_fields,
3510 ULINT_UNDEFINED, &heap);
3511
3512 for (ulint i = 0; i < num_idx; ++i) {
3513 fts_doc_t doc;
3514 dict_table_t* table;
3515 fts_get_doc_t* get_doc;
3516
3517 get_doc = static_cast<fts_get_doc_t*>(
3518 ib_vector_get(cache->get_docs, i));
3519
3520 table = get_doc->index_cache->index->table;
3521
3522 fts_doc_init(&doc);
3523
3524 fts_fetch_doc_from_rec(
3525 get_doc, clust_index, doc_pcur, offsets, &doc);
3526
3527 if (doc.found) {
3528 ibool success MY_ATTRIBUTE((unused));
3529
3530 btr_pcur_store_position(doc_pcur, &mtr);
3531 mtr_commit(&mtr);
3532
3533 rw_lock_x_lock(&table->fts->cache->lock);
3534
3535 if (table->fts->cache->stopword_info.status
3536 & STOPWORD_NOT_INIT) {
3537 fts_load_stopword(table, NULL,
3538 NULL, true, true);
3539 }
3540
3541 fts_cache_add_doc(
3542 table->fts->cache,
3543 get_doc->index_cache,
3544 doc_id, doc.tokens);
3545
3546 bool need_sync = !cache->sync->in_progress
3547 && (fts_need_sync
3548 || (cache->total_size
3549 - cache->total_size_at_sync)
3550 > fts_max_cache_size / 10);
3551 if (need_sync) {
3552 cache->total_size_at_sync =
3553 cache->total_size;
3554 }
3555
3556 rw_lock_x_unlock(&table->fts->cache->lock);
3557
3558 DBUG_EXECUTE_IF(
3559 "fts_instrument_sync",
3560 fts_optimize_request_sync_table(table);
3561 os_event_wait(cache->sync->event);
3562 );
3563
3564 DBUG_EXECUTE_IF(
3565 "fts_instrument_sync_debug",
3566 fts_sync(cache->sync, true, true);
3567 );
3568
3569 DEBUG_SYNC_C("fts_instrument_sync_request");
3570 DBUG_EXECUTE_IF(
3571 "fts_instrument_sync_request",
3572 fts_optimize_request_sync_table(table);
3573 );
3574
3575 if (need_sync) {
3576 fts_optimize_request_sync_table(table);
3577 }
3578
3579 mtr_start(&mtr);
3580
3581 if (i < num_idx - 1) {
3582
3583 success = btr_pcur_restore_position(
3584 BTR_SEARCH_LEAF, doc_pcur,
3585 &mtr);
3586
3587 ut_ad(success);
3588 }
3589 }
3590
3591 fts_doc_free(&doc);
3592 }
3593
3594 if (!is_id_cluster) {
3595 btr_pcur_close(doc_pcur);
3596 }
3597 }
3598 func_exit:
3599 mtr_commit(&mtr);
3600
3601 btr_pcur_close(&pcur);
3602
3603 mem_heap_free(heap);
3604 return(TRUE);
3605 }
3606
3607
3608 /*********************************************************************//**
3609 Callback function to read a single ulint column.
3610 return always returns TRUE */
3611 static
3612 ibool
fts_read_ulint(void * row,void * user_arg)3613 fts_read_ulint(
3614 /*===========*/
3615 void* row, /*!< in: sel_node_t* */
3616 void* user_arg) /*!< in: pointer to ulint */
3617 {
3618 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3619 ulint* value = static_cast<ulint*>(user_arg);
3620 que_node_t* exp = sel_node->select_list;
3621 dfield_t* dfield = que_node_get_val(exp);
3622 void* data = dfield_get_data(dfield);
3623
3624 *value = static_cast<ulint>(mach_read_from_4(
3625 static_cast<const byte*>(data)));
3626
3627 return(TRUE);
3628 }
3629
3630 /*********************************************************************//**
3631 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3632 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3633 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3634 fts_get_max_doc_id(
3635 /*===============*/
3636 dict_table_t* table) /*!< in: user table */
3637 {
3638 dict_index_t* index;
3639 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3640 doc_id_t doc_id = 0;
3641 mtr_t mtr;
3642 btr_pcur_t pcur;
3643
3644 index = table->fts_doc_id_index;
3645
3646 if (!index) {
3647 return(0);
3648 }
3649
3650 ut_ad(!index->is_instant());
3651
3652 dfield = dict_index_get_nth_field(index, 0);
3653
3654 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3655 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3656 #endif
3657
3658 mtr_start(&mtr);
3659
3660 /* fetch the largest indexes value */
3661 btr_pcur_open_at_index_side(
3662 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3663
3664 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3665 const rec_t* rec = NULL;
3666 rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
3667 rec_offs* offsets = offsets_;
3668 mem_heap_t* heap = NULL;
3669 ulint len;
3670 const void* data;
3671
3672 rec_offs_init(offsets_);
3673
3674 do {
3675 rec = btr_pcur_get_rec(&pcur);
3676
3677 if (page_rec_is_user_rec(rec)) {
3678 break;
3679 }
3680 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3681
3682 if (!rec) {
3683 goto func_exit;
3684 }
3685
3686 ut_ad(!rec_is_metadata(rec, index));
3687 offsets = rec_get_offsets(
3688 rec, index, offsets, index->n_core_fields,
3689 ULINT_UNDEFINED, &heap);
3690
3691 data = rec_get_nth_field(rec, offsets, 0, &len);
3692
3693 doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3694 static_cast<const byte*>(data)));
3695 }
3696
3697 func_exit:
3698 btr_pcur_close(&pcur);
3699 mtr_commit(&mtr);
3700 return(doc_id);
3701 }
3702
3703 /*********************************************************************//**
3704 Fetch document with the given document id.
3705 @return DB_SUCCESS if OK else error */
3706 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3707 fts_doc_fetch_by_doc_id(
3708 /*====================*/
3709 fts_get_doc_t* get_doc, /*!< in: state */
3710 doc_id_t doc_id, /*!< in: id of document to
3711 fetch */
3712 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3713 or NULL */
3714 ulint option, /*!< in: search option, if it is
3715 greater than doc_id or equal */
3716 fts_sql_callback
3717 callback, /*!< in: callback to read */
3718 void* arg) /*!< in: callback arg */
3719 {
3720 pars_info_t* info;
3721 dberr_t error;
3722 const char* select_str;
3723 doc_id_t write_doc_id;
3724 dict_index_t* index;
3725 trx_t* trx = trx_create();
3726 que_t* graph;
3727
3728 trx->op_info = "fetching indexed FTS document";
3729
3730 /* The FTS index can be supplied by caller directly with
3731 "index_to_use", otherwise, get it from "get_doc" */
3732 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3733
3734 if (get_doc && get_doc->get_document_graph) {
3735 info = get_doc->get_document_graph->info;
3736 } else {
3737 info = pars_info_create();
3738 }
3739
3740 /* Convert to "storage" byte order. */
3741 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3742 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3743 pars_info_bind_function(info, "my_func", callback, arg);
3744
3745 select_str = fts_get_select_columns_str(index, info, info->heap);
3746 pars_info_bind_id(info, "table_name", index->table->name.m_name);
3747
3748 if (!get_doc || !get_doc->get_document_graph) {
3749 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3750 graph = fts_parse_sql(
3751 NULL,
3752 info,
3753 mem_heap_printf(info->heap,
3754 "DECLARE FUNCTION my_func;\n"
3755 "DECLARE CURSOR c IS"
3756 " SELECT %s FROM $table_name"
3757 " WHERE %s = :doc_id;\n"
3758 "BEGIN\n"
3759 ""
3760 "OPEN c;\n"
3761 "WHILE 1 = 1 LOOP\n"
3762 " FETCH c INTO my_func();\n"
3763 " IF c %% NOTFOUND THEN\n"
3764 " EXIT;\n"
3765 " END IF;\n"
3766 "END LOOP;\n"
3767 "CLOSE c;",
3768 select_str, FTS_DOC_ID_COL_NAME));
3769 } else {
3770 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3771
3772 /* This is used for crash recovery of table with
3773 hidden DOC ID or FTS indexes. We will scan the table
3774 to re-processing user table rows whose DOC ID or
3775 FTS indexed documents have not been sync-ed to disc
3776 during recent crash.
3777 In the case that all fulltext indexes are dropped
3778 for a table, we will keep the "hidden" FTS_DOC_ID
3779 column, and this scan is to retreive the largest
3780 DOC ID being used in the table to determine the
3781 appropriate next DOC ID.
3782 In the case of there exists fulltext index(es), this
3783 operation will re-tokenize any docs that have not
3784 been sync-ed to the disk, and re-prime the FTS
3785 cached */
3786 graph = fts_parse_sql(
3787 NULL,
3788 info,
3789 mem_heap_printf(info->heap,
3790 "DECLARE FUNCTION my_func;\n"
3791 "DECLARE CURSOR c IS"
3792 " SELECT %s, %s FROM $table_name"
3793 " WHERE %s > :doc_id;\n"
3794 "BEGIN\n"
3795 ""
3796 "OPEN c;\n"
3797 "WHILE 1 = 1 LOOP\n"
3798 " FETCH c INTO my_func();\n"
3799 " IF c %% NOTFOUND THEN\n"
3800 " EXIT;\n"
3801 " END IF;\n"
3802 "END LOOP;\n"
3803 "CLOSE c;",
3804 FTS_DOC_ID_COL_NAME,
3805 select_str, FTS_DOC_ID_COL_NAME));
3806 }
3807 if (get_doc) {
3808 get_doc->get_document_graph = graph;
3809 }
3810 } else {
3811 graph = get_doc->get_document_graph;
3812 }
3813
3814 error = fts_eval_sql(trx, graph);
3815 fts_sql_commit(trx);
3816 trx->free();
3817
3818 if (!get_doc) {
3819 fts_que_graph_free(graph);
3820 }
3821
3822 return(error);
3823 }
3824
3825 /*********************************************************************//**
3826 Write out a single word's data as new entry/entries in the INDEX table.
3827 @return DB_SUCCESS if all OK. */
3828 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3829 fts_write_node(
3830 /*===========*/
3831 trx_t* trx, /*!< in: transaction */
3832 que_t** graph, /*!< in: query graph */
3833 fts_table_t* fts_table, /*!< in: aux table */
3834 fts_string_t* word, /*!< in: word in UTF-8 */
3835 fts_node_t* node) /*!< in: node columns */
3836 {
3837 pars_info_t* info;
3838 dberr_t error;
3839 ib_uint32_t doc_count;
3840 time_t start_time;
3841 doc_id_t last_doc_id;
3842 doc_id_t first_doc_id;
3843 char table_name[MAX_FULL_NAME_LEN];
3844
3845 ut_a(node->ilist != NULL);
3846
3847 if (*graph) {
3848 info = (*graph)->info;
3849 } else {
3850 info = pars_info_create();
3851
3852 fts_get_table_name(fts_table, table_name);
3853 pars_info_bind_id(info, "index_table_name", table_name);
3854 }
3855
3856 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3857
3858 /* Convert to "storage" byte order. */
3859 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3860 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3861
3862 /* Convert to "storage" byte order. */
3863 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3864 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3865
3866 ut_a(node->last_doc_id >= node->first_doc_id);
3867
3868 /* Convert to "storage" byte order. */
3869 mach_write_to_4((byte*) &doc_count, node->doc_count);
3870 pars_info_bind_int4_literal(
3871 info, "doc_count", (const ib_uint32_t*) &doc_count);
3872
3873 /* Set copy_name to FALSE since it's a static. */
3874 pars_info_bind_literal(
3875 info, "ilist", node->ilist, node->ilist_size,
3876 DATA_BLOB, DATA_BINARY_TYPE);
3877
3878 if (!*graph) {
3879
3880 *graph = fts_parse_sql(
3881 fts_table,
3882 info,
3883 "BEGIN\n"
3884 "INSERT INTO $index_table_name VALUES"
3885 " (:token, :first_doc_id,"
3886 " :last_doc_id, :doc_count, :ilist);");
3887 }
3888
3889 start_time = time(NULL);
3890 error = fts_eval_sql(trx, *graph);
3891 elapsed_time += time(NULL) - start_time;
3892 ++n_nodes;
3893
3894 return(error);
3895 }
3896
3897 /*********************************************************************//**
3898 Add rows to the DELETED_CACHE table.
3899 @return DB_SUCCESS if all went well else error code*/
3900 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3901 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)3902 fts_sync_add_deleted_cache(
3903 /*=======================*/
3904 fts_sync_t* sync, /*!< in: sync state */
3905 ib_vector_t* doc_ids) /*!< in: doc ids to add */
3906 {
3907 ulint i;
3908 pars_info_t* info;
3909 que_t* graph;
3910 fts_table_t fts_table;
3911 char table_name[MAX_FULL_NAME_LEN];
3912 doc_id_t dummy = 0;
3913 dberr_t error = DB_SUCCESS;
3914 ulint n_elems = ib_vector_size(doc_ids);
3915
3916 ut_a(ib_vector_size(doc_ids) > 0);
3917
3918 ib_vector_sort(doc_ids, fts_doc_id_cmp);
3919
3920 info = pars_info_create();
3921
3922 fts_bind_doc_id(info, "doc_id", &dummy);
3923
3924 FTS_INIT_FTS_TABLE(
3925 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3926
3927 fts_get_table_name(&fts_table, table_name);
3928 pars_info_bind_id(info, "table_name", table_name);
3929
3930 graph = fts_parse_sql(
3931 &fts_table,
3932 info,
3933 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
3934
3935 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3936 doc_id_t* update;
3937 doc_id_t write_doc_id;
3938
3939 update = static_cast<doc_id_t*>(ib_vector_get(doc_ids, i));
3940
3941 /* Convert to "storage" byte order. */
3942 fts_write_doc_id((byte*) &write_doc_id, *update);
3943 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3944
3945 error = fts_eval_sql(sync->trx, graph);
3946 }
3947
3948 fts_que_graph_free(graph);
3949
3950 return(error);
3951 }
3952
3953 /** Write the words and ilist to disk.
3954 @param[in,out] trx transaction
3955 @param[in] index_cache index cache
3956 @param[in] unlock_cache whether unlock cache when write node
3957 @return DB_SUCCESS if all went well else error code */
3958 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3959 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache)3960 fts_sync_write_words(
3961 trx_t* trx,
3962 fts_index_cache_t* index_cache,
3963 bool unlock_cache)
3964 {
3965 fts_table_t fts_table;
3966 ulint n_nodes = 0;
3967 ulint n_words = 0;
3968 const ib_rbt_node_t* rbt_node;
3969 dberr_t error = DB_SUCCESS;
3970 ibool print_error = FALSE;
3971 dict_table_t* table = index_cache->index->table;
3972
3973 FTS_INIT_INDEX_TABLE(
3974 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
3975
3976 n_words = rbt_size(index_cache->words);
3977
3978 /* We iterate over the entire tree, even if there is an error,
3979 since we want to free the memory used during caching. */
3980 for (rbt_node = rbt_first(index_cache->words);
3981 rbt_node;
3982 rbt_node = rbt_next(index_cache->words, rbt_node)) {
3983
3984 ulint i;
3985 ulint selected;
3986 fts_tokenizer_word_t* word;
3987
3988 word = rbt_value(fts_tokenizer_word_t, rbt_node);
3989
3990 DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
3991 os_thread_sleep(300000););
3992
3993 selected = fts_select_index(
3994 index_cache->charset, word->text.f_str,
3995 word->text.f_len);
3996
3997 fts_table.suffix = fts_get_suffix(selected);
3998
3999 /* We iterate over all the nodes even if there was an error */
4000 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4001
4002 fts_node_t* fts_node = static_cast<fts_node_t*>(
4003 ib_vector_get(word->nodes, i));
4004
4005 if (fts_node->synced) {
4006 continue;
4007 } else {
4008 fts_node->synced = true;
4009 }
4010
4011 /*FIXME: we need to handle the error properly. */
4012 if (error == DB_SUCCESS) {
4013 if (unlock_cache) {
4014 rw_lock_x_unlock(
4015 &table->fts->cache->lock);
4016 }
4017
4018 error = fts_write_node(
4019 trx,
4020 &index_cache->ins_graph[selected],
4021 &fts_table, &word->text, fts_node);
4022
4023 DEBUG_SYNC_C("fts_write_node");
4024 DBUG_EXECUTE_IF("fts_write_node_crash",
4025 DBUG_SUICIDE(););
4026
4027 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4028 os_thread_sleep(1000000);
4029 );
4030
4031 if (unlock_cache) {
4032 rw_lock_x_lock(
4033 &table->fts->cache->lock);
4034 }
4035 }
4036 }
4037
4038 n_nodes += ib_vector_size(word->nodes);
4039
4040 if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) {
4041 ib::error() << "(" << error << ") writing"
4042 " word node to FTS auxiliary index table "
4043 << table->name;
4044 print_error = TRUE;
4045 }
4046 }
4047
4048 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4049 printf("Avg number of nodes: %lf\n",
4050 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4051 }
4052
4053 return(error);
4054 }
4055
4056 /*********************************************************************//**
4057 Begin Sync, create transaction, acquire locks, etc. */
4058 static
4059 void
fts_sync_begin(fts_sync_t * sync)4060 fts_sync_begin(
4061 /*===========*/
4062 fts_sync_t* sync) /*!< in: sync state */
4063 {
4064 fts_cache_t* cache = sync->table->fts->cache;
4065
4066 n_nodes = 0;
4067 elapsed_time = 0;
4068
4069 sync->start_time = time(NULL);
4070
4071 sync->trx = trx_create();
4072 trx_start_internal(sync->trx);
4073
4074 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4075 ib::info() << "FTS SYNC for table " << sync->table->name
4076 << ", deleted count: "
4077 << ib_vector_size(cache->deleted_doc_ids)
4078 << " size: " << cache->total_size << " bytes";
4079 }
4080 }
4081
4082 /*********************************************************************//**
4083 Run SYNC on the table, i.e., write out data from the index specific
4084 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4085 @return DB_SUCCESS if all OK */
4086 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4087 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4088 fts_sync_index(
4089 /*===========*/
4090 fts_sync_t* sync, /*!< in: sync state */
4091 fts_index_cache_t* index_cache) /*!< in: index cache */
4092 {
4093 trx_t* trx = sync->trx;
4094
4095 trx->op_info = "doing SYNC index";
4096
4097 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4098 ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4099 }
4100
4101 ut_ad(rbt_validate(index_cache->words));
4102
4103 return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
4104 }
4105
4106 /** Check if index cache has been synced completely
4107 @param[in,out] index_cache index cache
4108 @return true if index is synced, otherwise false. */
4109 static
4110 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4111 fts_sync_index_check(
4112 fts_index_cache_t* index_cache)
4113 {
4114 const ib_rbt_node_t* rbt_node;
4115
4116 for (rbt_node = rbt_first(index_cache->words);
4117 rbt_node != NULL;
4118 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4119
4120 fts_tokenizer_word_t* word;
4121 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4122
4123 fts_node_t* fts_node;
4124 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4125
4126 if (!fts_node->synced) {
4127 return(false);
4128 }
4129 }
4130
4131 return(true);
4132 }
4133
4134 /** Reset synced flag in index cache when rollback
4135 @param[in,out] index_cache index cache */
4136 static
4137 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4138 fts_sync_index_reset(
4139 fts_index_cache_t* index_cache)
4140 {
4141 const ib_rbt_node_t* rbt_node;
4142
4143 for (rbt_node = rbt_first(index_cache->words);
4144 rbt_node != NULL;
4145 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4146
4147 fts_tokenizer_word_t* word;
4148 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4149
4150 fts_node_t* fts_node;
4151 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4152
4153 fts_node->synced = false;
4154 }
4155 }
4156
4157 /** Commit the SYNC, change state of processed doc ids etc.
4158 @param[in,out] sync sync state
4159 @return DB_SUCCESS if all OK */
4160 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4161 dberr_t
fts_sync_commit(fts_sync_t * sync)4162 fts_sync_commit(
4163 fts_sync_t* sync)
4164 {
4165 dberr_t error;
4166 trx_t* trx = sync->trx;
4167 fts_cache_t* cache = sync->table->fts->cache;
4168 doc_id_t last_doc_id;
4169
4170 trx->op_info = "doing SYNC commit";
4171
4172 /* After each Sync, update the CONFIG table about the max doc id
4173 we just sync-ed to index table */
4174 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4175 &last_doc_id);
4176
4177 /* Get the list of deleted documents that are either in the
4178 cache or were headed there but were deleted before the add
4179 thread got to them. */
4180
4181 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4182
4183 error = fts_sync_add_deleted_cache(
4184 sync, cache->deleted_doc_ids);
4185 }
4186
4187 /* We need to do this within the deleted lock since fts_delete() can
4188 attempt to add a deleted doc id to the cache deleted id array. */
4189 fts_cache_clear(cache);
4190 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4191 fts_cache_init(cache);
4192 rw_lock_x_unlock(&cache->lock);
4193
4194 if (UNIV_LIKELY(error == DB_SUCCESS)) {
4195 fts_sql_commit(trx);
4196 } else {
4197 fts_sql_rollback(trx);
4198 ib::error() << "(" << error << ") during SYNC of "
4199 "table " << sync->table->name;
4200 }
4201
4202 if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) {
4203 ib::info() << "SYNC for table " << sync->table->name
4204 << ": SYNC time: "
4205 << (time(NULL) - sync->start_time)
4206 << " secs: elapsed "
4207 << (double) n_nodes / elapsed_time
4208 << " ins/sec";
4209 }
4210
4211 /* Avoid assertion in trx_t::free(). */
4212 trx->dict_operation_lock_mode = 0;
4213 trx->free();
4214
4215 return(error);
4216 }
4217
4218 /** Rollback a sync operation
4219 @param[in,out] sync sync state */
4220 static
4221 void
fts_sync_rollback(fts_sync_t * sync)4222 fts_sync_rollback(
4223 fts_sync_t* sync)
4224 {
4225 trx_t* trx = sync->trx;
4226 fts_cache_t* cache = sync->table->fts->cache;
4227
4228 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4229 ulint j;
4230 fts_index_cache_t* index_cache;
4231
4232 index_cache = static_cast<fts_index_cache_t*>(
4233 ib_vector_get(cache->indexes, i));
4234
4235 /* Reset synced flag so nodes will not be skipped
4236 in the next sync, see fts_sync_write_words(). */
4237 fts_sync_index_reset(index_cache);
4238
4239 for (j = 0; fts_index_selector[j].value; ++j) {
4240
4241 if (index_cache->ins_graph[j] != NULL) {
4242
4243 fts_que_graph_free_check_lock(
4244 NULL, index_cache,
4245 index_cache->ins_graph[j]);
4246
4247 index_cache->ins_graph[j] = NULL;
4248 }
4249
4250 if (index_cache->sel_graph[j] != NULL) {
4251
4252 fts_que_graph_free_check_lock(
4253 NULL, index_cache,
4254 index_cache->sel_graph[j]);
4255
4256 index_cache->sel_graph[j] = NULL;
4257 }
4258 }
4259 }
4260
4261 rw_lock_x_unlock(&cache->lock);
4262
4263 fts_sql_rollback(trx);
4264
4265 /* Avoid assertion in trx_t::free(). */
4266 trx->dict_operation_lock_mode = 0;
4267 trx->free();
4268 }
4269
4270 /** Run SYNC on the table, i.e., write out data from the cache to the
4271 FTS auxiliary INDEX table and clear the cache at the end.
4272 @param[in,out] sync sync state
4273 @param[in] unlock_cache whether unlock cache lock when write node
4274 @param[in] wait whether wait when a sync is in progress
4275 @return DB_SUCCESS if all OK */
4276 static
4277 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait)4278 fts_sync(
4279 fts_sync_t* sync,
4280 bool unlock_cache,
4281 bool wait)
4282 {
4283 if (srv_read_only_mode) {
4284 return DB_READ_ONLY;
4285 }
4286
4287 ulint i;
4288 dberr_t error = DB_SUCCESS;
4289 fts_cache_t* cache = sync->table->fts->cache;
4290
4291 rw_lock_x_lock(&cache->lock);
4292
4293 /* Check if cache is being synced.
4294 Note: we release cache lock in fts_sync_write_words() to
4295 avoid long wait for the lock by other threads. */
4296 while (sync->in_progress) {
4297 rw_lock_x_unlock(&cache->lock);
4298
4299 if (wait) {
4300 os_event_wait(sync->event);
4301 } else {
4302 return(DB_SUCCESS);
4303 }
4304
4305 rw_lock_x_lock(&cache->lock);
4306 }
4307
4308 sync->unlock_cache = unlock_cache;
4309 sync->in_progress = true;
4310
4311 DEBUG_SYNC_C("fts_sync_begin");
4312 fts_sync_begin(sync);
4313
4314 begin_sync:
4315 if (cache->total_size > fts_max_cache_size) {
4316 /* Avoid the case: sync never finish when
4317 insert/update keeps comming. */
4318 ut_ad(sync->unlock_cache);
4319 sync->unlock_cache = false;
4320 }
4321
4322 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4323 fts_index_cache_t* index_cache;
4324
4325 index_cache = static_cast<fts_index_cache_t*>(
4326 ib_vector_get(cache->indexes, i));
4327
4328 if (index_cache->index->to_be_dropped
4329 || index_cache->index->table->to_be_dropped) {
4330 continue;
4331 }
4332
4333 DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4334 os_thread_sleep(300000););
4335 index_cache->index->index_fts_syncing = true;
4336
4337 error = fts_sync_index(sync, index_cache);
4338
4339 if (error != DB_SUCCESS) {
4340 goto end_sync;
4341 }
4342 }
4343
4344 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4345 sync->interrupted = true;
4346 error = DB_INTERRUPTED;
4347 goto end_sync;
4348 );
4349
4350 /* Make sure all the caches are synced. */
4351 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4352 fts_index_cache_t* index_cache;
4353
4354 index_cache = static_cast<fts_index_cache_t*>(
4355 ib_vector_get(cache->indexes, i));
4356
4357 if (index_cache->index->to_be_dropped
4358 || index_cache->index->table->to_be_dropped
4359 || fts_sync_index_check(index_cache)) {
4360 continue;
4361 }
4362
4363 goto begin_sync;
4364 }
4365
4366 end_sync:
4367 if (error == DB_SUCCESS && !sync->interrupted) {
4368 error = fts_sync_commit(sync);
4369 } else {
4370 fts_sync_rollback(sync);
4371 }
4372
4373 rw_lock_x_lock(&cache->lock);
4374 /* Clear fts syncing flags of any indexes in case sync is
4375 interrupted */
4376 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4377 static_cast<fts_index_cache_t*>(
4378 ib_vector_get(cache->indexes, i))
4379 ->index->index_fts_syncing = false;
4380 }
4381
4382 sync->interrupted = false;
4383 sync->in_progress = false;
4384 os_event_set(sync->event);
4385 rw_lock_x_unlock(&cache->lock);
4386
4387 /* We need to check whether an optimize is required, for that
4388 we make copies of the two variables that control the trigger. These
4389 variables can change behind our back and we don't want to hold the
4390 lock for longer than is needed. */
4391 mutex_enter(&cache->deleted_lock);
4392
4393 cache->added = 0;
4394 cache->deleted = 0;
4395
4396 mutex_exit(&cache->deleted_lock);
4397
4398 return(error);
4399 }
4400
4401 /** Run SYNC on the table, i.e., write out data from the cache to the
4402 FTS auxiliary INDEX table and clear the cache at the end.
4403 @param[in,out] table fts table
4404 @param[in] wait whether wait for existing sync to finish
4405 @return DB_SUCCESS on success, error code on failure. */
fts_sync_table(dict_table_t * table,bool wait)4406 dberr_t fts_sync_table(dict_table_t* table, bool wait)
4407 {
4408 dberr_t err = DB_SUCCESS;
4409
4410 ut_ad(table->fts);
4411
4412 if (table->space && table->fts->cache
4413 && !dict_table_is_corrupted(table)) {
4414 err = fts_sync(table->fts->cache->sync, !wait, wait);
4415 }
4416
4417 return(err);
4418 }
4419
4420 /** Check if a fts token is a stopword or less than fts_min_token_size
4421 or greater than fts_max_token_size.
4422 @param[in] token token string
4423 @param[in] stopwords stopwords rb tree
4424 @param[in] cs token charset
4425 @retval true if it is not stopword and length in range
4426 @retval false if it is stopword or lenght not in range */
4427 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,const CHARSET_INFO * cs)4428 fts_check_token(
4429 const fts_string_t* token,
4430 const ib_rbt_t* stopwords,
4431 const CHARSET_INFO* cs)
4432 {
4433 ut_ad(cs != NULL || stopwords == NULL);
4434
4435 ib_rbt_bound_t parent;
4436
4437 return(token->f_n_char >= fts_min_token_size
4438 && token->f_n_char <= fts_max_token_size
4439 && (stopwords == NULL
4440 || rbt_search(stopwords, &parent, token) != 0));
4441 }
4442
4443 /** Add the token and its start position to the token's list of positions.
4444 @param[in,out] result_doc result doc rb tree
4445 @param[in] str token string
4446 @param[in] position token position */
4447 static
4448 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)4449 fts_add_token(
4450 fts_doc_t* result_doc,
4451 fts_string_t str,
4452 ulint position)
4453 {
4454 /* Ignore string whose character number is less than
4455 "fts_min_token_size" or more than "fts_max_token_size" */
4456
4457 if (fts_check_token(&str, NULL, result_doc->charset)) {
4458
4459 mem_heap_t* heap;
4460 fts_string_t t_str;
4461 fts_token_t* token;
4462 ib_rbt_bound_t parent;
4463 ulint newlen;
4464
4465 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4466
4467 t_str.f_n_char = str.f_n_char;
4468
4469 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4470
4471 t_str.f_str = static_cast<byte*>(
4472 mem_heap_alloc(heap, t_str.f_len));
4473
4474 /* For binary collations, a case sensitive search is
4475 performed. Hence don't convert to lower case. */
4476 if (my_binary_compare(result_doc->charset)) {
4477 memcpy(t_str.f_str, str.f_str, str.f_len);
4478 t_str.f_str[str.f_len]= 0;
4479 newlen= str.f_len;
4480 } else {
4481 newlen = innobase_fts_casedn_str(
4482 result_doc->charset, (char*) str.f_str, str.f_len,
4483 (char*) t_str.f_str, t_str.f_len);
4484 }
4485
4486 t_str.f_len = newlen;
4487 t_str.f_str[newlen] = 0;
4488
4489 /* Add the word to the document statistics. If the word
4490 hasn't been seen before we create a new entry for it. */
4491 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4492 fts_token_t new_token;
4493
4494 new_token.text.f_len = newlen;
4495 new_token.text.f_str = t_str.f_str;
4496 new_token.text.f_n_char = t_str.f_n_char;
4497
4498 new_token.positions = ib_vector_create(
4499 result_doc->self_heap, sizeof(ulint), 32);
4500
4501 parent.last = rbt_add_node(
4502 result_doc->tokens, &parent, &new_token);
4503
4504 ut_ad(rbt_validate(result_doc->tokens));
4505 }
4506
4507 token = rbt_value(fts_token_t, parent.last);
4508 ib_vector_push(token->positions, &position);
4509 }
4510 }
4511
4512 /********************************************************************
4513 Process next token from document starting at the given position, i.e., add
4514 the token's start position to the token's list of positions.
4515 @return number of characters handled in this call */
4516 static
4517 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4518 fts_process_token(
4519 /*==============*/
4520 fts_doc_t* doc, /* in/out: document to
4521 tokenize */
4522 fts_doc_t* result, /* out: if provided, save
4523 result here */
4524 ulint start_pos, /*!< in: start position in text */
4525 ulint add_pos) /*!< in: add this position to all
4526 tokens from this tokenization */
4527 {
4528 ulint ret;
4529 fts_string_t str;
4530 ulint position;
4531 fts_doc_t* result_doc;
4532 byte buf[FTS_MAX_WORD_LEN + 1];
4533
4534 str.f_str = buf;
4535
4536 /* Determine where to save the result. */
4537 result_doc = (result != NULL) ? result : doc;
4538
4539 /* The length of a string in characters is set here only. */
4540
4541 ret = innobase_mysql_fts_get_token(
4542 doc->charset, doc->text.f_str + start_pos,
4543 doc->text.f_str + doc->text.f_len, &str);
4544
4545 position = start_pos + ret - str.f_len + add_pos;
4546
4547 fts_add_token(result_doc, str, position);
4548
4549 return(ret);
4550 }
4551
4552 /*************************************************************//**
4553 Get token char size by charset
4554 @return token size */
4555 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)4556 fts_get_token_size(
4557 /*===============*/
4558 const CHARSET_INFO* cs, /*!< in: Character set */
4559 const char* token, /*!< in: token */
4560 ulint len) /*!< in: token length */
4561 {
4562 char* start;
4563 char* end;
4564 ulint size = 0;
4565
4566 /* const_cast is for reinterpret_cast below, or it will fail. */
4567 start = const_cast<char*>(token);
4568 end = start + len;
4569 while (start < end) {
4570 int ctype;
4571 int mbl;
4572
4573 mbl = cs->cset->ctype(
4574 cs, &ctype,
4575 reinterpret_cast<uchar*>(start),
4576 reinterpret_cast<uchar*>(end));
4577
4578 size++;
4579
4580 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4581 }
4582
4583 return(size);
4584 }
4585
4586 /*************************************************************//**
4587 FTS plugin parser 'myql_parser' callback function for document tokenize.
4588 Refer to 'st_mysql_ftparser_param' for more detail.
4589 @return always returns 0 */
4590 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,const char * doc,int len)4591 fts_tokenize_document_internal(
4592 /*===========================*/
4593 MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
4594 const char* doc,/*!< in/out: document */
4595 int len) /*!< in: document length */
4596 {
4597 fts_string_t str;
4598 byte buf[FTS_MAX_WORD_LEN + 1];
4599 /* JAN: TODO: MySQL 5.7
4600 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4601 { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
4602 */
4603 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4604 { FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
4605
4606 ut_ad(len >= 0);
4607
4608 str.f_str = buf;
4609
4610 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4611 inc = innobase_mysql_fts_get_token(
4612 const_cast<CHARSET_INFO*>(param->cs),
4613 (uchar*)(doc) + i,
4614 (uchar*)(doc) + len,
4615 &str);
4616
4617 if (str.f_len > 0) {
4618 /* JAN: TODO: MySQL 5.7
4619 bool_info.position =
4620 static_cast<int>(i + inc - str.f_len);
4621 ut_ad(bool_info.position >= 0);
4622 */
4623
4624 /* Stop when add word fails */
4625 if (param->mysql_add_word(
4626 param,
4627 reinterpret_cast<char*>(str.f_str),
4628 static_cast<int>(str.f_len),
4629 &bool_info)) {
4630 break;
4631 }
4632 }
4633 }
4634
4635 return(0);
4636 }
4637
4638 /******************************************************************//**
4639 FTS plugin parser 'myql_add_word' callback function for document tokenize.
4640 Refer to 'st_mysql_ftparser_param' for more detail.
4641 @return always returns 0 */
4642 static
4643 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO *)4644 fts_tokenize_add_word_for_parser(
4645 /*=============================*/
4646 MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
4647 const char* word, /* in: token word */
4648 int word_len, /* in: word len */
4649 MYSQL_FTPARSER_BOOLEAN_INFO*)
4650 {
4651 fts_string_t str;
4652 fts_tokenize_param_t* fts_param;
4653 fts_doc_t* result_doc;
4654 ulint position;
4655
4656 fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
4657 result_doc = fts_param->result_doc;
4658 ut_ad(result_doc != NULL);
4659
4660 str.f_str = (byte*)(word);
4661 str.f_len = ulint(word_len);
4662 str.f_n_char = fts_get_token_size(
4663 const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
4664
4665 /* JAN: TODO: MySQL 5.7 FTS
4666 ut_ad(boolean_info->position >= 0);
4667 position = boolean_info->position + fts_param->add_pos;
4668 */
4669 position = fts_param->add_pos++;
4670
4671 fts_add_token(result_doc, str, position);
4672
4673 return(0);
4674 }
4675
4676 /******************************************************************//**
4677 Parse a document using an external / user supplied parser */
4678 static
4679 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)4680 fts_tokenize_by_parser(
4681 /*===================*/
4682 fts_doc_t* doc, /* in/out: document to tokenize */
4683 st_mysql_ftparser* parser, /* in: plugin fts parser */
4684 fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
4685 {
4686 MYSQL_FTPARSER_PARAM param;
4687
4688 ut_a(parser);
4689
4690 /* Set paramters for param */
4691 param.mysql_parse = fts_tokenize_document_internal;
4692 param.mysql_add_word = fts_tokenize_add_word_for_parser;
4693 param.mysql_ftparam = fts_param;
4694 param.cs = doc->charset;
4695 param.doc = reinterpret_cast<char*>(doc->text.f_str);
4696 param.length = static_cast<int>(doc->text.f_len);
4697 param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
4698
4699 PARSER_INIT(parser, ¶m);
4700 parser->parse(¶m);
4701 PARSER_DEINIT(parser, ¶m);
4702 }
4703
4704 /** Tokenize a document.
4705 @param[in,out] doc document to tokenize
4706 @param[out] result tokenization result
4707 @param[in] parser pluggable parser */
4708 static
4709 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)4710 fts_tokenize_document(
4711 fts_doc_t* doc,
4712 fts_doc_t* result,
4713 st_mysql_ftparser* parser)
4714 {
4715 ut_a(!doc->tokens);
4716 ut_a(doc->charset);
4717
4718 doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t),
4719 innobase_fts_text_cmp,
4720 (void*) doc->charset);
4721
4722 if (parser != NULL) {
4723 fts_tokenize_param_t fts_param;
4724 fts_param.result_doc = (result != NULL) ? result : doc;
4725 fts_param.add_pos = 0;
4726
4727 fts_tokenize_by_parser(doc, parser, &fts_param);
4728 } else {
4729 ulint inc;
4730
4731 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4732 inc = fts_process_token(doc, result, i, 0);
4733 ut_a(inc > 0);
4734 }
4735 }
4736 }
4737
4738 /** Continue to tokenize a document.
4739 @param[in,out] doc document to tokenize
4740 @param[in] add_pos add this position to all tokens from this tokenization
4741 @param[out] result tokenization result
4742 @param[in] parser pluggable parser */
4743 static
4744 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)4745 fts_tokenize_document_next(
4746 fts_doc_t* doc,
4747 ulint add_pos,
4748 fts_doc_t* result,
4749 st_mysql_ftparser* parser)
4750 {
4751 ut_a(doc->tokens);
4752
4753 if (parser) {
4754 fts_tokenize_param_t fts_param;
4755
4756 fts_param.result_doc = (result != NULL) ? result : doc;
4757 fts_param.add_pos = add_pos;
4758
4759 fts_tokenize_by_parser(doc, parser, &fts_param);
4760 } else {
4761 ulint inc;
4762
4763 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4764 inc = fts_process_token(doc, result, i, add_pos);
4765 ut_a(inc > 0);
4766 }
4767 }
4768 }
4769
4770 /** Create the vector of fts_get_doc_t instances.
4771 @param[in,out] cache fts cache
4772 @return vector of fts_get_doc_t instances */
4773 static
4774 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)4775 fts_get_docs_create(
4776 fts_cache_t* cache)
4777 {
4778 ib_vector_t* get_docs;
4779
4780 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4781
4782 /* We need one instance of fts_get_doc_t per index. */
4783 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4784
4785 /* Create the get_doc instance, we need one of these
4786 per FTS index. */
4787 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4788
4789 dict_index_t** index;
4790 fts_get_doc_t* get_doc;
4791
4792 index = static_cast<dict_index_t**>(
4793 ib_vector_get(cache->indexes, i));
4794
4795 get_doc = static_cast<fts_get_doc_t*>(
4796 ib_vector_push(get_docs, NULL));
4797
4798 memset(get_doc, 0x0, sizeof(*get_doc));
4799
4800 get_doc->index_cache = fts_get_index_cache(cache, *index);
4801 get_doc->cache = cache;
4802
4803 /* Must find the index cache. */
4804 ut_a(get_doc->index_cache != NULL);
4805 }
4806
4807 return(get_docs);
4808 }
4809
4810 /********************************************************************
4811 Release any resources held by the fts_get_doc_t instances. */
4812 static
4813 void
fts_get_docs_clear(ib_vector_t * get_docs)4814 fts_get_docs_clear(
4815 /*===============*/
4816 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
4817 {
4818 ulint i;
4819
4820 /* Release the get doc graphs if any. */
4821 for (i = 0; i < ib_vector_size(get_docs); ++i) {
4822
4823 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
4824 ib_vector_get(get_docs, i));
4825
4826 if (get_doc->get_document_graph != NULL) {
4827
4828 ut_a(get_doc->index_cache);
4829
4830 fts_que_graph_free(get_doc->get_document_graph);
4831 get_doc->get_document_graph = NULL;
4832 }
4833 }
4834 }
4835
4836 /*********************************************************************//**
4837 Get the initial Doc ID by consulting the CONFIG table
4838 @return initial Doc ID */
4839 doc_id_t
fts_init_doc_id(const dict_table_t * table)4840 fts_init_doc_id(
4841 /*============*/
4842 const dict_table_t* table) /*!< in: table */
4843 {
4844 doc_id_t max_doc_id = 0;
4845
4846 rw_lock_x_lock(&table->fts->cache->lock);
4847
4848 /* Return if the table is already initialized for DOC ID */
4849 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4850 rw_lock_x_unlock(&table->fts->cache->lock);
4851 return(0);
4852 }
4853
4854 DEBUG_SYNC_C("fts_initialize_doc_id");
4855
4856 /* Then compare this value with the ID value stored in the CONFIG
4857 table. The larger one will be our new initial Doc ID */
4858 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4859
4860 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4861 creating index (and add doc id column. No need to recovery
4862 documents */
4863 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4864 fts_init_index((dict_table_t*) table, TRUE);
4865 }
4866
4867 table->fts->added_synced = true;
4868
4869 table->fts->cache->first_doc_id = max_doc_id;
4870
4871 rw_lock_x_unlock(&table->fts->cache->lock);
4872
4873 ut_ad(max_doc_id > 0);
4874
4875 return(max_doc_id);
4876 }
4877
4878 #ifdef FTS_MULT_INDEX
4879 /*********************************************************************//**
4880 Check if the index is in the affected set.
4881 @return TRUE if index is updated */
4882 static
4883 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)4884 fts_is_index_updated(
4885 /*=================*/
4886 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
4887 const fts_get_doc_t* get_doc) /*!< in: info for reading
4888 document */
4889 {
4890 ulint i;
4891 dict_index_t* index = get_doc->index_cache->index;
4892
4893 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4894 const dict_index_t* updated_fts_index;
4895
4896 updated_fts_index = static_cast<const dict_index_t*>(
4897 ib_vector_getp_const(fts_indexes, i));
4898
4899 ut_a(updated_fts_index != NULL);
4900
4901 if (updated_fts_index == index) {
4902 return(TRUE);
4903 }
4904 }
4905
4906 return(FALSE);
4907 }
4908 #endif
4909
4910 /*********************************************************************//**
4911 Fetch COUNT(*) from specified table.
4912 @return the number of rows in the table */
4913 ulint
fts_get_rows_count(fts_table_t * fts_table)4914 fts_get_rows_count(
4915 /*===============*/
4916 fts_table_t* fts_table) /*!< in: fts table to read */
4917 {
4918 trx_t* trx;
4919 pars_info_t* info;
4920 que_t* graph;
4921 dberr_t error;
4922 ulint count = 0;
4923 char table_name[MAX_FULL_NAME_LEN];
4924
4925 trx = trx_create();
4926 trx->op_info = "fetching FT table rows count";
4927
4928 info = pars_info_create();
4929
4930 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
4931
4932 fts_get_table_name(fts_table, table_name);
4933 pars_info_bind_id(info, "table_name", table_name);
4934
4935 graph = fts_parse_sql(
4936 fts_table,
4937 info,
4938 "DECLARE FUNCTION my_func;\n"
4939 "DECLARE CURSOR c IS"
4940 " SELECT COUNT(*)"
4941 " FROM $table_name;\n"
4942 "BEGIN\n"
4943 "\n"
4944 "OPEN c;\n"
4945 "WHILE 1 = 1 LOOP\n"
4946 " FETCH c INTO my_func();\n"
4947 " IF c % NOTFOUND THEN\n"
4948 " EXIT;\n"
4949 " END IF;\n"
4950 "END LOOP;\n"
4951 "CLOSE c;");
4952
4953 for (;;) {
4954 error = fts_eval_sql(trx, graph);
4955
4956 if (UNIV_LIKELY(error == DB_SUCCESS)) {
4957 fts_sql_commit(trx);
4958
4959 break; /* Exit the loop. */
4960 } else {
4961 fts_sql_rollback(trx);
4962
4963 if (error == DB_LOCK_WAIT_TIMEOUT) {
4964 ib::warn() << "lock wait timeout reading"
4965 " FTS table. Retrying!";
4966
4967 trx->error_state = DB_SUCCESS;
4968 } else {
4969 ib::error() << "(" << error
4970 << ") while reading FTS table "
4971 << table_name;
4972
4973 break; /* Exit the loop. */
4974 }
4975 }
4976 }
4977
4978 fts_que_graph_free(graph);
4979
4980 trx->free();
4981
4982 return(count);
4983 }
4984
4985 #ifdef FTS_CACHE_SIZE_DEBUG
4986 /*********************************************************************//**
4987 Read the max cache size parameter from the config table. */
4988 static
4989 void
fts_update_max_cache_size(fts_sync_t * sync)4990 fts_update_max_cache_size(
4991 /*======================*/
4992 fts_sync_t* sync) /*!< in: sync state */
4993 {
4994 trx_t* trx;
4995 fts_table_t fts_table;
4996
4997 trx = trx_create();
4998
4999 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5000
5001 /* The size returned is in bytes. */
5002 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5003
5004 fts_sql_commit(trx);
5005
5006 trx->free();
5007 }
5008 #endif /* FTS_CACHE_SIZE_DEBUG */
5009
5010 /*********************************************************************//**
5011 Free the modified rows of a table. */
5012 UNIV_INLINE
5013 void
fts_trx_table_rows_free(ib_rbt_t * rows)5014 fts_trx_table_rows_free(
5015 /*====================*/
5016 ib_rbt_t* rows) /*!< in: rbt of rows to free */
5017 {
5018 const ib_rbt_node_t* node;
5019
5020 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5021 fts_trx_row_t* row;
5022
5023 row = rbt_value(fts_trx_row_t, node);
5024
5025 if (row->fts_indexes != NULL) {
5026 /* This vector shouldn't be using the
5027 heap allocator. */
5028 ut_a(row->fts_indexes->allocator->arg == NULL);
5029
5030 ib_vector_free(row->fts_indexes);
5031 row->fts_indexes = NULL;
5032 }
5033
5034 ut_free(rbt_remove_node(rows, node));
5035 }
5036
5037 ut_a(rbt_empty(rows));
5038 rbt_free(rows);
5039 }
5040
5041 /*********************************************************************//**
5042 Free an FTS savepoint instance. */
5043 UNIV_INLINE
5044 void
fts_savepoint_free(fts_savepoint_t * savepoint)5045 fts_savepoint_free(
5046 /*===============*/
5047 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5048 {
5049 const ib_rbt_node_t* node;
5050 ib_rbt_t* tables = savepoint->tables;
5051
5052 /* Nothing to free! */
5053 if (tables == NULL) {
5054 return;
5055 }
5056
5057 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5058 fts_trx_table_t* ftt;
5059 fts_trx_table_t** fttp;
5060
5061 fttp = rbt_value(fts_trx_table_t*, node);
5062 ftt = *fttp;
5063
5064 /* This can be NULL if a savepoint was released. */
5065 if (ftt->rows != NULL) {
5066 fts_trx_table_rows_free(ftt->rows);
5067 ftt->rows = NULL;
5068 }
5069
5070 /* This can be NULL if a savepoint was released. */
5071 if (ftt->added_doc_ids != NULL) {
5072 fts_doc_ids_free(ftt->added_doc_ids);
5073 ftt->added_doc_ids = NULL;
5074 }
5075
5076 /* The default savepoint name must be NULL. */
5077 if (ftt->docs_added_graph) {
5078 fts_que_graph_free(ftt->docs_added_graph);
5079 }
5080
5081 /* NOTE: We are responsible for free'ing the node */
5082 ut_free(rbt_remove_node(tables, node));
5083 }
5084
5085 ut_a(rbt_empty(tables));
5086 rbt_free(tables);
5087 savepoint->tables = NULL;
5088 }
5089
5090 /*********************************************************************//**
5091 Free an FTS trx. */
5092 void
fts_trx_free(fts_trx_t * fts_trx)5093 fts_trx_free(
5094 /*=========*/
5095 fts_trx_t* fts_trx) /* in, own: FTS trx */
5096 {
5097 ulint i;
5098
5099 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5100 fts_savepoint_t* savepoint;
5101
5102 savepoint = static_cast<fts_savepoint_t*>(
5103 ib_vector_get(fts_trx->savepoints, i));
5104
5105 /* The default savepoint name must be NULL. */
5106 if (i == 0) {
5107 ut_a(savepoint->name == NULL);
5108 }
5109
5110 fts_savepoint_free(savepoint);
5111 }
5112
5113 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5114 fts_savepoint_t* savepoint;
5115
5116 savepoint = static_cast<fts_savepoint_t*>(
5117 ib_vector_get(fts_trx->last_stmt, i));
5118
5119 /* The default savepoint name must be NULL. */
5120 if (i == 0) {
5121 ut_a(savepoint->name == NULL);
5122 }
5123
5124 fts_savepoint_free(savepoint);
5125 }
5126
5127 if (fts_trx->heap) {
5128 mem_heap_free(fts_trx->heap);
5129 }
5130 }
5131
5132 /*********************************************************************//**
5133 Extract the doc id from the FTS hidden column.
5134 @return doc id that was extracted from rec */
5135 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5136 fts_get_doc_id_from_row(
5137 /*====================*/
5138 dict_table_t* table, /*!< in: table */
5139 dtuple_t* row) /*!< in: row whose FTS doc id we
5140 want to extract.*/
5141 {
5142 dfield_t* field;
5143 doc_id_t doc_id = 0;
5144
5145 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5146
5147 field = dtuple_get_nth_field(row, table->fts->doc_col);
5148
5149 ut_a(dfield_get_len(field) == sizeof(doc_id));
5150 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5151
5152 doc_id = fts_read_doc_id(
5153 static_cast<const byte*>(dfield_get_data(field)));
5154
5155 return(doc_id);
5156 }
5157
5158 /** Extract the doc id from the record that belongs to index.
5159 @param[in] table table
5160 @param[in] rec record contains FTS_DOC_ID
5161 @param[in] index index of rec
5162 @param[in] heap heap memory
5163 @return doc id that was extracted from rec */
5164 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5165 fts_get_doc_id_from_rec(
5166 dict_table_t* table,
5167 const rec_t* rec,
5168 const dict_index_t* index,
5169 mem_heap_t* heap)
5170 {
5171 ulint len;
5172 const byte* data;
5173 ulint col_no;
5174 doc_id_t doc_id = 0;
5175 rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
5176 rec_offs* offsets = offsets_;
5177 mem_heap_t* my_heap = heap;
5178
5179 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5180
5181 rec_offs_init(offsets_);
5182
5183 offsets = rec_get_offsets(
5184 rec, index, offsets, index->n_core_fields,
5185 ULINT_UNDEFINED, &my_heap);
5186
5187 col_no = dict_col_get_index_pos(
5188 &table->cols[table->fts->doc_col], index);
5189
5190 ut_ad(col_no != ULINT_UNDEFINED);
5191
5192 data = rec_get_nth_field(rec, offsets, col_no, &len);
5193
5194 ut_a(len == 8);
5195 ut_ad(8 == sizeof(doc_id));
5196 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5197
5198 if (my_heap && !heap) {
5199 mem_heap_free(my_heap);
5200 }
5201
5202 return(doc_id);
5203 }
5204
5205 /*********************************************************************//**
5206 Search the index specific cache for a particular FTS index.
5207 @return the index specific cache else NULL */
5208 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5209 fts_find_index_cache(
5210 /*=================*/
5211 const fts_cache_t* cache, /*!< in: cache to search */
5212 const dict_index_t* index) /*!< in: index to search for */
5213 {
5214 /* We cast away the const because our internal function, takes
5215 non-const cache arg and returns a non-const pointer. */
5216 return(static_cast<fts_index_cache_t*>(
5217 fts_get_index_cache((fts_cache_t*) cache, index)));
5218 }
5219
5220 /*********************************************************************//**
5221 Search cache for word.
5222 @return the word node vector if found else NULL */
5223 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5224 fts_cache_find_word(
5225 /*================*/
5226 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5227 const fts_string_t* text) /*!< in: word to search for */
5228 {
5229 ib_rbt_bound_t parent;
5230 const ib_vector_t* nodes = NULL;
5231 #ifdef UNIV_DEBUG
5232 dict_table_t* table = index_cache->index->table;
5233 fts_cache_t* cache = table->fts->cache;
5234
5235 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5236 #endif /* UNIV_DEBUG */
5237
5238 /* Lookup the word in the rb tree */
5239 if (rbt_search(index_cache->words, &parent, text) == 0) {
5240 const fts_tokenizer_word_t* word;
5241
5242 word = rbt_value(fts_tokenizer_word_t, parent.last);
5243
5244 nodes = word->nodes;
5245 }
5246
5247 return(nodes);
5248 }
5249
5250 /*********************************************************************//**
5251 Append deleted doc ids to vector. */
5252 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5253 fts_cache_append_deleted_doc_ids(
5254 /*=============================*/
5255 const fts_cache_t* cache, /*!< in: cache to use */
5256 ib_vector_t* vector) /*!< in: append to this vector */
5257 {
5258 mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5259
5260 if (cache->deleted_doc_ids == NULL) {
5261 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5262 return;
5263 }
5264
5265
5266 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5267 doc_id_t* update;
5268
5269 update = static_cast<doc_id_t*>(
5270 ib_vector_get(cache->deleted_doc_ids, i));
5271
5272 ib_vector_push(vector, &update);
5273 }
5274
5275 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5276 }
5277
5278 /*********************************************************************//**
5279 Add the FTS document id hidden column. */
5280 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5281 fts_add_doc_id_column(
5282 /*==================*/
5283 dict_table_t* table, /*!< in/out: Table with FTS index */
5284 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5285 {
5286 dict_mem_table_add_col(
5287 table, heap,
5288 FTS_DOC_ID_COL_NAME,
5289 DATA_INT,
5290 dtype_form_prtype(
5291 DATA_NOT_NULL | DATA_UNSIGNED
5292 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5293 sizeof(doc_id_t));
5294 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5295 }
5296
5297 /** Add new fts doc id to the update vector.
5298 @param[in] table the table that contains the FTS index.
5299 @param[in,out] ufield the fts doc id field in the update vector.
5300 No new memory is allocated for this in this
5301 function.
5302 @param[in,out] next_doc_id the fts doc id that has been added to the
5303 update vector. If 0, a new fts doc id is
5304 automatically generated. The memory provided
5305 for this argument will be used by the update
5306 vector. Ensure that the life time of this
5307 memory matches that of the update vector.
5308 @return the fts doc id used in the update vector */
5309 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5310 fts_update_doc_id(
5311 dict_table_t* table,
5312 upd_field_t* ufield,
5313 doc_id_t* next_doc_id)
5314 {
5315 doc_id_t doc_id;
5316 dberr_t error = DB_SUCCESS;
5317
5318 if (*next_doc_id) {
5319 doc_id = *next_doc_id;
5320 } else {
5321 /* Get the new document id that will be added. */
5322 error = fts_get_next_doc_id(table, &doc_id);
5323 }
5324
5325 if (error == DB_SUCCESS) {
5326 dict_index_t* clust_index;
5327 dict_col_t* col = dict_table_get_nth_col(
5328 table, table->fts->doc_col);
5329
5330 ufield->exp = NULL;
5331
5332 ufield->new_val.len = sizeof(doc_id);
5333
5334 clust_index = dict_table_get_first_index(table);
5335
5336 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5337 dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5338
5339 /* It is possible we update record that has
5340 not yet be sync-ed from last crash. */
5341
5342 /* Convert to storage byte order. */
5343 ut_a(doc_id != FTS_NULL_DOC_ID);
5344 fts_write_doc_id((byte*) next_doc_id, doc_id);
5345
5346 ufield->new_val.data = next_doc_id;
5347 ufield->new_val.ext = 0;
5348 }
5349
5350 return(doc_id);
5351 }
5352
5353 /** fts_t constructor.
5354 @param[in] table table with FTS indexes
5355 @param[in,out] heap memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)5356 fts_t::fts_t(
5357 const dict_table_t* table,
5358 mem_heap_t* heap)
5359 :
5360 added_synced(0), dict_locked(0),
5361 add_wq(NULL),
5362 cache(NULL),
5363 doc_col(ULINT_UNDEFINED), in_queue(false),
5364 fts_heap(heap)
5365 {
5366 ut_a(table->fts == NULL);
5367
5368 ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
5369
5370 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5371
5372 dict_table_get_all_fts_indexes(table, indexes);
5373 }
5374
5375 /** fts_t destructor. */
~fts_t()5376 fts_t::~fts_t()
5377 {
5378 ut_ad(add_wq == NULL);
5379
5380 if (cache != NULL) {
5381 fts_cache_clear(cache);
5382 fts_cache_destroy(cache);
5383 cache = NULL;
5384 }
5385
5386 /* There is no need to call ib_vector_free() on this->indexes
5387 because it is stored in this->fts_heap. */
5388 }
5389
5390 /*********************************************************************//**
5391 Create an instance of fts_t.
5392 @return instance of fts_t */
5393 fts_t*
fts_create(dict_table_t * table)5394 fts_create(
5395 /*=======*/
5396 dict_table_t* table) /*!< in/out: table with FTS indexes */
5397 {
5398 fts_t* fts;
5399 mem_heap_t* heap;
5400
5401 heap = mem_heap_create(512);
5402
5403 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5404
5405 new(fts) fts_t(table, heap);
5406
5407 return(fts);
5408 }
5409
5410 /*********************************************************************//**
5411 Free the FTS resources. */
5412 void
fts_free(dict_table_t * table)5413 fts_free(
5414 /*=====*/
5415 dict_table_t* table) /*!< in/out: table with FTS indexes */
5416 {
5417 fts_t* fts = table->fts;
5418
5419 fts->~fts_t();
5420
5421 mem_heap_free(fts->fts_heap);
5422
5423 table->fts = NULL;
5424 }
5425
5426 /*********************************************************************//**
5427 Take a FTS savepoint. */
5428 UNIV_INLINE
5429 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5430 fts_savepoint_copy(
5431 /*===============*/
5432 const fts_savepoint_t* src, /*!< in: source savepoint */
5433 fts_savepoint_t* dst) /*!< out: destination savepoint */
5434 {
5435 const ib_rbt_node_t* node;
5436 const ib_rbt_t* tables;
5437
5438 tables = src->tables;
5439
5440 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5441
5442 fts_trx_table_t* ftt_dst;
5443 const fts_trx_table_t** ftt_src;
5444
5445 ftt_src = rbt_value(const fts_trx_table_t*, node);
5446
5447 ftt_dst = fts_trx_table_clone(*ftt_src);
5448
5449 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5450 }
5451 }
5452
5453 /*********************************************************************//**
5454 Take a FTS savepoint. */
5455 void
fts_savepoint_take(fts_trx_t * fts_trx,const char * name)5456 fts_savepoint_take(
5457 /*===============*/
5458 fts_trx_t* fts_trx, /*!< in: fts transaction */
5459 const char* name) /*!< in: savepoint name */
5460 {
5461 mem_heap_t* heap;
5462 fts_savepoint_t* savepoint;
5463 fts_savepoint_t* last_savepoint;
5464
5465 ut_a(name != NULL);
5466
5467 heap = fts_trx->heap;
5468
5469 /* The implied savepoint must exist. */
5470 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5471
5472 last_savepoint = static_cast<fts_savepoint_t*>(
5473 ib_vector_last(fts_trx->savepoints));
5474 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5475
5476 if (last_savepoint->tables != NULL) {
5477 fts_savepoint_copy(last_savepoint, savepoint);
5478 }
5479 }
5480
5481 /*********************************************************************//**
5482 Lookup a savepoint instance by name.
5483 @return ULINT_UNDEFINED if not found */
5484 UNIV_INLINE
5485 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5486 fts_savepoint_lookup(
5487 /*==================*/
5488 ib_vector_t* savepoints, /*!< in: savepoints */
5489 const char* name) /*!< in: savepoint name */
5490 {
5491 ulint i;
5492
5493 ut_a(ib_vector_size(savepoints) > 0);
5494
5495 for (i = 1; i < ib_vector_size(savepoints); ++i) {
5496 fts_savepoint_t* savepoint;
5497
5498 savepoint = static_cast<fts_savepoint_t*>(
5499 ib_vector_get(savepoints, i));
5500
5501 if (strcmp(name, savepoint->name) == 0) {
5502 return(i);
5503 }
5504 }
5505
5506 return(ULINT_UNDEFINED);
5507 }
5508
5509 /*********************************************************************//**
5510 Release the savepoint data identified by name. All savepoints created
5511 after the named savepoint are kept.
5512 @return DB_SUCCESS or error code */
5513 void
fts_savepoint_release(trx_t * trx,const char * name)5514 fts_savepoint_release(
5515 /*==================*/
5516 trx_t* trx, /*!< in: transaction */
5517 const char* name) /*!< in: savepoint name */
5518 {
5519 ut_a(name != NULL);
5520
5521 ib_vector_t* savepoints = trx->fts_trx->savepoints;
5522
5523 ut_a(ib_vector_size(savepoints) > 0);
5524
5525 ulint i = fts_savepoint_lookup(savepoints, name);
5526 if (i != ULINT_UNDEFINED) {
5527 ut_a(i >= 1);
5528
5529 fts_savepoint_t* savepoint;
5530 savepoint = static_cast<fts_savepoint_t*>(
5531 ib_vector_get(savepoints, i));
5532
5533 if (i == ib_vector_size(savepoints) - 1) {
5534 /* If the savepoint is the last, we save its
5535 tables to the previous savepoint. */
5536 fts_savepoint_t* prev_savepoint;
5537 prev_savepoint = static_cast<fts_savepoint_t*>(
5538 ib_vector_get(savepoints, i - 1));
5539
5540 ib_rbt_t* tables = savepoint->tables;
5541 savepoint->tables = prev_savepoint->tables;
5542 prev_savepoint->tables = tables;
5543 }
5544
5545 fts_savepoint_free(savepoint);
5546 ib_vector_remove(savepoints, *(void**)savepoint);
5547
5548 /* Make sure we don't delete the implied savepoint. */
5549 ut_a(ib_vector_size(savepoints) > 0);
5550 }
5551 }
5552
5553 /**********************************************************************//**
5554 Refresh last statement savepoint. */
5555 void
fts_savepoint_laststmt_refresh(trx_t * trx)5556 fts_savepoint_laststmt_refresh(
5557 /*===========================*/
5558 trx_t* trx) /*!< in: transaction */
5559 {
5560
5561 fts_trx_t* fts_trx;
5562 fts_savepoint_t* savepoint;
5563
5564 fts_trx = trx->fts_trx;
5565
5566 savepoint = static_cast<fts_savepoint_t*>(
5567 ib_vector_pop(fts_trx->last_stmt));
5568 fts_savepoint_free(savepoint);
5569
5570 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5571 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5572 }
5573
5574 /********************************************************************
5575 Undo the Doc ID add/delete operations in last stmt */
5576 static
5577 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5578 fts_undo_last_stmt(
5579 /*===============*/
5580 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
5581 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
5582 {
5583 ib_rbt_t* s_rows;
5584 ib_rbt_t* l_rows;
5585 const ib_rbt_node_t* node;
5586
5587 l_rows = l_ftt->rows;
5588 s_rows = s_ftt->rows;
5589
5590 for (node = rbt_first(l_rows);
5591 node;
5592 node = rbt_next(l_rows, node)) {
5593 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
5594 ib_rbt_bound_t parent;
5595
5596 rbt_search(s_rows, &parent, &(l_row->doc_id));
5597
5598 if (parent.result == 0) {
5599 fts_trx_row_t* s_row = rbt_value(
5600 fts_trx_row_t, parent.last);
5601
5602 switch (l_row->state) {
5603 case FTS_INSERT:
5604 ut_free(rbt_remove_node(s_rows, parent.last));
5605 break;
5606
5607 case FTS_DELETE:
5608 if (s_row->state == FTS_NOTHING) {
5609 s_row->state = FTS_INSERT;
5610 } else if (s_row->state == FTS_DELETE) {
5611 ut_free(rbt_remove_node(
5612 s_rows, parent.last));
5613 }
5614 break;
5615
5616 /* FIXME: Check if FTS_MODIFY need to be addressed */
5617 case FTS_MODIFY:
5618 case FTS_NOTHING:
5619 break;
5620 default:
5621 ut_error;
5622 }
5623 }
5624 }
5625 }
5626
5627 /**********************************************************************//**
5628 Rollback to savepoint indentified by name.
5629 @return DB_SUCCESS or error code */
5630 void
fts_savepoint_rollback_last_stmt(trx_t * trx)5631 fts_savepoint_rollback_last_stmt(
5632 /*=============================*/
5633 trx_t* trx) /*!< in: transaction */
5634 {
5635 ib_vector_t* savepoints;
5636 fts_savepoint_t* savepoint;
5637 fts_savepoint_t* last_stmt;
5638 fts_trx_t* fts_trx;
5639 ib_rbt_bound_t parent;
5640 const ib_rbt_node_t* node;
5641 ib_rbt_t* l_tables;
5642 ib_rbt_t* s_tables;
5643
5644 fts_trx = trx->fts_trx;
5645 savepoints = fts_trx->savepoints;
5646
5647 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5648 last_stmt = static_cast<fts_savepoint_t*>(
5649 ib_vector_last(fts_trx->last_stmt));
5650
5651 l_tables = last_stmt->tables;
5652 s_tables = savepoint->tables;
5653
5654 for (node = rbt_first(l_tables);
5655 node;
5656 node = rbt_next(l_tables, node)) {
5657
5658 fts_trx_table_t** l_ftt;
5659
5660 l_ftt = rbt_value(fts_trx_table_t*, node);
5661
5662 rbt_search_cmp(
5663 s_tables, &parent, &(*l_ftt)->table->id,
5664 fts_trx_table_id_cmp, NULL);
5665
5666 if (parent.result == 0) {
5667 fts_trx_table_t** s_ftt;
5668
5669 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5670
5671 fts_undo_last_stmt(*s_ftt, *l_ftt);
5672 }
5673 }
5674 }
5675
5676 /**********************************************************************//**
5677 Rollback to savepoint indentified by name.
5678 @return DB_SUCCESS or error code */
5679 void
fts_savepoint_rollback(trx_t * trx,const char * name)5680 fts_savepoint_rollback(
5681 /*===================*/
5682 trx_t* trx, /*!< in: transaction */
5683 const char* name) /*!< in: savepoint name */
5684 {
5685 ulint i;
5686 ib_vector_t* savepoints;
5687
5688 ut_a(name != NULL);
5689
5690 savepoints = trx->fts_trx->savepoints;
5691
5692 /* We pop all savepoints from the the top of the stack up to
5693 and including the instance that was found. */
5694 i = fts_savepoint_lookup(savepoints, name);
5695
5696 if (i != ULINT_UNDEFINED) {
5697 fts_savepoint_t* savepoint;
5698
5699 ut_a(i > 0);
5700
5701 while (ib_vector_size(savepoints) > i) {
5702 fts_savepoint_t* savepoint;
5703
5704 savepoint = static_cast<fts_savepoint_t*>(
5705 ib_vector_pop(savepoints));
5706
5707 if (savepoint->name != NULL) {
5708 /* Since name was allocated on the heap, the
5709 memory will be released when the transaction
5710 completes. */
5711 savepoint->name = NULL;
5712
5713 fts_savepoint_free(savepoint);
5714 }
5715 }
5716
5717 /* Pop all a elements from the top of the stack that may
5718 have been released. We have to be careful that we don't
5719 delete the implied savepoint. */
5720
5721 for (savepoint = static_cast<fts_savepoint_t*>(
5722 ib_vector_last(savepoints));
5723 ib_vector_size(savepoints) > 1
5724 && savepoint->name == NULL;
5725 savepoint = static_cast<fts_savepoint_t*>(
5726 ib_vector_last(savepoints))) {
5727
5728 ib_vector_pop(savepoints);
5729 }
5730
5731 /* Make sure we don't delete the implied savepoint. */
5732 ut_a(ib_vector_size(savepoints) > 0);
5733
5734 /* Restore the savepoint. */
5735 fts_savepoint_take(trx->fts_trx, name);
5736 }
5737 }
5738
fts_check_aux_table(const char * name,table_id_t * table_id,index_id_t * index_id)5739 bool fts_check_aux_table(const char *name,
5740 table_id_t *table_id,
5741 index_id_t *index_id)
5742 {
5743 ulint len= strlen(name);
5744 const char* ptr;
5745 const char* end= name + len;
5746
5747 ut_ad(len <= MAX_FULL_NAME_LEN);
5748 ptr= static_cast<const char*>(memchr(name, '/', len));
5749
5750 if (ptr != NULL)
5751 {
5752 /* We will start the match after the '/' */
5753 ++ptr;
5754 len = end - ptr;
5755 }
5756
5757 /* All auxiliary tables are prefixed with "FTS_" and the name
5758 length will be at the very least greater than 20 bytes. */
5759 if (ptr && len > 20 && !memcmp(ptr, "FTS_", 4))
5760 {
5761 /* Skip the prefix. */
5762 ptr+= 4;
5763 len-= 4;
5764
5765 const char *table_id_ptr= ptr;
5766 /* Skip the table id. */
5767 ptr= static_cast<const char*>(memchr(ptr, '_', len));
5768
5769 if (!ptr)
5770 return false;
5771
5772 /* Skip the underscore. */
5773 ++ptr;
5774 ut_ad(end > ptr);
5775 len= end - ptr;
5776
5777 sscanf(table_id_ptr, UINT64PFx, table_id);
5778 /* First search the common table suffix array. */
5779 for (ulint i = 0; fts_common_tables[i]; ++i)
5780 {
5781 if (!strncmp(ptr, fts_common_tables[i], len))
5782 return true;
5783 }
5784
5785 /* Could be obsolete common tables. */
5786 if ((len == 5 && !memcmp(ptr, "ADDED", len)) ||
5787 (len == 9 && !memcmp(ptr, "STOPWORDS", len)))
5788 return true;
5789
5790 const char* index_id_ptr= ptr;
5791 /* Skip the index id. */
5792 ptr= static_cast<const char*>(memchr(ptr, '_', len));
5793 if (!ptr)
5794 return false;
5795
5796 sscanf(index_id_ptr, UINT64PFx, index_id);
5797
5798 /* Skip the underscore. */
5799 ++ptr;
5800 ut_a(end > ptr);
5801 len= end - ptr;
5802
5803 if (len > 7)
5804 return false;
5805
5806 /* Search the FT index specific array. */
5807 for (ulint i = 0; i < FTS_NUM_AUX_INDEX; ++i)
5808 {
5809 if (!memcmp(ptr, "INDEX_", len - 1))
5810 return true;
5811 }
5812
5813 /* Other FT index specific table(s). */
5814 if (len == 6 && !memcmp(ptr, "DOC_ID", len))
5815 return true;
5816 }
5817
5818 return false;
5819 }
5820
5821 typedef std::pair<table_id_t,index_id_t> fts_aux_id;
5822 typedef std::set<fts_aux_id> fts_space_set_t;
5823
5824 /** Iterate over all the spaces in the space list and fetch the
5825 fts parent table id and index id.
5826 @param[in,out] fts_space_set store the list of tablespace id and
5827 index id */
fil_get_fts_spaces(fts_space_set_t & fts_space_set)5828 static void fil_get_fts_spaces(fts_space_set_t& fts_space_set)
5829 {
5830 mutex_enter(&fil_system.mutex);
5831
5832 for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list);
5833 space;
5834 space= UT_LIST_GET_NEXT(space_list, space))
5835 {
5836 index_id_t index_id= 0;
5837 table_id_t table_id= 0;
5838
5839 if (space->purpose == FIL_TYPE_TABLESPACE
5840 && fts_check_aux_table(space->name, &table_id, &index_id))
5841 fts_space_set.insert(std::make_pair(table_id, index_id));
5842 }
5843
5844 mutex_exit(&fil_system.mutex);
5845 }
5846
5847 /** Check whether the parent table id and index id of fts auxilary
5848 tables with SYS_INDEXES. If it exists then we can safely ignore the
5849 fts table from orphaned tables.
5850 @param[in,out] fts_space_set fts space set contains set of auxiliary
5851 table ids */
fts_check_orphaned_tables(fts_space_set_t & fts_space_set)5852 static void fts_check_orphaned_tables(fts_space_set_t& fts_space_set)
5853 {
5854 btr_pcur_t pcur;
5855 mtr_t mtr;
5856 trx_t* trx = trx_create();
5857 trx->op_info = "checking fts orphaned tables";
5858
5859 row_mysql_lock_data_dictionary(trx);
5860
5861 mtr.start();
5862 btr_pcur_open_at_index_side(
5863 true, dict_table_get_first_index(dict_sys->sys_indexes),
5864 BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
5865
5866 do
5867 {
5868 const rec_t *rec;
5869 const byte *tbl_field;
5870 const byte *index_field;
5871 ulint len;
5872
5873 btr_pcur_move_to_next_user_rec(&pcur, &mtr);
5874 if (!btr_pcur_is_on_user_rec(&pcur))
5875 break;
5876
5877 rec= btr_pcur_get_rec(&pcur);
5878 if (rec_get_deleted_flag(rec, 0))
5879 continue;
5880
5881 tbl_field= rec_get_nth_field_old(rec, 0, &len);
5882 if (len != 8)
5883 continue;
5884
5885 index_field= rec_get_nth_field_old(rec, 1, &len);
5886 if (len != 8)
5887 continue;
5888
5889 table_id_t table_id = mach_read_from_8(tbl_field);
5890 index_id_t index_id = mach_read_from_8(index_field);
5891
5892 fts_space_set_t::iterator it = fts_space_set.find(
5893 fts_aux_id(table_id, index_id));
5894
5895 if (it != fts_space_set.end())
5896 fts_space_set.erase(*it);
5897 else
5898 {
5899 it= fts_space_set.find(fts_aux_id(table_id, 0));
5900 if (it != fts_space_set.end())
5901 fts_space_set.erase(*it);
5902 }
5903 } while(!fts_space_set.empty());
5904
5905 btr_pcur_close(&pcur);
5906 mtr.commit();
5907 row_mysql_unlock_data_dictionary(trx);
5908 trx->free();
5909 }
5910
5911 /** Drop all fts auxilary table for the respective fts_id
5912 @param[in] fts_id fts auxilary table ids */
fts_drop_all_aux_tables(trx_t * trx,fts_table_t * fts_table)5913 static void fts_drop_all_aux_tables(trx_t *trx, fts_table_t *fts_table)
5914 {
5915 char fts_table_name[MAX_FULL_NAME_LEN];
5916 for (ulint i= 0;i < FTS_NUM_AUX_INDEX; i++)
5917 {
5918 fts_table->suffix= fts_get_suffix(i);
5919 fts_get_table_name(fts_table, fts_table_name, true);
5920
5921 /* Drop all fts aux and common table */
5922 dberr_t err= fts_drop_table(trx, fts_table_name);
5923
5924 if (err == DB_FAIL)
5925 {
5926 char *path= fil_make_filepath(NULL, fts_table_name, IBD, false);
5927
5928 if (path != NULL)
5929 {
5930 os_file_delete_if_exists(innodb_data_file_key, path , NULL);
5931 ut_free(path);
5932 }
5933 }
5934 }
5935 }
5936
5937 /** Drop all orphaned FTS auxiliary tables, those that don't have
5938 a parent table or FTS index defined on them. */
fts_drop_orphaned_tables()5939 void fts_drop_orphaned_tables()
5940 {
5941 fts_space_set_t fts_space_set;
5942 fil_get_fts_spaces(fts_space_set);
5943
5944 if (fts_space_set.empty())
5945 return;
5946
5947 fts_check_orphaned_tables(fts_space_set);
5948
5949 if (fts_space_set.empty())
5950 return;
5951
5952 trx_t* trx= trx_create();
5953 trx->op_info= "Drop orphaned aux FTS tables";
5954 row_mysql_lock_data_dictionary(trx);
5955
5956 for (fts_space_set_t::iterator it = fts_space_set.begin();
5957 it != fts_space_set.end(); it++)
5958 {
5959 fts_table_t fts_table;
5960 dict_table_t *table= dict_table_open_on_id(it->first, TRUE,
5961 DICT_TABLE_OP_NORMAL);
5962 if (!table)
5963 continue;
5964
5965 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
5966 fts_drop_common_tables(trx, &fts_table, true);
5967
5968 fts_table.type= FTS_INDEX_TABLE;
5969 fts_table.index_id= it->second;
5970 fts_drop_all_aux_tables(trx, &fts_table);
5971
5972 dict_table_close(table, true, false);
5973 }
5974 trx_commit_for_mysql(trx);
5975 row_mysql_unlock_data_dictionary(trx);
5976 trx->dict_operation_lock_mode= 0;
5977 trx->free();
5978 }
5979
5980 /**********************************************************************//**
5981 Check whether user supplied stopword table is of the right format.
5982 Caller is responsible to hold dictionary locks.
5983 @return the stopword column charset if qualifies */
5984 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)5985 fts_valid_stopword_table(
5986 /*=====================*/
5987 const char* stopword_table_name) /*!< in: Stopword table
5988 name */
5989 {
5990 dict_table_t* table;
5991 dict_col_t* col = NULL;
5992
5993 if (!stopword_table_name) {
5994 return(NULL);
5995 }
5996
5997 table = dict_table_get_low(stopword_table_name);
5998
5999 if (!table) {
6000 ib::error() << "User stopword table " << stopword_table_name
6001 << " does not exist.";
6002
6003 return(NULL);
6004 } else {
6005 const char* col_name;
6006
6007 col_name = dict_table_get_col_name(table, 0);
6008
6009 if (ut_strcmp(col_name, "value")) {
6010 ib::error() << "Invalid column name for stopword"
6011 " table " << stopword_table_name << ". Its"
6012 " first column must be named as 'value'.";
6013
6014 return(NULL);
6015 }
6016
6017 col = dict_table_get_nth_col(table, 0);
6018
6019 if (col->mtype != DATA_VARCHAR
6020 && col->mtype != DATA_VARMYSQL) {
6021 ib::error() << "Invalid column type for stopword"
6022 " table " << stopword_table_name << ". Its"
6023 " first column must be of varchar type";
6024
6025 return(NULL);
6026 }
6027 }
6028
6029 ut_ad(col);
6030
6031 return(fts_get_charset(col->prtype));
6032 }
6033
6034 /**********************************************************************//**
6035 This function loads the stopword into the FTS cache. It also
6036 records/fetches stopword configuration to/from FTS configure
6037 table, depending on whether we are creating or reloading the
6038 FTS.
6039 @return true if load operation is successful */
6040 bool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * session_stopword_table,bool stopword_is_on,bool reload)6041 fts_load_stopword(
6042 /*==============*/
6043 const dict_table_t*
6044 table, /*!< in: Table with FTS */
6045 trx_t* trx, /*!< in: Transactions */
6046 const char* session_stopword_table, /*!< in: Session stopword table
6047 name */
6048 bool stopword_is_on, /*!< in: Whether stopword
6049 option is turned on/off */
6050 bool reload) /*!< in: Whether it is
6051 for reloading FTS table */
6052 {
6053 fts_table_t fts_table;
6054 fts_string_t str;
6055 dberr_t error = DB_SUCCESS;
6056 ulint use_stopword;
6057 fts_cache_t* cache;
6058 const char* stopword_to_use = NULL;
6059 ibool new_trx = FALSE;
6060 byte str_buffer[MAX_FULL_NAME_LEN + 1];
6061
6062 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
6063
6064 cache = table->fts->cache;
6065
6066 if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
6067 return true;
6068 }
6069
6070 if (!trx) {
6071 trx = trx_create();
6072 if (srv_read_only_mode) {
6073 trx_start_internal_read_only(trx);
6074 } else {
6075 trx_start_internal(trx);
6076 }
6077 trx->op_info = "upload FTS stopword";
6078 new_trx = TRUE;
6079 }
6080
6081 /* First check whether stopword filtering is turned off */
6082 if (reload) {
6083 error = fts_config_get_ulint(
6084 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
6085 } else {
6086 use_stopword = (ulint) stopword_is_on;
6087
6088 error = fts_config_set_ulint(
6089 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
6090 }
6091
6092 if (error != DB_SUCCESS) {
6093 goto cleanup;
6094 }
6095
6096 /* If stopword is turned off, no need to continue to load the
6097 stopword into cache, but still need to do initialization */
6098 if (!use_stopword) {
6099 cache->stopword_info.status = STOPWORD_OFF;
6100 goto cleanup;
6101 }
6102
6103 if (reload) {
6104 /* Fetch the stopword table name from FTS config
6105 table */
6106 str.f_n_char = 0;
6107 str.f_str = str_buffer;
6108 str.f_len = sizeof(str_buffer) - 1;
6109
6110 error = fts_config_get_value(
6111 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6112
6113 if (error != DB_SUCCESS) {
6114 goto cleanup;
6115 }
6116
6117 if (*str.f_str) {
6118 stopword_to_use = (const char*) str.f_str;
6119 }
6120 } else {
6121 stopword_to_use = session_stopword_table;
6122 }
6123
6124 if (stopword_to_use
6125 && fts_load_user_stopword(table->fts, stopword_to_use,
6126 &cache->stopword_info)) {
6127 /* Save the stopword table name to the configure
6128 table */
6129 if (!reload) {
6130 str.f_n_char = 0;
6131 str.f_str = (byte*) stopword_to_use;
6132 str.f_len = ut_strlen(stopword_to_use);
6133
6134 error = fts_config_set_value(
6135 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6136 }
6137 } else {
6138 /* Load system default stopword list */
6139 fts_load_default_stopword(&cache->stopword_info);
6140 }
6141
6142 cleanup:
6143 if (new_trx) {
6144 if (error == DB_SUCCESS) {
6145 fts_sql_commit(trx);
6146 } else {
6147 fts_sql_rollback(trx);
6148 }
6149
6150 trx->free();
6151 }
6152
6153 if (!cache->stopword_info.cached_stopword) {
6154 cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
6155 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
6156 &my_charset_latin1);
6157 }
6158
6159 return error == DB_SUCCESS;
6160 }
6161
6162 /**********************************************************************//**
6163 Callback function when we initialize the FTS at the start up
6164 time. It recovers the maximum Doc IDs presented in the current table.
6165 @return: always returns TRUE */
6166 static
6167 ibool
fts_init_get_doc_id(void * row,void * user_arg)6168 fts_init_get_doc_id(
6169 /*================*/
6170 void* row, /*!< in: sel_node_t* */
6171 void* user_arg) /*!< in: fts cache */
6172 {
6173 doc_id_t doc_id = FTS_NULL_DOC_ID;
6174 sel_node_t* node = static_cast<sel_node_t*>(row);
6175 que_node_t* exp = node->select_list;
6176 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
6177
6178 ut_ad(ib_vector_is_empty(cache->get_docs));
6179
6180 /* Copy each indexed column content into doc->text.f_str */
6181 if (exp) {
6182 dfield_t* dfield = que_node_get_val(exp);
6183 dtype_t* type = dfield_get_type(dfield);
6184 void* data = dfield_get_data(dfield);
6185
6186 ut_a(dtype_get_mtype(type) == DATA_INT);
6187
6188 doc_id = static_cast<doc_id_t>(mach_read_from_8(
6189 static_cast<const byte*>(data)));
6190
6191 if (doc_id >= cache->next_doc_id) {
6192 cache->next_doc_id = doc_id + 1;
6193 }
6194 }
6195
6196 return(TRUE);
6197 }
6198
6199 /**********************************************************************//**
6200 Callback function when we initialize the FTS at the start up
6201 time. It recovers Doc IDs that have not sync-ed to the auxiliary
6202 table, and require to bring them back into FTS index.
6203 @return: always returns TRUE */
6204 static
6205 ibool
fts_init_recover_doc(void * row,void * user_arg)6206 fts_init_recover_doc(
6207 /*=================*/
6208 void* row, /*!< in: sel_node_t* */
6209 void* user_arg) /*!< in: fts cache */
6210 {
6211
6212 fts_doc_t doc;
6213 ulint doc_len = 0;
6214 ulint field_no = 0;
6215 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
6216 doc_id_t doc_id = FTS_NULL_DOC_ID;
6217 sel_node_t* node = static_cast<sel_node_t*>(row);
6218 que_node_t* exp = node->select_list;
6219 fts_cache_t* cache = get_doc->cache;
6220 st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
6221
6222 fts_doc_init(&doc);
6223 doc.found = TRUE;
6224
6225 ut_ad(cache);
6226
6227 /* Copy each indexed column content into doc->text.f_str */
6228 while (exp) {
6229 dfield_t* dfield = que_node_get_val(exp);
6230 ulint len = dfield_get_len(dfield);
6231
6232 if (field_no == 0) {
6233 dtype_t* type = dfield_get_type(dfield);
6234 void* data = dfield_get_data(dfield);
6235
6236 ut_a(dtype_get_mtype(type) == DATA_INT);
6237
6238 doc_id = static_cast<doc_id_t>(mach_read_from_8(
6239 static_cast<const byte*>(data)));
6240
6241 field_no++;
6242 exp = que_node_get_next(exp);
6243 continue;
6244 }
6245
6246 if (len == UNIV_SQL_NULL) {
6247 exp = que_node_get_next(exp);
6248 continue;
6249 }
6250
6251 ut_ad(get_doc);
6252
6253 if (!get_doc->index_cache->charset) {
6254 get_doc->index_cache->charset = fts_get_charset(
6255 dfield->type.prtype);
6256 }
6257
6258 doc.charset = get_doc->index_cache->charset;
6259
6260 if (dfield_is_ext(dfield)) {
6261 dict_table_t* table = cache->sync->table;
6262
6263 doc.text.f_str = btr_copy_externally_stored_field(
6264 &doc.text.f_len,
6265 static_cast<byte*>(dfield_get_data(dfield)),
6266 dict_table_page_size(table), len,
6267 static_cast<mem_heap_t*>(doc.self_heap->arg));
6268 } else {
6269 doc.text.f_str = static_cast<byte*>(
6270 dfield_get_data(dfield));
6271
6272 doc.text.f_len = len;
6273 }
6274
6275 if (field_no == 1) {
6276 fts_tokenize_document(&doc, NULL, parser);
6277 } else {
6278 fts_tokenize_document_next(&doc, doc_len, NULL, parser);
6279 }
6280
6281 exp = que_node_get_next(exp);
6282
6283 doc_len += (exp) ? len + 1 : len;
6284
6285 field_no++;
6286 }
6287
6288 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
6289
6290 fts_doc_free(&doc);
6291
6292 cache->added++;
6293
6294 if (doc_id >= cache->next_doc_id) {
6295 cache->next_doc_id = doc_id + 1;
6296 }
6297
6298 return(TRUE);
6299 }
6300
6301 /**********************************************************************//**
6302 This function brings FTS index in sync when FTS index is first
6303 used. There are documents that have not yet sync-ed to auxiliary
6304 tables from last server abnormally shutdown, we will need to bring
6305 such document into FTS cache before any further operations
6306 @return TRUE if all OK */
6307 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)6308 fts_init_index(
6309 /*===========*/
6310 dict_table_t* table, /*!< in: Table with FTS */
6311 ibool has_cache_lock) /*!< in: Whether we already have
6312 cache lock */
6313 {
6314 dict_index_t* index;
6315 doc_id_t start_doc;
6316 fts_get_doc_t* get_doc = NULL;
6317 fts_cache_t* cache = table->fts->cache;
6318 bool need_init = false;
6319
6320 ut_ad(!mutex_own(&dict_sys->mutex));
6321
6322 /* First check cache->get_docs is initialized */
6323 if (!has_cache_lock) {
6324 rw_lock_x_lock(&cache->lock);
6325 }
6326
6327 rw_lock_x_lock(&cache->init_lock);
6328 if (cache->get_docs == NULL) {
6329 cache->get_docs = fts_get_docs_create(cache);
6330 }
6331 rw_lock_x_unlock(&cache->init_lock);
6332
6333 if (table->fts->added_synced) {
6334 goto func_exit;
6335 }
6336
6337 need_init = true;
6338
6339 start_doc = cache->synced_doc_id;
6340
6341 if (!start_doc) {
6342 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
6343 cache->synced_doc_id = start_doc;
6344 }
6345
6346 /* No FTS index, this is the case when previous FTS index
6347 dropped, and we re-initialize the Doc ID system for subsequent
6348 insertion */
6349 if (ib_vector_is_empty(cache->get_docs)) {
6350 index = table->fts_doc_id_index;
6351
6352 ut_a(index);
6353
6354 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6355 FTS_FETCH_DOC_BY_ID_LARGE,
6356 fts_init_get_doc_id, cache);
6357 } else {
6358 if (table->fts->cache->stopword_info.status
6359 & STOPWORD_NOT_INIT) {
6360 fts_load_stopword(table, NULL, NULL, true, true);
6361 }
6362
6363 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
6364 get_doc = static_cast<fts_get_doc_t*>(
6365 ib_vector_get(cache->get_docs, i));
6366
6367 index = get_doc->index_cache->index;
6368
6369 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6370 FTS_FETCH_DOC_BY_ID_LARGE,
6371 fts_init_recover_doc, get_doc);
6372 }
6373 }
6374
6375 table->fts->added_synced = true;
6376
6377 fts_get_docs_clear(cache->get_docs);
6378
6379 func_exit:
6380 if (!has_cache_lock) {
6381 rw_lock_x_unlock(&cache->lock);
6382 }
6383
6384 if (need_init) {
6385 mutex_enter(&dict_sys->mutex);
6386 /* Register the table with the optimize thread. */
6387 fts_optimize_add_table(table);
6388 mutex_exit(&dict_sys->mutex);
6389 }
6390
6391 return(TRUE);
6392 }
6393
6394 /** Check if the all the auxillary tables associated with FTS index are in
6395 consistent state. For now consistency is check only by ensuring
6396 index->page_no != FIL_NULL
6397 @param[out] base_table table has host fts index
6398 @param[in,out] trx trx handler */
6399 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)6400 fts_check_corrupt(
6401 dict_table_t* base_table,
6402 trx_t* trx)
6403 {
6404 bool sane = true;
6405 fts_table_t fts_table;
6406
6407 /* Iterate over the common table and check for their sanity. */
6408 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
6409
6410 for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
6411
6412 char table_name[MAX_FULL_NAME_LEN];
6413
6414 fts_table.suffix = fts_common_tables[i];
6415 fts_get_table_name(&fts_table, table_name);
6416
6417 dict_table_t* aux_table = dict_table_open_on_name(
6418 table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
6419
6420 if (aux_table == NULL) {
6421 dict_set_corrupted(
6422 dict_table_get_first_index(base_table),
6423 trx, "FTS_SANITY_CHECK");
6424 ut_ad(base_table->corrupted == TRUE);
6425 sane = false;
6426 continue;
6427 }
6428
6429 for (dict_index_t* aux_table_index =
6430 UT_LIST_GET_FIRST(aux_table->indexes);
6431 aux_table_index != NULL;
6432 aux_table_index =
6433 UT_LIST_GET_NEXT(indexes, aux_table_index)) {
6434
6435 /* Check if auxillary table needed for FTS is sane. */
6436 if (aux_table_index->page == FIL_NULL) {
6437 dict_set_corrupted(
6438 dict_table_get_first_index(base_table),
6439 trx, "FTS_SANITY_CHECK");
6440 ut_ad(base_table->corrupted == TRUE);
6441 sane = false;
6442 }
6443 }
6444
6445 dict_table_close(aux_table, FALSE, FALSE);
6446 }
6447 }
6448