1 /*****************************************************************************
2
3 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31
32 #include "trx0roll.h"
33 #include "row0mysql.h"
34 #include "row0upd.h"
35 #include "dict0types.h"
36 #include "dict0stats_bg.h"
37 #include "row0sel.h"
38
39 #include "fts0fts.h"
40 #include "fts0priv.h"
41 #include "fts0types.h"
42
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "dict0priv.h"
46 #include "dict0stats.h"
47 #include "btr0pcur.h"
48 #include <vector>
49
50 #include "ha_prototypes.h"
51
52 #define FTS_MAX_ID_LEN 32
53
54 /** Column name from the FTS config table */
55 #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
56
57 /** Verify if a aux table name is a obsolete table
58 by looking up the key word in the obsolete table names */
59 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
60 (strstr((table_name), "DOC_ID") != NULL \
61 || strstr((table_name), "ADDED") != NULL \
62 || strstr((table_name), "STOPWORDS") != NULL)
63
64 /** This is maximum FTS cache for each table and would be
65 a configurable variable */
66 UNIV_INTERN ulong fts_max_cache_size;
67
68 /** Whether the total memory used for FTS cache is exhausted, and we will
69 need a sync to free some memory */
70 UNIV_INTERN bool fts_need_sync = false;
71
72 /** Variable specifying the total memory allocated for FTS cache */
73 UNIV_INTERN ulong fts_max_total_cache_size;
74
75 /** This is FTS result cache limit for each query and would be
76 a configurable variable */
77 UNIV_INTERN ulong fts_result_cache_limit;
78
79 /** Variable specifying the maximum FTS max token size */
80 UNIV_INTERN ulong fts_max_token_size;
81
82 /** Variable specifying the minimum FTS max token size */
83 UNIV_INTERN ulong fts_min_token_size;
84
85
86 // FIXME: testing
87 ib_time_t elapsed_time = 0;
88 ulint n_nodes = 0;
89
90 /** Error condition reported by fts_utf8_decode() */
91 const ulint UTF8_ERROR = 0xFFFFFFFF;
92
93 #ifdef FTS_CACHE_SIZE_DEBUG
94 /** The cache size permissible lower limit (1K) */
95 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
96
97 /** The cache size permissible upper limit (1G) */
98 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
99 #endif /* FTS_CACHE_SIZE_DEBUG */
100
101 /** Time to sleep after DEADLOCK error before retrying operation. */
102 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
103
104 #ifdef UNIV_PFS_RWLOCK
105 UNIV_INTERN mysql_pfs_key_t fts_cache_rw_lock_key;
106 UNIV_INTERN mysql_pfs_key_t fts_cache_init_rw_lock_key;
107 #endif /* UNIV_PFS_RWLOCK */
108
109 #ifdef UNIV_PFS_MUTEX
110 UNIV_INTERN mysql_pfs_key_t fts_delete_mutex_key;
111 UNIV_INTERN mysql_pfs_key_t fts_optimize_mutex_key;
112 UNIV_INTERN mysql_pfs_key_t fts_bg_threads_mutex_key;
113 UNIV_INTERN mysql_pfs_key_t fts_doc_id_mutex_key;
114 UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
115 #endif /* UNIV_PFS_MUTEX */
116
117 /** variable to record innodb_fts_internal_tbl_name for information
118 schema table INNODB_FTS_INSERTED etc. */
119 UNIV_INTERN char* fts_internal_tbl_name = NULL;
120
121 /** InnoDB default stopword list:
122 There are different versions of stopwords, the stop words listed
123 below comes from "Google Stopword" list. Reference:
124 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
125 The final version of InnoDB default stopword list is still pending
126 for decision */
127 const char *fts_default_stopword[] =
128 {
129 "a",
130 "about",
131 "an",
132 "are",
133 "as",
134 "at",
135 "be",
136 "by",
137 "com",
138 "de",
139 "en",
140 "for",
141 "from",
142 "how",
143 "i",
144 "in",
145 "is",
146 "it",
147 "la",
148 "of",
149 "on",
150 "or",
151 "that",
152 "the",
153 "this",
154 "to",
155 "was",
156 "what",
157 "when",
158 "where",
159 "who",
160 "will",
161 "with",
162 "und",
163 "the",
164 "www",
165 NULL
166 };
167
168 /** For storing table info when checking for orphaned tables. */
169 struct fts_aux_table_t {
170 table_id_t id; /*!< Table id */
171 table_id_t parent_id; /*!< Parent table id */
172 table_id_t index_id; /*!< Table FT index id */
173 char* name; /*!< Name of the table */
174 };
175
176 /** SQL statements for creating the ancillary common FTS tables. */
177 static const char* fts_create_common_tables_sql = {
178 "BEGIN\n"
179 ""
180 "CREATE TABLE \"%s_DELETED\" (\n"
181 " doc_id BIGINT UNSIGNED\n"
182 ") COMPACT;\n"
183 "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DELETED\"(doc_id);\n"
184 ""
185 "CREATE TABLE \"%s_DELETED_CACHE\" (\n"
186 " doc_id BIGINT UNSIGNED\n"
187 ") COMPACT;\n"
188 "CREATE UNIQUE CLUSTERED INDEX IND "
189 "ON \"%s_DELETED_CACHE\"(doc_id);\n"
190 ""
191 "CREATE TABLE \"%s_BEING_DELETED\" (\n"
192 " doc_id BIGINT UNSIGNED\n"
193 ") COMPACT;\n"
194 "CREATE UNIQUE CLUSTERED INDEX IND "
195 "ON \"%s_BEING_DELETED\"(doc_id);\n"
196 ""
197 "CREATE TABLE \"%s_BEING_DELETED_CACHE\" (\n"
198 " doc_id BIGINT UNSIGNED\n"
199 ") COMPACT;\n"
200 "CREATE UNIQUE CLUSTERED INDEX IND "
201 "ON \"%s_BEING_DELETED_CACHE\"(doc_id);\n"
202 ""
203 "CREATE TABLE \"%s_CONFIG\" (\n"
204 " key CHAR(50),\n"
205 " value CHAR(200) NOT NULL\n"
206 ") COMPACT;\n"
207 "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_CONFIG\"(key);\n"
208 };
209
210 #ifdef FTS_DOC_STATS_DEBUG
211 /** Template for creating the FTS auxiliary index specific tables. This is
212 mainly designed for the statistics work in the future */
213 static const char* fts_create_index_tables_sql = {
214 "BEGIN\n"
215 ""
216 "CREATE TABLE \"%s_DOC_ID\" (\n"
217 " doc_id BIGINT UNSIGNED,\n"
218 " word_count INTEGER UNSIGNED NOT NULL\n"
219 ") COMPACT;\n"
220 "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DOC_ID\"(doc_id);\n"
221 };
222 #endif
223
224 /** Template for creating the ancillary FTS tables word index tables. */
225 static const char* fts_create_index_sql = {
226 "BEGIN\n"
227 ""
228 "CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND "
229 "ON \"%s\"(word, first_doc_id);\n"
230 };
231
232 /** FTS auxiliary table suffixes that are common to all FT indexes. */
233 static const char* fts_common_tables[] = {
234 "BEING_DELETED",
235 "BEING_DELETED_CACHE",
236 "CONFIG",
237 "DELETED",
238 "DELETED_CACHE",
239 NULL
240 };
241
242 /** FTS auxiliary INDEX split intervals. */
243 const fts_index_selector_t fts_index_selector[] = {
244 { 9, "INDEX_1" },
245 { 65, "INDEX_2" },
246 { 70, "INDEX_3" },
247 { 75, "INDEX_4" },
248 { 80, "INDEX_5" },
249 { 85, "INDEX_6" },
250 { 0 , NULL }
251 };
252
253 /** Default config values for FTS indexes on a table. */
254 static const char* fts_config_table_insert_values_sql =
255 "BEGIN\n"
256 "\n"
257 "INSERT INTO \"%s\" VALUES('"
258 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
259 ""
260 "INSERT INTO \"%s\" VALUES('"
261 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
262 ""
263 "INSERT INTO \"%s\" VALUES ('"
264 FTS_SYNCED_DOC_ID "', '0');\n"
265 ""
266 "INSERT INTO \"%s\" VALUES ('"
267 FTS_TOTAL_DELETED_COUNT "', '0');\n"
268 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
269 "INSERT INTO \"%s\" VALUES ('"
270 FTS_TABLE_STATE "', '0');\n";
271
272 /** Run SYNC on the table, i.e., write out data from the cache to the
273 FTS auxiliary INDEX table and clear the cache at the end.
274 @param[in,out] sync sync state
275 @param[in] unlock_cache whether unlock cache lock when write node
276 @param[in] wait whether wait when a sync is in progress
277 @param[in] has_dict whether has dict operation lock
278 @return DB_SUCCESS if all OK */
279 static
280 dberr_t
281 fts_sync(
282 fts_sync_t* sync,
283 bool unlock_cache,
284 bool wait,
285 bool has_dict);
286
287 /****************************************************************//**
288 Release all resources help by the words rb tree e.g., the node ilist. */
289 static
290 void
291 fts_words_free(
292 /*===========*/
293 ib_rbt_t* words) /*!< in: rb tree of words */
294 MY_ATTRIBUTE((nonnull));
295 #ifdef FTS_CACHE_SIZE_DEBUG
296 /****************************************************************//**
297 Read the max cache size parameter from the config table. */
298 static
299 void
300 fts_update_max_cache_size(
301 /*======================*/
302 fts_sync_t* sync); /*!< in: sync state */
303 #endif
304
305 /*********************************************************************//**
306 This function fetches the document just inserted right before
307 we commit the transaction, and tokenize the inserted text data
308 and insert into FTS auxiliary table and its cache.
309 @return TRUE if successful */
310 static
311 ulint
312 fts_add_doc_by_id(
313 /*==============*/
314 fts_trx_table_t*ftt, /*!< in: FTS trx table */
315 doc_id_t doc_id, /*!< in: doc id */
316 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
317 /*!< in: affected fts indexes */
318 #ifdef FTS_DOC_STATS_DEBUG
319 /****************************************************************//**
320 Check whether a particular word (term) exists in the FTS index.
321 @return DB_SUCCESS if all went fine */
322 static
323 dberr_t
324 fts_is_word_in_index(
325 /*=================*/
326 trx_t* trx, /*!< in: FTS query state */
327 que_t** graph, /*!< out: Query graph */
328 fts_table_t* fts_table, /*!< in: table instance */
329 const fts_string_t* word, /*!< in: the word to check */
330 ibool* found) /*!< out: TRUE if exists */
331 MY_ATTRIBUTE((nonnull, warn_unused_result));
332 #endif /* FTS_DOC_STATS_DEBUG */
333
334 /******************************************************************//**
335 Update the last document id. This function could create a new
336 transaction to update the last document id.
337 @return DB_SUCCESS if OK */
338 static
339 dberr_t
340 fts_update_sync_doc_id(
341 /*===================*/
342 const dict_table_t* table, /*!< in: table */
343 const char* table_name, /*!< in: table name, or NULL */
344 doc_id_t doc_id, /*!< in: last document id */
345 trx_t* trx) /*!< in: update trx, or NULL */
346 MY_ATTRIBUTE((nonnull(1)));
347
348 /****************************************************************//**
349 This function loads the default InnoDB stopword list */
350 static
351 void
fts_load_default_stopword(fts_stopword_t * stopword_info)352 fts_load_default_stopword(
353 /*======================*/
354 fts_stopword_t* stopword_info) /*!< in: stopword info */
355 {
356 fts_string_t str;
357 mem_heap_t* heap;
358 ib_alloc_t* allocator;
359 ib_rbt_t* stop_words;
360
361 allocator = stopword_info->heap;
362 heap = static_cast<mem_heap_t*>(allocator->arg);
363
364 if (!stopword_info->cached_stopword) {
365 /* For default stopword, we always use fts_utf8_string_cmp() */
366 stopword_info->cached_stopword = rbt_create(
367 sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
368 }
369
370 stop_words = stopword_info->cached_stopword;
371
372 str.f_n_char = 0;
373
374 for (ulint i = 0; fts_default_stopword[i]; ++i) {
375 char* word;
376 fts_tokenizer_word_t new_word;
377
378 /* We are going to duplicate the value below. */
379 word = const_cast<char*>(fts_default_stopword[i]);
380
381 new_word.nodes = ib_vector_create(
382 allocator, sizeof(fts_node_t), 4);
383
384 str.f_len = ut_strlen(word);
385 str.f_str = reinterpret_cast<byte*>(word);
386
387 fts_utf8_string_dup(&new_word.text, &str, heap);
388
389 rbt_insert(stop_words, &new_word, &new_word);
390 }
391
392 stopword_info->status = STOPWORD_FROM_DEFAULT;
393 }
394
395 /****************************************************************//**
396 Callback function to read a single stopword value.
397 @return Always return TRUE */
398 static
399 ibool
fts_read_stopword(void * row,void * user_arg)400 fts_read_stopword(
401 /*==============*/
402 void* row, /*!< in: sel_node_t* */
403 void* user_arg) /*!< in: pointer to ib_vector_t */
404 {
405 ib_alloc_t* allocator;
406 fts_stopword_t* stopword_info;
407 sel_node_t* sel_node;
408 que_node_t* exp;
409 ib_rbt_t* stop_words;
410 dfield_t* dfield;
411 fts_string_t str;
412 mem_heap_t* heap;
413 ib_rbt_bound_t parent;
414
415 sel_node = static_cast<sel_node_t*>(row);
416 stopword_info = static_cast<fts_stopword_t*>(user_arg);
417
418 stop_words = stopword_info->cached_stopword;
419 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
420 heap = static_cast<mem_heap_t*>(allocator->arg);
421
422 exp = sel_node->select_list;
423
424 /* We only need to read the first column */
425 dfield = que_node_get_val(exp);
426
427 str.f_n_char = 0;
428 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
429 str.f_len = dfield_get_len(dfield);
430
431 /* Only create new node if it is a value not already existed */
432 if (str.f_len != UNIV_SQL_NULL
433 && rbt_search(stop_words, &parent, &str) != 0) {
434
435 fts_tokenizer_word_t new_word;
436
437 new_word.nodes = ib_vector_create(
438 allocator, sizeof(fts_node_t), 4);
439
440 new_word.text.f_str = static_cast<byte*>(
441 mem_heap_alloc(heap, str.f_len + 1));
442
443 memcpy(new_word.text.f_str, str.f_str, str.f_len);
444
445 new_word.text.f_n_char = 0;
446 new_word.text.f_len = str.f_len;
447 new_word.text.f_str[str.f_len] = 0;
448
449 rbt_insert(stop_words, &new_word, &new_word);
450 }
451
452 return(TRUE);
453 }
454
455 /******************************************************************//**
456 Load user defined stopword from designated user table
457 @return TRUE if load operation is successful */
458 static
459 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)460 fts_load_user_stopword(
461 /*===================*/
462 fts_t* fts, /*!< in: FTS struct */
463 const char* stopword_table_name, /*!< in: Stopword table
464 name */
465 fts_stopword_t* stopword_info) /*!< in: Stopword info */
466 {
467 pars_info_t* info;
468 que_t* graph;
469 dberr_t error = DB_SUCCESS;
470 ibool ret = TRUE;
471 trx_t* trx;
472 ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
473
474 trx = trx_allocate_for_background();
475 trx->op_info = "Load user stopword table into FTS cache";
476
477 if (!has_lock) {
478 mutex_enter(&dict_sys->mutex);
479 }
480
481 /* Validate the user table existence and in the right
482 format */
483 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
484 if (!stopword_info->charset) {
485 ret = FALSE;
486 goto cleanup;
487 } else if (!stopword_info->cached_stopword) {
488 /* Create the stopword RB tree with the stopword column
489 charset. All comparison will use this charset */
490 stopword_info->cached_stopword = rbt_create_arg_cmp(
491 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
492 stopword_info->charset);
493
494 }
495
496 info = pars_info_create();
497
498 pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
499
500 pars_info_bind_function(info, "my_func", fts_read_stopword,
501 stopword_info);
502
503 graph = fts_parse_sql_no_dict_lock(
504 NULL,
505 info,
506 "DECLARE FUNCTION my_func;\n"
507 "DECLARE CURSOR c IS"
508 " SELECT value "
509 " FROM $table_stopword;\n"
510 "BEGIN\n"
511 "\n"
512 "OPEN c;\n"
513 "WHILE 1 = 1 LOOP\n"
514 " FETCH c INTO my_func();\n"
515 " IF c % NOTFOUND THEN\n"
516 " EXIT;\n"
517 " END IF;\n"
518 "END LOOP;\n"
519 "CLOSE c;");
520
521 for (;;) {
522 error = fts_eval_sql(trx, graph);
523
524 if (error == DB_SUCCESS) {
525 fts_sql_commit(trx);
526 stopword_info->status = STOPWORD_USER_TABLE;
527 break;
528 } else {
529
530 fts_sql_rollback(trx);
531
532 ut_print_timestamp(stderr);
533
534 if (error == DB_LOCK_WAIT_TIMEOUT) {
535 fprintf(stderr, " InnoDB: Warning: lock wait "
536 "timeout reading user stopword table. "
537 "Retrying!\n");
538
539 trx->error_state = DB_SUCCESS;
540 } else {
541 fprintf(stderr, " InnoDB: Error '%s' "
542 "while reading user stopword table.\n",
543 ut_strerr(error));
544 ret = FALSE;
545 break;
546 }
547 }
548 }
549
550 que_graph_free(graph);
551
552 cleanup:
553 if (!has_lock) {
554 mutex_exit(&dict_sys->mutex);
555 }
556
557 trx_free_for_background(trx);
558 return(ret);
559 }
560
561 /******************************************************************//**
562 Initialize the index cache. */
563 static
564 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)565 fts_index_cache_init(
566 /*=================*/
567 ib_alloc_t* allocator, /*!< in: the allocator to use */
568 fts_index_cache_t* index_cache) /*!< in: index cache */
569 {
570 ulint i;
571
572 ut_a(index_cache->words == NULL);
573
574 index_cache->words = rbt_create_arg_cmp(
575 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
576 index_cache->charset);
577
578 ut_a(index_cache->doc_stats == NULL);
579
580 index_cache->doc_stats = ib_vector_create(
581 allocator, sizeof(fts_doc_stats_t), 4);
582
583 for (i = 0; fts_index_selector[i].value; ++i) {
584 ut_a(index_cache->ins_graph[i] == NULL);
585 ut_a(index_cache->sel_graph[i] == NULL);
586 }
587 }
588
589 /*********************************************************************//**
590 Initialize FTS cache. */
591 UNIV_INTERN
592 void
fts_cache_init(fts_cache_t * cache)593 fts_cache_init(
594 /*===========*/
595 fts_cache_t* cache) /*!< in: cache to initialize */
596 {
597 ulint i;
598
599 /* Just to make sure */
600 ut_a(cache->sync_heap->arg == NULL);
601
602 cache->sync_heap->arg = mem_heap_create(1024);
603
604 cache->total_size = 0;
605
606 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
607 cache->deleted_doc_ids = ib_vector_create(
608 cache->sync_heap, sizeof(fts_update_t), 4);
609 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
610
611 /* Reset the cache data for all the FTS indexes. */
612 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
613 fts_index_cache_t* index_cache;
614
615 index_cache = static_cast<fts_index_cache_t*>(
616 ib_vector_get(cache->indexes, i));
617
618 fts_index_cache_init(cache->sync_heap, index_cache);
619 }
620 }
621
622 /****************************************************************//**
623 Create a FTS cache. */
624 UNIV_INTERN
625 fts_cache_t*
fts_cache_create(dict_table_t * table)626 fts_cache_create(
627 /*=============*/
628 dict_table_t* table) /*!< in: table owns the FTS cache */
629 {
630 mem_heap_t* heap;
631 fts_cache_t* cache;
632
633 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
634
635 cache = static_cast<fts_cache_t*>(
636 mem_heap_zalloc(heap, sizeof(*cache)));
637
638 cache->cache_heap = heap;
639
640 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
641
642 rw_lock_create(
643 fts_cache_init_rw_lock_key, &cache->init_lock,
644 SYNC_FTS_CACHE_INIT);
645
646 mutex_create(
647 fts_delete_mutex_key, &cache->deleted_lock, SYNC_FTS_OPTIMIZE);
648
649 mutex_create(
650 fts_optimize_mutex_key, &cache->optimize_lock,
651 SYNC_FTS_OPTIMIZE);
652
653 mutex_create(
654 fts_doc_id_mutex_key, &cache->doc_id_lock, SYNC_FTS_OPTIMIZE);
655
656 /* This is the heap used to create the cache itself. */
657 cache->self_heap = ib_heap_allocator_create(heap);
658
659 /* This is a transient heap, used for storing sync data. */
660 cache->sync_heap = ib_heap_allocator_create(heap);
661 cache->sync_heap->arg = NULL;
662
663 fts_need_sync = false;
664
665 cache->sync = static_cast<fts_sync_t*>(
666 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
667
668 cache->sync->table = table;
669 cache->sync->event = os_event_create();
670
671 /* Create the index cache vector that will hold the inverted indexes. */
672 cache->indexes = ib_vector_create(
673 cache->self_heap, sizeof(fts_index_cache_t), 2);
674
675 fts_cache_init(cache);
676
677 cache->stopword_info.cached_stopword = NULL;
678 cache->stopword_info.charset = NULL;
679
680 cache->stopword_info.heap = cache->self_heap;
681
682 cache->stopword_info.status = STOPWORD_NOT_INIT;
683
684 return(cache);
685 }
686
687 /*******************************************************************//**
688 Add a newly create index into FTS cache */
689 UNIV_INTERN
690 void
fts_add_index(dict_index_t * index,dict_table_t * table)691 fts_add_index(
692 /*==========*/
693 dict_index_t* index, /*!< FTS index to be added */
694 dict_table_t* table) /*!< table */
695 {
696 fts_t* fts = table->fts;
697 fts_cache_t* cache;
698 fts_index_cache_t* index_cache;
699
700 ut_ad(fts);
701 cache = table->fts->cache;
702
703 rw_lock_x_lock(&cache->init_lock);
704
705 ib_vector_push(fts->indexes, &index);
706
707 index_cache = fts_find_index_cache(cache, index);
708
709 if (!index_cache) {
710 /* Add new index cache structure */
711 index_cache = fts_cache_index_cache_create(table, index);
712 }
713
714 rw_lock_x_unlock(&cache->init_lock);
715 }
716
717 /*******************************************************************//**
718 recalibrate get_doc structure after index_cache in cache->indexes changed */
719 static
720 void
fts_reset_get_doc(fts_cache_t * cache)721 fts_reset_get_doc(
722 /*==============*/
723 fts_cache_t* cache) /*!< in: FTS index cache */
724 {
725 fts_get_doc_t* get_doc;
726 ulint i;
727
728 #ifdef UNIV_SYNC_DEBUG
729 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
730 #endif
731 ib_vector_reset(cache->get_docs);
732
733 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
734 fts_index_cache_t* ind_cache;
735
736 ind_cache = static_cast<fts_index_cache_t*>(
737 ib_vector_get(cache->indexes, i));
738
739 get_doc = static_cast<fts_get_doc_t*>(
740 ib_vector_push(cache->get_docs, NULL));
741
742 memset(get_doc, 0x0, sizeof(*get_doc));
743
744 get_doc->index_cache = ind_cache;
745 }
746
747 ut_ad(ib_vector_size(cache->get_docs)
748 == ib_vector_size(cache->indexes));
749 }
750
751 /*******************************************************************//**
752 Check an index is in the table->indexes list
753 @return TRUE if it exists */
754 static
755 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)756 fts_in_dict_index(
757 /*==============*/
758 dict_table_t* table, /*!< in: Table */
759 dict_index_t* index_check) /*!< in: index to be checked */
760 {
761 dict_index_t* index;
762
763 for (index = dict_table_get_first_index(table);
764 index != NULL;
765 index = dict_table_get_next_index(index)) {
766
767 if (index == index_check) {
768 return(TRUE);
769 }
770 }
771
772 return(FALSE);
773 }
774
775 /*******************************************************************//**
776 Check an index is in the fts->cache->indexes list
777 @return TRUE if it exists */
778 static
779 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)780 fts_in_index_cache(
781 /*===============*/
782 dict_table_t* table, /*!< in: Table */
783 dict_index_t* index) /*!< in: index to be checked */
784 {
785 ulint i;
786
787 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
788 fts_index_cache_t* index_cache;
789
790 index_cache = static_cast<fts_index_cache_t*>(
791 ib_vector_get(table->fts->cache->indexes, i));
792
793 if (index_cache->index == index) {
794 return(TRUE);
795 }
796 }
797
798 return(FALSE);
799 }
800
801 /*******************************************************************//**
802 Check indexes in the fts->indexes is also present in index cache and
803 table->indexes list
804 @return TRUE if all indexes match */
805 UNIV_INTERN
806 ibool
fts_check_cached_index(dict_table_t * table)807 fts_check_cached_index(
808 /*===================*/
809 dict_table_t* table) /*!< in: Table where indexes are dropped */
810 {
811 ulint i;
812
813 if (!table->fts || !table->fts->cache) {
814 return(TRUE);
815 }
816
817 ut_a(ib_vector_size(table->fts->indexes)
818 == ib_vector_size(table->fts->cache->indexes));
819
820 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
821 dict_index_t* index;
822
823 index = static_cast<dict_index_t*>(
824 ib_vector_getp(table->fts->indexes, i));
825
826 if (!fts_in_index_cache(table, index)) {
827 return(FALSE);
828 }
829
830 if (!fts_in_dict_index(table, index)) {
831 return(FALSE);
832 }
833 }
834
835 return(TRUE);
836 }
837
838 /*******************************************************************//**
839 Drop auxiliary tables related to an FTS index
840 @return DB_SUCCESS or error number */
841 UNIV_INTERN
842 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)843 fts_drop_index(
844 /*===========*/
845 dict_table_t* table, /*!< in: Table where indexes are dropped */
846 dict_index_t* index, /*!< in: Index to be dropped */
847 trx_t* trx) /*!< in: Transaction for the drop */
848 {
849 ib_vector_t* indexes = table->fts->indexes;
850 dberr_t err = DB_SUCCESS;
851
852 ut_a(indexes);
853
854 if ((ib_vector_size(indexes) == 1
855 && (index == static_cast<dict_index_t*>(
856 ib_vector_getp(table->fts->indexes, 0))))
857 || ib_vector_is_empty(indexes)) {
858 doc_id_t current_doc_id;
859 doc_id_t first_doc_id;
860
861 /* If we are dropping the only FTS index of the table,
862 remove it from optimize thread */
863 fts_optimize_remove_table(table);
864
865 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
866
867 /* If Doc ID column is not added internally by FTS index,
868 we can drop all FTS auxiliary tables. Otherwise, we will
869 need to keep some common table such as CONFIG table, so
870 as to keep track of incrementing Doc IDs */
871 if (!DICT_TF2_FLAG_IS_SET(
872 table, DICT_TF2_FTS_HAS_DOC_ID)) {
873
874 err = fts_drop_tables(trx, table);
875
876 err = fts_drop_index_tables(trx, index);
877
878 while (index->index_fts_syncing
879 && !trx_is_interrupted(trx)) {
880 DICT_BG_YIELD(trx);
881 }
882
883 fts_free(table);
884
885 return(err);
886 }
887
888 while (index->index_fts_syncing
889 && !trx_is_interrupted(trx)) {
890 DICT_BG_YIELD(trx);
891 }
892
893 current_doc_id = table->fts->cache->next_doc_id;
894 first_doc_id = table->fts->cache->first_doc_id;
895 fts_cache_clear(table->fts->cache);
896 fts_cache_destroy(table->fts->cache);
897 table->fts->cache = fts_cache_create(table);
898 table->fts->cache->next_doc_id = current_doc_id;
899 table->fts->cache->first_doc_id = first_doc_id;
900 } else {
901 fts_cache_t* cache = table->fts->cache;
902 fts_index_cache_t* index_cache;
903
904 rw_lock_x_lock(&cache->init_lock);
905
906 index_cache = fts_find_index_cache(cache, index);
907
908 if (index_cache != NULL) {
909 while (index->index_fts_syncing
910 && !trx_is_interrupted(trx)) {
911 DICT_BG_YIELD(trx);
912 }
913 if (index_cache->words) {
914 fts_words_free(index_cache->words);
915 rbt_free(index_cache->words);
916 }
917
918 ib_vector_remove(cache->indexes, *(void**) index_cache);
919 }
920
921 if (cache->get_docs) {
922 fts_reset_get_doc(cache);
923 }
924
925 rw_lock_x_unlock(&cache->init_lock);
926 }
927
928 err = fts_drop_index_tables(trx, index);
929
930 ib_vector_remove(indexes, (const void*) index);
931
932 return(err);
933 }
934
935 /****************************************************************//**
936 Free the query graph but check whether dict_sys->mutex is already
937 held */
938 UNIV_INTERN
939 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)940 fts_que_graph_free_check_lock(
941 /*==========================*/
942 fts_table_t* fts_table, /*!< in: FTS table */
943 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
944 que_t* graph) /*!< in: query graph */
945 {
946 ibool has_dict = FALSE;
947
948 if (fts_table && fts_table->table) {
949 ut_ad(fts_table->table->fts);
950
951 has_dict = fts_table->table->fts->fts_status
952 & TABLE_DICT_LOCKED;
953 } else if (index_cache) {
954 ut_ad(index_cache->index->table->fts);
955
956 has_dict = index_cache->index->table->fts->fts_status
957 & TABLE_DICT_LOCKED;
958 }
959
960 if (!has_dict) {
961 mutex_enter(&dict_sys->mutex);
962 }
963
964 ut_ad(mutex_own(&dict_sys->mutex));
965
966 que_graph_free(graph);
967
968 if (!has_dict) {
969 mutex_exit(&dict_sys->mutex);
970 }
971 }
972
973 /****************************************************************//**
974 Create an FTS index cache. */
975 UNIV_INTERN
976 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)977 fts_index_get_charset(
978 /*==================*/
979 dict_index_t* index) /*!< in: FTS index */
980 {
981 CHARSET_INFO* charset = NULL;
982 dict_field_t* field;
983 ulint prtype;
984
985 field = dict_index_get_nth_field(index, 0);
986 prtype = field->col->prtype;
987
988 charset = innobase_get_fts_charset(
989 (int) (prtype & DATA_MYSQL_TYPE_MASK),
990 (uint) dtype_get_charset_coll(prtype));
991
992 #ifdef FTS_DEBUG
993 /* Set up charset info for this index. Please note all
994 field of the FTS index should have the same charset */
995 for (i = 1; i < index->n_fields; i++) {
996 CHARSET_INFO* fld_charset;
997
998 field = dict_index_get_nth_field(index, i);
999 prtype = field->col->prtype;
1000
1001 fld_charset = innobase_get_fts_charset(
1002 (int)(prtype & DATA_MYSQL_TYPE_MASK),
1003 (uint) dtype_get_charset_coll(prtype));
1004
1005 /* All FTS columns should have the same charset */
1006 if (charset) {
1007 ut_a(charset == fld_charset);
1008 } else {
1009 charset = fld_charset;
1010 }
1011 }
1012 #endif
1013
1014 return(charset);
1015
1016 }
1017 /****************************************************************//**
1018 Create an FTS index cache.
1019 @return Index Cache */
1020 UNIV_INTERN
1021 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)1022 fts_cache_index_cache_create(
1023 /*=========================*/
1024 dict_table_t* table, /*!< in: table with FTS index */
1025 dict_index_t* index) /*!< in: FTS index */
1026 {
1027 ulint n_bytes;
1028 fts_index_cache_t* index_cache;
1029 fts_cache_t* cache = table->fts->cache;
1030
1031 ut_a(cache != NULL);
1032
1033 #ifdef UNIV_SYNC_DEBUG
1034 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
1035 #endif
1036
1037 /* Must not already exist in the cache vector. */
1038 ut_a(fts_find_index_cache(cache, index) == NULL);
1039
1040 index_cache = static_cast<fts_index_cache_t*>(
1041 ib_vector_push(cache->indexes, NULL));
1042
1043 memset(index_cache, 0x0, sizeof(*index_cache));
1044
1045 index_cache->index = index;
1046
1047 index_cache->charset = fts_index_get_charset(index);
1048
1049 n_bytes = sizeof(que_t*) * sizeof(fts_index_selector);
1050
1051 index_cache->ins_graph = static_cast<que_t**>(
1052 mem_heap_zalloc(static_cast<mem_heap_t*>(
1053 cache->self_heap->arg), n_bytes));
1054
1055 index_cache->sel_graph = static_cast<que_t**>(
1056 mem_heap_zalloc(static_cast<mem_heap_t*>(
1057 cache->self_heap->arg), n_bytes));
1058
1059 fts_index_cache_init(cache->sync_heap, index_cache);
1060
1061 if (cache->get_docs) {
1062 fts_reset_get_doc(cache);
1063 }
1064
1065 return(index_cache);
1066 }
1067
1068 /****************************************************************//**
1069 Release all resources help by the words rb tree e.g., the node ilist. */
1070 static
1071 void
fts_words_free(ib_rbt_t * words)1072 fts_words_free(
1073 /*===========*/
1074 ib_rbt_t* words) /*!< in: rb tree of words */
1075 {
1076 const ib_rbt_node_t* rbt_node;
1077
1078 /* Free the resources held by a word. */
1079 for (rbt_node = rbt_first(words);
1080 rbt_node != NULL;
1081 rbt_node = rbt_first(words)) {
1082
1083 ulint i;
1084 fts_tokenizer_word_t* word;
1085
1086 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1087
1088 /* Free the ilists of this word. */
1089 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1090
1091 fts_node_t* fts_node = static_cast<fts_node_t*>(
1092 ib_vector_get(word->nodes, i));
1093
1094 ut_free(fts_node->ilist);
1095 fts_node->ilist = NULL;
1096 }
1097
1098 /* NOTE: We are responsible for free'ing the node */
1099 ut_free(rbt_remove_node(words, rbt_node));
1100 }
1101 }
1102
1103 /** Clear cache.
1104 @param[in,out] cache fts cache */
1105 UNIV_INTERN
1106 void
fts_cache_clear(fts_cache_t * cache)1107 fts_cache_clear(
1108 fts_cache_t* cache)
1109 {
1110 ulint i;
1111
1112 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1113 ulint j;
1114 fts_index_cache_t* index_cache;
1115
1116 index_cache = static_cast<fts_index_cache_t*>(
1117 ib_vector_get(cache->indexes, i));
1118
1119 fts_words_free(index_cache->words);
1120
1121 rbt_free(index_cache->words);
1122
1123 index_cache->words = NULL;
1124
1125 for (j = 0; fts_index_selector[j].value; ++j) {
1126
1127 if (index_cache->ins_graph[j] != NULL) {
1128
1129 fts_que_graph_free_check_lock(
1130 NULL, index_cache,
1131 index_cache->ins_graph[j]);
1132
1133 index_cache->ins_graph[j] = NULL;
1134 }
1135
1136 if (index_cache->sel_graph[j] != NULL) {
1137
1138 fts_que_graph_free_check_lock(
1139 NULL, index_cache,
1140 index_cache->sel_graph[j]);
1141
1142 index_cache->sel_graph[j] = NULL;
1143 }
1144 }
1145
1146 index_cache->doc_stats = NULL;
1147 }
1148
1149 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1150 cache->sync_heap->arg = NULL;
1151
1152 cache->total_size = 0;
1153
1154 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1155 cache->deleted_doc_ids = NULL;
1156 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1157 }
1158
1159 /*********************************************************************//**
1160 Search the index specific cache for a particular FTS index.
1161 @return the index cache else NULL */
1162 UNIV_INLINE
1163 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1164 fts_get_index_cache(
1165 /*================*/
1166 fts_cache_t* cache, /*!< in: cache to search */
1167 const dict_index_t* index) /*!< in: index to search for */
1168 {
1169 ulint i;
1170
1171 #ifdef UNIV_SYNC_DEBUG
1172 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX)
1173 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
1174 #endif
1175
1176 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1177 fts_index_cache_t* index_cache;
1178
1179 index_cache = static_cast<fts_index_cache_t*>(
1180 ib_vector_get(cache->indexes, i));
1181
1182 if (index_cache->index == index) {
1183
1184 return(index_cache);
1185 }
1186 }
1187
1188 return(NULL);
1189 }
1190
1191 #ifdef FTS_DEBUG
1192 /*********************************************************************//**
1193 Search the index cache for a get_doc structure.
1194 @return the fts_get_doc_t item else NULL */
1195 static
1196 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1197 fts_get_index_get_doc(
1198 /*==================*/
1199 fts_cache_t* cache, /*!< in: cache to search */
1200 const dict_index_t* index) /*!< in: index to search for */
1201 {
1202 ulint i;
1203
1204 #ifdef UNIV_SYNC_DEBUG
1205 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
1206 #endif
1207
1208 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1209 fts_get_doc_t* get_doc;
1210
1211 get_doc = static_cast<fts_get_doc_t*>(
1212 ib_vector_get(cache->get_docs, i));
1213
1214 if (get_doc->index_cache->index == index) {
1215
1216 return(get_doc);
1217 }
1218 }
1219
1220 return(NULL);
1221 }
1222 #endif
1223
1224 /**********************************************************************//**
1225 Free the FTS cache. */
1226 UNIV_INTERN
1227 void
fts_cache_destroy(fts_cache_t * cache)1228 fts_cache_destroy(
1229 /*==============*/
1230 fts_cache_t* cache) /*!< in: cache*/
1231 {
1232 rw_lock_free(&cache->lock);
1233 rw_lock_free(&cache->init_lock);
1234 mutex_free(&cache->optimize_lock);
1235 mutex_free(&cache->deleted_lock);
1236 mutex_free(&cache->doc_id_lock);
1237 os_event_free(cache->sync->event);
1238
1239 if (cache->stopword_info.cached_stopword) {
1240 rbt_free(cache->stopword_info.cached_stopword);
1241 }
1242
1243 if (cache->sync_heap->arg) {
1244 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1245 }
1246
1247 mem_heap_free(cache->cache_heap);
1248 }
1249
1250 /**********************************************************************//**
1251 Find an existing word, or if not found, create one and return it.
1252 @return specified word token */
1253 static
1254 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1255 fts_tokenizer_word_get(
1256 /*===================*/
1257 fts_cache_t* cache, /*!< in: cache */
1258 fts_index_cache_t*
1259 index_cache, /*!< in: index cache */
1260 fts_string_t* text) /*!< in: node text */
1261 {
1262 fts_tokenizer_word_t* word;
1263 ib_rbt_bound_t parent;
1264
1265 #ifdef UNIV_SYNC_DEBUG
1266 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
1267 #endif
1268
1269 /* If it is a stopword, do not index it */
1270 if (cache->stopword_info.cached_stopword != NULL
1271 && rbt_search(cache->stopword_info.cached_stopword,
1272 &parent, text) == 0) {
1273
1274 return(NULL);
1275 }
1276
1277 /* Check if we found a match, if not then add word to tree. */
1278 if (rbt_search(index_cache->words, &parent, text) != 0) {
1279 mem_heap_t* heap;
1280 fts_tokenizer_word_t new_word;
1281
1282 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1283
1284 new_word.nodes = ib_vector_create(
1285 cache->sync_heap, sizeof(fts_node_t), 4);
1286
1287 fts_utf8_string_dup(&new_word.text, text, heap);
1288
1289 parent.last = rbt_add_node(
1290 index_cache->words, &parent, &new_word);
1291
1292 /* Take into account the RB tree memory use and the vector. */
1293 cache->total_size += sizeof(new_word)
1294 + sizeof(ib_rbt_node_t)
1295 + text->f_len
1296 + (sizeof(fts_node_t) * 4)
1297 + sizeof(*new_word.nodes);
1298
1299 ut_ad(rbt_validate(index_cache->words));
1300 }
1301
1302 word = rbt_value(fts_tokenizer_word_t, parent.last);
1303
1304 return(word);
1305 }
1306
1307 /**********************************************************************//**
1308 Add the given doc_id/word positions to the given node's ilist. */
1309 UNIV_INTERN
1310 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1311 fts_cache_node_add_positions(
1312 /*=========================*/
1313 fts_cache_t* cache, /*!< in: cache */
1314 fts_node_t* node, /*!< in: word node */
1315 doc_id_t doc_id, /*!< in: doc id */
1316 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1317 {
1318 ulint i;
1319 byte* ptr;
1320 byte* ilist;
1321 ulint enc_len;
1322 ulint last_pos;
1323 byte* ptr_start;
1324 ulint doc_id_delta;
1325
1326 #ifdef UNIV_SYNC_DEBUG
1327 if (cache) {
1328 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
1329 }
1330 #endif
1331 ut_ad(doc_id >= node->last_doc_id);
1332
1333 /* Calculate the space required to store the ilist. */
1334 doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1335 enc_len = fts_get_encoded_len(doc_id_delta);
1336
1337 last_pos = 0;
1338 for (i = 0; i < ib_vector_size(positions); i++) {
1339 ulint pos = *(static_cast<ulint*>(
1340 ib_vector_get(positions, i)));
1341
1342 ut_ad(last_pos == 0 || pos > last_pos);
1343
1344 enc_len += fts_get_encoded_len(pos - last_pos);
1345 last_pos = pos;
1346 }
1347
1348 /* The 0x00 byte at the end of the token positions list. */
1349 enc_len++;
1350
1351 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1352 /* No need to allocate more space, we can fit in the new
1353 data at the end of the old one. */
1354 ilist = NULL;
1355 ptr = node->ilist + node->ilist_size;
1356 } else {
1357 ulint new_size = node->ilist_size + enc_len;
1358
1359 /* Over-reserve space by a fixed size for small lengths and
1360 by 20% for lengths >= 48 bytes. */
1361 if (new_size < 16) {
1362 new_size = 16;
1363 } else if (new_size < 32) {
1364 new_size = 32;
1365 } else if (new_size < 48) {
1366 new_size = 48;
1367 } else {
1368 new_size = (ulint)(1.2 * new_size);
1369 }
1370
1371 ilist = static_cast<byte*>(ut_malloc(new_size));
1372 ptr = ilist + node->ilist_size;
1373
1374 node->ilist_size_alloc = new_size;
1375 }
1376
1377 ptr_start = ptr;
1378
1379 /* Encode the new fragment. */
1380 ptr += fts_encode_int(doc_id_delta, ptr);
1381
1382 last_pos = 0;
1383 for (i = 0; i < ib_vector_size(positions); i++) {
1384 ulint pos = *(static_cast<ulint*>(
1385 ib_vector_get(positions, i)));
1386
1387 ptr += fts_encode_int(pos - last_pos, ptr);
1388 last_pos = pos;
1389 }
1390
1391 *ptr++ = 0;
1392
1393 ut_a(enc_len == (ulint)(ptr - ptr_start));
1394
1395 if (ilist) {
1396 /* Copy old ilist to the start of the new one and switch the
1397 new one into place in the node. */
1398 if (node->ilist_size > 0) {
1399 memcpy(ilist, node->ilist, node->ilist_size);
1400 ut_free(node->ilist);
1401 }
1402
1403 node->ilist = ilist;
1404 }
1405
1406 node->ilist_size += enc_len;
1407
1408 if (cache) {
1409 cache->total_size += enc_len;
1410 }
1411
1412 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1413 node->first_doc_id = doc_id;
1414 }
1415
1416 node->last_doc_id = doc_id;
1417 ++node->doc_count;
1418 }
1419
1420 /**********************************************************************//**
1421 Add document to the cache. */
1422 static
1423 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1424 fts_cache_add_doc(
1425 /*==============*/
1426 fts_cache_t* cache, /*!< in: cache */
1427 fts_index_cache_t*
1428 index_cache, /*!< in: index cache */
1429 doc_id_t doc_id, /*!< in: doc id to add */
1430 ib_rbt_t* tokens) /*!< in: document tokens */
1431 {
1432 const ib_rbt_node_t* node;
1433 ulint n_words;
1434 fts_doc_stats_t* doc_stats;
1435
1436 if (!tokens) {
1437 return;
1438 }
1439
1440 #ifdef UNIV_SYNC_DEBUG
1441 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
1442 #endif
1443
1444 n_words = rbt_size(tokens);
1445
1446 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1447
1448 fts_tokenizer_word_t* word;
1449 fts_node_t* fts_node = NULL;
1450 fts_token_t* token = rbt_value(fts_token_t, node);
1451
1452 /* Find and/or add token to the cache. */
1453 word = fts_tokenizer_word_get(
1454 cache, index_cache, &token->text);
1455
1456 if (!word) {
1457 ut_free(rbt_remove_node(tokens, node));
1458 continue;
1459 }
1460
1461 if (ib_vector_size(word->nodes) > 0) {
1462 fts_node = static_cast<fts_node_t*>(
1463 ib_vector_last(word->nodes));
1464 }
1465
1466 if (fts_node == NULL || fts_node->synced
1467 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1468 || doc_id < fts_node->last_doc_id) {
1469
1470 fts_node = static_cast<fts_node_t*>(
1471 ib_vector_push(word->nodes, NULL));
1472
1473 memset(fts_node, 0x0, sizeof(*fts_node));
1474
1475 cache->total_size += sizeof(*fts_node);
1476 }
1477
1478 fts_cache_node_add_positions(
1479 cache, fts_node, doc_id, token->positions);
1480
1481 ut_free(rbt_remove_node(tokens, node));
1482 }
1483
1484 ut_a(rbt_empty(tokens));
1485
1486 /* Add to doc ids processed so far. */
1487 doc_stats = static_cast<fts_doc_stats_t*>(
1488 ib_vector_push(index_cache->doc_stats, NULL));
1489
1490 doc_stats->doc_id = doc_id;
1491 doc_stats->word_count = n_words;
1492
1493 /* Add the doc stats memory usage too. */
1494 cache->total_size += sizeof(*doc_stats);
1495
1496 if (doc_id > cache->sync->max_doc_id) {
1497 cache->sync->max_doc_id = doc_id;
1498 }
1499 }
1500
1501 /****************************************************************//**
1502 Drops a table. If the table can't be found we return a SUCCESS code.
1503 @return DB_SUCCESS or error code */
1504 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1505 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1506 fts_drop_table(
1507 /*===========*/
1508 trx_t* trx, /*!< in: transaction */
1509 const char* table_name) /*!< in: table to drop */
1510 {
1511 dict_table_t* table;
1512 dberr_t error = DB_SUCCESS;
1513
1514 /* Check that the table exists in our data dictionary.
1515 Similar to regular drop table case, we will open table with
1516 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1517 table = dict_table_open_on_name(
1518 table_name, TRUE, FALSE,
1519 static_cast<dict_err_ignore_t>(
1520 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1521
1522 if (table != 0) {
1523
1524 dict_table_close(table, TRUE, FALSE);
1525
1526 /* Pass nonatomic=false (dont allow data dict unlock),
1527 because the transaction may hold locks on SYS_* tables from
1528 previous calls to fts_drop_table(). */
1529 error = row_drop_table_for_mysql(table_name, trx, true, false);
1530
1531 if (error != DB_SUCCESS) {
1532 ib_logf(IB_LOG_LEVEL_ERROR,
1533 "Unable to drop FTS index aux table %s: %s",
1534 table_name, ut_strerr(error));
1535 }
1536 } else {
1537 error = DB_FAIL;
1538 }
1539
1540 return(error);
1541 }
1542
1543 /****************************************************************//**
1544 Rename a single auxiliary table due to database name change.
1545 @return DB_SUCCESS or error code */
1546 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1547 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1548 fts_rename_one_aux_table(
1549 /*=====================*/
1550 const char* new_name, /*!< in: new parent tbl name */
1551 const char* fts_table_old_name, /*!< in: old aux tbl name */
1552 trx_t* trx) /*!< in: transaction */
1553 {
1554 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1555 ulint new_db_name_len = dict_get_db_name_len(new_name);
1556 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1557 ulint table_new_name_len = strlen(fts_table_old_name)
1558 + new_db_name_len - old_db_name_len;
1559
1560 /* Check if the new and old database names are the same, if so,
1561 nothing to do */
1562 ut_ad((new_db_name_len != old_db_name_len)
1563 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1564
1565 /* Get the database name from "new_name", and table name
1566 from the fts_table_old_name */
1567 strncpy(fts_table_new_name, new_name, new_db_name_len);
1568 strncpy(fts_table_new_name + new_db_name_len,
1569 strchr(fts_table_old_name, '/'),
1570 table_new_name_len - new_db_name_len);
1571 fts_table_new_name[table_new_name_len] = 0;
1572
1573 return(row_rename_table_for_mysql(
1574 fts_table_old_name, fts_table_new_name, trx, false));
1575 }
1576
1577 /****************************************************************//**
1578 Rename auxiliary tables for all fts index for a table. This(rename)
1579 is due to database name change
1580 @return DB_SUCCESS or error code */
1581
1582 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1583 fts_rename_aux_tables(
1584 /*==================*/
1585 dict_table_t* table, /*!< in: user Table */
1586 const char* new_name, /*!< in: new table name */
1587 trx_t* trx) /*!< in: transaction */
1588 {
1589 ulint i;
1590 fts_table_t fts_table;
1591
1592 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1593
1594 /* Rename common auxiliary tables */
1595 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1596 char* old_table_name;
1597 dberr_t err = DB_SUCCESS;
1598
1599 fts_table.suffix = fts_common_tables[i];
1600
1601 old_table_name = fts_get_table_name(&fts_table);
1602
1603 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1604
1605 mem_free(old_table_name);
1606
1607 if (err != DB_SUCCESS) {
1608 return(err);
1609 }
1610 }
1611
1612 fts_t* fts = table->fts;
1613
1614 /* Rename index specific auxiliary tables */
1615 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1616 ++i) {
1617 dict_index_t* index;
1618
1619 index = static_cast<dict_index_t*>(
1620 ib_vector_getp(fts->indexes, i));
1621
1622 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1623
1624 for (ulint j = 0; fts_index_selector[j].value; ++j) {
1625 dberr_t err;
1626 char* old_table_name;
1627
1628 fts_table.suffix = fts_get_suffix(j);
1629
1630 old_table_name = fts_get_table_name(&fts_table);
1631
1632 err = fts_rename_one_aux_table(
1633 new_name, old_table_name, trx);
1634
1635 DBUG_EXECUTE_IF("fts_rename_failure",
1636 err = DB_DEADLOCK;
1637 fts_sql_rollback(trx););
1638
1639 mem_free(old_table_name);
1640
1641 if (err != DB_SUCCESS) {
1642 return(err);
1643 }
1644 }
1645 }
1646
1647 return(DB_SUCCESS);
1648 }
1649
1650 /****************************************************************//**
1651 Drops the common ancillary tables needed for supporting an FTS index
1652 on the given table. row_mysql_lock_data_dictionary must have been called
1653 before this.
1654 @return DB_SUCCESS or error code */
1655 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1656 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1657 fts_drop_common_tables(
1658 /*===================*/
1659 trx_t* trx, /*!< in: transaction */
1660 fts_table_t* fts_table) /*!< in: table with an FTS
1661 index */
1662 {
1663 ulint i;
1664 dberr_t error = DB_SUCCESS;
1665
1666 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1667 dberr_t err;
1668 char* table_name;
1669
1670 fts_table->suffix = fts_common_tables[i];
1671
1672 table_name = fts_get_table_name(fts_table);
1673
1674 err = fts_drop_table(trx, table_name);
1675
1676 /* We only return the status of the last error. */
1677 if (err != DB_SUCCESS && err != DB_FAIL) {
1678 error = err;
1679 }
1680
1681 mem_free(table_name);
1682 }
1683
1684 return(error);
1685 }
1686
1687 /****************************************************************//**
1688 Since we do a horizontal split on the index table, we need to drop
1689 all the split tables.
1690 @return DB_SUCCESS or error code */
1691 UNIV_INTERN
1692 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1693 fts_drop_index_split_tables(
1694 /*========================*/
1695 trx_t* trx, /*!< in: transaction */
1696 dict_index_t* index) /*!< in: fts instance */
1697
1698 {
1699 ulint i;
1700 fts_table_t fts_table;
1701 dberr_t error = DB_SUCCESS;
1702
1703 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1704
1705 for (i = 0; fts_index_selector[i].value; ++i) {
1706 dberr_t err;
1707 char* table_name;
1708
1709 fts_table.suffix = fts_get_suffix(i);
1710
1711 table_name = fts_get_table_name(&fts_table);
1712
1713 err = fts_drop_table(trx, table_name);
1714
1715 /* We only return the status of the last error. */
1716 if (err != DB_SUCCESS && err != DB_FAIL) {
1717 error = err;
1718 }
1719
1720 mem_free(table_name);
1721 }
1722
1723 return(error);
1724 }
1725
1726 /****************************************************************//**
1727 Drops FTS auxiliary tables for an FTS index
1728 @return DB_SUCCESS or error code */
1729 UNIV_INTERN
1730 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1731 fts_drop_index_tables(
1732 /*==================*/
1733 trx_t* trx, /*!< in: transaction */
1734 dict_index_t* index) /*!< in: Index to drop */
1735 {
1736 dberr_t error = DB_SUCCESS;
1737
1738 #ifdef FTS_DOC_STATS_DEBUG
1739 fts_table_t fts_table;
1740 static const char* index_tables[] = {
1741 "DOC_ID",
1742 NULL
1743 };
1744 #endif /* FTS_DOC_STATS_DEBUG */
1745
1746 dberr_t err = fts_drop_index_split_tables(trx, index);
1747
1748 /* We only return the status of the last error. */
1749 if (err != DB_SUCCESS) {
1750 error = err;
1751 }
1752
1753 #ifdef FTS_DOC_STATS_DEBUG
1754 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1755
1756 for (ulint i = 0; index_tables[i] != NULL; ++i) {
1757 char* table_name;
1758
1759 fts_table.suffix = index_tables[i];
1760
1761 table_name = fts_get_table_name(&fts_table);
1762
1763 err = fts_drop_table(trx, table_name);
1764
1765 /* We only return the status of the last error. */
1766 if (err != DB_SUCCESS && err != DB_FAIL) {
1767 error = err;
1768 }
1769
1770 mem_free(table_name);
1771 }
1772 #endif /* FTS_DOC_STATS_DEBUG */
1773
1774 return(error);
1775 }
1776
1777 /****************************************************************//**
1778 Drops FTS ancillary tables needed for supporting an FTS index
1779 on the given table. row_mysql_lock_data_dictionary must have been called
1780 before this.
1781 @return DB_SUCCESS or error code */
1782 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1783 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1784 fts_drop_all_index_tables(
1785 /*======================*/
1786 trx_t* trx, /*!< in: transaction */
1787 fts_t* fts) /*!< in: fts instance */
1788 {
1789 dberr_t error = DB_SUCCESS;
1790
1791 for (ulint i = 0;
1792 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1793 ++i) {
1794
1795 dberr_t err;
1796 dict_index_t* index;
1797
1798 index = static_cast<dict_index_t*>(
1799 ib_vector_getp(fts->indexes, i));
1800
1801 err = fts_drop_index_tables(trx, index);
1802
1803 if (err != DB_SUCCESS) {
1804 error = err;
1805 }
1806 }
1807
1808 return(error);
1809 }
1810
1811 /*********************************************************************//**
1812 Drops the ancillary tables needed for supporting an FTS index on a
1813 given table. row_mysql_lock_data_dictionary must have been called before
1814 this.
1815 @return DB_SUCCESS or error code */
1816 UNIV_INTERN
1817 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1818 fts_drop_tables(
1819 /*============*/
1820 trx_t* trx, /*!< in: transaction */
1821 dict_table_t* table) /*!< in: table has the FTS index */
1822 {
1823 dberr_t error;
1824 fts_table_t fts_table;
1825
1826 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1827
1828 /* TODO: This is not atomic and can cause problems during recovery. */
1829
1830 error = fts_drop_common_tables(trx, &fts_table);
1831
1832 if (error == DB_SUCCESS) {
1833 error = fts_drop_all_index_tables(trx, table->fts);
1834 }
1835
1836 return(error);
1837 }
1838
1839 /*********************************************************************//**
1840 Prepare the SQL, so that all '%s' are replaced by the common prefix.
1841 @return sql string, use mem_free() to free the memory */
1842 static
1843 char*
fts_prepare_sql(fts_table_t * fts_table,const char * my_template)1844 fts_prepare_sql(
1845 /*============*/
1846 fts_table_t* fts_table, /*!< in: table name info */
1847 const char* my_template) /*!< in: sql template */
1848 {
1849 char* sql;
1850 char* name_prefix;
1851
1852 name_prefix = fts_get_table_name_prefix(fts_table);
1853 sql = ut_strreplace(my_template, "%s", name_prefix);
1854 mem_free(name_prefix);
1855
1856 return(sql);
1857 }
1858
1859 /*********************************************************************//**
1860 Creates the common ancillary tables needed for supporting an FTS index
1861 on the given table. row_mysql_lock_data_dictionary must have been called
1862 before this.
1863 @return DB_SUCCESS if succeed */
1864 UNIV_INTERN
1865 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1866 fts_create_common_tables(
1867 /*=====================*/
1868 trx_t* trx, /*!< in: transaction */
1869 const dict_table_t* table, /*!< in: table with FTS index */
1870 const char* name, /*!< in: table name normalized.*/
1871 bool skip_doc_id_index)/*!< in: Skip index on doc id */
1872 {
1873 char* sql;
1874 dberr_t error;
1875 que_t* graph;
1876 fts_table_t fts_table;
1877 mem_heap_t* heap = mem_heap_create(1024);
1878 pars_info_t* info;
1879
1880 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1881
1882 error = fts_drop_common_tables(trx, &fts_table);
1883
1884 if (error != DB_SUCCESS) {
1885
1886 goto func_exit;
1887 }
1888
1889 /* Create the FTS tables that are common to an FTS index. */
1890 sql = fts_prepare_sql(&fts_table, fts_create_common_tables_sql);
1891 graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
1892 mem_free(sql);
1893
1894 error = fts_eval_sql(trx, graph);
1895
1896 que_graph_free(graph);
1897
1898 if (error != DB_SUCCESS) {
1899
1900 goto func_exit;
1901 }
1902
1903 /* Write the default settings to the config table. */
1904 fts_table.suffix = "CONFIG";
1905 graph = fts_parse_sql_no_dict_lock(
1906 &fts_table, NULL, fts_config_table_insert_values_sql);
1907
1908 error = fts_eval_sql(trx, graph);
1909
1910 que_graph_free(graph);
1911
1912 if (error != DB_SUCCESS || skip_doc_id_index) {
1913
1914 goto func_exit;
1915 }
1916
1917 info = pars_info_create();
1918
1919 pars_info_bind_id(info, TRUE, "table_name", name);
1920 pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
1921 pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
1922
1923 /* Create the FTS DOC_ID index on the hidden column. Currently this
1924 is common for any FT index created on the table. */
1925 graph = fts_parse_sql_no_dict_lock(
1926 NULL,
1927 info,
1928 mem_heap_printf(
1929 heap,
1930 "BEGIN\n"
1931 ""
1932 "CREATE UNIQUE INDEX $index_name ON $table_name("
1933 "$doc_id_col_name);\n"));
1934
1935 error = fts_eval_sql(trx, graph);
1936 que_graph_free(graph);
1937
1938 func_exit:
1939 if (error != DB_SUCCESS) {
1940 /* We have special error handling here */
1941
1942 trx->error_state = DB_SUCCESS;
1943
1944 trx_rollback_to_savepoint(trx, NULL);
1945
1946 row_drop_table_for_mysql(table->name, trx, FALSE);
1947
1948 trx->error_state = DB_SUCCESS;
1949 }
1950
1951 mem_heap_free(heap);
1952
1953 return(error);
1954 }
1955
1956 /*************************************************************//**
1957 Wrapper function of fts_create_index_tables_low(), create auxiliary
1958 tables for an FTS index
1959 @return: DB_SUCCESS or error code */
1960 static
1961 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)1962 fts_create_one_index_table(
1963 /*=======================*/
1964 trx_t* trx, /*!< in: transaction */
1965 const dict_index_t*
1966 index, /*!< in: the index instance */
1967 fts_table_t* fts_table, /*!< in: fts_table structure */
1968 mem_heap_t* heap) /*!< in: heap */
1969 {
1970 dict_field_t* field;
1971 dict_table_t* new_table = NULL;
1972 char* table_name = fts_get_table_name(fts_table);
1973 dberr_t error;
1974 CHARSET_INFO* charset;
1975 ulint flags2 = 0;
1976
1977 ut_ad(index->type & DICT_FTS);
1978
1979 if (srv_file_per_table) {
1980 flags2 = DICT_TF2_USE_TABLESPACE;
1981 }
1982
1983 new_table = dict_mem_table_create(table_name, 0, 5, 1, flags2);
1984
1985 field = dict_index_get_nth_field(index, 0);
1986 charset = innobase_get_fts_charset(
1987 (int)(field->col->prtype & DATA_MYSQL_TYPE_MASK),
1988 (uint) dtype_get_charset_coll(field->col->prtype));
1989
1990 if (strcmp(charset->name, "latin1_swedish_ci") == 0) {
1991 dict_mem_table_add_col(new_table, heap, "word", DATA_VARCHAR,
1992 field->col->prtype, FTS_MAX_WORD_LEN);
1993 } else {
1994 dict_mem_table_add_col(new_table, heap, "word", DATA_VARMYSQL,
1995 field->col->prtype, FTS_MAX_WORD_LEN);
1996 }
1997
1998 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
1999 DATA_NOT_NULL | DATA_UNSIGNED,
2000 sizeof(doc_id_t));
2001
2002 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2003 DATA_NOT_NULL | DATA_UNSIGNED,
2004 sizeof(doc_id_t));
2005
2006 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2007 DATA_NOT_NULL | DATA_UNSIGNED, 4);
2008
2009 dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
2010 4130048, 0);
2011
2012 error = row_create_table_for_mysql(new_table, trx, false);
2013
2014 if (error != DB_SUCCESS) {
2015 trx->error_state = error;
2016 dict_mem_table_free(new_table);
2017 new_table = NULL;
2018 ib_logf(IB_LOG_LEVEL_WARN,
2019 "Fail to create FTS index table %s", table_name);
2020 }
2021
2022 mem_free(table_name);
2023
2024 return(new_table);
2025 }
2026
2027 /*************************************************************//**
2028 Wrapper function of fts_create_index_tables_low(), create auxiliary
2029 tables for an FTS index
2030 @return: DB_SUCCESS or error code */
2031 UNIV_INTERN
2032 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2033 fts_create_index_tables_low(
2034 /*========================*/
2035 trx_t* trx, /*!< in: transaction */
2036 const dict_index_t*
2037 index, /*!< in: the index instance */
2038 const char* table_name, /*!< in: the table name */
2039 table_id_t table_id) /*!< in: the table id */
2040
2041 {
2042 ulint i;
2043 que_t* graph;
2044 fts_table_t fts_table;
2045 dberr_t error = DB_SUCCESS;
2046 mem_heap_t* heap = mem_heap_create(1024);
2047
2048 fts_table.type = FTS_INDEX_TABLE;
2049 fts_table.index_id = index->id;
2050 fts_table.table_id = table_id;
2051 fts_table.parent = table_name;
2052 fts_table.table = index->table;
2053
2054 #ifdef FTS_DOC_STATS_DEBUG
2055 char* sql;
2056
2057 /* Create the FTS auxiliary tables that are specific
2058 to an FTS index. */
2059 sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
2060
2061 graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
2062 mem_free(sql);
2063
2064 error = fts_eval_sql(trx, graph);
2065 que_graph_free(graph);
2066 #endif /* FTS_DOC_STATS_DEBUG */
2067
2068 for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) {
2069 dict_table_t* new_table;
2070
2071 /* Create the FTS auxiliary tables that are specific
2072 to an FTS index. We need to preserve the table_id %s
2073 which fts_parse_sql_no_dict_lock() will fill in for us. */
2074 fts_table.suffix = fts_get_suffix(i);
2075
2076 new_table = fts_create_one_index_table(
2077 trx, index, &fts_table, heap);
2078
2079 if (!new_table) {
2080 error = DB_FAIL;
2081 break;
2082 }
2083
2084 graph = fts_parse_sql_no_dict_lock(
2085 &fts_table, NULL, fts_create_index_sql);
2086
2087 error = fts_eval_sql(trx, graph);
2088 que_graph_free(graph);
2089 }
2090
2091 if (error != DB_SUCCESS) {
2092 /* We have special error handling here */
2093
2094 trx->error_state = DB_SUCCESS;
2095
2096 trx_rollback_to_savepoint(trx, NULL);
2097
2098 row_drop_table_for_mysql(table_name, trx, FALSE);
2099
2100 trx->error_state = DB_SUCCESS;
2101 }
2102
2103 mem_heap_free(heap);
2104
2105 return(error);
2106 }
2107
2108 /******************************************************************//**
2109 Creates the column specific ancillary tables needed for supporting an
2110 FTS index on the given table. row_mysql_lock_data_dictionary must have
2111 been called before this.
2112 @return DB_SUCCESS or error code */
2113 UNIV_INTERN
2114 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2115 fts_create_index_tables(
2116 /*====================*/
2117 trx_t* trx, /*!< in: transaction */
2118 const dict_index_t* index) /*!< in: the index instance */
2119 {
2120 dberr_t err;
2121 dict_table_t* table;
2122
2123 table = dict_table_get_low(index->table_name);
2124 ut_a(table != NULL);
2125
2126 err = fts_create_index_tables_low(trx, index, table->name, table->id);
2127
2128 if (err == DB_SUCCESS) {
2129 trx_commit(trx);
2130 }
2131
2132 return(err);
2133 }
2134 #if 0
2135 /******************************************************************//**
2136 Return string representation of state. */
2137 static
2138 const char*
2139 fts_get_state_str(
2140 /*==============*/
2141 /* out: string representation of state */
2142 fts_row_state state) /*!< in: state */
2143 {
2144 switch (state) {
2145 case FTS_INSERT:
2146 return("INSERT");
2147
2148 case FTS_MODIFY:
2149 return("MODIFY");
2150
2151 case FTS_DELETE:
2152 return("DELETE");
2153
2154 case FTS_NOTHING:
2155 return("NOTHING");
2156
2157 case FTS_INVALID:
2158 return("INVALID");
2159
2160 default:
2161 return("UNKNOWN");
2162 }
2163 }
2164 #endif
2165
2166 /******************************************************************//**
2167 Calculate the new state of a row given the existing state and a new event.
2168 @return new state of row */
2169 static
2170 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2171 fts_trx_row_get_new_state(
2172 /*======================*/
2173 fts_row_state old_state, /*!< in: existing state of row */
2174 fts_row_state event) /*!< in: new event */
2175 {
2176 /* The rules for transforming states:
2177
2178 I = inserted
2179 M = modified
2180 D = deleted
2181 N = nothing
2182
2183 M+D -> D:
2184
2185 If the row existed before the transaction started and it is modified
2186 during the transaction, followed by a deletion of the row, only the
2187 deletion will be signaled.
2188
2189 M+ -> M:
2190
2191 If the row existed before the transaction started and it is modified
2192 more than once during the transaction, only the last modification
2193 will be signaled.
2194
2195 IM*D -> N:
2196
2197 If a new row is added during the transaction (and possibly modified
2198 after its initial insertion) but it is deleted before the end of the
2199 transaction, nothing will be signaled.
2200
2201 IM* -> I:
2202
2203 If a new row is added during the transaction and modified after its
2204 initial insertion, only the addition will be signaled.
2205
2206 M*DI -> M:
2207
2208 If the row existed before the transaction started and it is deleted,
2209 then re-inserted, only a modification will be signaled. Note that
2210 this case is only possible if the table is using the row's primary
2211 key for FTS row ids, since those can be re-inserted by the user,
2212 which is not true for InnoDB generated row ids.
2213
2214 It is easily seen that the above rules decompose such that we do not
2215 need to store the row's entire history of events. Instead, we can
2216 store just one state for the row and update that when new events
2217 arrive. Then we can implement the above rules as a two-dimensional
2218 look-up table, and get checking of invalid combinations "for free"
2219 in the process. */
2220
2221 /* The lookup table for transforming states. old_state is the
2222 Y-axis, event is the X-axis. */
2223 static const fts_row_state table[4][4] = {
2224 /* I M D N */
2225 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2226 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2227 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2228 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2229 };
2230
2231 fts_row_state result;
2232
2233 ut_a(old_state < FTS_INVALID);
2234 ut_a(event < FTS_INVALID);
2235
2236 result = table[(int) old_state][(int) event];
2237 ut_a(result != FTS_INVALID);
2238
2239 return(result);
2240 }
2241
2242 /******************************************************************//**
2243 Create a savepoint instance.
2244 @return savepoint instance */
2245 static
2246 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2247 fts_savepoint_create(
2248 /*=================*/
2249 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2250 const char* name, /*!< in: savepoint name */
2251 mem_heap_t* heap) /*!< in: heap */
2252 {
2253 fts_savepoint_t* savepoint;
2254
2255 savepoint = static_cast<fts_savepoint_t*>(
2256 ib_vector_push(savepoints, NULL));
2257
2258 memset(savepoint, 0x0, sizeof(*savepoint));
2259
2260 if (name) {
2261 savepoint->name = mem_heap_strdup(heap, name);
2262 }
2263
2264 savepoint->tables = rbt_create(
2265 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2266
2267 return(savepoint);
2268 }
2269
2270 /******************************************************************//**
2271 Create an FTS trx.
2272 @return FTS trx */
2273 static
2274 fts_trx_t*
fts_trx_create(trx_t * trx)2275 fts_trx_create(
2276 /*===========*/
2277 trx_t* trx) /*!< in/out: InnoDB
2278 transaction */
2279 {
2280 fts_trx_t* ftt;
2281 ib_alloc_t* heap_alloc;
2282 mem_heap_t* heap = mem_heap_create(1024);
2283 trx_named_savept_t* savep;
2284
2285 ut_a(trx->fts_trx == NULL);
2286
2287 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2288 ftt->trx = trx;
2289 ftt->heap = heap;
2290
2291 heap_alloc = ib_heap_allocator_create(heap);
2292
2293 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2294 heap_alloc, sizeof(fts_savepoint_t), 4));
2295
2296 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2297 heap_alloc, sizeof(fts_savepoint_t), 4));
2298
2299 /* Default instance has no name and no heap. */
2300 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2301 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2302
2303 /* Copy savepoints that already set before. */
2304 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2305 savep != NULL;
2306 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2307
2308 fts_savepoint_take(trx, ftt, savep->name);
2309 }
2310
2311 return(ftt);
2312 }
2313
2314 /******************************************************************//**
2315 Create an FTS trx table.
2316 @return FTS trx table */
2317 static
2318 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2319 fts_trx_table_create(
2320 /*=================*/
2321 fts_trx_t* fts_trx, /*!< in: FTS trx */
2322 dict_table_t* table) /*!< in: table */
2323 {
2324 fts_trx_table_t* ftt;
2325
2326 ftt = static_cast<fts_trx_table_t*>(
2327 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2328
2329 memset(ftt, 0x0, sizeof(*ftt));
2330
2331 ftt->table = table;
2332 ftt->fts_trx = fts_trx;
2333
2334 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2335
2336 return(ftt);
2337 }
2338
2339 /******************************************************************//**
2340 Clone an FTS trx table.
2341 @return FTS trx table */
2342 static
2343 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2344 fts_trx_table_clone(
2345 /*=================*/
2346 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2347 {
2348 fts_trx_table_t* ftt;
2349
2350 ftt = static_cast<fts_trx_table_t*>(
2351 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2352
2353 memset(ftt, 0x0, sizeof(*ftt));
2354
2355 ftt->table = ftt_src->table;
2356 ftt->fts_trx = ftt_src->fts_trx;
2357
2358 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2359
2360 /* Copy the rb tree values to the new savepoint. */
2361 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2362
2363 /* These are only added on commit. At this stage we only have
2364 the updated row state. */
2365 ut_a(ftt_src->added_doc_ids == NULL);
2366
2367 return(ftt);
2368 }
2369
2370 /******************************************************************//**
2371 Initialize the FTS trx instance.
2372 @return FTS trx instance */
2373 static
2374 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2375 fts_trx_init(
2376 /*=========*/
2377 trx_t* trx, /*!< in: transaction */
2378 dict_table_t* table, /*!< in: FTS table instance */
2379 ib_vector_t* savepoints) /*!< in: Savepoints */
2380 {
2381 fts_trx_table_t* ftt;
2382 ib_rbt_bound_t parent;
2383 ib_rbt_t* tables;
2384 fts_savepoint_t* savepoint;
2385
2386 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2387
2388 tables = savepoint->tables;
2389 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2390
2391 if (parent.result == 0) {
2392 fts_trx_table_t** fttp;
2393
2394 fttp = rbt_value(fts_trx_table_t*, parent.last);
2395 ftt = *fttp;
2396 } else {
2397 ftt = fts_trx_table_create(trx->fts_trx, table);
2398 rbt_add_node(tables, &parent, &ftt);
2399 }
2400
2401 ut_a(ftt->table == table);
2402
2403 return(ftt);
2404 }
2405
2406 /******************************************************************//**
2407 Notify the FTS system about an operation on an FTS-indexed table. */
2408 static
2409 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2410 fts_trx_table_add_op(
2411 /*=================*/
2412 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2413 doc_id_t doc_id, /*!< in: doc id */
2414 fts_row_state state, /*!< in: state of the row */
2415 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2416 {
2417 ib_rbt_t* rows;
2418 ib_rbt_bound_t parent;
2419
2420 rows = ftt->rows;
2421 rbt_search(rows, &parent, &doc_id);
2422
2423 /* Row id found, update state, and if new state is FTS_NOTHING,
2424 we delete the row from our tree. */
2425 if (parent.result == 0) {
2426 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2427
2428 row->state = fts_trx_row_get_new_state(row->state, state);
2429
2430 if (row->state == FTS_NOTHING) {
2431 if (row->fts_indexes) {
2432 ib_vector_free(row->fts_indexes);
2433 }
2434
2435 ut_free(rbt_remove_node(rows, parent.last));
2436 row = NULL;
2437 } else if (row->fts_indexes != NULL) {
2438 ib_vector_free(row->fts_indexes);
2439 row->fts_indexes = fts_indexes;
2440 }
2441
2442 } else { /* Row-id not found, create a new one. */
2443 fts_trx_row_t row;
2444
2445 row.doc_id = doc_id;
2446 row.state = state;
2447 row.fts_indexes = fts_indexes;
2448
2449 rbt_add_node(rows, &parent, &row);
2450 }
2451 }
2452
2453 /******************************************************************//**
2454 Notify the FTS system about an operation on an FTS-indexed table. */
2455 UNIV_INTERN
2456 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2457 fts_trx_add_op(
2458 /*===========*/
2459 trx_t* trx, /*!< in: InnoDB transaction */
2460 dict_table_t* table, /*!< in: table */
2461 doc_id_t doc_id, /*!< in: new doc id */
2462 fts_row_state state, /*!< in: state of the row */
2463 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2464 (NULL=all) */
2465 {
2466 fts_trx_table_t* tran_ftt;
2467 fts_trx_table_t* stmt_ftt;
2468
2469 if (!trx->fts_trx) {
2470 trx->fts_trx = fts_trx_create(trx);
2471 }
2472
2473 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2474 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2475
2476 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2477 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2478 }
2479
2480 /******************************************************************//**
2481 Fetch callback that converts a textual document id to a binary value and
2482 stores it in the given place.
2483 @return always returns NULL */
2484 static
2485 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2486 fts_fetch_store_doc_id(
2487 /*===================*/
2488 void* row, /*!< in: sel_node_t* */
2489 void* user_arg) /*!< in: doc_id_t* to store
2490 doc_id in */
2491 {
2492 int n_parsed;
2493 sel_node_t* node = static_cast<sel_node_t*>(row);
2494 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2495 dfield_t* dfield = que_node_get_val(node->select_list);
2496 dtype_t* type = dfield_get_type(dfield);
2497 ulint len = dfield_get_len(dfield);
2498
2499 char buf[32];
2500
2501 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2502 ut_a(len > 0 && len < sizeof(buf));
2503
2504 memcpy(buf, dfield_get_data(dfield), len);
2505 buf[len] = '\0';
2506
2507 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2508 ut_a(n_parsed == 1);
2509
2510 return(FALSE);
2511 }
2512
2513 #ifdef FTS_CACHE_SIZE_DEBUG
2514 /******************************************************************//**
2515 Get the max cache size in bytes. If there is an error reading the
2516 value we simply print an error message here and return the default
2517 value to the caller.
2518 @return max cache size in bytes */
2519 static
2520 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2521 fts_get_max_cache_size(
2522 /*===================*/
2523 trx_t* trx, /*!< in: transaction */
2524 fts_table_t* fts_table) /*!< in: table instance */
2525 {
2526 dberr_t error;
2527 fts_string_t value;
2528 ulint cache_size_in_mb;
2529
2530 /* Set to the default value. */
2531 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2532
2533 /* We set the length of value to the max bytes it can hold. This
2534 information is used by the callback that reads the value. */
2535 value.f_n_char = 0;
2536 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2537 value.f_str = ut_malloc(value.f_len + 1);
2538
2539 error = fts_config_get_value(
2540 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2541
2542 if (error == DB_SUCCESS) {
2543
2544 value.f_str[value.f_len] = 0;
2545 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2546
2547 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2548
2549 ut_print_timestamp(stderr);
2550 fprintf(stderr, " InnoDB: Warning: FTS max cache size "
2551 " (%lu) out of range. Minimum value is "
2552 "%luMB and the maximum values is %luMB, "
2553 "setting cache size to upper limit\n",
2554 cache_size_in_mb,
2555 FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
2556 FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
2557
2558 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2559
2560 } else if (cache_size_in_mb
2561 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2562
2563 ut_print_timestamp(stderr);
2564 fprintf(stderr, " InnoDB: Warning: FTS max cache size "
2565 " (%lu) out of range. Minimum value is "
2566 "%luMB and the maximum values is %luMB, "
2567 "setting cache size to lower limit\n",
2568 cache_size_in_mb,
2569 FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
2570 FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
2571
2572 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2573 }
2574 } else {
2575 ut_print_timestamp(stderr);
2576 fprintf(stderr, "InnoDB: Error: (%lu) reading max cache "
2577 "config value from config table\n", error);
2578 }
2579
2580 ut_free(value.f_str);
2581
2582 return(cache_size_in_mb * 1024 * 1024);
2583 }
2584 #endif
2585
2586 #ifdef FTS_DOC_STATS_DEBUG
2587 /*********************************************************************//**
2588 Get the total number of words in the FTS for a particular FTS index.
2589 @return DB_SUCCESS if all OK else error code */
2590 UNIV_INTERN
2591 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2592 fts_get_total_word_count(
2593 /*=====================*/
2594 trx_t* trx, /*!< in: transaction */
2595 dict_index_t* index, /*!< in: for this index */
2596 ulint* total) /* out: total words */
2597 {
2598 dberr_t error;
2599 fts_string_t value;
2600
2601 *total = 0;
2602
2603 /* We set the length of value to the max bytes it can hold. This
2604 information is used by the callback that reads the value. */
2605 value.f_n_char = 0;
2606 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2607 value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
2608
2609 error = fts_config_get_index_value(
2610 trx, index, FTS_TOTAL_WORD_COUNT, &value);
2611
2612 if (error == DB_SUCCESS) {
2613
2614 value.f_str[value.f_len] = 0;
2615 *total = strtoul((char*) value.f_str, NULL, 10);
2616 } else {
2617 ut_print_timestamp(stderr);
2618 fprintf(stderr, " InnoDB: Error: (%s) reading total words "
2619 "value from config table\n", ut_strerr(error));
2620 }
2621
2622 ut_free(value.f_str);
2623
2624 return(error);
2625 }
2626 #endif /* FTS_DOC_STATS_DEBUG */
2627
2628 /*********************************************************************//**
2629 Update the next and last Doc ID in the CONFIG table to be the input
2630 "doc_id" value (+ 1). We would do so after each FTS index build or
2631 table truncate */
2632 UNIV_INTERN
2633 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2634 fts_update_next_doc_id(
2635 /*===================*/
2636 trx_t* trx, /*!< in/out: transaction */
2637 const dict_table_t* table, /*!< in: table */
2638 const char* table_name, /*!< in: table name, or NULL */
2639 doc_id_t doc_id) /*!< in: DOC ID to set */
2640 {
2641 table->fts->cache->synced_doc_id = doc_id;
2642 table->fts->cache->next_doc_id = doc_id + 1;
2643
2644 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2645
2646 fts_update_sync_doc_id(
2647 table, table_name, table->fts->cache->synced_doc_id, trx);
2648
2649 }
2650
2651 /*********************************************************************//**
2652 Get the next available document id.
2653 @return DB_SUCCESS if OK */
2654 UNIV_INTERN
2655 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2656 fts_get_next_doc_id(
2657 /*================*/
2658 const dict_table_t* table, /*!< in: table */
2659 doc_id_t* doc_id) /*!< out: new document id */
2660 {
2661 fts_cache_t* cache = table->fts->cache;
2662
2663 /* If the Doc ID system has not yet been initialized, we
2664 will consult the CONFIG table and user table to re-establish
2665 the initial value of the Doc ID */
2666
2667 if (cache->first_doc_id != 0 || !fts_init_doc_id(table)) {
2668 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2669 *doc_id = FTS_NULL_DOC_ID;
2670 return(DB_SUCCESS);
2671 }
2672
2673 /* Otherwise, simply increment the value in cache */
2674 mutex_enter(&cache->doc_id_lock);
2675 *doc_id = ++cache->next_doc_id;
2676 mutex_exit(&cache->doc_id_lock);
2677 } else {
2678 mutex_enter(&cache->doc_id_lock);
2679 *doc_id = cache->next_doc_id;
2680 mutex_exit(&cache->doc_id_lock);
2681 }
2682
2683 return(DB_SUCCESS);
2684 }
2685
2686 /*********************************************************************//**
2687 This function fetch the Doc ID from CONFIG table, and compare with
2688 the Doc ID supplied. And store the larger one to the CONFIG table.
2689 @return DB_SUCCESS if OK */
2690 static MY_ATTRIBUTE((nonnull))
2691 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2692 fts_cmp_set_sync_doc_id(
2693 /*====================*/
2694 const dict_table_t* table, /*!< in: table */
2695 doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
2696 ibool read_only, /*!< in: TRUE if read the
2697 synced_doc_id only */
2698 doc_id_t* doc_id) /*!< out: larger document id
2699 after comparing "doc_id_cmp"
2700 to the one stored in CONFIG
2701 table */
2702 {
2703 trx_t* trx;
2704 pars_info_t* info;
2705 dberr_t error;
2706 fts_table_t fts_table;
2707 que_t* graph = NULL;
2708 fts_cache_t* cache = table->fts->cache;
2709 retry:
2710 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2711
2712 fts_table.suffix = "CONFIG";
2713 fts_table.table_id = table->id;
2714 fts_table.type = FTS_COMMON_TABLE;
2715 fts_table.table = table;
2716
2717 fts_table.parent = table->name;
2718
2719 trx = trx_allocate_for_background();
2720
2721 trx->op_info = "update the next FTS document id";
2722
2723 info = pars_info_create();
2724
2725 pars_info_bind_function(
2726 info, "my_func", fts_fetch_store_doc_id, doc_id);
2727
2728 graph = fts_parse_sql(
2729 &fts_table, info,
2730 "DECLARE FUNCTION my_func;\n"
2731 "DECLARE CURSOR c IS SELECT value FROM \"%s\""
2732 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2733 "BEGIN\n"
2734 ""
2735 "OPEN c;\n"
2736 "WHILE 1 = 1 LOOP\n"
2737 " FETCH c INTO my_func();\n"
2738 " IF c % NOTFOUND THEN\n"
2739 " EXIT;\n"
2740 " END IF;\n"
2741 "END LOOP;\n"
2742 "CLOSE c;");
2743
2744 *doc_id = 0;
2745
2746 error = fts_eval_sql(trx, graph);
2747
2748 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2749
2750 // FIXME: We need to retry deadlock errors
2751 if (error != DB_SUCCESS) {
2752 goto func_exit;
2753 }
2754
2755 if (read_only) {
2756 goto func_exit;
2757 }
2758
2759 if (doc_id_cmp == 0 && *doc_id) {
2760 cache->synced_doc_id = *doc_id - 1;
2761 } else {
2762 cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2763 }
2764
2765 mutex_enter(&cache->doc_id_lock);
2766 /* For each sync operation, we will add next_doc_id by 1,
2767 so to mark a sync operation */
2768 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2769 cache->next_doc_id = cache->synced_doc_id + 1;
2770 }
2771 mutex_exit(&cache->doc_id_lock);
2772
2773 if (doc_id_cmp > *doc_id) {
2774 error = fts_update_sync_doc_id(
2775 table, table->name, cache->synced_doc_id, trx);
2776 }
2777
2778 *doc_id = cache->next_doc_id;
2779
2780 func_exit:
2781
2782 if (error == DB_SUCCESS) {
2783 fts_sql_commit(trx);
2784 } else {
2785 *doc_id = 0;
2786
2787 ut_print_timestamp(stderr);
2788 fprintf(stderr, " InnoDB: Error: (%s) "
2789 "while getting next doc id.\n", ut_strerr(error));
2790
2791 fts_sql_rollback(trx);
2792
2793 if (error == DB_DEADLOCK) {
2794 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2795 goto retry;
2796 }
2797 }
2798
2799 trx_free_for_background(trx);
2800
2801 return(error);
2802 }
2803
2804 /*********************************************************************//**
2805 Update the last document id. This function could create a new
2806 transaction to update the last document id.
2807 @return DB_SUCCESS if OK */
2808 static
2809 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2810 fts_update_sync_doc_id(
2811 /*===================*/
2812 const dict_table_t* table, /*!< in: table */
2813 const char* table_name, /*!< in: table name, or NULL */
2814 doc_id_t doc_id, /*!< in: last document id */
2815 trx_t* trx) /*!< in: update trx, or NULL */
2816 {
2817 byte id[FTS_MAX_ID_LEN];
2818 pars_info_t* info;
2819 fts_table_t fts_table;
2820 ulint id_len;
2821 que_t* graph = NULL;
2822 dberr_t error;
2823 ibool local_trx = FALSE;
2824 fts_cache_t* cache = table->fts->cache;
2825
2826 fts_table.suffix = "CONFIG";
2827 fts_table.table_id = table->id;
2828 fts_table.type = FTS_COMMON_TABLE;
2829 fts_table.table = table;
2830 if (table_name) {
2831 fts_table.parent = table_name;
2832 } else {
2833 fts_table.parent = table->name;
2834 }
2835
2836 if (!trx) {
2837 trx = trx_allocate_for_background();
2838
2839 trx->op_info = "setting last FTS document id";
2840 local_trx = TRUE;
2841 }
2842
2843 info = pars_info_create();
2844
2845 id_len = ut_snprintf(
2846 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2847
2848 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2849
2850 graph = fts_parse_sql(
2851 &fts_table, info,
2852 "BEGIN "
2853 "UPDATE \"%s\" SET value = :doc_id"
2854 " WHERE key = 'synced_doc_id';");
2855
2856 error = fts_eval_sql(trx, graph);
2857
2858 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2859
2860 if (local_trx) {
2861 if (error == DB_SUCCESS) {
2862 fts_sql_commit(trx);
2863 cache->synced_doc_id = doc_id;
2864 } else {
2865
2866 ib_logf(IB_LOG_LEVEL_ERROR,
2867 "(%s) while updating last doc id.",
2868 ut_strerr(error));
2869
2870 fts_sql_rollback(trx);
2871 }
2872 trx_free_for_background(trx);
2873 }
2874
2875 return(error);
2876 }
2877
2878 /*********************************************************************//**
2879 Create a new fts_doc_ids_t.
2880 @return new fts_doc_ids_t */
2881 UNIV_INTERN
2882 fts_doc_ids_t*
fts_doc_ids_create(void)2883 fts_doc_ids_create(void)
2884 /*====================*/
2885 {
2886 fts_doc_ids_t* fts_doc_ids;
2887 mem_heap_t* heap = mem_heap_create(512);
2888
2889 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2890 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2891
2892 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2893
2894 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2895 fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2896
2897 return(fts_doc_ids);
2898 }
2899
2900 /*********************************************************************//**
2901 Free a fts_doc_ids_t. */
2902
2903 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2904 fts_doc_ids_free(
2905 /*=============*/
2906 fts_doc_ids_t* fts_doc_ids)
2907 {
2908 mem_heap_t* heap = static_cast<mem_heap_t*>(
2909 fts_doc_ids->self_heap->arg);
2910
2911 memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
2912
2913 mem_heap_free(heap);
2914 }
2915
2916 /*********************************************************************//**
2917 Do commit-phase steps necessary for the insertion of a new row. */
2918 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)2919 fts_add(
2920 /*====*/
2921 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2922 fts_trx_row_t* row) /*!< in: row */
2923 {
2924 dict_table_t* table = ftt->table;
2925 doc_id_t doc_id = row->doc_id;
2926
2927 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2928
2929 fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
2930
2931 mutex_enter(&table->fts->cache->deleted_lock);
2932 ++table->fts->cache->added;
2933 mutex_exit(&table->fts->cache->deleted_lock);
2934
2935 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2936 && doc_id >= table->fts->cache->next_doc_id) {
2937 table->fts->cache->next_doc_id = doc_id + 1;
2938 }
2939 }
2940
2941 /*********************************************************************//**
2942 Do commit-phase steps necessary for the deletion of a row.
2943 @return DB_SUCCESS or error code */
2944 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2945 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)2946 fts_delete(
2947 /*=======*/
2948 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2949 fts_trx_row_t* row) /*!< in: row */
2950 {
2951 que_t* graph;
2952 fts_table_t fts_table;
2953 dberr_t error = DB_SUCCESS;
2954 doc_id_t write_doc_id;
2955 dict_table_t* table = ftt->table;
2956 doc_id_t doc_id = row->doc_id;
2957 trx_t* trx = ftt->fts_trx->trx;
2958 pars_info_t* info = pars_info_create();
2959 fts_cache_t* cache = table->fts->cache;
2960
2961 /* we do not index Documents whose Doc ID value is 0 */
2962 if (doc_id == FTS_NULL_DOC_ID) {
2963 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2964 return(error);
2965 }
2966
2967 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2968
2969 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2970
2971 /* Convert to "storage" byte order. */
2972 fts_write_doc_id((byte*) &write_doc_id, doc_id);
2973 fts_bind_doc_id(info, "doc_id", &write_doc_id);
2974
2975 /* It is possible we update a record that has not yet been sync-ed
2976 into cache from last crash (delete Doc will not initialize the
2977 sync). Avoid any added counter accounting until the FTS cache
2978 is re-established and sync-ed */
2979 if (table->fts->fts_status & ADDED_TABLE_SYNCED
2980 && doc_id > cache->synced_doc_id) {
2981 mutex_enter(&table->fts->cache->deleted_lock);
2982
2983 /* The Doc ID could belong to those left in
2984 ADDED table from last crash. So need to check
2985 if it is less than first_doc_id when we initialize
2986 the Doc ID system after reboot */
2987 if (doc_id >= table->fts->cache->first_doc_id
2988 && table->fts->cache->added > 0) {
2989 --table->fts->cache->added;
2990 }
2991
2992 mutex_exit(&table->fts->cache->deleted_lock);
2993
2994 /* Only if the row was really deleted. */
2995 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2996 }
2997
2998 /* Note the deleted document for OPTIMIZE to purge. */
2999 if (error == DB_SUCCESS) {
3000
3001 trx->op_info = "adding doc id to FTS DELETED";
3002
3003 info->graph_owns_us = TRUE;
3004
3005 fts_table.suffix = "DELETED";
3006
3007 graph = fts_parse_sql(
3008 &fts_table,
3009 info,
3010 "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
3011
3012 error = fts_eval_sql(trx, graph);
3013
3014 fts_que_graph_free(graph);
3015 } else {
3016 pars_info_free(info);
3017 }
3018
3019 /* Increment the total deleted count, this is used to calculate the
3020 number of documents indexed. */
3021 if (error == DB_SUCCESS) {
3022 mutex_enter(&table->fts->cache->deleted_lock);
3023
3024 ++table->fts->cache->deleted;
3025
3026 mutex_exit(&table->fts->cache->deleted_lock);
3027 }
3028
3029 return(error);
3030 }
3031
3032 /*********************************************************************//**
3033 Do commit-phase steps necessary for the modification of a row.
3034 @return DB_SUCCESS or error code */
3035 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3036 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3037 fts_modify(
3038 /*=======*/
3039 fts_trx_table_t* ftt, /*!< in: FTS trx table */
3040 fts_trx_row_t* row) /*!< in: row */
3041 {
3042 dberr_t error;
3043
3044 ut_a(row->state == FTS_MODIFY);
3045
3046 error = fts_delete(ftt, row);
3047
3048 if (error == DB_SUCCESS) {
3049 fts_add(ftt, row);
3050 }
3051
3052 return(error);
3053 }
3054
3055 /*********************************************************************//**
3056 Create a new document id.
3057 @return DB_SUCCESS if all went well else error */
3058 UNIV_INTERN
3059 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3060 fts_create_doc_id(
3061 /*==============*/
3062 dict_table_t* table, /*!< in: row is of this table. */
3063 dtuple_t* row, /* in/out: add doc id value to this
3064 row. This is the current row that is
3065 being inserted. */
3066 mem_heap_t* heap) /*!< in: heap */
3067 {
3068 doc_id_t doc_id;
3069 dberr_t error = DB_SUCCESS;
3070
3071 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3072
3073 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3074 if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3075 error = fts_get_next_doc_id(table, &doc_id);
3076 }
3077 return(error);
3078 }
3079
3080 error = fts_get_next_doc_id(table, &doc_id);
3081
3082 if (error == DB_SUCCESS) {
3083 dfield_t* dfield;
3084 doc_id_t* write_doc_id;
3085
3086 ut_a(doc_id > 0);
3087
3088 dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3089 write_doc_id = static_cast<doc_id_t*>(
3090 mem_heap_alloc(heap, sizeof(*write_doc_id)));
3091
3092 ut_a(doc_id != FTS_NULL_DOC_ID);
3093 ut_a(sizeof(doc_id) == dfield->type.len);
3094 fts_write_doc_id((byte*) write_doc_id, doc_id);
3095
3096 dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3097 }
3098
3099 return(error);
3100 }
3101
3102 /*********************************************************************//**
3103 The given transaction is about to be committed; do whatever is necessary
3104 from the FTS system's POV.
3105 @return DB_SUCCESS or error code */
3106 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3107 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3108 fts_commit_table(
3109 /*=============*/
3110 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
3111 {
3112 const ib_rbt_node_t* node;
3113 ib_rbt_t* rows;
3114 dberr_t error = DB_SUCCESS;
3115 fts_cache_t* cache = ftt->table->fts->cache;
3116 trx_t* trx = trx_allocate_for_background();
3117
3118 rows = ftt->rows;
3119
3120 ftt->fts_trx->trx = trx;
3121
3122 if (cache->get_docs == NULL) {
3123 rw_lock_x_lock(&cache->init_lock);
3124 if (cache->get_docs == NULL) {
3125 cache->get_docs = fts_get_docs_create(cache);
3126 }
3127 rw_lock_x_unlock(&cache->init_lock);
3128 }
3129
3130 for (node = rbt_first(rows);
3131 node != NULL && error == DB_SUCCESS;
3132 node = rbt_next(rows, node)) {
3133
3134 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3135
3136 switch (row->state) {
3137 case FTS_INSERT:
3138 fts_add(ftt, row);
3139 break;
3140
3141 case FTS_MODIFY:
3142 error = fts_modify(ftt, row);
3143 break;
3144
3145 case FTS_DELETE:
3146 error = fts_delete(ftt, row);
3147 break;
3148
3149 default:
3150 ut_error;
3151 }
3152 }
3153
3154 fts_sql_commit(trx);
3155
3156 trx_free_for_background(trx);
3157
3158 return(error);
3159 }
3160
3161 /*********************************************************************//**
3162 The given transaction is about to be committed; do whatever is necessary
3163 from the FTS system's POV.
3164 @return DB_SUCCESS or error code */
3165 UNIV_INTERN
3166 dberr_t
fts_commit(trx_t * trx)3167 fts_commit(
3168 /*=======*/
3169 trx_t* trx) /*!< in: transaction */
3170 {
3171 const ib_rbt_node_t* node;
3172 dberr_t error;
3173 ib_rbt_t* tables;
3174 fts_savepoint_t* savepoint;
3175
3176 savepoint = static_cast<fts_savepoint_t*>(
3177 ib_vector_last(trx->fts_trx->savepoints));
3178 tables = savepoint->tables;
3179
3180 for (node = rbt_first(tables), error = DB_SUCCESS;
3181 node != NULL && error == DB_SUCCESS;
3182 node = rbt_next(tables, node)) {
3183
3184 fts_trx_table_t** ftt;
3185
3186 ftt = rbt_value(fts_trx_table_t*, node);
3187
3188 error = fts_commit_table(*ftt);
3189 }
3190
3191 return(error);
3192 }
3193
3194 /*********************************************************************//**
3195 Initialize a document. */
3196 UNIV_INTERN
3197 void
fts_doc_init(fts_doc_t * doc)3198 fts_doc_init(
3199 /*=========*/
3200 fts_doc_t* doc) /*!< in: doc to initialize */
3201 {
3202 mem_heap_t* heap = mem_heap_create(32);
3203
3204 memset(doc, 0, sizeof(*doc));
3205
3206 doc->self_heap = ib_heap_allocator_create(heap);
3207 }
3208
3209 /*********************************************************************//**
3210 Free document. */
3211 UNIV_INTERN
3212 void
fts_doc_free(fts_doc_t * doc)3213 fts_doc_free(
3214 /*=========*/
3215 fts_doc_t* doc) /*!< in: document */
3216 {
3217 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3218
3219 if (doc->tokens) {
3220 rbt_free(doc->tokens);
3221 }
3222
3223 #ifdef UNIV_DEBUG
3224 memset(doc, 0, sizeof(*doc));
3225 #endif /* UNIV_DEBUG */
3226
3227 mem_heap_free(heap);
3228 }
3229
3230 /*********************************************************************//**
3231 Callback function for fetch that stores a row id to the location pointed.
3232 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3233 @return always returns NULL */
3234 UNIV_INTERN
3235 void*
fts_fetch_row_id(void * row,void * user_arg)3236 fts_fetch_row_id(
3237 /*=============*/
3238 void* row, /*!< in: sel_node_t* */
3239 void* user_arg) /*!< in: data pointer */
3240 {
3241 sel_node_t* node = static_cast<sel_node_t*>(row);
3242
3243 dfield_t* dfield = que_node_get_val(node->select_list);
3244 dtype_t* type = dfield_get_type(dfield);
3245 ulint len = dfield_get_len(dfield);
3246
3247 ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3248 ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3249 ut_a(len == 8);
3250
3251 memcpy(user_arg, dfield_get_data(dfield), 8);
3252
3253 return(NULL);
3254 }
3255
3256 /*********************************************************************//**
3257 Callback function for fetch that stores the text of an FTS document,
3258 converting each column to UTF-16.
3259 @return always FALSE */
3260 UNIV_INTERN
3261 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3262 fts_query_expansion_fetch_doc(
3263 /*==========================*/
3264 void* row, /*!< in: sel_node_t* */
3265 void* user_arg) /*!< in: fts_doc_t* */
3266 {
3267 que_node_t* exp;
3268 sel_node_t* node = static_cast<sel_node_t*>(row);
3269 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3270 dfield_t* dfield;
3271 ulint len;
3272 ulint doc_len;
3273 fts_doc_t doc;
3274 CHARSET_INFO* doc_charset = NULL;
3275 ulint field_no = 0;
3276
3277 len = 0;
3278
3279 fts_doc_init(&doc);
3280 doc.found = TRUE;
3281
3282 exp = node->select_list;
3283 doc_len = 0;
3284
3285 doc_charset = result_doc->charset;
3286
3287 /* Copy each indexed column content into doc->text.f_str */
3288 while (exp) {
3289 dfield = que_node_get_val(exp);
3290 len = dfield_get_len(dfield);
3291
3292 /* NULL column */
3293 if (len == UNIV_SQL_NULL) {
3294 exp = que_node_get_next(exp);
3295 continue;
3296 }
3297
3298 if (!doc_charset) {
3299 ulint prtype = dfield->type.prtype;
3300 doc_charset = innobase_get_fts_charset(
3301 (int)(prtype & DATA_MYSQL_TYPE_MASK),
3302 (uint) dtype_get_charset_coll(prtype));
3303 }
3304
3305 doc.charset = doc_charset;
3306
3307 if (dfield_is_ext(dfield)) {
3308 /* We ignore columns that are stored externally, this
3309 could result in too many words to search */
3310 exp = que_node_get_next(exp);
3311 continue;
3312 } else {
3313 doc.text.f_n_char = 0;
3314
3315 doc.text.f_str = static_cast<byte*>(
3316 dfield_get_data(dfield));
3317
3318 doc.text.f_len = len;
3319 }
3320
3321 if (field_no == 0) {
3322 fts_tokenize_document(&doc, result_doc);
3323 } else {
3324 fts_tokenize_document_next(&doc, doc_len, result_doc);
3325 }
3326
3327 exp = que_node_get_next(exp);
3328
3329 doc_len += (exp) ? len + 1 : len;
3330
3331 field_no++;
3332 }
3333
3334 ut_ad(doc_charset);
3335
3336 if (!result_doc->charset) {
3337 result_doc->charset = doc_charset;
3338 }
3339
3340 fts_doc_free(&doc);
3341
3342 return(FALSE);
3343 }
3344
3345 /*********************************************************************//**
3346 fetch and tokenize the document. */
3347 static
3348 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3349 fts_fetch_doc_from_rec(
3350 /*===================*/
3351 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3352 dict_index_t* clust_index, /*!< in: cluster index */
3353 btr_pcur_t* pcur, /*!< in: cursor whose position
3354 has been stored */
3355 ulint* offsets, /*!< in: offsets */
3356 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3357 documents */
3358 {
3359 dict_index_t* index;
3360 dict_table_t* table;
3361 const rec_t* clust_rec;
3362 ulint num_field;
3363 const dict_field_t* ifield;
3364 const dict_col_t* col;
3365 ulint clust_pos;
3366 ulint i;
3367 ulint doc_len = 0;
3368 ulint processed_doc = 0;
3369
3370 if (!get_doc) {
3371 return;
3372 }
3373
3374 index = get_doc->index_cache->index;
3375 table = get_doc->index_cache->index->table;
3376
3377 clust_rec = btr_pcur_get_rec(pcur);
3378
3379 num_field = dict_index_get_n_fields(index);
3380
3381 for (i = 0; i < num_field; i++) {
3382 ifield = dict_index_get_nth_field(index, i);
3383 col = dict_field_get_col(ifield);
3384 clust_pos = dict_col_get_clust_pos(col, clust_index);
3385
3386 if (!get_doc->index_cache->charset) {
3387 ulint prtype = ifield->col->prtype;
3388
3389 get_doc->index_cache->charset =
3390 innobase_get_fts_charset(
3391 (int) (prtype & DATA_MYSQL_TYPE_MASK),
3392 (uint) dtype_get_charset_coll(prtype));
3393 }
3394
3395 if (rec_offs_nth_extern(offsets, clust_pos)) {
3396 doc->text.f_str =
3397 btr_rec_copy_externally_stored_field(
3398 clust_rec, offsets,
3399 dict_table_zip_size(table),
3400 clust_pos, &doc->text.f_len,
3401 static_cast<mem_heap_t*>(
3402 doc->self_heap->arg));
3403 } else {
3404 doc->text.f_str = (byte*) rec_get_nth_field(
3405 clust_rec, offsets, clust_pos,
3406 &doc->text.f_len);
3407 }
3408
3409 doc->found = TRUE;
3410 doc->charset = get_doc->index_cache->charset;
3411
3412 /* Null Field */
3413 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3414 continue;
3415 }
3416
3417 if (processed_doc == 0) {
3418 fts_tokenize_document(doc, NULL);
3419 } else {
3420 fts_tokenize_document_next(doc, doc_len, NULL);
3421 }
3422
3423 processed_doc++;
3424 doc_len += doc->text.f_len + 1;
3425 }
3426 }
3427
3428 /*********************************************************************//**
3429 This function fetches the document inserted during the committing
3430 transaction, and tokenize the inserted text data and insert into
3431 FTS auxiliary table and its cache.
3432 @return TRUE if successful */
3433 static
3434 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3435 fts_add_doc_by_id(
3436 /*==============*/
3437 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3438 doc_id_t doc_id, /*!< in: doc id */
3439 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
3440 /*!< in: affected fts indexes */
3441 {
3442 mtr_t mtr;
3443 mem_heap_t* heap;
3444 btr_pcur_t pcur;
3445 dict_table_t* table;
3446 dtuple_t* tuple;
3447 dfield_t* dfield;
3448 fts_get_doc_t* get_doc;
3449 doc_id_t temp_doc_id;
3450 dict_index_t* clust_index;
3451 dict_index_t* fts_id_index;
3452 ibool is_id_cluster;
3453 fts_cache_t* cache = ftt->table->fts->cache;
3454
3455 ut_ad(cache->get_docs);
3456
3457 /* If Doc ID has been supplied by the user, then the table
3458 might not yet be sync-ed */
3459
3460 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3461 fts_init_index(ftt->table, FALSE);
3462 }
3463
3464 /* Get the first FTS index's get_doc */
3465 get_doc = static_cast<fts_get_doc_t*>(
3466 ib_vector_get(cache->get_docs, 0));
3467 ut_ad(get_doc);
3468
3469 table = get_doc->index_cache->index->table;
3470
3471 heap = mem_heap_create(512);
3472
3473 clust_index = dict_table_get_first_index(table);
3474 fts_id_index = dict_table_get_index_on_name(
3475 table, FTS_DOC_ID_INDEX_NAME);
3476
3477 /* Check whether the index on FTS_DOC_ID is cluster index */
3478 is_id_cluster = (clust_index == fts_id_index);
3479
3480 mtr_start(&mtr);
3481 btr_pcur_init(&pcur);
3482
3483 /* Search based on Doc ID. Here, we'll need to consider the case
3484 when there is no primary index on Doc ID */
3485 tuple = dtuple_create(heap, 1);
3486 dfield = dtuple_get_nth_field(tuple, 0);
3487 dfield->type.mtype = DATA_INT;
3488 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3489
3490 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3491 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3492
3493 btr_pcur_open_with_no_init(
3494 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3495 &pcur, 0, &mtr);
3496
3497 /* If we have a match, add the data to doc structure */
3498 if (btr_pcur_get_low_match(&pcur) == 1) {
3499 const rec_t* rec;
3500 btr_pcur_t* doc_pcur;
3501 const rec_t* clust_rec;
3502 btr_pcur_t clust_pcur;
3503 ulint* offsets = NULL;
3504 ulint num_idx = ib_vector_size(cache->get_docs);
3505
3506 rec = btr_pcur_get_rec(&pcur);
3507
3508 /* Doc could be deleted */
3509 if (page_rec_is_infimum(rec)
3510 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3511
3512 goto func_exit;
3513 }
3514
3515 if (is_id_cluster) {
3516 clust_rec = rec;
3517 doc_pcur = &pcur;
3518 } else {
3519 dtuple_t* clust_ref;
3520 ulint n_fields;
3521
3522 btr_pcur_init(&clust_pcur);
3523 n_fields = dict_index_get_n_unique(clust_index);
3524
3525 clust_ref = dtuple_create(heap, n_fields);
3526 dict_index_copy_types(clust_ref, clust_index, n_fields);
3527
3528 row_build_row_ref_in_tuple(
3529 clust_ref, rec, fts_id_index, NULL, NULL);
3530
3531 btr_pcur_open_with_no_init(
3532 clust_index, clust_ref, PAGE_CUR_LE,
3533 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3534
3535 doc_pcur = &clust_pcur;
3536 clust_rec = btr_pcur_get_rec(&clust_pcur);
3537
3538 }
3539
3540 offsets = rec_get_offsets(clust_rec, clust_index,
3541 NULL, ULINT_UNDEFINED, &heap);
3542
3543 for (ulint i = 0; i < num_idx; ++i) {
3544 fts_doc_t doc;
3545 dict_table_t* table;
3546 fts_get_doc_t* get_doc;
3547
3548 get_doc = static_cast<fts_get_doc_t*>(
3549 ib_vector_get(cache->get_docs, i));
3550
3551 table = get_doc->index_cache->index->table;
3552
3553 fts_doc_init(&doc);
3554
3555 fts_fetch_doc_from_rec(
3556 get_doc, clust_index, doc_pcur, offsets, &doc);
3557
3558 if (doc.found) {
3559 ibool success MY_ATTRIBUTE((unused));
3560
3561 btr_pcur_store_position(doc_pcur, &mtr);
3562 mtr_commit(&mtr);
3563
3564 DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3565 rw_lock_x_lock(&table->fts->cache->lock);
3566
3567 if (table->fts->cache->stopword_info.status
3568 & STOPWORD_NOT_INIT) {
3569 fts_load_stopword(table, NULL, NULL,
3570 NULL, TRUE, TRUE);
3571 }
3572
3573 fts_cache_add_doc(
3574 table->fts->cache,
3575 get_doc->index_cache,
3576 doc_id, doc.tokens);
3577
3578 bool need_sync = false;
3579 if ((cache->total_size > fts_max_cache_size / 10
3580 || fts_need_sync)
3581 && !cache->sync->in_progress) {
3582 need_sync = true;
3583 }
3584
3585 rw_lock_x_unlock(&table->fts->cache->lock);
3586
3587 DBUG_EXECUTE_IF(
3588 "fts_instrument_sync_cache_wait",
3589 srv_fatal_semaphore_wait_threshold = 25;
3590 fts_max_cache_size = 100;
3591 fts_sync(cache->sync, true, true, false);
3592 );
3593
3594 DBUG_EXECUTE_IF(
3595 "fts_instrument_sync",
3596 fts_optimize_request_sync_table(table);
3597 os_event_wait(cache->sync->event);
3598 );
3599
3600 DBUG_EXECUTE_IF(
3601 "fts_instrument_sync_debug",
3602 fts_sync(cache->sync, true, true, false);
3603 );
3604
3605 DEBUG_SYNC_C("fts_instrument_sync_request");
3606 DBUG_EXECUTE_IF(
3607 "fts_instrument_sync_request",
3608 fts_optimize_request_sync_table(table);
3609 );
3610
3611 if (need_sync) {
3612 fts_optimize_request_sync_table(table);
3613 }
3614
3615 mtr_start(&mtr);
3616
3617 if (i < num_idx - 1) {
3618
3619 success = btr_pcur_restore_position(
3620 BTR_SEARCH_LEAF, doc_pcur,
3621 &mtr);
3622
3623 ut_ad(success);
3624 }
3625 }
3626
3627 fts_doc_free(&doc);
3628 }
3629
3630 if (!is_id_cluster) {
3631 btr_pcur_close(doc_pcur);
3632 }
3633 }
3634 func_exit:
3635 mtr_commit(&mtr);
3636
3637 btr_pcur_close(&pcur);
3638
3639 mem_heap_free(heap);
3640 return(TRUE);
3641 }
3642
3643
3644 /*********************************************************************//**
3645 Callback function to read a single ulint column.
3646 return always returns TRUE */
3647 static
3648 ibool
fts_read_ulint(void * row,void * user_arg)3649 fts_read_ulint(
3650 /*===========*/
3651 void* row, /*!< in: sel_node_t* */
3652 void* user_arg) /*!< in: pointer to ulint */
3653 {
3654 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3655 ulint* value = static_cast<ulint*>(user_arg);
3656 que_node_t* exp = sel_node->select_list;
3657 dfield_t* dfield = que_node_get_val(exp);
3658 void* data = dfield_get_data(dfield);
3659
3660 *value = static_cast<ulint>(mach_read_from_4(
3661 static_cast<const byte*>(data)));
3662
3663 return(TRUE);
3664 }
3665
3666 /*********************************************************************//**
3667 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3668 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3669 UNIV_INTERN
3670 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3671 fts_get_max_doc_id(
3672 /*===============*/
3673 dict_table_t* table) /*!< in: user table */
3674 {
3675 dict_index_t* index;
3676 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3677 doc_id_t doc_id = 0;
3678 mtr_t mtr;
3679 btr_pcur_t pcur;
3680
3681 index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
3682
3683 if (!index) {
3684 return(0);
3685 }
3686
3687 dfield = dict_index_get_nth_field(index, 0);
3688
3689 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3690 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3691 #endif
3692
3693 mtr_start(&mtr);
3694
3695 /* fetch the largest indexes value */
3696 btr_pcur_open_at_index_side(
3697 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3698
3699 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3700 const rec_t* rec = NULL;
3701 ulint offsets_[REC_OFFS_NORMAL_SIZE];
3702 ulint* offsets = offsets_;
3703 mem_heap_t* heap = NULL;
3704 ulint len;
3705 const void* data;
3706
3707 rec_offs_init(offsets_);
3708
3709 do {
3710 rec = btr_pcur_get_rec(&pcur);
3711
3712 if (page_rec_is_user_rec(rec)) {
3713 break;
3714 }
3715 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3716
3717 if (!rec) {
3718 goto func_exit;
3719 }
3720
3721 offsets = rec_get_offsets(
3722 rec, index, offsets, ULINT_UNDEFINED, &heap);
3723
3724 data = rec_get_nth_field(rec, offsets, 0, &len);
3725
3726 doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3727 static_cast<const byte*>(data)));
3728 }
3729
3730 func_exit:
3731 btr_pcur_close(&pcur);
3732 mtr_commit(&mtr);
3733 return(doc_id);
3734 }
3735
3736 /*********************************************************************//**
3737 Fetch document with the given document id.
3738 @return DB_SUCCESS if OK else error */
3739 UNIV_INTERN
3740 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3741 fts_doc_fetch_by_doc_id(
3742 /*====================*/
3743 fts_get_doc_t* get_doc, /*!< in: state */
3744 doc_id_t doc_id, /*!< in: id of document to
3745 fetch */
3746 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3747 or NULL */
3748 ulint option, /*!< in: search option, if it is
3749 greater than doc_id or equal */
3750 fts_sql_callback
3751 callback, /*!< in: callback to read */
3752 void* arg) /*!< in: callback arg */
3753 {
3754 pars_info_t* info;
3755 dberr_t error;
3756 const char* select_str;
3757 doc_id_t write_doc_id;
3758 dict_index_t* index;
3759 trx_t* trx = trx_allocate_for_background();
3760 que_t* graph;
3761
3762 trx->op_info = "fetching indexed FTS document";
3763
3764 /* The FTS index can be supplied by caller directly with
3765 "index_to_use", otherwise, get it from "get_doc" */
3766 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3767
3768 if (get_doc && get_doc->get_document_graph) {
3769 info = get_doc->get_document_graph->info;
3770 } else {
3771 info = pars_info_create();
3772 }
3773
3774 /* Convert to "storage" byte order. */
3775 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3776 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3777 pars_info_bind_function(info, "my_func", callback, arg);
3778
3779 select_str = fts_get_select_columns_str(index, info, info->heap);
3780 pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3781
3782 if (!get_doc || !get_doc->get_document_graph) {
3783 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3784 graph = fts_parse_sql(
3785 NULL,
3786 info,
3787 mem_heap_printf(info->heap,
3788 "DECLARE FUNCTION my_func;\n"
3789 "DECLARE CURSOR c IS"
3790 " SELECT %s FROM $table_name"
3791 " WHERE %s = :doc_id;\n"
3792 "BEGIN\n"
3793 ""
3794 "OPEN c;\n"
3795 "WHILE 1 = 1 LOOP\n"
3796 " FETCH c INTO my_func();\n"
3797 " IF c %% NOTFOUND THEN\n"
3798 " EXIT;\n"
3799 " END IF;\n"
3800 "END LOOP;\n"
3801 "CLOSE c;",
3802 select_str, FTS_DOC_ID_COL_NAME));
3803 } else {
3804 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3805
3806 /* This is used for crash recovery of table with
3807 hidden DOC ID or FTS indexes. We will scan the table
3808 to re-processing user table rows whose DOC ID or
3809 FTS indexed documents have not been sync-ed to disc
3810 during recent crash.
3811 In the case that all fulltext indexes are dropped
3812 for a table, we will keep the "hidden" FTS_DOC_ID
3813 column, and this scan is to retreive the largest
3814 DOC ID being used in the table to determine the
3815 appropriate next DOC ID.
3816 In the case of there exists fulltext index(es), this
3817 operation will re-tokenize any docs that have not
3818 been sync-ed to the disk, and re-prime the FTS
3819 cached */
3820 graph = fts_parse_sql(
3821 NULL,
3822 info,
3823 mem_heap_printf(info->heap,
3824 "DECLARE FUNCTION my_func;\n"
3825 "DECLARE CURSOR c IS"
3826 " SELECT %s, %s FROM $table_name"
3827 " WHERE %s > :doc_id;\n"
3828 "BEGIN\n"
3829 ""
3830 "OPEN c;\n"
3831 "WHILE 1 = 1 LOOP\n"
3832 " FETCH c INTO my_func();\n"
3833 " IF c %% NOTFOUND THEN\n"
3834 " EXIT;\n"
3835 " END IF;\n"
3836 "END LOOP;\n"
3837 "CLOSE c;",
3838 FTS_DOC_ID_COL_NAME,
3839 select_str, FTS_DOC_ID_COL_NAME));
3840 }
3841 if (get_doc) {
3842 get_doc->get_document_graph = graph;
3843 }
3844 } else {
3845 graph = get_doc->get_document_graph;
3846 }
3847
3848 error = fts_eval_sql(trx, graph);
3849
3850 if (error == DB_SUCCESS) {
3851 fts_sql_commit(trx);
3852 } else {
3853 fts_sql_rollback(trx);
3854 }
3855
3856 trx_free_for_background(trx);
3857
3858 if (!get_doc) {
3859 fts_que_graph_free(graph);
3860 }
3861
3862 return(error);
3863 }
3864
3865 /*********************************************************************//**
3866 Write out a single word's data as new entry/entries in the INDEX table.
3867 @return DB_SUCCESS if all OK. */
3868 UNIV_INTERN
3869 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3870 fts_write_node(
3871 /*===========*/
3872 trx_t* trx, /*!< in: transaction */
3873 que_t** graph, /*!< in: query graph */
3874 fts_table_t* fts_table, /*!< in: aux table */
3875 fts_string_t* word, /*!< in: word in UTF-8 */
3876 fts_node_t* node) /*!< in: node columns */
3877 {
3878 pars_info_t* info;
3879 dberr_t error;
3880 ib_uint32_t doc_count;
3881 ib_time_t start_time;
3882 doc_id_t last_doc_id;
3883 doc_id_t first_doc_id;
3884
3885 if (*graph) {
3886 info = (*graph)->info;
3887 } else {
3888 info = pars_info_create();
3889 }
3890
3891 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3892
3893 /* Convert to "storage" byte order. */
3894 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3895 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3896
3897 /* Convert to "storage" byte order. */
3898 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3899 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3900
3901 ut_a(node->last_doc_id >= node->first_doc_id);
3902
3903 /* Convert to "storage" byte order. */
3904 mach_write_to_4((byte*) &doc_count, node->doc_count);
3905 pars_info_bind_int4_literal(
3906 info, "doc_count", (const ib_uint32_t*) &doc_count);
3907
3908 /* Set copy_name to FALSE since it's a static. */
3909 pars_info_bind_literal(
3910 info, "ilist", node->ilist, node->ilist_size,
3911 DATA_BLOB, DATA_BINARY_TYPE);
3912
3913 if (!*graph) {
3914 *graph = fts_parse_sql(
3915 fts_table,
3916 info,
3917 "BEGIN\n"
3918 "INSERT INTO \"%s\" VALUES "
3919 "(:token, :first_doc_id,"
3920 " :last_doc_id, :doc_count, :ilist);");
3921 }
3922
3923 start_time = ut_time();
3924 error = fts_eval_sql(trx, *graph);
3925 elapsed_time += ut_time() - start_time;
3926 ++n_nodes;
3927
3928 return(error);
3929 }
3930
3931 /*********************************************************************//**
3932 Add rows to the DELETED_CACHE table.
3933 @return DB_SUCCESS if all went well else error code*/
3934 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3935 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)3936 fts_sync_add_deleted_cache(
3937 /*=======================*/
3938 fts_sync_t* sync, /*!< in: sync state */
3939 ib_vector_t* doc_ids) /*!< in: doc ids to add */
3940 {
3941 ulint i;
3942 pars_info_t* info;
3943 que_t* graph;
3944 fts_table_t fts_table;
3945 doc_id_t dummy = 0;
3946 dberr_t error = DB_SUCCESS;
3947 ulint n_elems = ib_vector_size(doc_ids);
3948
3949 ut_a(ib_vector_size(doc_ids) > 0);
3950
3951 ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
3952
3953 info = pars_info_create();
3954
3955 fts_bind_doc_id(info, "doc_id", &dummy);
3956
3957 FTS_INIT_FTS_TABLE(
3958 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3959
3960 graph = fts_parse_sql(
3961 &fts_table,
3962 info,
3963 "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
3964
3965 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3966 fts_update_t* update;
3967 doc_id_t write_doc_id;
3968
3969 update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
3970
3971 /* Convert to "storage" byte order. */
3972 fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
3973 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3974
3975 error = fts_eval_sql(sync->trx, graph);
3976 }
3977
3978 fts_que_graph_free(graph);
3979
3980 return(error);
3981 }
3982
3983 /** Write the words and ilist to disk.
3984 @param[in,out] trx transaction
3985 @param[in] index_cache index cache
3986 @param[in] unlock_cache whether unlock cache when write node
3987 @param[in] sync_start_time Holds the timestamp of start of sync
3988 for deducing the length of sync time
3989 @return DB_SUCCESS if all went well else error code */
3990 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3991 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)3992 fts_sync_write_words(
3993 trx_t* trx,
3994 fts_index_cache_t* index_cache,
3995 bool unlock_cache,
3996 ib_time_t sync_start_time)
3997 {
3998 fts_table_t fts_table;
3999 ulint n_nodes = 0;
4000 ulint n_words = 0;
4001 const ib_rbt_node_t* rbt_node;
4002 dberr_t error = DB_SUCCESS;
4003 ibool print_error = FALSE;
4004 dict_table_t* table = index_cache->index->table;
4005 /* We use this to deduce threshold value of time
4006 that we can let sync to go on holding cache lock */
4007 const float cutoff = 0.98;
4008 ulint lock_threshold =
4009 (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4010 * cutoff;
4011 bool timeout_extended = false;
4012 #ifdef FTS_DOC_STATS_DEBUG
4013 ulint n_new_words = 0;
4014 #endif /* FTS_DOC_STATS_DEBUG */
4015
4016 FTS_INIT_INDEX_TABLE(
4017 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4018
4019 n_words = rbt_size(index_cache->words);
4020
4021 /* We iterate over the entire tree, even if there is an error,
4022 since we want to free the memory used during caching. */
4023 for (rbt_node = rbt_first(index_cache->words);
4024 rbt_node;
4025 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4026
4027 ulint i;
4028 ulint selected;
4029 fts_tokenizer_word_t* word;
4030
4031 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4032
4033 selected = fts_select_index(
4034 index_cache->charset, word->text.f_str,
4035 word->text.f_len);
4036
4037 fts_table.suffix = fts_get_suffix(selected);
4038
4039 #ifdef FTS_DOC_STATS_DEBUG
4040 /* Check if the word exists in the FTS index and if not
4041 then we need to increment the total word count stats. */
4042 if (error == DB_SUCCESS && fts_enable_diag_print) {
4043 ibool found = FALSE;
4044
4045 error = fts_is_word_in_index(
4046 trx,
4047 &index_cache->sel_graph[selected],
4048 &fts_table,
4049 &word->text, &found);
4050
4051 if (error == DB_SUCCESS && !found) {
4052
4053 ++n_new_words;
4054 }
4055 }
4056 #endif /* FTS_DOC_STATS_DEBUG */
4057
4058 /* We iterate over all the nodes even if there was an error */
4059 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4060
4061 fts_node_t* fts_node = static_cast<fts_node_t*>(
4062 ib_vector_get(word->nodes, i));
4063
4064 if (fts_node->synced) {
4065 continue;
4066 } else {
4067 fts_node->synced = true;
4068 }
4069
4070 /*FIXME: we need to handle the error properly. */
4071 if (error == DB_SUCCESS) {
4072 DBUG_EXECUTE_IF("fts_instrument_sync_write",
4073 os_thread_sleep(10000000););
4074
4075 if (!unlock_cache) {
4076 ulint cache_lock_time = ut_time() - sync_start_time;
4077 if (cache_lock_time > lock_threshold) {
4078 if (!timeout_extended) {
4079 os_atomic_increment_ulint(
4080 &srv_fatal_semaphore_wait_threshold,
4081 SRV_SEMAPHORE_WAIT_EXTENSION);
4082 timeout_extended = true;
4083 lock_threshold +=
4084 SRV_SEMAPHORE_WAIT_EXTENSION;
4085 } else {
4086 unlock_cache = true;
4087 os_atomic_decrement_ulint(
4088 &srv_fatal_semaphore_wait_threshold,
4089 SRV_SEMAPHORE_WAIT_EXTENSION);
4090 timeout_extended = false;
4091
4092 }
4093 }
4094 }
4095
4096 if (unlock_cache) {
4097 rw_lock_x_unlock(
4098 &table->fts->cache->lock);
4099 }
4100
4101 error = fts_write_node(
4102 trx,
4103 &index_cache->ins_graph[selected],
4104 &fts_table, &word->text, fts_node);
4105
4106 DBUG_EXECUTE_IF("fts_instrument_sync_write",
4107 os_thread_sleep(15000000););
4108
4109 DEBUG_SYNC_C("fts_write_node");
4110 DBUG_EXECUTE_IF("fts_write_node_crash",
4111 DBUG_SUICIDE(););
4112
4113 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4114 os_thread_sleep(1000000);
4115 );
4116
4117 if (unlock_cache) {
4118 rw_lock_x_lock(
4119 &table->fts->cache->lock);
4120 }
4121 }
4122 }
4123
4124 n_nodes += ib_vector_size(word->nodes);
4125
4126 if (error != DB_SUCCESS && !print_error) {
4127 ut_print_timestamp(stderr);
4128 fprintf(stderr, " InnoDB: Error (%s) writing "
4129 "word node to FTS auxiliary index "
4130 "table.\n", ut_strerr(error));
4131
4132 print_error = TRUE;
4133 }
4134 }
4135
4136 #ifdef FTS_DOC_STATS_DEBUG
4137 if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4138 fts_table_t fts_table;
4139
4140 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4141
4142 /* Increment the total number of words in the FTS index */
4143 error = fts_config_increment_index_value(
4144 trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4145 n_new_words);
4146 }
4147 #endif /* FTS_DOC_STATS_DEBUG */
4148
4149 if (fts_enable_diag_print) {
4150 printf("Avg number of nodes: %lf\n",
4151 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4152 }
4153
4154 return(error);
4155 }
4156
4157 #ifdef FTS_DOC_STATS_DEBUG
4158 /*********************************************************************//**
4159 Write a single documents statistics to disk.
4160 @return DB_SUCCESS if all went well else error code */
4161 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4162 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4163 fts_sync_write_doc_stat(
4164 /*====================*/
4165 trx_t* trx, /*!< in: transaction */
4166 dict_index_t* index, /*!< in: index */
4167 que_t** graph, /* out: query graph */
4168 const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */
4169 {
4170 pars_info_t* info;
4171 doc_id_t doc_id;
4172 dberr_t error = DB_SUCCESS;
4173 ib_uint32_t word_count;
4174
4175 if (*graph) {
4176 info = (*graph)->info;
4177 } else {
4178 info = pars_info_create();
4179 }
4180
4181 /* Convert to "storage" byte order. */
4182 mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4183 pars_info_bind_int4_literal(
4184 info, "count", (const ib_uint32_t*) &word_count);
4185
4186 /* Convert to "storage" byte order. */
4187 fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4188 fts_bind_doc_id(info, "doc_id", &doc_id);
4189
4190 if (!*graph) {
4191 fts_table_t fts_table;
4192
4193 FTS_INIT_INDEX_TABLE(
4194 &fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4195
4196 *graph = fts_parse_sql(
4197 &fts_table,
4198 info,
4199 "BEGIN INSERT INTO \"%s\" VALUES (:doc_id, :count);");
4200 }
4201
4202 for (;;) {
4203 error = fts_eval_sql(trx, *graph);
4204
4205 if (error == DB_SUCCESS) {
4206
4207 break; /* Exit the loop. */
4208 } else {
4209 ut_print_timestamp(stderr);
4210
4211 if (error == DB_LOCK_WAIT_TIMEOUT) {
4212 fprintf(stderr, " InnoDB: Warning: lock wait "
4213 "timeout writing to FTS doc_id. "
4214 "Retrying!\n");
4215
4216 trx->error_state = DB_SUCCESS;
4217 } else {
4218 fprintf(stderr, " InnoDB: Error: (%s) "
4219 "while writing to FTS doc_id.\n",
4220 ut_strerr(error));
4221
4222 break; /* Exit the loop. */
4223 }
4224 }
4225 }
4226
4227 return(error);
4228 }
4229
4230 /*********************************************************************//**
4231 Write document statistics to disk.
4232 @return DB_SUCCESS if all OK */
4233 static
4234 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4235 fts_sync_write_doc_stats(
4236 /*=====================*/
4237 trx_t* trx, /*!< in: transaction */
4238 const fts_index_cache_t*index_cache) /*!< in: index cache */
4239 {
4240 dberr_t error = DB_SUCCESS;
4241 que_t* graph = NULL;
4242 fts_doc_stats_t* doc_stat;
4243
4244 if (ib_vector_is_empty(index_cache->doc_stats)) {
4245 return(DB_SUCCESS);
4246 }
4247
4248 doc_stat = static_cast<ts_doc_stats_t*>(
4249 ib_vector_pop(index_cache->doc_stats));
4250
4251 while (doc_stat) {
4252 error = fts_sync_write_doc_stat(
4253 trx, index_cache->index, &graph, doc_stat);
4254
4255 if (error != DB_SUCCESS) {
4256 break;
4257 }
4258
4259 if (ib_vector_is_empty(index_cache->doc_stats)) {
4260 break;
4261 }
4262
4263 doc_stat = static_cast<ts_doc_stats_t*>(
4264 ib_vector_pop(index_cache->doc_stats));
4265 }
4266
4267 if (graph != NULL) {
4268 fts_que_graph_free_check_lock(NULL, index_cache, graph);
4269 }
4270
4271 return(error);
4272 }
4273
4274 /*********************************************************************//**
4275 Callback to check the existince of a word.
4276 @return always return NULL */
4277 static
4278 ibool
fts_lookup_word(void * row,void * user_arg)4279 fts_lookup_word(
4280 /*============*/
4281 void* row, /*!< in: sel_node_t* */
4282 void* user_arg) /*!< in: fts_doc_t* */
4283 {
4284
4285 que_node_t* exp;
4286 sel_node_t* node = static_cast<sel_node_t*>(row);
4287 ibool* found = static_cast<ibool*>(user_arg);
4288
4289 exp = node->select_list;
4290
4291 while (exp) {
4292 dfield_t* dfield = que_node_get_val(exp);
4293 ulint len = dfield_get_len(dfield);
4294
4295 if (len != UNIV_SQL_NULL && len != 0) {
4296 *found = TRUE;
4297 }
4298
4299 exp = que_node_get_next(exp);
4300 }
4301
4302 return(FALSE);
4303 }
4304
4305 /*********************************************************************//**
4306 Check whether a particular word (term) exists in the FTS index.
4307 @return DB_SUCCESS if all went well else error code */
4308 static
4309 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4310 fts_is_word_in_index(
4311 /*=================*/
4312 trx_t* trx, /*!< in: FTS query state */
4313 que_t** graph, /* out: Query graph */
4314 fts_table_t* fts_table, /*!< in: table instance */
4315 const fts_string_t*
4316 word, /*!< in: the word to check */
4317 ibool* found) /* out: TRUE if exists */
4318 {
4319 pars_info_t* info;
4320 dberr_t error;
4321
4322 trx->op_info = "looking up word in FTS index";
4323
4324 if (*graph) {
4325 info = (*graph)->info;
4326 } else {
4327 info = pars_info_create();
4328 }
4329
4330 pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4331 pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4332
4333 if (*graph == NULL) {
4334 *graph = fts_parse_sql(
4335 fts_table,
4336 info,
4337 "DECLARE FUNCTION my_func;\n"
4338 "DECLARE CURSOR c IS"
4339 " SELECT doc_count\n"
4340 " FROM \"%s\"\n"
4341 " WHERE word = :word "
4342 " ORDER BY first_doc_id;\n"
4343 "BEGIN\n"
4344 "\n"
4345 "OPEN c;\n"
4346 "WHILE 1 = 1 LOOP\n"
4347 " FETCH c INTO my_func();\n"
4348 " IF c % NOTFOUND THEN\n"
4349 " EXIT;\n"
4350 " END IF;\n"
4351 "END LOOP;\n"
4352 "CLOSE c;");
4353 }
4354
4355 for (;;) {
4356 error = fts_eval_sql(trx, *graph);
4357
4358 if (error == DB_SUCCESS) {
4359
4360 break; /* Exit the loop. */
4361 } else {
4362 ut_print_timestamp(stderr);
4363
4364 if (error == DB_LOCK_WAIT_TIMEOUT) {
4365 fprintf(stderr, " InnoDB: Warning: lock wait "
4366 "timeout reading FTS index. "
4367 "Retrying!\n");
4368
4369 trx->error_state = DB_SUCCESS;
4370 } else {
4371 fprintf(stderr, " InnoDB: Error: (%s) "
4372 "while reading FTS index.\n",
4373 ut_strerr(error));
4374
4375 break; /* Exit the loop. */
4376 }
4377 }
4378 }
4379
4380 return(error);
4381 }
4382 #endif /* FTS_DOC_STATS_DEBUG */
4383
4384 /*********************************************************************//**
4385 Begin Sync, create transaction, acquire locks, etc. */
4386 static
4387 void
fts_sync_begin(fts_sync_t * sync)4388 fts_sync_begin(
4389 /*===========*/
4390 fts_sync_t* sync) /*!< in: sync state */
4391 {
4392 fts_cache_t* cache = sync->table->fts->cache;
4393
4394 n_nodes = 0;
4395 elapsed_time = 0;
4396
4397 sync->start_time = ut_time();
4398
4399 sync->trx = trx_allocate_for_background();
4400
4401 if (fts_enable_diag_print) {
4402 ib_logf(IB_LOG_LEVEL_INFO,
4403 "FTS SYNC for table %s, deleted count: %ld size: "
4404 "%lu bytes",
4405 sync->table->name,
4406 ib_vector_size(cache->deleted_doc_ids),
4407 cache->total_size);
4408 }
4409 }
4410
4411 /*********************************************************************//**
4412 Run SYNC on the table, i.e., write out data from the index specific
4413 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4414 @return DB_SUCCESS if all OK */
4415 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4416 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4417 fts_sync_index(
4418 /*===========*/
4419 fts_sync_t* sync, /*!< in: sync state */
4420 fts_index_cache_t* index_cache) /*!< in: index cache */
4421 {
4422 trx_t* trx = sync->trx;
4423 dberr_t error = DB_SUCCESS;
4424
4425 trx->op_info = "doing SYNC index";
4426
4427 if (fts_enable_diag_print) {
4428 ib_logf(IB_LOG_LEVEL_INFO,
4429 "SYNC words: %ld", rbt_size(index_cache->words));
4430 }
4431
4432 ut_ad(rbt_validate(index_cache->words));
4433
4434 error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache,
4435 sync->start_time);
4436
4437 #ifdef FTS_DOC_STATS_DEBUG
4438 /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4439 is not used currently for ranking. We disable fts_sync_write_doc_stats()
4440 for now */
4441 /* Write the per doc statistics that will be used for ranking. */
4442 if (error == DB_SUCCESS) {
4443
4444 error = fts_sync_write_doc_stats(trx, index_cache);
4445 }
4446 #endif /* FTS_DOC_STATS_DEBUG */
4447
4448 return(error);
4449 }
4450
4451 /** Check if index cache has been synced completely
4452 @param[in,out] index_cache index cache
4453 @return true if index is synced, otherwise false. */
4454 static
4455 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4456 fts_sync_index_check(
4457 fts_index_cache_t* index_cache)
4458 {
4459 const ib_rbt_node_t* rbt_node;
4460
4461 for (rbt_node = rbt_first(index_cache->words);
4462 rbt_node != NULL;
4463 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4464
4465 fts_tokenizer_word_t* word;
4466 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4467
4468 fts_node_t* fts_node;
4469 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4470
4471 if (!fts_node->synced) {
4472 return(false);
4473 }
4474 }
4475
4476 return(true);
4477 }
4478
4479 /** Reset synced flag in index cache when rollback
4480 @param[in,out] index_cache index cache */
4481 static
4482 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4483 fts_sync_index_reset(
4484 fts_index_cache_t* index_cache)
4485 {
4486 const ib_rbt_node_t* rbt_node;
4487
4488 for (rbt_node = rbt_first(index_cache->words);
4489 rbt_node != NULL;
4490 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4491
4492 fts_tokenizer_word_t* word;
4493 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4494
4495 fts_node_t* fts_node;
4496 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4497
4498 fts_node->synced = false;
4499 }
4500 }
4501
4502 /** Commit the SYNC, change state of processed doc ids etc.
4503 @param[in,out] sync sync state
4504 @return DB_SUCCESS if all OK */
4505 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4506 dberr_t
fts_sync_commit(fts_sync_t * sync)4507 fts_sync_commit(
4508 fts_sync_t* sync)
4509 {
4510 dberr_t error;
4511 trx_t* trx = sync->trx;
4512 fts_cache_t* cache = sync->table->fts->cache;
4513 doc_id_t last_doc_id;
4514
4515 trx->op_info = "doing SYNC commit";
4516
4517 /* After each Sync, update the CONFIG table about the max doc id
4518 we just sync-ed to index table */
4519 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4520 &last_doc_id);
4521
4522 /* Get the list of deleted documents that are either in the
4523 cache or were headed there but were deleted before the add
4524 thread got to them. */
4525
4526 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4527
4528 error = fts_sync_add_deleted_cache(
4529 sync, cache->deleted_doc_ids);
4530 }
4531
4532 /* We need to do this within the deleted lock since fts_delete() can
4533 attempt to add a deleted doc id to the cache deleted id array. */
4534 fts_cache_clear(cache);
4535 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4536 fts_cache_init(cache);
4537 rw_lock_x_unlock(&cache->lock);
4538
4539 if (error == DB_SUCCESS) {
4540
4541 fts_sql_commit(trx);
4542
4543 } else if (error != DB_SUCCESS) {
4544
4545 fts_sql_rollback(trx);
4546
4547 ut_print_timestamp(stderr);
4548 fprintf(stderr, " InnoDB: Error: (%s) during SYNC.\n",
4549 ut_strerr(error));
4550 }
4551
4552 if (fts_enable_diag_print && elapsed_time) {
4553 ib_logf(IB_LOG_LEVEL_INFO,
4554 "SYNC for table %s: SYNC time : %lu secs: "
4555 "elapsed %lf ins/sec",
4556 sync->table->name,
4557 (ulong) (ut_time() - sync->start_time),
4558 (double) n_nodes/ (double) elapsed_time);
4559 }
4560
4561 /* Avoid assertion in trx_free(). */
4562 trx->dict_operation_lock_mode = 0;
4563 trx_free_for_background(trx);
4564
4565 return(error);
4566 }
4567
4568 /** Rollback a sync operation
4569 @param[in,out] sync sync state */
4570 static
4571 void
fts_sync_rollback(fts_sync_t * sync)4572 fts_sync_rollback(
4573 fts_sync_t* sync)
4574 {
4575 trx_t* trx = sync->trx;
4576 fts_cache_t* cache = sync->table->fts->cache;
4577
4578 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4579 ulint j;
4580 fts_index_cache_t* index_cache;
4581
4582 index_cache = static_cast<fts_index_cache_t*>(
4583 ib_vector_get(cache->indexes, i));
4584
4585 /* Reset synced flag so nodes will not be skipped
4586 in the next sync, see fts_sync_write_words(). */
4587 fts_sync_index_reset(index_cache);
4588
4589 for (j = 0; fts_index_selector[j].value; ++j) {
4590
4591 if (index_cache->ins_graph[j] != NULL) {
4592
4593 fts_que_graph_free_check_lock(
4594 NULL, index_cache,
4595 index_cache->ins_graph[j]);
4596
4597 index_cache->ins_graph[j] = NULL;
4598 }
4599
4600 if (index_cache->sel_graph[j] != NULL) {
4601
4602 fts_que_graph_free_check_lock(
4603 NULL, index_cache,
4604 index_cache->sel_graph[j]);
4605
4606 index_cache->sel_graph[j] = NULL;
4607 }
4608 }
4609 }
4610
4611 rw_lock_x_unlock(&cache->lock);
4612
4613 fts_sql_rollback(trx);
4614
4615 /* Avoid assertion in trx_free(). */
4616 trx->dict_operation_lock_mode = 0;
4617 trx_free_for_background(trx);
4618 }
4619
4620 /** Run SYNC on the table, i.e., write out data from the cache to the
4621 FTS auxiliary INDEX table and clear the cache at the end.
4622 @param[in,out] sync sync state
4623 @param[in] unlock_cache whether unlock cache lock when write node
4624 @param[in] wait whether wait when a sync is in progress
4625 @param[in] has_dict whether has dict operation lock
4626 @return DB_SUCCESS if all OK */
4627 static
4628 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict)4629 fts_sync(
4630 fts_sync_t* sync,
4631 bool unlock_cache,
4632 bool wait,
4633 bool has_dict)
4634 {
4635 ulint i;
4636 dberr_t error = DB_SUCCESS;
4637 fts_cache_t* cache = sync->table->fts->cache;
4638
4639 rw_lock_x_lock(&cache->lock);
4640
4641 /* Check if cache is being synced.
4642 Note: we release cache lock in fts_sync_write_words() to
4643 avoid long wait for the lock by other threads. */
4644 while (sync->in_progress) {
4645 rw_lock_x_unlock(&cache->lock);
4646
4647 if (wait) {
4648 os_event_wait(sync->event);
4649 } else {
4650 return(DB_SUCCESS);
4651 }
4652
4653 rw_lock_x_lock(&cache->lock);
4654 }
4655
4656 sync->unlock_cache = unlock_cache;
4657 sync->in_progress = true;
4658
4659 DEBUG_SYNC_C("fts_sync_begin");
4660 fts_sync_begin(sync);
4661
4662 /* When sync in background, we hold dict operation lock
4663 to prevent DDL like DROP INDEX, etc. */
4664 if (has_dict) {
4665 sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4666 }
4667
4668 begin_sync:
4669 if (cache->total_size > fts_max_cache_size) {
4670 /* Avoid the case: sync never finish when
4671 insert/update keeps comming. */
4672 ut_ad(sync->unlock_cache);
4673 sync->unlock_cache = false;
4674 }
4675 DEBUG_SYNC_C("fts_instrument_sync");
4676 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4677 fts_index_cache_t* index_cache;
4678
4679 index_cache = static_cast<fts_index_cache_t*>(
4680 ib_vector_get(cache->indexes, i));
4681
4682 if (index_cache->index->to_be_dropped
4683 || index_cache->index->table->to_be_dropped) {
4684 continue;
4685 }
4686
4687 index_cache->index->index_fts_syncing = true;
4688 DBUG_EXECUTE_IF("fts_instrument_sync_sleep_drop_waits",
4689 os_thread_sleep(10000000);
4690 );
4691
4692 error = fts_sync_index(sync, index_cache);
4693
4694 if (error != DB_SUCCESS && !sync->interrupted) {
4695
4696 goto end_sync;
4697 }
4698 }
4699
4700 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4701 sync->interrupted = true;
4702 error = DB_INTERRUPTED;
4703 goto end_sync;
4704 );
4705
4706 /* Make sure all the caches are synced. */
4707 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4708 fts_index_cache_t* index_cache;
4709
4710 index_cache = static_cast<fts_index_cache_t*>(
4711 ib_vector_get(cache->indexes, i));
4712
4713 if (index_cache->index->to_be_dropped
4714 || fts_sync_index_check(index_cache)) {
4715 continue;
4716 }
4717
4718 goto begin_sync;
4719 }
4720
4721 end_sync:
4722 if (error == DB_SUCCESS && !sync->interrupted) {
4723 error = fts_sync_commit(sync);
4724 if (error == DB_SUCCESS) {
4725 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4726 fts_index_cache_t* index_cache;
4727 index_cache = static_cast<fts_index_cache_t*>(
4728 ib_vector_get(cache->indexes, i));
4729 if (index_cache->index->index_fts_syncing) {
4730 index_cache->index->index_fts_syncing
4731 = false;
4732 }
4733 }
4734 }
4735 } else {
4736 fts_sync_rollback(sync);
4737 }
4738
4739 rw_lock_x_lock(&cache->lock);
4740 /* Clear fts syncing flags of any indexes incase sync is
4741 interrupeted */
4742 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4743 fts_index_cache_t* index_cache;
4744 index_cache = static_cast<fts_index_cache_t*>(
4745 ib_vector_get(cache->indexes, i));
4746 if (index_cache->index->index_fts_syncing == true) {
4747 index_cache->index->index_fts_syncing = false;
4748 }
4749 }
4750
4751 sync->interrupted = false;
4752 sync->in_progress = false;
4753 os_event_set(sync->event);
4754 rw_lock_x_unlock(&cache->lock);
4755
4756 /* We need to check whether an optimize is required, for that
4757 we make copies of the two variables that control the trigger. These
4758 variables can change behind our back and we don't want to hold the
4759 lock for longer than is needed. */
4760 mutex_enter(&cache->deleted_lock);
4761
4762 cache->added = 0;
4763 cache->deleted = 0;
4764
4765 mutex_exit(&cache->deleted_lock);
4766
4767 return(error);
4768 }
4769
4770 /** Run SYNC on the table, i.e., write out data from the cache to the
4771 FTS auxiliary INDEX table and clear the cache at the end.
4772 @param[in,out] table fts table
4773 @param[in] unlock_cache whether unlock cache when write node
4774 @param[in] wait whether wait for existing sync to finish
4775 @param[in] has_dict whether has dict operation lock
4776 @return DB_SUCCESS on success, error code on failure. */
4777 UNIV_INTERN
4778 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4779 fts_sync_table(
4780 dict_table_t* table,
4781 bool unlock_cache,
4782 bool wait,
4783 bool has_dict)
4784 {
4785 dberr_t err = DB_SUCCESS;
4786
4787 ut_ad(table->fts);
4788
4789 if (!dict_table_is_discarded(table) && table->fts->cache) {
4790 err = fts_sync(table->fts->cache->sync,
4791 unlock_cache, wait, has_dict);
4792 }
4793
4794 return(err);
4795 }
4796
4797 /********************************************************************
4798 Process next token from document starting at the given position, i.e., add
4799 the token's start position to the token's list of positions.
4800 @return number of characters handled in this call */
4801 static
4802 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4803 fts_process_token(
4804 /*==============*/
4805 fts_doc_t* doc, /* in/out: document to
4806 tokenize */
4807 fts_doc_t* result, /* out: if provided, save
4808 result here */
4809 ulint start_pos, /*!< in: start position in text */
4810 ulint add_pos) /*!< in: add this position to all
4811 tokens from this tokenization */
4812 {
4813 ulint ret;
4814 fts_string_t str;
4815 ulint offset = 0;
4816 fts_doc_t* result_doc;
4817
4818 /* Determine where to save the result. */
4819 result_doc = (result) ? result : doc;
4820
4821 /* The length of a string in characters is set here only. */
4822 ret = innobase_mysql_fts_get_token(
4823 doc->charset, doc->text.f_str + start_pos,
4824 doc->text.f_str + doc->text.f_len, &str, &offset);
4825
4826 /* Ignore string whose character number is less than
4827 "fts_min_token_size" or more than "fts_max_token_size" */
4828
4829 if (str.f_n_char >= fts_min_token_size
4830 && str.f_n_char <= fts_max_token_size) {
4831
4832 mem_heap_t* heap;
4833 fts_string_t t_str;
4834 fts_token_t* token;
4835 ib_rbt_bound_t parent;
4836 ulint newlen;
4837
4838 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4839
4840 t_str.f_n_char = str.f_n_char;
4841
4842 t_str.f_len = str.f_len * doc->charset->casedn_multiply + 1;
4843
4844 t_str.f_str = static_cast<byte*>(
4845 mem_heap_alloc(heap, t_str.f_len));
4846
4847 /* For binary collations, a case sensitive search is
4848 performed. Hence don't convert to lower case. */
4849 if (my_binary_compare(result_doc->charset)) {
4850 memcpy(t_str.f_str, str.f_str, str.f_len);
4851 t_str.f_str[str.f_len]= 0;
4852 newlen= str.f_len;
4853 } else {
4854 newlen = innobase_fts_casedn_str(
4855 doc->charset, (char*) str.f_str, str.f_len,
4856 (char*) t_str.f_str, t_str.f_len);
4857 }
4858
4859 t_str.f_len = newlen;
4860 t_str.f_str[newlen] = 0;
4861
4862 /* Add the word to the document statistics. If the word
4863 hasn't been seen before we create a new entry for it. */
4864 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4865 fts_token_t new_token;
4866
4867 new_token.text.f_len = newlen;
4868 new_token.text.f_str = t_str.f_str;
4869 new_token.text.f_n_char = t_str.f_n_char;
4870
4871 new_token.positions = ib_vector_create(
4872 result_doc->self_heap, sizeof(ulint), 32);
4873
4874 ut_a(new_token.text.f_n_char >= fts_min_token_size);
4875 ut_a(new_token.text.f_n_char <= fts_max_token_size);
4876
4877 parent.last = rbt_add_node(
4878 result_doc->tokens, &parent, &new_token);
4879
4880 ut_ad(rbt_validate(result_doc->tokens));
4881 }
4882
4883 #ifdef FTS_CHARSET_DEBUG
4884 offset += start_pos + add_pos;
4885 #endif /* FTS_CHARSET_DEBUG */
4886
4887 offset += start_pos + ret - str.f_len + add_pos;
4888
4889 token = rbt_value(fts_token_t, parent.last);
4890 ib_vector_push(token->positions, &offset);
4891 }
4892
4893 return(ret);
4894 }
4895
4896 /******************************************************************//**
4897 Tokenize a document. */
4898 UNIV_INTERN
4899 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result)4900 fts_tokenize_document(
4901 /*==================*/
4902 fts_doc_t* doc, /* in/out: document to
4903 tokenize */
4904 fts_doc_t* result) /* out: if provided, save
4905 the result token here */
4906 {
4907 ulint inc;
4908
4909 ut_a(!doc->tokens);
4910 ut_a(doc->charset);
4911
4912 doc->tokens = rbt_create_arg_cmp(
4913 sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
4914
4915 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4916 inc = fts_process_token(doc, result, i, 0);
4917 ut_a(inc > 0);
4918 }
4919 }
4920
4921 /******************************************************************//**
4922 Continue to tokenize a document. */
4923 UNIV_INTERN
4924 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result)4925 fts_tokenize_document_next(
4926 /*=======================*/
4927 fts_doc_t* doc, /*!< in/out: document to
4928 tokenize */
4929 ulint add_pos, /*!< in: add this position to all
4930 tokens from this tokenization */
4931 fts_doc_t* result) /*!< out: if provided, save
4932 the result token here */
4933 {
4934 ulint inc;
4935
4936 ut_a(doc->tokens);
4937
4938 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4939 inc = fts_process_token(doc, result, i, add_pos);
4940 ut_a(inc > 0);
4941 }
4942 }
4943
4944 /********************************************************************
4945 Create the vector of fts_get_doc_t instances. */
4946 UNIV_INTERN
4947 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)4948 fts_get_docs_create(
4949 /*================*/
4950 /* out: vector of
4951 fts_get_doc_t instances */
4952 fts_cache_t* cache) /*!< in: fts cache */
4953 {
4954 ulint i;
4955 ib_vector_t* get_docs;
4956
4957 #ifdef UNIV_SYNC_DEBUG
4958 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
4959 #endif
4960 /* We need one instance of fts_get_doc_t per index. */
4961 get_docs = ib_vector_create(
4962 cache->self_heap, sizeof(fts_get_doc_t), 4);
4963
4964 /* Create the get_doc instance, we need one of these
4965 per FTS index. */
4966 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4967
4968 dict_index_t** index;
4969 fts_get_doc_t* get_doc;
4970
4971 index = static_cast<dict_index_t**>(
4972 ib_vector_get(cache->indexes, i));
4973
4974 get_doc = static_cast<fts_get_doc_t*>(
4975 ib_vector_push(get_docs, NULL));
4976
4977 memset(get_doc, 0x0, sizeof(*get_doc));
4978
4979 get_doc->index_cache = fts_get_index_cache(cache, *index);
4980 get_doc->cache = cache;
4981
4982 /* Must find the index cache. */
4983 ut_a(get_doc->index_cache != NULL);
4984 }
4985
4986 return(get_docs);
4987 }
4988
4989 /********************************************************************
4990 Release any resources held by the fts_get_doc_t instances. */
4991 static
4992 void
fts_get_docs_clear(ib_vector_t * get_docs)4993 fts_get_docs_clear(
4994 /*===============*/
4995 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
4996 {
4997 ulint i;
4998
4999 /* Release the get doc graphs if any. */
5000 for (i = 0; i < ib_vector_size(get_docs); ++i) {
5001
5002 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
5003 ib_vector_get(get_docs, i));
5004
5005 if (get_doc->get_document_graph != NULL) {
5006
5007 ut_a(get_doc->index_cache);
5008
5009 fts_que_graph_free(get_doc->get_document_graph);
5010 get_doc->get_document_graph = NULL;
5011 }
5012 }
5013 }
5014
5015 /*********************************************************************//**
5016 Get the initial Doc ID by consulting the CONFIG table
5017 @return initial Doc ID */
5018 UNIV_INTERN
5019 doc_id_t
fts_init_doc_id(const dict_table_t * table)5020 fts_init_doc_id(
5021 /*============*/
5022 const dict_table_t* table) /*!< in: table */
5023 {
5024 doc_id_t max_doc_id = 0;
5025
5026 rw_lock_x_lock(&table->fts->cache->lock);
5027
5028 /* Return if the table is already initialized for DOC ID */
5029 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5030 rw_lock_x_unlock(&table->fts->cache->lock);
5031 return(0);
5032 }
5033
5034 DEBUG_SYNC_C("fts_initialize_doc_id");
5035
5036 /* Then compare this value with the ID value stored in the CONFIG
5037 table. The larger one will be our new initial Doc ID */
5038 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5039
5040 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5041 creating index (and add doc id column. No need to recovery
5042 documents */
5043 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5044 fts_init_index((dict_table_t*) table, TRUE);
5045 }
5046
5047 table->fts->fts_status |= ADDED_TABLE_SYNCED;
5048
5049 table->fts->cache->first_doc_id = max_doc_id;
5050
5051 rw_lock_x_unlock(&table->fts->cache->lock);
5052
5053 ut_ad(max_doc_id > 0);
5054
5055 return(max_doc_id);
5056 }
5057
5058 #ifdef FTS_MULT_INDEX
5059 /*********************************************************************//**
5060 Check if the index is in the affected set.
5061 @return TRUE if index is updated */
5062 static
5063 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5064 fts_is_index_updated(
5065 /*=================*/
5066 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
5067 const fts_get_doc_t* get_doc) /*!< in: info for reading
5068 document */
5069 {
5070 ulint i;
5071 dict_index_t* index = get_doc->index_cache->index;
5072
5073 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5074 const dict_index_t* updated_fts_index;
5075
5076 updated_fts_index = static_cast<const dict_index_t*>(
5077 ib_vector_getp_const(fts_indexes, i));
5078
5079 ut_a(updated_fts_index != NULL);
5080
5081 if (updated_fts_index == index) {
5082 return(TRUE);
5083 }
5084 }
5085
5086 return(FALSE);
5087 }
5088 #endif
5089
5090 /*********************************************************************//**
5091 Fetch COUNT(*) from specified table.
5092 @return the number of rows in the table */
5093 UNIV_INTERN
5094 ulint
fts_get_rows_count(fts_table_t * fts_table)5095 fts_get_rows_count(
5096 /*===============*/
5097 fts_table_t* fts_table) /*!< in: fts table to read */
5098 {
5099 trx_t* trx;
5100 pars_info_t* info;
5101 que_t* graph;
5102 dberr_t error;
5103 ulint count = 0;
5104
5105 trx = trx_allocate_for_background();
5106
5107 trx->op_info = "fetching FT table rows count";
5108
5109 info = pars_info_create();
5110
5111 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5112
5113 graph = fts_parse_sql(
5114 fts_table,
5115 info,
5116 "DECLARE FUNCTION my_func;\n"
5117 "DECLARE CURSOR c IS"
5118 " SELECT COUNT(*) "
5119 " FROM \"%s\";\n"
5120 "BEGIN\n"
5121 "\n"
5122 "OPEN c;\n"
5123 "WHILE 1 = 1 LOOP\n"
5124 " FETCH c INTO my_func();\n"
5125 " IF c % NOTFOUND THEN\n"
5126 " EXIT;\n"
5127 " END IF;\n"
5128 "END LOOP;\n"
5129 "CLOSE c;");
5130
5131 for (;;) {
5132 error = fts_eval_sql(trx, graph);
5133
5134 if (error == DB_SUCCESS) {
5135 fts_sql_commit(trx);
5136
5137 break; /* Exit the loop. */
5138 } else {
5139 fts_sql_rollback(trx);
5140
5141 ut_print_timestamp(stderr);
5142
5143 if (error == DB_LOCK_WAIT_TIMEOUT) {
5144 fprintf(stderr, " InnoDB: Warning: lock wait "
5145 "timeout reading FTS table. "
5146 "Retrying!\n");
5147
5148 trx->error_state = DB_SUCCESS;
5149 } else {
5150 fprintf(stderr, " InnoDB: Error: (%s) "
5151 "while reading FTS table.\n",
5152 ut_strerr(error));
5153
5154 break; /* Exit the loop. */
5155 }
5156 }
5157 }
5158
5159 fts_que_graph_free(graph);
5160
5161 trx_free_for_background(trx);
5162
5163 return(count);
5164 }
5165
5166 #ifdef FTS_CACHE_SIZE_DEBUG
5167 /*********************************************************************//**
5168 Read the max cache size parameter from the config table. */
5169 static
5170 void
fts_update_max_cache_size(fts_sync_t * sync)5171 fts_update_max_cache_size(
5172 /*======================*/
5173 fts_sync_t* sync) /*!< in: sync state */
5174 {
5175 trx_t* trx;
5176 fts_table_t fts_table;
5177
5178 trx = trx_allocate_for_background();
5179
5180 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5181
5182 /* The size returned is in bytes. */
5183 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5184
5185 fts_sql_commit(trx);
5186
5187 trx_free_for_background(trx);
5188 }
5189 #endif /* FTS_CACHE_SIZE_DEBUG */
5190
5191 /*********************************************************************//**
5192 Free the modified rows of a table. */
5193 UNIV_INLINE
5194 void
fts_trx_table_rows_free(ib_rbt_t * rows)5195 fts_trx_table_rows_free(
5196 /*====================*/
5197 ib_rbt_t* rows) /*!< in: rbt of rows to free */
5198 {
5199 const ib_rbt_node_t* node;
5200
5201 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5202 fts_trx_row_t* row;
5203
5204 row = rbt_value(fts_trx_row_t, node);
5205
5206 if (row->fts_indexes != NULL) {
5207 /* This vector shouldn't be using the
5208 heap allocator. */
5209 ut_a(row->fts_indexes->allocator->arg == NULL);
5210
5211 ib_vector_free(row->fts_indexes);
5212 row->fts_indexes = NULL;
5213 }
5214
5215 ut_free(rbt_remove_node(rows, node));
5216 }
5217
5218 ut_a(rbt_empty(rows));
5219 rbt_free(rows);
5220 }
5221
5222 /*********************************************************************//**
5223 Free an FTS savepoint instance. */
5224 UNIV_INLINE
5225 void
fts_savepoint_free(fts_savepoint_t * savepoint)5226 fts_savepoint_free(
5227 /*===============*/
5228 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5229 {
5230 const ib_rbt_node_t* node;
5231 ib_rbt_t* tables = savepoint->tables;
5232
5233 /* Nothing to free! */
5234 if (tables == NULL) {
5235 return;
5236 }
5237
5238 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5239 fts_trx_table_t* ftt;
5240 fts_trx_table_t** fttp;
5241
5242 fttp = rbt_value(fts_trx_table_t*, node);
5243 ftt = *fttp;
5244
5245 /* This can be NULL if a savepoint was released. */
5246 if (ftt->rows != NULL) {
5247 fts_trx_table_rows_free(ftt->rows);
5248 ftt->rows = NULL;
5249 }
5250
5251 /* This can be NULL if a savepoint was released. */
5252 if (ftt->added_doc_ids != NULL) {
5253 fts_doc_ids_free(ftt->added_doc_ids);
5254 ftt->added_doc_ids = NULL;
5255 }
5256
5257 /* The default savepoint name must be NULL. */
5258 if (ftt->docs_added_graph) {
5259 fts_que_graph_free(ftt->docs_added_graph);
5260 }
5261
5262 /* NOTE: We are responsible for free'ing the node */
5263 ut_free(rbt_remove_node(tables, node));
5264 }
5265
5266 ut_a(rbt_empty(tables));
5267 rbt_free(tables);
5268 savepoint->tables = NULL;
5269 }
5270
5271 /*********************************************************************//**
5272 Free an FTS trx. */
5273 UNIV_INTERN
5274 void
fts_trx_free(fts_trx_t * fts_trx)5275 fts_trx_free(
5276 /*=========*/
5277 fts_trx_t* fts_trx) /* in, own: FTS trx */
5278 {
5279 ulint i;
5280
5281 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5282 fts_savepoint_t* savepoint;
5283
5284 savepoint = static_cast<fts_savepoint_t*>(
5285 ib_vector_get(fts_trx->savepoints, i));
5286
5287 /* The default savepoint name must be NULL. */
5288 if (i == 0) {
5289 ut_a(savepoint->name == NULL);
5290 }
5291
5292 fts_savepoint_free(savepoint);
5293 }
5294
5295 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5296 fts_savepoint_t* savepoint;
5297
5298 savepoint = static_cast<fts_savepoint_t*>(
5299 ib_vector_get(fts_trx->last_stmt, i));
5300
5301 /* The default savepoint name must be NULL. */
5302 if (i == 0) {
5303 ut_a(savepoint->name == NULL);
5304 }
5305
5306 fts_savepoint_free(savepoint);
5307 }
5308
5309 if (fts_trx->heap) {
5310 mem_heap_free(fts_trx->heap);
5311 }
5312 }
5313
5314 /*********************************************************************//**
5315 Extract the doc id from the FTS hidden column.
5316 @return doc id that was extracted from rec */
5317 UNIV_INTERN
5318 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5319 fts_get_doc_id_from_row(
5320 /*====================*/
5321 dict_table_t* table, /*!< in: table */
5322 dtuple_t* row) /*!< in: row whose FTS doc id we
5323 want to extract.*/
5324 {
5325 dfield_t* field;
5326 doc_id_t doc_id = 0;
5327
5328 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5329
5330 field = dtuple_get_nth_field(row, table->fts->doc_col);
5331
5332 ut_a(dfield_get_len(field) == sizeof(doc_id));
5333 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5334
5335 doc_id = fts_read_doc_id(
5336 static_cast<const byte*>(dfield_get_data(field)));
5337
5338 return(doc_id);
5339 }
5340
5341 /*********************************************************************//**
5342 Extract the doc id from the FTS hidden column.
5343 @return doc id that was extracted from rec */
5344 UNIV_INTERN
5345 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,mem_heap_t * heap)5346 fts_get_doc_id_from_rec(
5347 /*====================*/
5348 dict_table_t* table, /*!< in: table */
5349 const rec_t* rec, /*!< in: rec */
5350 mem_heap_t* heap) /*!< in: heap */
5351 {
5352 ulint len;
5353 const byte* data;
5354 ulint col_no;
5355 doc_id_t doc_id = 0;
5356 dict_index_t* clust_index;
5357 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5358 ulint* offsets = offsets_;
5359 mem_heap_t* my_heap = heap;
5360
5361 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5362
5363 clust_index = dict_table_get_first_index(table);
5364
5365 rec_offs_init(offsets_);
5366
5367 offsets = rec_get_offsets(
5368 rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap);
5369
5370 col_no = dict_col_get_clust_pos(
5371 &table->cols[table->fts->doc_col], clust_index);
5372 ut_ad(col_no != ULINT_UNDEFINED);
5373
5374 data = rec_get_nth_field(rec, offsets, col_no, &len);
5375
5376 ut_a(len == 8);
5377 ut_ad(8 == sizeof(doc_id));
5378 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5379
5380 if (my_heap && !heap) {
5381 mem_heap_free(my_heap);
5382 }
5383
5384 return(doc_id);
5385 }
5386
5387 /*********************************************************************//**
5388 Search the index specific cache for a particular FTS index.
5389 @return the index specific cache else NULL */
5390 UNIV_INTERN
5391 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5392 fts_find_index_cache(
5393 /*=================*/
5394 const fts_cache_t* cache, /*!< in: cache to search */
5395 const dict_index_t* index) /*!< in: index to search for */
5396 {
5397 /* We cast away the const because our internal function, takes
5398 non-const cache arg and returns a non-const pointer. */
5399 return(static_cast<fts_index_cache_t*>(
5400 fts_get_index_cache((fts_cache_t*) cache, index)));
5401 }
5402
5403 /*********************************************************************//**
5404 Search cache for word.
5405 @return the word node vector if found else NULL */
5406 UNIV_INTERN
5407 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5408 fts_cache_find_word(
5409 /*================*/
5410 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5411 const fts_string_t* text) /*!< in: word to search for */
5412 {
5413 ib_rbt_bound_t parent;
5414 const ib_vector_t* nodes = NULL;
5415 #ifdef UNIV_SYNC_DEBUG
5416 dict_table_t* table = index_cache->index->table;
5417 fts_cache_t* cache = table->fts->cache;
5418
5419 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX));
5420 #endif
5421
5422 /* Lookup the word in the rb tree */
5423 if (rbt_search(index_cache->words, &parent, text) == 0) {
5424 const fts_tokenizer_word_t* word;
5425
5426 word = rbt_value(fts_tokenizer_word_t, parent.last);
5427
5428 nodes = word->nodes;
5429 }
5430
5431 return(nodes);
5432 }
5433
5434 /*********************************************************************//**
5435 Check cache for deleted doc id.
5436 @return TRUE if deleted */
5437 UNIV_INTERN
5438 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5439 fts_cache_is_deleted_doc_id(
5440 /*========================*/
5441 const fts_cache_t* cache, /*!< in: cache ito search */
5442 doc_id_t doc_id) /*!< in: doc id to search for */
5443 {
5444 ulint i;
5445
5446 #ifdef UNIV_SYNC_DEBUG
5447 ut_ad(mutex_own(&cache->deleted_lock));
5448 #endif
5449
5450 for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5451 const fts_update_t* update;
5452
5453 update = static_cast<const fts_update_t*>(
5454 ib_vector_get_const(cache->deleted_doc_ids, i));
5455
5456 if (doc_id == update->doc_id) {
5457
5458 return(TRUE);
5459 }
5460 }
5461
5462 return(FALSE);
5463 }
5464
5465 /*********************************************************************//**
5466 Append deleted doc ids to vector. */
5467 UNIV_INTERN
5468 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5469 fts_cache_append_deleted_doc_ids(
5470 /*=============================*/
5471 const fts_cache_t* cache, /*!< in: cache to use */
5472 ib_vector_t* vector) /*!< in: append to this vector */
5473 {
5474 ulint i;
5475
5476 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
5477
5478 if (cache->deleted_doc_ids == NULL) {
5479 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5480 return;
5481 }
5482
5483
5484 for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5485 fts_update_t* update;
5486
5487 update = static_cast<fts_update_t*>(
5488 ib_vector_get(cache->deleted_doc_ids, i));
5489
5490 ib_vector_push(vector, &update->doc_id);
5491 }
5492
5493 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5494 }
5495
5496 /*********************************************************************//**
5497 Wait for the background thread to start. We poll to detect change
5498 of state, which is acceptable, since the wait should happen only
5499 once during startup.
5500 @return true if the thread started else FALSE (i.e timed out) */
5501 UNIV_INTERN
5502 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5503 fts_wait_for_background_thread_to_start(
5504 /*====================================*/
5505 dict_table_t* table, /*!< in: table to which the thread
5506 is attached */
5507 ulint max_wait) /*!< in: time in microseconds, if
5508 set to 0 then it disables
5509 timeout checking */
5510 {
5511 ulint count = 0;
5512 ibool done = FALSE;
5513
5514 ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5515
5516 for (;;) {
5517 fts_t* fts = table->fts;
5518
5519 mutex_enter(&fts->bg_threads_mutex);
5520
5521 if (fts->fts_status & BG_THREAD_READY) {
5522
5523 done = TRUE;
5524 }
5525
5526 mutex_exit(&fts->bg_threads_mutex);
5527
5528 if (!done) {
5529 os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5530
5531 if (max_wait > 0) {
5532
5533 max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5534
5535 /* We ignore the residual value. */
5536 if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5537 break;
5538 }
5539 }
5540
5541 ++count;
5542 } else {
5543 break;
5544 }
5545
5546 if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5547 ut_print_timestamp(stderr);
5548 fprintf(stderr, " InnoDB: Error the background thread "
5549 "for the FTS table %s refuses to start\n",
5550 table->name);
5551
5552 count = 0;
5553 }
5554 }
5555
5556 return(done);
5557 }
5558
5559 /*********************************************************************//**
5560 Add the FTS document id hidden column. */
5561 UNIV_INTERN
5562 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5563 fts_add_doc_id_column(
5564 /*==================*/
5565 dict_table_t* table, /*!< in/out: Table with FTS index */
5566 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5567 {
5568 dict_mem_table_add_col(
5569 table, heap,
5570 FTS_DOC_ID_COL_NAME,
5571 DATA_INT,
5572 dtype_form_prtype(
5573 DATA_NOT_NULL | DATA_UNSIGNED
5574 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5575 sizeof(doc_id_t));
5576 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5577 }
5578
5579 /*********************************************************************//**
5580 Update the query graph with a new document id.
5581 @return Doc ID used */
5582 UNIV_INTERN
5583 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5584 fts_update_doc_id(
5585 /*==============*/
5586 dict_table_t* table, /*!< in: table */
5587 upd_field_t* ufield, /*!< out: update node */
5588 doc_id_t* next_doc_id) /*!< in/out: buffer for writing */
5589 {
5590 doc_id_t doc_id;
5591 dberr_t error = DB_SUCCESS;
5592
5593 if (*next_doc_id) {
5594 doc_id = *next_doc_id;
5595 } else {
5596 /* Get the new document id that will be added. */
5597 error = fts_get_next_doc_id(table, &doc_id);
5598 }
5599
5600 if (error == DB_SUCCESS) {
5601 dict_index_t* clust_index;
5602
5603 ufield->exp = NULL;
5604
5605 ufield->new_val.len = sizeof(doc_id);
5606
5607 clust_index = dict_table_get_first_index(table);
5608
5609 ufield->field_no = dict_col_get_clust_pos(
5610 &table->cols[table->fts->doc_col], clust_index);
5611
5612 /* It is possible we update record that has
5613 not yet be sync-ed from last crash. */
5614
5615 /* Convert to storage byte order. */
5616 ut_a(doc_id != FTS_NULL_DOC_ID);
5617 fts_write_doc_id((byte*) next_doc_id, doc_id);
5618
5619 ufield->new_val.data = next_doc_id;
5620 }
5621
5622 return(doc_id);
5623 }
5624
5625 /*********************************************************************//**
5626 Check if the table has an FTS index. This is the non-inline version
5627 of dict_table_has_fts_index().
5628 @return TRUE if table has an FTS index */
5629 UNIV_INTERN
5630 ibool
fts_dict_table_has_fts_index(dict_table_t * table)5631 fts_dict_table_has_fts_index(
5632 /*=========================*/
5633 dict_table_t* table) /*!< in: table */
5634 {
5635 return(dict_table_has_fts_index(table));
5636 }
5637
5638 /*********************************************************************//**
5639 Create an instance of fts_t.
5640 @return instance of fts_t */
5641 UNIV_INTERN
5642 fts_t*
fts_create(dict_table_t * table)5643 fts_create(
5644 /*=======*/
5645 dict_table_t* table) /*!< in/out: table with FTS indexes */
5646 {
5647 fts_t* fts;
5648 ib_alloc_t* heap_alloc;
5649 mem_heap_t* heap;
5650
5651 ut_a(!table->fts);
5652
5653 heap = mem_heap_create(512);
5654
5655 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5656
5657 memset(fts, 0x0, sizeof(*fts));
5658
5659 fts->fts_heap = heap;
5660
5661 fts->doc_col = ULINT_UNDEFINED;
5662
5663 mutex_create(
5664 fts_bg_threads_mutex_key, &fts->bg_threads_mutex,
5665 SYNC_FTS_BG_THREADS);
5666
5667 heap_alloc = ib_heap_allocator_create(heap);
5668 fts->indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5669 dict_table_get_all_fts_indexes(table, fts->indexes);
5670
5671 return(fts);
5672 }
5673
5674 /*********************************************************************//**
5675 Free the FTS resources. */
5676 UNIV_INTERN
5677 void
fts_free(dict_table_t * table)5678 fts_free(
5679 /*=====*/
5680 dict_table_t* table) /*!< in/out: table with FTS indexes */
5681 {
5682 fts_t* fts = table->fts;
5683
5684 mutex_free(&fts->bg_threads_mutex);
5685
5686 ut_ad(!fts->add_wq);
5687
5688 if (fts->cache) {
5689 fts_cache_clear(fts->cache);
5690 fts_cache_destroy(fts->cache);
5691 fts->cache = NULL;
5692 }
5693
5694 mem_heap_free(fts->fts_heap);
5695
5696 table->fts = NULL;
5697 }
5698
5699 /*********************************************************************//**
5700 Signal FTS threads to initiate shutdown. */
5701 UNIV_INTERN
5702 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)5703 fts_start_shutdown(
5704 /*===============*/
5705 dict_table_t* table, /*!< in: table with FTS indexes */
5706 fts_t* fts) /*!< in: fts instance that needs
5707 to be informed about shutdown */
5708 {
5709 mutex_enter(&fts->bg_threads_mutex);
5710
5711 fts->fts_status |= BG_THREAD_STOP;
5712
5713 mutex_exit(&fts->bg_threads_mutex);
5714
5715 }
5716
5717 /*********************************************************************//**
5718 Wait for FTS threads to shutdown. */
5719 UNIV_INTERN
5720 void
fts_shutdown(dict_table_t * table,fts_t * fts)5721 fts_shutdown(
5722 /*=========*/
5723 dict_table_t* table, /*!< in: table with FTS indexes */
5724 fts_t* fts) /*!< in: fts instance to shutdown */
5725 {
5726 mutex_enter(&fts->bg_threads_mutex);
5727
5728 ut_a(fts->fts_status & BG_THREAD_STOP);
5729
5730 dict_table_wait_for_bg_threads_to_exit(table, 20000);
5731
5732 mutex_exit(&fts->bg_threads_mutex);
5733 }
5734
5735 /*********************************************************************//**
5736 Take a FTS savepoint. */
5737 UNIV_INLINE
5738 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5739 fts_savepoint_copy(
5740 /*===============*/
5741 const fts_savepoint_t* src, /*!< in: source savepoint */
5742 fts_savepoint_t* dst) /*!< out: destination savepoint */
5743 {
5744 const ib_rbt_node_t* node;
5745 const ib_rbt_t* tables;
5746
5747 tables = src->tables;
5748
5749 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5750
5751 fts_trx_table_t* ftt_dst;
5752 const fts_trx_table_t** ftt_src;
5753
5754 ftt_src = rbt_value(const fts_trx_table_t*, node);
5755
5756 ftt_dst = fts_trx_table_clone(*ftt_src);
5757
5758 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5759 }
5760 }
5761
5762 /*********************************************************************//**
5763 Take a FTS savepoint. */
5764 UNIV_INTERN
5765 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)5766 fts_savepoint_take(
5767 /*===============*/
5768 trx_t* trx, /*!< in: transaction */
5769 fts_trx_t* fts_trx, /*!< in: fts transaction */
5770 const char* name) /*!< in: savepoint name */
5771 {
5772 mem_heap_t* heap;
5773 fts_savepoint_t* savepoint;
5774 fts_savepoint_t* last_savepoint;
5775
5776 ut_a(name != NULL);
5777
5778 heap = fts_trx->heap;
5779
5780 /* The implied savepoint must exist. */
5781 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5782
5783 last_savepoint = static_cast<fts_savepoint_t*>(
5784 ib_vector_last(fts_trx->savepoints));
5785 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5786
5787 if (last_savepoint->tables != NULL) {
5788 fts_savepoint_copy(last_savepoint, savepoint);
5789 }
5790 }
5791
5792 /*********************************************************************//**
5793 Lookup a savepoint instance by name.
5794 @return ULINT_UNDEFINED if not found */
5795 UNIV_INLINE
5796 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5797 fts_savepoint_lookup(
5798 /*==================*/
5799 ib_vector_t* savepoints, /*!< in: savepoints */
5800 const char* name) /*!< in: savepoint name */
5801 {
5802 ulint i;
5803
5804 ut_a(ib_vector_size(savepoints) > 0);
5805
5806 for (i = 1; i < ib_vector_size(savepoints); ++i) {
5807 fts_savepoint_t* savepoint;
5808
5809 savepoint = static_cast<fts_savepoint_t*>(
5810 ib_vector_get(savepoints, i));
5811
5812 if (strcmp(name, savepoint->name) == 0) {
5813 return(i);
5814 }
5815 }
5816
5817 return(ULINT_UNDEFINED);
5818 }
5819
5820 /*********************************************************************//**
5821 Release the savepoint data identified by name. All savepoints created
5822 after the named savepoint are kept.
5823 @return DB_SUCCESS or error code */
5824 UNIV_INTERN
5825 void
fts_savepoint_release(trx_t * trx,const char * name)5826 fts_savepoint_release(
5827 /*==================*/
5828 trx_t* trx, /*!< in: transaction */
5829 const char* name) /*!< in: savepoint name */
5830 {
5831 ut_a(name != NULL);
5832
5833 ib_vector_t* savepoints = trx->fts_trx->savepoints;
5834
5835 ut_a(ib_vector_size(savepoints) > 0);
5836
5837 ulint i = fts_savepoint_lookup(savepoints, name);
5838 if (i != ULINT_UNDEFINED) {
5839 ut_a(i >= 1);
5840
5841 fts_savepoint_t* savepoint;
5842 savepoint = static_cast<fts_savepoint_t*>(
5843 ib_vector_get(savepoints, i));
5844
5845 if (i == ib_vector_size(savepoints) - 1) {
5846 /* If the savepoint is the last, we save its
5847 tables to the previous savepoint. */
5848 fts_savepoint_t* prev_savepoint;
5849 prev_savepoint = static_cast<fts_savepoint_t*>(
5850 ib_vector_get(savepoints, i - 1));
5851
5852 ib_rbt_t* tables = savepoint->tables;
5853 savepoint->tables = prev_savepoint->tables;
5854 prev_savepoint->tables = tables;
5855 }
5856
5857 fts_savepoint_free(savepoint);
5858 ib_vector_remove(savepoints, *(void**)savepoint);
5859
5860 /* Make sure we don't delete the implied savepoint. */
5861 ut_a(ib_vector_size(savepoints) > 0);
5862 }
5863 }
5864
5865 /**********************************************************************//**
5866 Refresh last statement savepoint. */
5867 UNIV_INTERN
5868 void
fts_savepoint_laststmt_refresh(trx_t * trx)5869 fts_savepoint_laststmt_refresh(
5870 /*===========================*/
5871 trx_t* trx) /*!< in: transaction */
5872 {
5873
5874 fts_trx_t* fts_trx;
5875 fts_savepoint_t* savepoint;
5876
5877 fts_trx = trx->fts_trx;
5878
5879 savepoint = static_cast<fts_savepoint_t*>(
5880 ib_vector_pop(fts_trx->last_stmt));
5881 fts_savepoint_free(savepoint);
5882
5883 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5884 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5885 }
5886
5887 /********************************************************************
5888 Undo the Doc ID add/delete operations in last stmt */
5889 static
5890 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5891 fts_undo_last_stmt(
5892 /*===============*/
5893 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
5894 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
5895 {
5896 ib_rbt_t* s_rows;
5897 ib_rbt_t* l_rows;
5898 const ib_rbt_node_t* node;
5899
5900 l_rows = l_ftt->rows;
5901 s_rows = s_ftt->rows;
5902
5903 for (node = rbt_first(l_rows);
5904 node;
5905 node = rbt_next(l_rows, node)) {
5906 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
5907 ib_rbt_bound_t parent;
5908
5909 rbt_search(s_rows, &parent, &(l_row->doc_id));
5910
5911 if (parent.result == 0) {
5912 fts_trx_row_t* s_row = rbt_value(
5913 fts_trx_row_t, parent.last);
5914
5915 switch (l_row->state) {
5916 case FTS_INSERT:
5917 ut_free(rbt_remove_node(s_rows, parent.last));
5918 break;
5919
5920 case FTS_DELETE:
5921 if (s_row->state == FTS_NOTHING) {
5922 s_row->state = FTS_INSERT;
5923 } else if (s_row->state == FTS_DELETE) {
5924 ut_free(rbt_remove_node(
5925 s_rows, parent.last));
5926 }
5927 break;
5928
5929 /* FIXME: Check if FTS_MODIFY need to be addressed */
5930 case FTS_MODIFY:
5931 case FTS_NOTHING:
5932 break;
5933 default:
5934 ut_error;
5935 }
5936 }
5937 }
5938 }
5939
5940 /**********************************************************************//**
5941 Rollback to savepoint indentified by name.
5942 @return DB_SUCCESS or error code */
5943 UNIV_INTERN
5944 void
fts_savepoint_rollback_last_stmt(trx_t * trx)5945 fts_savepoint_rollback_last_stmt(
5946 /*=============================*/
5947 trx_t* trx) /*!< in: transaction */
5948 {
5949 ib_vector_t* savepoints;
5950 fts_savepoint_t* savepoint;
5951 fts_savepoint_t* last_stmt;
5952 fts_trx_t* fts_trx;
5953 ib_rbt_bound_t parent;
5954 const ib_rbt_node_t* node;
5955 ib_rbt_t* l_tables;
5956 ib_rbt_t* s_tables;
5957
5958 fts_trx = trx->fts_trx;
5959 savepoints = fts_trx->savepoints;
5960
5961 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5962 last_stmt = static_cast<fts_savepoint_t*>(
5963 ib_vector_last(fts_trx->last_stmt));
5964
5965 l_tables = last_stmt->tables;
5966 s_tables = savepoint->tables;
5967
5968 for (node = rbt_first(l_tables);
5969 node;
5970 node = rbt_next(l_tables, node)) {
5971
5972 fts_trx_table_t** l_ftt;
5973
5974 l_ftt = rbt_value(fts_trx_table_t*, node);
5975
5976 rbt_search_cmp(
5977 s_tables, &parent, &(*l_ftt)->table->id,
5978 fts_trx_table_id_cmp, NULL);
5979
5980 if (parent.result == 0) {
5981 fts_trx_table_t** s_ftt;
5982
5983 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5984
5985 fts_undo_last_stmt(*s_ftt, *l_ftt);
5986 }
5987 }
5988 }
5989
5990 /**********************************************************************//**
5991 Rollback to savepoint indentified by name.
5992 @return DB_SUCCESS or error code */
5993 UNIV_INTERN
5994 void
fts_savepoint_rollback(trx_t * trx,const char * name)5995 fts_savepoint_rollback(
5996 /*===================*/
5997 trx_t* trx, /*!< in: transaction */
5998 const char* name) /*!< in: savepoint name */
5999 {
6000 ulint i;
6001 ib_vector_t* savepoints;
6002
6003 ut_a(name != NULL);
6004
6005 savepoints = trx->fts_trx->savepoints;
6006
6007 /* We pop all savepoints from the the top of the stack up to
6008 and including the instance that was found. */
6009 i = fts_savepoint_lookup(savepoints, name);
6010
6011 if (i != ULINT_UNDEFINED) {
6012 fts_savepoint_t* savepoint;
6013
6014 ut_a(i > 0);
6015
6016 while (ib_vector_size(savepoints) > i) {
6017 fts_savepoint_t* savepoint;
6018
6019 savepoint = static_cast<fts_savepoint_t*>(
6020 ib_vector_pop(savepoints));
6021
6022 if (savepoint->name != NULL) {
6023 /* Since name was allocated on the heap, the
6024 memory will be released when the transaction
6025 completes. */
6026 savepoint->name = NULL;
6027
6028 fts_savepoint_free(savepoint);
6029 }
6030 }
6031
6032 /* Pop all a elements from the top of the stack that may
6033 have been released. We have to be careful that we don't
6034 delete the implied savepoint. */
6035
6036 for (savepoint = static_cast<fts_savepoint_t*>(
6037 ib_vector_last(savepoints));
6038 ib_vector_size(savepoints) > 1
6039 && savepoint->name == NULL;
6040 savepoint = static_cast<fts_savepoint_t*>(
6041 ib_vector_last(savepoints))) {
6042
6043 ib_vector_pop(savepoints);
6044 }
6045
6046 /* Make sure we don't delete the implied savepoint. */
6047 ut_a(ib_vector_size(savepoints) > 0);
6048
6049 /* Restore the savepoint. */
6050 fts_savepoint_take(trx, trx->fts_trx, name);
6051 }
6052 }
6053
6054 /**********************************************************************//**
6055 Check if a table is an FTS auxiliary table name.
6056 @return TRUE if the name matches an auxiliary table name pattern */
6057 static
6058 ibool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6059 fts_is_aux_table_name(
6060 /*==================*/
6061 fts_aux_table_t*table, /*!< out: table info */
6062 const char* name, /*!< in: table name */
6063 ulint len) /*!< in: length of table name */
6064 {
6065 const char* ptr;
6066 char* end;
6067 char my_name[MAX_FULL_NAME_LEN + 1];
6068
6069 ut_ad(len <= MAX_FULL_NAME_LEN);
6070 ut_memcpy(my_name, name, len);
6071 my_name[len] = 0;
6072 end = my_name + len;
6073
6074 ptr = static_cast<const char*>(memchr(my_name, '/', len));
6075
6076 if (ptr != NULL) {
6077 /* We will start the match after the '/' */
6078 ++ptr;
6079 len = end - ptr;
6080 }
6081
6082 /* All auxiliary tables are prefixed with "FTS_" and the name
6083 length will be at the very least greater than 20 bytes. */
6084 if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6085 ulint i;
6086
6087 /* Skip the prefix. */
6088 ptr += 4;
6089 len -= 4;
6090
6091 /* Try and read the table id. */
6092 if (!fts_read_object_id(&table->parent_id, ptr)) {
6093 return(FALSE);
6094 }
6095
6096 /* Skip the table id. */
6097 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6098
6099 if (ptr == NULL) {
6100 return(FALSE);
6101 }
6102
6103 /* Skip the underscore. */
6104 ++ptr;
6105 ut_a(end > ptr);
6106 len = end - ptr;
6107
6108 /* First search the common table suffix array. */
6109 for (i = 0; fts_common_tables[i] != NULL; ++i) {
6110
6111 if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6112 return(TRUE);
6113 }
6114 }
6115
6116 /* Could be obsolete common tables. */
6117 if (strncmp(ptr, "ADDED", len) == 0
6118 || strncmp(ptr, "STOPWORDS", len) == 0) {
6119 return(true);
6120 }
6121
6122 /* Try and read the index id. */
6123 if (!fts_read_object_id(&table->index_id, ptr)) {
6124 return(FALSE);
6125 }
6126
6127 /* Skip the table id. */
6128 ptr = static_cast<const char*>(memchr(ptr, '_', len));
6129
6130 if (ptr == NULL) {
6131 return(FALSE);
6132 }
6133
6134 /* Skip the underscore. */
6135 ++ptr;
6136 ut_a(end > ptr);
6137 len = end - ptr;
6138
6139 /* Search the FT index specific array. */
6140 for (i = 0; fts_index_selector[i].value; ++i) {
6141
6142 if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6143 return(TRUE);
6144 }
6145 }
6146
6147 /* Other FT index specific table(s). */
6148 if (strncmp(ptr, "DOC_ID", len) == 0) {
6149 return(TRUE);
6150 }
6151 }
6152
6153 return(FALSE);
6154 }
6155
6156 /**********************************************************************//**
6157 Callback function to read a single table ID column.
6158 @return Always return TRUE */
6159 static
6160 ibool
fts_read_tables(void * row,void * user_arg)6161 fts_read_tables(
6162 /*============*/
6163 void* row, /*!< in: sel_node_t* */
6164 void* user_arg) /*!< in: pointer to ib_vector_t */
6165 {
6166 int i;
6167 fts_aux_table_t*table;
6168 mem_heap_t* heap;
6169 ibool done = FALSE;
6170 ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
6171 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
6172 que_node_t* exp = sel_node->select_list;
6173
6174 /* Must be a heap allocated vector. */
6175 ut_a(tables->allocator->arg != NULL);
6176
6177 /* We will use this heap for allocating strings. */
6178 heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6179 table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6180
6181 memset(table, 0x0, sizeof(*table));
6182
6183 /* Iterate over the columns and read the values. */
6184 for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6185
6186 dfield_t* dfield = que_node_get_val(exp);
6187 void* data = dfield_get_data(dfield);
6188 ulint len = dfield_get_len(dfield);
6189
6190 ut_a(len != UNIV_SQL_NULL);
6191
6192 /* Note: The column numbers below must match the SELECT */
6193 switch (i) {
6194 case 0: /* NAME */
6195
6196 if (!fts_is_aux_table_name(
6197 table, static_cast<const char*>(data), len)) {
6198 ib_vector_pop(tables);
6199 done = TRUE;
6200 break;
6201 }
6202
6203 table->name = static_cast<char*>(
6204 mem_heap_alloc(heap, len + 1));
6205 memcpy(table->name, data, len);
6206 table->name[len] = 0;
6207 break;
6208
6209 case 1: /* ID */
6210 ut_a(len == 8);
6211 table->id = mach_read_from_8(
6212 static_cast<const byte*>(data));
6213 break;
6214
6215 default:
6216 ut_error;
6217 }
6218 }
6219
6220 return(TRUE);
6221 }
6222
6223 /******************************************************************//**
6224 Callback that sets a hex formatted FTS table's flags2 in
6225 SYS_TABLES. The flags is stored in MIX_LEN column.
6226 @return FALSE if all OK */
6227 static
6228 ibool
fts_set_hex_format(void * row,void * user_arg)6229 fts_set_hex_format(
6230 /*===============*/
6231 void* row, /*!< in: sel_node_t* */
6232 void* user_arg) /*!< in: bool set/unset flag */
6233 {
6234 sel_node_t* node = static_cast<sel_node_t*>(row);
6235 dfield_t* dfield = que_node_get_val(node->select_list);
6236
6237 ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6238 ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6239 /* There should be at most one matching record. So the value
6240 must be the default value. */
6241 ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6242 == ULINT32_UNDEFINED);
6243
6244 ulint flags2 = mach_read_from_4(
6245 static_cast<byte*>(dfield_get_data(dfield)));
6246
6247 flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6248
6249 mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6250
6251 return(FALSE);
6252 }
6253
6254 /*****************************************************************//**
6255 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6256 @return DB_SUCCESS or error code. */
6257 UNIV_INTERN
6258 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6259 fts_update_hex_format_flag(
6260 /*=======================*/
6261 trx_t* trx, /*!< in/out: transaction that
6262 covers the update */
6263 table_id_t table_id, /*!< in: Table for which we want
6264 to set the root table->flags2 */
6265 bool dict_locked) /*!< in: set to true if the
6266 caller already owns the
6267 dict_sys_t::mutex. */
6268 {
6269 pars_info_t* info;
6270 ib_uint32_t flags2;
6271
6272 static const char sql[] =
6273 "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6274 "DECLARE FUNCTION my_func;\n"
6275 "DECLARE CURSOR c IS\n"
6276 " SELECT MIX_LEN "
6277 " FROM SYS_TABLES "
6278 " WHERE ID = :table_id FOR UPDATE;"
6279 "\n"
6280 "BEGIN\n"
6281 "OPEN c;\n"
6282 "WHILE 1 = 1 LOOP\n"
6283 " FETCH c INTO my_func();\n"
6284 " IF c % NOTFOUND THEN\n"
6285 " EXIT;\n"
6286 " END IF;\n"
6287 "END LOOP;\n"
6288 "UPDATE SYS_TABLES"
6289 " SET MIX_LEN = :flags2"
6290 " WHERE ID = :table_id;\n"
6291 "CLOSE c;\n"
6292 "END;\n";
6293
6294 flags2 = ULINT32_UNDEFINED;
6295
6296 info = pars_info_create();
6297
6298 pars_info_add_ull_literal(info, "table_id", table_id);
6299 pars_info_bind_int4_literal(info, "flags2", &flags2);
6300
6301 pars_info_bind_function(
6302 info, "my_func", fts_set_hex_format, &flags2);
6303
6304 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6305 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6306 }
6307
6308 dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6309
6310 ut_a(flags2 != ULINT32_UNDEFINED);
6311
6312 return (err);
6313 }
6314
6315 /*********************************************************************//**
6316 Rename an aux table to HEX format. It's called when "%016llu" is used
6317 to format an object id in table name, which only happens in Windows. */
6318 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6319 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6320 fts_rename_one_aux_table_to_hex_format(
6321 /*===================================*/
6322 trx_t* trx, /*!< in: transaction */
6323 const fts_aux_table_t* aux_table, /*!< in: table info */
6324 const dict_table_t* parent_table) /*!< in: parent table name */
6325 {
6326 const char* ptr;
6327 fts_table_t fts_table;
6328 char* new_name;
6329 dberr_t error;
6330
6331 ptr = strchr(aux_table->name, '/');
6332 ut_a(ptr != NULL);
6333 ++ptr;
6334 /* Skip "FTS_", table id and underscore */
6335 for (ulint i = 0; i < 2; ++i) {
6336 ptr = strchr(ptr, '_');
6337 ut_a(ptr != NULL);
6338 ++ptr;
6339 }
6340
6341 fts_table.suffix = NULL;
6342 if (aux_table->index_id == 0) {
6343 fts_table.type = FTS_COMMON_TABLE;
6344
6345 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6346 if (strcmp(ptr, fts_common_tables[i]) == 0) {
6347 fts_table.suffix = fts_common_tables[i];
6348 break;
6349 }
6350 }
6351 } else {
6352 fts_table.type = FTS_INDEX_TABLE;
6353
6354 /* Skip index id and underscore */
6355 ptr = strchr(ptr, '_');
6356 ut_a(ptr != NULL);
6357 ++ptr;
6358
6359 for (ulint i = 0; fts_index_selector[i].value; ++i) {
6360 if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6361 fts_table.suffix = fts_get_suffix(i);
6362 break;
6363 }
6364 }
6365 }
6366
6367 ut_a(fts_table.suffix != NULL);
6368
6369 fts_table.parent = parent_table->name;
6370 fts_table.table_id = aux_table->parent_id;
6371 fts_table.index_id = aux_table->index_id;
6372 fts_table.table = parent_table;
6373
6374 new_name = fts_get_table_name(&fts_table);
6375 ut_ad(strcmp(new_name, aux_table->name) != 0);
6376
6377 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6378 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6379 }
6380
6381 error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6382 FALSE);
6383
6384 if (error != DB_SUCCESS) {
6385 ib_logf(IB_LOG_LEVEL_WARN,
6386 "Failed to rename aux table \'%s\' to "
6387 "new format \'%s\'. ",
6388 aux_table->name, new_name);
6389 } else {
6390 ib_logf(IB_LOG_LEVEL_INFO,
6391 "Renamed aux table \'%s\' to \'%s\'.",
6392 aux_table->name, new_name);
6393 }
6394
6395 mem_free(new_name);
6396
6397 return (error);
6398 }
6399
6400 /**********************************************************************//**
6401 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6402 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6403 It's called when "%016llu" is used to format an object id in table name,
6404 which only happens in Windows.
6405 Note the ids in tables are correct but the names are old ambiguous ones.
6406
6407 This function should make sure that either all the parent table and aux tables
6408 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6409 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6410 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6411 fts_rename_aux_tables_to_hex_format_low(
6412 /*====================================*/
6413 trx_t* trx, /*!< in: transaction */
6414 dict_table_t* parent_table, /*!< in: parent table */
6415 ib_vector_t* tables) /*!< in: aux tables to rename. */
6416 {
6417 dberr_t error;
6418 ulint count;
6419
6420 ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6421 ut_ad(!ib_vector_is_empty(tables));
6422
6423 error = fts_update_hex_format_flag(trx, parent_table->id, true);
6424
6425 if (error != DB_SUCCESS) {
6426 ib_logf(IB_LOG_LEVEL_WARN,
6427 "Setting parent table %s to hex format failed.",
6428 parent_table->name);
6429
6430 fts_sql_rollback(trx);
6431 return (error);
6432 }
6433
6434 DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6435
6436 for (count = 0; count < ib_vector_size(tables); ++count) {
6437 dict_table_t* table;
6438 fts_aux_table_t* aux_table;
6439
6440 aux_table = static_cast<fts_aux_table_t*>(
6441 ib_vector_get(tables, count));
6442
6443 table = dict_table_open_on_id(aux_table->id, TRUE,
6444 DICT_TABLE_OP_NORMAL);
6445
6446 ut_ad(table != NULL);
6447 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6448
6449 /* Set HEX_NAME flag here to make sure we can get correct
6450 new table name in following function */
6451 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6452 error = fts_rename_one_aux_table_to_hex_format(trx,
6453 aux_table, parent_table);
6454 /* We will rollback the trx if the error != DB_SUCCESS,
6455 so setting the flag here is the same with setting it in
6456 row_rename_table_for_mysql */
6457 DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6458
6459 if (error != DB_SUCCESS) {
6460 dict_table_close(table, TRUE, FALSE);
6461
6462 ib_logf(IB_LOG_LEVEL_WARN,
6463 "Failed to rename one aux table %s "
6464 "Will revert all successful rename "
6465 "operations.", aux_table->name);
6466
6467 fts_sql_rollback(trx);
6468 break;
6469 }
6470
6471 error = fts_update_hex_format_flag(trx, aux_table->id, true);
6472 dict_table_close(table, TRUE, FALSE);
6473
6474 if (error != DB_SUCCESS) {
6475 ib_logf(IB_LOG_LEVEL_WARN,
6476 "Setting aux table %s to hex format failed.",
6477 aux_table->name);
6478
6479 fts_sql_rollback(trx);
6480 break;
6481 }
6482 }
6483
6484 if (error != DB_SUCCESS) {
6485 ut_ad(count != ib_vector_size(tables));
6486 /* If rename fails, thr trx would be rolled back, we can't
6487 use it any more, we'll start a new background trx to do
6488 the reverting. */
6489 ut_a(trx->state == TRX_STATE_NOT_STARTED);
6490 bool not_rename = false;
6491
6492 /* Try to revert those succesful rename operations
6493 in order to revert the ibd file rename. */
6494 for (ulint i = 0; i <= count; ++i) {
6495 dict_table_t* table;
6496 fts_aux_table_t* aux_table;
6497 trx_t* trx_bg;
6498 dberr_t err;
6499
6500 aux_table = static_cast<fts_aux_table_t*>(
6501 ib_vector_get(tables, i));
6502
6503 table = dict_table_open_on_id(aux_table->id, TRUE,
6504 DICT_TABLE_OP_NORMAL);
6505 ut_ad(table != NULL);
6506
6507 if (not_rename) {
6508 DICT_TF2_FLAG_UNSET(table,
6509 DICT_TF2_FTS_AUX_HEX_NAME);
6510 }
6511
6512 if (!DICT_TF2_FLAG_IS_SET(table,
6513 DICT_TF2_FTS_AUX_HEX_NAME)) {
6514 dict_table_close(table, TRUE, FALSE);
6515 continue;
6516 }
6517
6518 trx_bg = trx_allocate_for_background();
6519 trx_bg->op_info = "Revert half done rename";
6520 trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6521 trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6522
6523 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6524 err = row_rename_table_for_mysql(table->name,
6525 aux_table->name,
6526 trx_bg, FALSE);
6527
6528 trx_bg->dict_operation_lock_mode = 0;
6529 dict_table_close(table, TRUE, FALSE);
6530
6531 if (err != DB_SUCCESS) {
6532 ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert "
6533 "table %s. Please revert manually.",
6534 table->name);
6535 fts_sql_rollback(trx_bg);
6536 trx_free_for_background(trx_bg);
6537 /* Continue to clear aux tables' flags2 */
6538 not_rename = true;
6539 continue;
6540 }
6541
6542 fts_sql_commit(trx_bg);
6543 trx_free_for_background(trx_bg);
6544 }
6545
6546 DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6547 }
6548
6549 return (error);
6550 }
6551
6552 /**********************************************************************//**
6553 Convert an id, which is actually a decimal number but was regard as a HEX
6554 from a string, to its real value. */
6555 static
6556 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6557 fts_fake_hex_to_dec(
6558 /*================*/
6559 ib_id_t id) /*!< in: number to convert */
6560 {
6561 ib_id_t dec_id = 0;
6562 char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6563 int ret MY_ATTRIBUTE((unused));
6564
6565 ret = sprintf(tmp_id, UINT64PFx, id);
6566 ut_ad(ret == 16);
6567 #ifdef _WIN32
6568 ret = sscanf(tmp_id, "%016llu", &dec_id);
6569 #else
6570 ret = sscanf(tmp_id, "%016" PRIu64, &dec_id);
6571 #endif /* _WIN32 */
6572 ut_ad(ret == 1);
6573
6574 return dec_id;
6575 }
6576
6577 /*********************************************************************//**
6578 Compare two fts_aux_table_t parent_ids.
6579 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6580 UNIV_INLINE
6581 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6582 fts_check_aux_table_parent_id_cmp(
6583 /*==============================*/
6584 const void* p1, /*!< in: id1 */
6585 const void* p2) /*!< in: id2 */
6586 {
6587 const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1);
6588 const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2);
6589
6590 return static_cast<int>(fa1->parent_id - fa2->parent_id);
6591 }
6592
6593 /** Mark all the fts index associated with the parent table as corrupted.
6594 @param[in] trx transaction
6595 @param[in, out] parent_table fts index associated with this parent table
6596 will be marked as corrupted. */
6597 static
6598 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6599 fts_parent_all_index_set_corrupt(
6600 trx_t* trx,
6601 dict_table_t* parent_table)
6602 {
6603 fts_t* fts = parent_table->fts;
6604
6605 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6606 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6607 }
6608
6609 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6610 dict_index_t* index = static_cast<dict_index_t*>(
6611 ib_vector_getp_const(fts->indexes, j));
6612 dict_set_corrupted(index,
6613 trx, "DROP ORPHANED TABLE");
6614 }
6615 }
6616
6617 /** Mark the fts index which index id matches the id as corrupted.
6618 @param[in] trx transaction
6619 @param[in] id index id to search
6620 @param[in, out] parent_table parent table to check with all
6621 the index. */
6622 static
6623 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)6624 fts_set_index_corrupt(
6625 trx_t* trx,
6626 index_id_t id,
6627 dict_table_t* table)
6628 {
6629 fts_t* fts = table->fts;
6630
6631 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6632 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6633 }
6634
6635 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6636 dict_index_t* index = static_cast<dict_index_t*>(
6637 ib_vector_getp_const(fts->indexes, j));
6638 if (index->id == id) {
6639 dict_set_corrupted(index, trx,
6640 "DROP ORPHANED TABLE");
6641 break;
6642 }
6643 }
6644 }
6645
6646 /** Check the index for the aux table is corrupted.
6647 @param[in] aux_table auxiliary table
6648 @retval nonzero if index is corrupted, zero for valid index */
6649 static
6650 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)6651 fts_check_corrupt_index(
6652 fts_aux_table_t* aux_table)
6653 {
6654 dict_table_t* table;
6655 dict_index_t* index;
6656 table = dict_table_open_on_id(
6657 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6658
6659 if (table == NULL) {
6660 return(0);
6661 }
6662
6663 for (index = UT_LIST_GET_FIRST(table->indexes);
6664 index;
6665 index = UT_LIST_GET_NEXT(indexes, index)) {
6666 if (index->id == aux_table->index_id) {
6667 ut_ad(index->type & DICT_FTS);
6668 dict_table_close(table, true, false);
6669 return(dict_index_is_corrupted(index));
6670 }
6671 }
6672
6673 dict_table_close(table, true, false);
6674 return(0);
6675 }
6676
6677 /* Get parent table name if it's a fts aux table
6678 @param[in] aux_table_name aux table name
6679 @param[in] aux_table_len aux table length
6680 @return parent table name, or NULL */
6681 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)6682 fts_get_parent_table_name(
6683 const char* aux_table_name,
6684 ulint aux_table_len)
6685 {
6686 fts_aux_table_t aux_table;
6687 char* parent_table_name = NULL;
6688
6689 if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
6690 dict_table_t* parent_table;
6691
6692 parent_table = dict_table_open_on_id(
6693 aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6694
6695 if (parent_table != NULL) {
6696 parent_table_name = mem_strdupl(
6697 parent_table->name,
6698 strlen(parent_table->name));
6699
6700 dict_table_close(parent_table, TRUE, FALSE);
6701 }
6702 }
6703
6704 return(parent_table_name);
6705 }
6706
6707 /** Check the validity of the parent table.
6708 @param[in] aux_table auxiliary table
6709 @return true if it is a valid table or false if it is not */
6710 static
6711 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)6712 fts_valid_parent_table(
6713 const fts_aux_table_t* aux_table)
6714 {
6715 dict_table_t* parent_table;
6716 bool valid = false;
6717
6718 parent_table = dict_table_open_on_id(
6719 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6720
6721 if (parent_table != NULL && parent_table->fts != NULL) {
6722 if (aux_table->index_id == 0) {
6723 valid = true;
6724 } else {
6725 index_id_t id = aux_table->index_id;
6726 dict_index_t* index;
6727
6728 /* Search for the FT index in the table's list. */
6729 for (index = UT_LIST_GET_FIRST(parent_table->indexes);
6730 index;
6731 index = UT_LIST_GET_NEXT(indexes, index)) {
6732 if (index->id == id) {
6733 valid = true;
6734 break;
6735 }
6736
6737 }
6738 }
6739 }
6740
6741 if (parent_table) {
6742 dict_table_close(parent_table, TRUE, FALSE);
6743 }
6744
6745 return(valid);
6746 }
6747
6748 /** Try to rename all aux tables of the specified parent table.
6749 @param[in] aux_tables aux_tables to be renamed
6750 @param[in] parent_table parent table of all aux
6751 tables stored in tables. */
6752 static
6753 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)6754 fts_rename_aux_tables_to_hex_format(
6755 ib_vector_t* aux_tables,
6756 dict_table_t* parent_table)
6757 {
6758 dberr_t err;
6759 trx_t* trx_rename = trx_allocate_for_background();
6760 trx_rename->op_info = "Rename aux tables to hex format";
6761 trx_rename->dict_operation_lock_mode = RW_X_LATCH;
6762 trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
6763
6764 err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
6765 parent_table, aux_tables);
6766
6767 trx_rename->dict_operation_lock_mode = 0;
6768
6769 if (err != DB_SUCCESS) {
6770
6771 ib_logf(IB_LOG_LEVEL_WARN,
6772 "Rollback operations on all aux tables of table %s. "
6773 "All the fts index associated with the table are "
6774 "marked as corrupted. Please rebuild the "
6775 "index again.", parent_table->name);
6776 fts_sql_rollback(trx_rename);
6777
6778 /* Corrupting the fts index related to parent table. */
6779 trx_t* trx_corrupt;
6780 trx_corrupt = trx_allocate_for_background();
6781 trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
6782 trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
6783 fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
6784 trx_corrupt->dict_operation_lock_mode = 0;
6785 fts_sql_commit(trx_corrupt);
6786 trx_free_for_background(trx_corrupt);
6787 } else {
6788 fts_sql_commit(trx_rename);
6789 }
6790
6791 trx_free_for_background(trx_rename);
6792 ib_vector_reset(aux_tables);
6793 }
6794
6795 /** Set the hex format flag for the parent table.
6796 @param[in, out] parent_table parent table
6797 @param[in] trx transaction */
6798 static
6799 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)6800 fts_set_parent_hex_format_flag(
6801 dict_table_t* parent_table,
6802 trx_t* trx)
6803 {
6804 if (!DICT_TF2_FLAG_IS_SET(parent_table,
6805 DICT_TF2_FTS_AUX_HEX_NAME)) {
6806 DBUG_EXECUTE_IF("parent_table_flag_fail",
6807 ib_logf(IB_LOG_LEVEL_FATAL,
6808 "Setting parent table %s to hex format "
6809 "failed. Please try to restart the server "
6810 "again, if it doesn't work, the system "
6811 "tables might be corrupted.",
6812 parent_table->name);
6813 return;);
6814
6815 dberr_t err = fts_update_hex_format_flag(
6816 trx, parent_table->id, true);
6817
6818 if (err != DB_SUCCESS) {
6819 ib_logf(IB_LOG_LEVEL_FATAL,
6820 "Setting parent table %s to hex format "
6821 "failed. Please try to restart the server "
6822 "again, if it doesn't work, the system "
6823 "tables might be corrupted.",
6824 parent_table->name);
6825 } else {
6826 DICT_TF2_FLAG_SET(
6827 parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6828 }
6829 }
6830 }
6831
6832 /** Drop the obsolete auxilary table.
6833 @param[in] tables tables to be dropped. */
6834 static
6835 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)6836 fts_drop_obsolete_aux_table_from_vector(
6837 ib_vector_t* tables)
6838 {
6839 dberr_t err;
6840
6841 for (ulint count = 0; count < ib_vector_size(tables);
6842 ++count) {
6843
6844 fts_aux_table_t* aux_drop_table;
6845 aux_drop_table = static_cast<fts_aux_table_t*>(
6846 ib_vector_get(tables, count));
6847 trx_t* trx_drop = trx_allocate_for_background();
6848 trx_drop->op_info = "Drop obsolete aux tables";
6849 trx_drop->dict_operation_lock_mode = RW_X_LATCH;
6850 trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
6851
6852 err = row_drop_table_for_mysql(
6853 aux_drop_table->name, trx_drop, false, true);
6854
6855 trx_drop->dict_operation_lock_mode = 0;
6856
6857 if (err != DB_SUCCESS) {
6858 /* We don't need to worry about the
6859 failure, since server would try to
6860 drop it on next restart, even if
6861 the table was broken. */
6862 ib_logf(IB_LOG_LEVEL_WARN,
6863 "Fail to drop obsolete aux table '%s', which "
6864 "is harmless. will try to drop it on next "
6865 "restart.", aux_drop_table->name);
6866 fts_sql_rollback(trx_drop);
6867 } else {
6868 ib_logf(IB_LOG_LEVEL_INFO,
6869 "Dropped obsolete aux table '%s'.",
6870 aux_drop_table->name);
6871
6872 fts_sql_commit(trx_drop);
6873 }
6874
6875 trx_free_for_background(trx_drop);
6876 }
6877 }
6878
6879 /** Drop all the auxiliary table present in the vector.
6880 @param[in] trx transaction
6881 @param[in] tables tables to be dropped */
6882 static
6883 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)6884 fts_drop_aux_table_from_vector(
6885 trx_t* trx,
6886 ib_vector_t* tables)
6887 {
6888 for (ulint count = 0; count < ib_vector_size(tables);
6889 ++count) {
6890 fts_aux_table_t* aux_drop_table;
6891 aux_drop_table = static_cast<fts_aux_table_t*>(
6892 ib_vector_get(tables, count));
6893
6894 /* Check for the validity of the parent table */
6895 if (!fts_valid_parent_table(aux_drop_table)) {
6896 ib_logf(IB_LOG_LEVEL_WARN,
6897 "Parent table of FTS auxiliary table %s not "
6898 "found.", aux_drop_table->name);
6899 dberr_t err = fts_drop_table(trx, aux_drop_table->name);
6900 if (err == DB_FAIL) {
6901 char* path = fil_make_ibd_name(
6902 aux_drop_table->name, false);
6903 os_file_delete_if_exists(innodb_file_data_key,
6904 path);
6905 mem_free(path);
6906 }
6907 }
6908 }
6909 }
6910
6911 /**********************************************************************//**
6912 Check and drop all orphaned FTS auxiliary tables, those that don't have
6913 a parent table or FTS index defined on them.
6914 @return DB_SUCCESS or error code */
6915 static MY_ATTRIBUTE((nonnull))
6916 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)6917 fts_check_and_drop_orphaned_tables(
6918 /*===============================*/
6919 trx_t* trx, /*!< in: transaction */
6920 ib_vector_t* tables) /*!< in: tables to check */
6921 {
6922 mem_heap_t* heap;
6923 ib_vector_t* aux_tables_to_rename;
6924 ib_vector_t* invalid_aux_tables;
6925 ib_vector_t* valid_aux_tables;
6926 ib_vector_t* drop_aux_tables;
6927 ib_vector_t* obsolete_aux_tables;
6928 ib_alloc_t* heap_alloc;
6929
6930 heap = mem_heap_create(1024);
6931 heap_alloc = ib_heap_allocator_create(heap);
6932
6933 /* We store all aux tables belonging to the same parent table here,
6934 and rename all these tables in a batch mode. */
6935 aux_tables_to_rename = ib_vector_create(heap_alloc,
6936 sizeof(fts_aux_table_t), 128);
6937
6938 /* We store all fake auxiliary table and orphaned table here. */
6939 invalid_aux_tables = ib_vector_create(heap_alloc,
6940 sizeof(fts_aux_table_t), 128);
6941
6942 /* We store all valid aux tables. We use this to filter the
6943 fake auxiliary table from invalid auxiliary tables. */
6944 valid_aux_tables = ib_vector_create(heap_alloc,
6945 sizeof(fts_aux_table_t), 128);
6946
6947 /* We store all auxiliary tables to be dropped. */
6948 drop_aux_tables = ib_vector_create(heap_alloc,
6949 sizeof(fts_aux_table_t), 128);
6950
6951 /* We store all obsolete auxiliary tables to be dropped. */
6952 obsolete_aux_tables = ib_vector_create(heap_alloc,
6953 sizeof(fts_aux_table_t), 128);
6954
6955 /* Sort by parent_id first, in case rename will fail */
6956 ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
6957
6958 for (ulint i = 0; i < ib_vector_size(tables); ++i) {
6959 dict_table_t* parent_table;
6960 fts_aux_table_t* aux_table;
6961 bool drop = false;
6962 dict_table_t* table;
6963 fts_aux_table_t* next_aux_table = NULL;
6964 ib_id_t orig_parent_id = 0;
6965 ib_id_t orig_index_id = 0;
6966 bool rename = false;
6967
6968 aux_table = static_cast<fts_aux_table_t*>(
6969 ib_vector_get(tables, i));
6970
6971 table = dict_table_open_on_id(
6972 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
6973 orig_parent_id = aux_table->parent_id;
6974 orig_index_id = aux_table->index_id;
6975
6976 if (table == NULL || strcmp(table->name, aux_table->name)) {
6977
6978 bool fake_aux = false;
6979
6980 if (table != NULL) {
6981 dict_table_close(table, TRUE, FALSE);
6982 }
6983
6984 if (i + 1 < ib_vector_size(tables)) {
6985 next_aux_table = static_cast<fts_aux_table_t*>(
6986 ib_vector_get(tables, i + 1));
6987 }
6988
6989 /* To know whether aux table is fake fts or
6990 orphan fts table. */
6991 for (ulint count = 0;
6992 count < ib_vector_size(valid_aux_tables);
6993 count++) {
6994 fts_aux_table_t* valid_aux;
6995 valid_aux = static_cast<fts_aux_table_t*>(
6996 ib_vector_get(valid_aux_tables, count));
6997 if (strcmp(valid_aux->name,
6998 aux_table->name) == 0) {
6999 fake_aux = true;
7000 break;
7001 }
7002 }
7003
7004 /* All aux tables of parent table, whose id is
7005 last_parent_id, have been checked, try to rename
7006 them if necessary. */
7007 if ((next_aux_table == NULL
7008 || orig_parent_id != next_aux_table->parent_id)
7009 && (!ib_vector_is_empty(aux_tables_to_rename))) {
7010
7011 ulint parent_id = fts_fake_hex_to_dec(
7012 aux_table->parent_id);
7013
7014 parent_table = dict_table_open_on_id(
7015 parent_id, TRUE,
7016 DICT_TABLE_OP_NORMAL);
7017
7018 fts_rename_aux_tables_to_hex_format(
7019 aux_tables_to_rename, parent_table);
7020
7021 dict_table_close(parent_table, TRUE,
7022 FALSE);
7023 }
7024
7025 /* If the aux table is fake aux table. Skip it. */
7026 if (!fake_aux) {
7027 ib_vector_push(invalid_aux_tables, aux_table);
7028 }
7029
7030 continue;
7031 } else if (!DICT_TF2_FLAG_IS_SET(table,
7032 DICT_TF2_FTS_AUX_HEX_NAME)) {
7033
7034 aux_table->parent_id = fts_fake_hex_to_dec(
7035 aux_table->parent_id);
7036
7037 if (aux_table->index_id != 0) {
7038 aux_table->index_id = fts_fake_hex_to_dec(
7039 aux_table->index_id);
7040 }
7041
7042 ut_ad(aux_table->id > aux_table->parent_id);
7043
7044 /* Check whether parent table id and index id
7045 are stored as decimal format. */
7046 if (fts_valid_parent_table(aux_table)) {
7047
7048 parent_table = dict_table_open_on_id(
7049 aux_table->parent_id, true,
7050 DICT_TABLE_OP_NORMAL);
7051
7052 ut_ad(parent_table != NULL);
7053 ut_ad(parent_table->fts != NULL);
7054
7055 if (!DICT_TF2_FLAG_IS_SET(
7056 parent_table,
7057 DICT_TF2_FTS_AUX_HEX_NAME)) {
7058 rename = true;
7059 }
7060
7061 dict_table_close(parent_table, TRUE, FALSE);
7062 }
7063
7064 if (!rename) {
7065 /* Reassign the original value of
7066 aux table if it is not in decimal format */
7067 aux_table->parent_id = orig_parent_id;
7068 aux_table->index_id = orig_index_id;
7069 }
7070 }
7071
7072 if (table != NULL) {
7073 dict_table_close(table, true, false);
7074 }
7075
7076 if (!rename) {
7077 /* Check the validity of the parent table. */
7078 if (!fts_valid_parent_table(aux_table)) {
7079 drop = true;
7080 }
7081 }
7082
7083 /* Filter out the fake aux table by comparing with the
7084 current valid auxiliary table name . */
7085 for (ulint count = 0;
7086 count < ib_vector_size(invalid_aux_tables); count++) {
7087 fts_aux_table_t* invalid_aux;
7088 invalid_aux = static_cast<fts_aux_table_t*>(
7089 ib_vector_get(invalid_aux_tables, count));
7090 if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7091 ib_vector_remove(
7092 invalid_aux_tables,
7093 *reinterpret_cast<void**>(invalid_aux));
7094 break;
7095 }
7096 }
7097
7098 ib_vector_push(valid_aux_tables, aux_table);
7099
7100 /* If the index associated with aux table is corrupted,
7101 skip it. */
7102 if (fts_check_corrupt_index(aux_table) > 0) {
7103
7104 if (i + 1 < ib_vector_size(tables)) {
7105 next_aux_table = static_cast<fts_aux_table_t*>(
7106 ib_vector_get(tables, i + 1));
7107 }
7108
7109 if (next_aux_table == NULL
7110 || orig_parent_id != next_aux_table->parent_id) {
7111
7112 parent_table = dict_table_open_on_id(
7113 aux_table->parent_id, TRUE,
7114 DICT_TABLE_OP_NORMAL);
7115
7116 if (!ib_vector_is_empty(aux_tables_to_rename)) {
7117 fts_rename_aux_tables_to_hex_format(
7118 aux_tables_to_rename, parent_table);
7119
7120 } else {
7121 fts_set_parent_hex_format_flag(
7122 parent_table, trx);
7123 }
7124
7125 dict_table_close(parent_table, TRUE, FALSE);
7126 }
7127
7128 continue;
7129 }
7130
7131 parent_table = dict_table_open_on_id(
7132 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7133
7134 if (drop) {
7135 ib_vector_push(drop_aux_tables, aux_table);
7136 } else {
7137 if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7138
7139 /* Current table could be one of the three
7140 obsolete tables, in this case, we should
7141 always try to drop it but not rename it.
7142 This could happen when we try to upgrade
7143 from older server to later one, which doesn't
7144 contain these obsolete tables. */
7145 ib_vector_push(obsolete_aux_tables, aux_table);
7146 continue;
7147 }
7148 }
7149
7150 /* If the aux table is in decimal format, we should
7151 rename it, so push it to aux_tables_to_rename */
7152 if (!drop && rename) {
7153 ib_vector_push(aux_tables_to_rename, aux_table);
7154 }
7155
7156 if (i + 1 < ib_vector_size(tables)) {
7157 next_aux_table = static_cast<fts_aux_table_t*>(
7158 ib_vector_get(tables, i + 1));
7159 }
7160
7161 if ((next_aux_table == NULL
7162 || orig_parent_id != next_aux_table->parent_id)
7163 && !ib_vector_is_empty(aux_tables_to_rename)) {
7164 /* All aux tables of parent table, whose id is
7165 last_parent_id, have been checked, try to rename
7166 them if necessary. We had better use a new background
7167 trx to rename rather than the original trx, in case
7168 any failure would cause a complete rollback. */
7169 ut_ad(rename);
7170 ut_ad(!DICT_TF2_FLAG_IS_SET(
7171 parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7172
7173 fts_rename_aux_tables_to_hex_format(
7174 aux_tables_to_rename,parent_table);
7175 }
7176
7177 /* The IDs are already in correct hex format. */
7178 if (!drop && !rename) {
7179 dict_table_t* table;
7180
7181 table = dict_table_open_on_id(
7182 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7183 if (table != NULL
7184 && strcmp(table->name, aux_table->name)) {
7185 dict_table_close(table, TRUE, FALSE);
7186 table = NULL;
7187 }
7188
7189 if (table != NULL
7190 && !DICT_TF2_FLAG_IS_SET(
7191 table,
7192 DICT_TF2_FTS_AUX_HEX_NAME)) {
7193
7194 DBUG_EXECUTE_IF("aux_table_flag_fail",
7195 ib_logf(IB_LOG_LEVEL_WARN,
7196 "Setting aux table %s to hex "
7197 "format failed.", table->name);
7198 fts_set_index_corrupt(
7199 trx, aux_table->index_id,
7200 parent_table);
7201 goto table_exit;);
7202
7203 dberr_t err = fts_update_hex_format_flag(
7204 trx, table->id, true);
7205
7206 if (err != DB_SUCCESS) {
7207 ib_logf(IB_LOG_LEVEL_WARN,
7208 "Setting aux table %s to hex "
7209 "format failed.", table->name);
7210
7211 fts_set_index_corrupt(
7212 trx, aux_table->index_id,
7213 parent_table);
7214 } else {
7215 DICT_TF2_FLAG_SET(table,
7216 DICT_TF2_FTS_AUX_HEX_NAME);
7217 }
7218 }
7219 #ifndef DBUG_OFF
7220 table_exit:
7221 #endif /* !DBUG_OFF */
7222
7223 if (table != NULL) {
7224 dict_table_close(table, TRUE, FALSE);
7225 }
7226
7227 ut_ad(parent_table != NULL);
7228
7229 fts_set_parent_hex_format_flag(
7230 parent_table, trx);
7231 }
7232
7233 if (parent_table != NULL) {
7234 dict_table_close(parent_table, TRUE, FALSE);
7235 }
7236 }
7237
7238 fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7239 fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7240 fts_sql_commit(trx);
7241
7242 fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7243
7244 /* Free the memory allocated at the beginning */
7245 if (heap != NULL) {
7246 mem_heap_free(heap);
7247 }
7248 }
7249
7250 /**********************************************************************//**
7251 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7252 table or FTS index defined on them. */
7253 UNIV_INTERN
7254 void
fts_drop_orphaned_tables(void)7255 fts_drop_orphaned_tables(void)
7256 /*==========================*/
7257 {
7258 trx_t* trx;
7259 pars_info_t* info;
7260 mem_heap_t* heap;
7261 que_t* graph;
7262 ib_vector_t* tables;
7263 ib_alloc_t* heap_alloc;
7264 space_name_list_t space_name_list;
7265 dberr_t error = DB_SUCCESS;
7266
7267 /* Note: We have to free the memory after we are done with the list. */
7268 error = fil_get_space_names(space_name_list);
7269
7270 if (error == DB_OUT_OF_MEMORY) {
7271 ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
7272 ut_error;
7273 }
7274
7275 heap = mem_heap_create(1024);
7276 heap_alloc = ib_heap_allocator_create(heap);
7277
7278 /* We store the table ids of all the FTS indexes that were found. */
7279 tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7280
7281 /* Get the list of all known .ibd files and check for orphaned
7282 FTS auxiliary files in that list. We need to remove them because
7283 users can't map them back to table names and this will create
7284 unnecessary clutter. */
7285
7286 for (space_name_list_t::iterator it = space_name_list.begin();
7287 it != space_name_list.end();
7288 ++it) {
7289
7290 fts_aux_table_t* fts_aux_table;
7291
7292 fts_aux_table = static_cast<fts_aux_table_t*>(
7293 ib_vector_push(tables, NULL));
7294
7295 memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7296
7297 if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7298 ib_vector_pop(tables);
7299 } else {
7300 ulint len = strlen(*it);
7301
7302 fts_aux_table->id = fil_get_space_id_for_table(*it);
7303
7304 /* We got this list from fil0fil.cc. The tablespace
7305 with this name must exist. */
7306 ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7307
7308 fts_aux_table->name = static_cast<char*>(
7309 mem_heap_dup(heap, *it, len + 1));
7310
7311 fts_aux_table->name[len] = 0;
7312 }
7313 }
7314
7315 trx = trx_allocate_for_background();
7316 trx->op_info = "dropping orphaned FTS tables";
7317 row_mysql_lock_data_dictionary(trx);
7318
7319 info = pars_info_create();
7320
7321 pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7322
7323 graph = fts_parse_sql_no_dict_lock(
7324 NULL,
7325 info,
7326 "DECLARE FUNCTION my_func;\n"
7327 "DECLARE CURSOR c IS"
7328 " SELECT NAME, ID "
7329 " FROM SYS_TABLES;\n"
7330 "BEGIN\n"
7331 "\n"
7332 "OPEN c;\n"
7333 "WHILE 1 = 1 LOOP\n"
7334 " FETCH c INTO my_func();\n"
7335 " IF c % NOTFOUND THEN\n"
7336 " EXIT;\n"
7337 " END IF;\n"
7338 "END LOOP;\n"
7339 "CLOSE c;");
7340
7341 for (;;) {
7342 error = fts_eval_sql(trx, graph);
7343
7344 if (error == DB_SUCCESS) {
7345 fts_check_and_drop_orphaned_tables(trx, tables);
7346 break; /* Exit the loop. */
7347 } else {
7348 ib_vector_reset(tables);
7349
7350 fts_sql_rollback(trx);
7351
7352 ut_print_timestamp(stderr);
7353
7354 if (error == DB_LOCK_WAIT_TIMEOUT) {
7355 ib_logf(IB_LOG_LEVEL_WARN,
7356 "lock wait timeout reading SYS_TABLES. "
7357 "Retrying!");
7358
7359 trx->error_state = DB_SUCCESS;
7360 } else {
7361 ib_logf(IB_LOG_LEVEL_ERROR,
7362 "(%s) while reading SYS_TABLES.",
7363 ut_strerr(error));
7364
7365 break; /* Exit the loop. */
7366 }
7367 }
7368 }
7369
7370 que_graph_free(graph);
7371
7372 row_mysql_unlock_data_dictionary(trx);
7373
7374 trx_free_for_background(trx);
7375
7376 if (heap != NULL) {
7377 mem_heap_free(heap);
7378 }
7379
7380 /** Free the memory allocated to store the .ibd names. */
7381 for (space_name_list_t::iterator it = space_name_list.begin();
7382 it != space_name_list.end();
7383 ++it) {
7384
7385 delete[] *it;
7386 }
7387 }
7388
7389 /**********************************************************************//**
7390 Check whether user supplied stopword table is of the right format.
7391 Caller is responsible to hold dictionary locks.
7392 @return the stopword column charset if qualifies */
7393 UNIV_INTERN
7394 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7395 fts_valid_stopword_table(
7396 /*=====================*/
7397 const char* stopword_table_name) /*!< in: Stopword table
7398 name */
7399 {
7400 dict_table_t* table;
7401 dict_col_t* col = NULL;
7402
7403 if (!stopword_table_name) {
7404 return(NULL);
7405 }
7406
7407 table = dict_table_get_low(stopword_table_name);
7408
7409 if (!table) {
7410 fprintf(stderr,
7411 "InnoDB: user stopword table %s does not exist.\n",
7412 stopword_table_name);
7413
7414 return(NULL);
7415 } else {
7416 const char* col_name;
7417
7418 col_name = dict_table_get_col_name(table, 0);
7419
7420 if (ut_strcmp(col_name, "value")) {
7421 fprintf(stderr,
7422 "InnoDB: invalid column name for stopword "
7423 "table %s. Its first column must be named as "
7424 "'value'.\n", stopword_table_name);
7425
7426 return(NULL);
7427 }
7428
7429 col = dict_table_get_nth_col(table, 0);
7430
7431 if (col->mtype != DATA_VARCHAR
7432 && col->mtype != DATA_VARMYSQL) {
7433 fprintf(stderr,
7434 "InnoDB: invalid column type for stopword "
7435 "table %s. Its first column must be of "
7436 "varchar type\n", stopword_table_name);
7437
7438 return(NULL);
7439 }
7440 }
7441
7442 ut_ad(col);
7443
7444 return(innobase_get_fts_charset(
7445 static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK),
7446 static_cast<uint>(dtype_get_charset_coll(col->prtype))));
7447 }
7448
7449 /**********************************************************************//**
7450 This function loads the stopword into the FTS cache. It also
7451 records/fetches stopword configuration to/from FTS configure
7452 table, depending on whether we are creating or reloading the
7453 FTS.
7454 @return TRUE if load operation is successful */
7455 UNIV_INTERN
7456 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7457 fts_load_stopword(
7458 /*==============*/
7459 const dict_table_t*
7460 table, /*!< in: Table with FTS */
7461 trx_t* trx, /*!< in: Transactions */
7462 const char* global_stopword_table, /*!< in: Global stopword table
7463 name */
7464 const char* session_stopword_table, /*!< in: Session stopword table
7465 name */
7466 ibool stopword_is_on, /*!< in: Whether stopword
7467 option is turned on/off */
7468 ibool reload) /*!< in: Whether it is
7469 for reloading FTS table */
7470 {
7471 fts_table_t fts_table;
7472 fts_string_t str;
7473 dberr_t error = DB_SUCCESS;
7474 ulint use_stopword;
7475 fts_cache_t* cache;
7476 const char* stopword_to_use = NULL;
7477 ibool new_trx = FALSE;
7478 byte str_buffer[MAX_FULL_NAME_LEN + 1];
7479
7480 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7481
7482 cache = table->fts->cache;
7483
7484 if (!reload && !(cache->stopword_info.status
7485 & STOPWORD_NOT_INIT)) {
7486 return(TRUE);
7487 }
7488
7489 if (!trx) {
7490 trx = trx_allocate_for_background();
7491 trx->op_info = "upload FTS stopword";
7492 new_trx = TRUE;
7493 }
7494
7495 /* First check whether stopword filtering is turned off */
7496 if (reload) {
7497 error = fts_config_get_ulint(
7498 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7499 } else {
7500 use_stopword = (ulint) stopword_is_on;
7501
7502 error = fts_config_set_ulint(
7503 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7504 }
7505
7506 if (error != DB_SUCCESS) {
7507 goto cleanup;
7508 }
7509
7510 /* If stopword is turned off, no need to continue to load the
7511 stopword into cache, but still need to do initialization */
7512 if (!use_stopword) {
7513 cache->stopword_info.status = STOPWORD_OFF;
7514 goto cleanup;
7515 }
7516
7517 if (reload) {
7518 /* Fetch the stopword table name from FTS config
7519 table */
7520 str.f_n_char = 0;
7521 str.f_str = str_buffer;
7522 str.f_len = sizeof(str_buffer) - 1;
7523
7524 error = fts_config_get_value(
7525 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7526
7527 if (error != DB_SUCCESS) {
7528 goto cleanup;
7529 }
7530
7531 if (strlen((char*) str.f_str) > 0) {
7532 stopword_to_use = (const char*) str.f_str;
7533 }
7534 } else {
7535 stopword_to_use = (session_stopword_table)
7536 ? session_stopword_table : global_stopword_table;
7537 }
7538
7539 if (stopword_to_use
7540 && fts_load_user_stopword(table->fts, stopword_to_use,
7541 &cache->stopword_info)) {
7542 /* Save the stopword table name to the configure
7543 table */
7544 if (!reload) {
7545 str.f_n_char = 0;
7546 str.f_str = (byte*) stopword_to_use;
7547 str.f_len = ut_strlen(stopword_to_use);
7548
7549 error = fts_config_set_value(
7550 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7551 }
7552 } else {
7553 /* Load system default stopword list */
7554 fts_load_default_stopword(&cache->stopword_info);
7555 }
7556
7557 cleanup:
7558 if (new_trx) {
7559 if (error == DB_SUCCESS) {
7560 fts_sql_commit(trx);
7561 } else {
7562 fts_sql_rollback(trx);
7563 }
7564
7565 trx_free_for_background(trx);
7566 }
7567
7568 if (!cache->stopword_info.cached_stopword) {
7569 cache->stopword_info.cached_stopword = rbt_create(
7570 sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
7571 }
7572
7573 return(error == DB_SUCCESS);
7574 }
7575
7576 /**********************************************************************//**
7577 Callback function when we initialize the FTS at the start up
7578 time. It recovers the maximum Doc IDs presented in the current table.
7579 @return: always returns TRUE */
7580 static
7581 ibool
fts_init_get_doc_id(void * row,void * user_arg)7582 fts_init_get_doc_id(
7583 /*================*/
7584 void* row, /*!< in: sel_node_t* */
7585 void* user_arg) /*!< in: fts cache */
7586 {
7587 doc_id_t doc_id = FTS_NULL_DOC_ID;
7588 sel_node_t* node = static_cast<sel_node_t*>(row);
7589 que_node_t* exp = node->select_list;
7590 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
7591
7592 ut_ad(ib_vector_is_empty(cache->get_docs));
7593
7594 /* Copy each indexed column content into doc->text.f_str */
7595 if (exp) {
7596 dfield_t* dfield = que_node_get_val(exp);
7597 dtype_t* type = dfield_get_type(dfield);
7598 void* data = dfield_get_data(dfield);
7599
7600 ut_a(dtype_get_mtype(type) == DATA_INT);
7601
7602 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7603 static_cast<const byte*>(data)));
7604
7605 if (doc_id >= cache->next_doc_id) {
7606 cache->next_doc_id = doc_id + 1;
7607 }
7608 }
7609
7610 return(TRUE);
7611 }
7612
7613 /**********************************************************************//**
7614 Callback function when we initialize the FTS at the start up
7615 time. It recovers Doc IDs that have not sync-ed to the auxiliary
7616 table, and require to bring them back into FTS index.
7617 @return: always returns TRUE */
7618 static
7619 ibool
fts_init_recover_doc(void * row,void * user_arg)7620 fts_init_recover_doc(
7621 /*=================*/
7622 void* row, /*!< in: sel_node_t* */
7623 void* user_arg) /*!< in: fts cache */
7624 {
7625
7626 fts_doc_t doc;
7627 ulint doc_len = 0;
7628 ulint field_no = 0;
7629 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
7630 doc_id_t doc_id = FTS_NULL_DOC_ID;
7631 sel_node_t* node = static_cast<sel_node_t*>(row);
7632 que_node_t* exp = node->select_list;
7633 fts_cache_t* cache = get_doc->cache;
7634
7635 fts_doc_init(&doc);
7636 doc.found = TRUE;
7637
7638 ut_ad(cache);
7639
7640 /* Copy each indexed column content into doc->text.f_str */
7641 while (exp) {
7642 dfield_t* dfield = que_node_get_val(exp);
7643 ulint len = dfield_get_len(dfield);
7644
7645 if (field_no == 0) {
7646 dtype_t* type = dfield_get_type(dfield);
7647 void* data = dfield_get_data(dfield);
7648
7649 ut_a(dtype_get_mtype(type) == DATA_INT);
7650
7651 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7652 static_cast<const byte*>(data)));
7653
7654 field_no++;
7655 exp = que_node_get_next(exp);
7656 continue;
7657 }
7658
7659 if (len == UNIV_SQL_NULL) {
7660 exp = que_node_get_next(exp);
7661 continue;
7662 }
7663
7664 ut_ad(get_doc);
7665
7666 if (!get_doc->index_cache->charset) {
7667 ulint prtype = dfield->type.prtype;
7668
7669 get_doc->index_cache->charset =
7670 innobase_get_fts_charset(
7671 (int)(prtype & DATA_MYSQL_TYPE_MASK),
7672 (uint) dtype_get_charset_coll(prtype));
7673 }
7674
7675 doc.charset = get_doc->index_cache->charset;
7676
7677 if (dfield_is_ext(dfield)) {
7678 dict_table_t* table = cache->sync->table;
7679 ulint zip_size = dict_table_zip_size(table);
7680
7681 doc.text.f_str = btr_copy_externally_stored_field(
7682 &doc.text.f_len,
7683 static_cast<byte*>(dfield_get_data(dfield)),
7684 zip_size, len,
7685 static_cast<mem_heap_t*>(doc.self_heap->arg));
7686 } else {
7687 doc.text.f_str = static_cast<byte*>(
7688 dfield_get_data(dfield));
7689
7690 doc.text.f_len = len;
7691 }
7692
7693 if (field_no == 1) {
7694 fts_tokenize_document(&doc, NULL);
7695 } else {
7696 fts_tokenize_document_next(&doc, doc_len, NULL);
7697 }
7698
7699 exp = que_node_get_next(exp);
7700
7701 doc_len += (exp) ? len + 1 : len;
7702
7703 field_no++;
7704 }
7705
7706 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
7707
7708 fts_doc_free(&doc);
7709
7710 cache->added++;
7711
7712 if (doc_id >= cache->next_doc_id) {
7713 cache->next_doc_id = doc_id + 1;
7714 }
7715
7716 return(TRUE);
7717 }
7718
7719 /**********************************************************************//**
7720 This function brings FTS index in sync when FTS index is first
7721 used. There are documents that have not yet sync-ed to auxiliary
7722 tables from last server abnormally shutdown, we will need to bring
7723 such document into FTS cache before any further operations
7724 @return TRUE if all OK */
7725 UNIV_INTERN
7726 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)7727 fts_init_index(
7728 /*===========*/
7729 dict_table_t* table, /*!< in: Table with FTS */
7730 ibool has_cache_lock) /*!< in: Whether we already have
7731 cache lock */
7732 {
7733 dict_index_t* index;
7734 doc_id_t start_doc;
7735 fts_get_doc_t* get_doc = NULL;
7736 fts_cache_t* cache = table->fts->cache;
7737 bool need_init = false;
7738
7739 ut_ad(!mutex_own(&dict_sys->mutex));
7740
7741 /* First check cache->get_docs is initialized */
7742 if (!has_cache_lock) {
7743 rw_lock_x_lock(&cache->lock);
7744 }
7745
7746 rw_lock_x_lock(&cache->init_lock);
7747 if (cache->get_docs == NULL) {
7748 cache->get_docs = fts_get_docs_create(cache);
7749 }
7750 rw_lock_x_unlock(&cache->init_lock);
7751
7752 if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
7753 goto func_exit;
7754 }
7755
7756 need_init = true;
7757
7758 start_doc = cache->synced_doc_id;
7759
7760 if (!start_doc) {
7761 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
7762 cache->synced_doc_id = start_doc;
7763 }
7764
7765 /* No FTS index, this is the case when previous FTS index
7766 dropped, and we re-initialize the Doc ID system for subsequent
7767 insertion */
7768 if (ib_vector_is_empty(cache->get_docs)) {
7769 index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
7770
7771 ut_a(index);
7772
7773 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
7774 FTS_FETCH_DOC_BY_ID_LARGE,
7775 fts_init_get_doc_id, cache);
7776 } else {
7777 if (table->fts->cache->stopword_info.status
7778 & STOPWORD_NOT_INIT) {
7779 fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
7780 }
7781
7782 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
7783 get_doc = static_cast<fts_get_doc_t*>(
7784 ib_vector_get(cache->get_docs, i));
7785
7786 index = get_doc->index_cache->index;
7787
7788 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
7789 FTS_FETCH_DOC_BY_ID_LARGE,
7790 fts_init_recover_doc, get_doc);
7791 }
7792 }
7793
7794 table->fts->fts_status |= ADDED_TABLE_SYNCED;
7795
7796 fts_get_docs_clear(cache->get_docs);
7797
7798 func_exit:
7799 if (!has_cache_lock) {
7800 rw_lock_x_unlock(&cache->lock);
7801 }
7802
7803 if (need_init) {
7804 mutex_enter(&dict_sys->mutex);
7805 /* Register the table with the optimize thread. */
7806 fts_optimize_add_table(table);
7807 mutex_exit(&dict_sys->mutex);
7808 }
7809
7810 return(TRUE);
7811 }
7812